1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-OUTLOOP 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -prefer-inloop-reductions \ 9; RUN: -force-tail-folding-style=data-with-evl \ 10; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 11; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-INLOOP 12 13; RUN: opt -passes=loop-vectorize \ 14; RUN: -force-tail-folding-style=none \ 15; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 16; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-OUTLOOP 17 18; RUN: opt -passes=loop-vectorize \ 19; RUN: -prefer-inloop-reductions \ 20; RUN: -force-tail-folding-style=none \ 21; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 22; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-INLOOP 23 24define i32 @cond_add(ptr %a, i64 %n, i32 %start) { 25; IF-EVL-OUTLOOP-LABEL: define i32 @cond_add( 26; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { 27; IF-EVL-OUTLOOP-NEXT: entry: 28; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 29; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 30; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 31; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 32; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 33; IF-EVL-OUTLOOP: vector.ph: 34; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 35; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 36; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 37; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 38; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 39; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 40; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 41; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 42; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 43; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 44; IF-EVL-OUTLOOP: vector.body: 45; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 46; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ] 47; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] 48; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV1]] 49; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true) 50; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV1]], 0 51; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 52; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 53; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) 54; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 55; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP11]]) 56; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) 57; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) 58; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 59; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]] 60; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]] 61; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N_VEC]] 62; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 63; IF-EVL-OUTLOOP: middle.block: 64; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP20]]) 65; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 66; IF-EVL-OUTLOOP: scalar.ph: 67; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 68; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 69; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 70; IF-EVL-OUTLOOP: for.body: 71; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 72; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 73; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 74; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 75; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP27]], 3 76; IF-EVL-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP27]], i32 0 77; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 78; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 79; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 80; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 81; IF-EVL-OUTLOOP: for.end: 82; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] 83; IF-EVL-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] 84; 85; IF-EVL-INLOOP-LABEL: define i32 @cond_add( 86; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { 87; IF-EVL-INLOOP-NEXT: entry: 88; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 89; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 90; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 91; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 92; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 93; IF-EVL-INLOOP: vector.ph: 94; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 95; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 96; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 97; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] 98; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] 99; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 100; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 101; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 102; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 103; IF-EVL-INLOOP: vector.body: 104; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 105; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 106; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] 107; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 108; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) 109; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 110; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 111; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 112; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) 113; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 114; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]]) 115; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) 116; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] 117; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 118; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] 119; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] 120; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 121; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 122; IF-EVL-INLOOP: middle.block: 123; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 124; IF-EVL-INLOOP: scalar.ph: 125; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 126; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 127; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 128; IF-EVL-INLOOP: for.body: 129; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 130; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 131; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 132; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 133; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 3 134; IF-EVL-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP25]], i32 0 135; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 136; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 137; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 138; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 139; IF-EVL-INLOOP: for.end: 140; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 141; IF-EVL-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] 142; 143; NO-VP-OUTLOOP-LABEL: define i32 @cond_add( 144; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { 145; NO-VP-OUTLOOP-NEXT: entry: 146; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 147; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 148; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] 149; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 150; NO-VP-OUTLOOP: vector.ph: 151; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 152; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 153; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] 154; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 155; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 156; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 157; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 158; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 159; NO-VP-OUTLOOP: vector.body: 160; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 161; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP11]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 162; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 163; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 164; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 165; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4 166; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) 167; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer 168; NO-VP-OUTLOOP-NEXT: [[TMP17]] = add <vscale x 4 x i32> [[TMP16]], [[VEC_PHI]] 169; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 170; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 171; NO-VP-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 172; NO-VP-OUTLOOP: middle.block: 173; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP17]]) 174; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 175; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 176; NO-VP-OUTLOOP: scalar.ph: 177; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 178; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 179; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 180; NO-VP-OUTLOOP: for.body: 181; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 182; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 183; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 184; NO-VP-OUTLOOP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 185; NO-VP-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 3 186; NO-VP-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP15]], i32 0 187; NO-VP-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 188; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 189; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 190; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 191; NO-VP-OUTLOOP: for.end: 192; NO-VP-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 193; NO-VP-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] 194; 195; NO-VP-INLOOP-LABEL: define i32 @cond_add( 196; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { 197; NO-VP-INLOOP-NEXT: entry: 198; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 199; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 200; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 201; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 202; NO-VP-INLOOP: vector.ph: 203; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 204; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 205; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 206; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 207; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 208; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 209; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 210; NO-VP-INLOOP: vector.body: 211; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 212; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 213; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 214; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] 215; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 216; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 217; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 218; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer 219; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 220; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] 221; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 222; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 223; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 224; NO-VP-INLOOP: middle.block: 225; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 226; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 227; NO-VP-INLOOP: scalar.ph: 228; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 229; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 230; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 231; NO-VP-INLOOP: for.body: 232; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 233; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 234; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 235; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 236; NO-VP-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 3 237; NO-VP-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP14]], i32 0 238; NO-VP-INLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 239; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 240; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 241; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 242; NO-VP-INLOOP: for.end: 243; NO-VP-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 244; NO-VP-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] 245; 246entry: 247 br label %for.body 248 249for.body: 250 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 251 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] 252 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 253 %0 = load i32, ptr %arrayidx, align 4 254 %cmp = icmp sgt i32 %0, 3 255 %select = select i1 %cmp, i32 %0, i32 0 256 %add = add nsw i32 %select, %rdx 257 %iv.next = add nuw nsw i64 %iv, 1 258 %exitcond.not = icmp eq i64 %iv.next, %n 259 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 260 261for.end: 262 ret i32 %add 263} 264 265define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { 266; IF-EVL-OUTLOOP-LABEL: define i32 @cond_add_pred( 267; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 268; IF-EVL-OUTLOOP-NEXT: entry: 269; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 270; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 271; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 272; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 273; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 274; IF-EVL-OUTLOOP: vector.ph: 275; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 276; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 277; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 278; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]] 279; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] 280; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 281; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 282; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 283; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 284; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 285; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 286; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 287; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 288; IF-EVL-OUTLOOP: vector.body: 289; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 290; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 291; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] 292; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 293; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true) 294; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 295; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0 296; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 297; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() 298; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP13]] 299; IF-EVL-OUTLOOP-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]] 300; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] 301; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 302; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 303; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) 304; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 305; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) 306; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true) 307; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer 308; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]] 309; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) 310; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 311; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] 312; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] 313; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 314; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 315; IF-EVL-OUTLOOP: middle.block: 316; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]]) 317; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 318; IF-EVL-OUTLOOP: scalar.ph: 319; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 320; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 321; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 322; IF-EVL-OUTLOOP: for.body: 323; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 324; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 325; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 326; IF-EVL-OUTLOOP-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 327; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 3 328; IF-EVL-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 329; IF-EVL-OUTLOOP: if.then: 330; IF-EVL-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP28]] 331; IF-EVL-OUTLOOP-NEXT: br label [[FOR_INC]] 332; IF-EVL-OUTLOOP: for.inc: 333; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 334; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 335; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 336; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 337; IF-EVL-OUTLOOP: for.end: 338; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] 339; IF-EVL-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 340; 341; IF-EVL-INLOOP-LABEL: define i32 @cond_add_pred( 342; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 343; IF-EVL-INLOOP-NEXT: entry: 344; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 345; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 346; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 347; IF-EVL-INLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 348; IF-EVL-INLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 349; IF-EVL-INLOOP: vector.ph: 350; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 351; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 352; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 353; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] 354; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]] 355; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 356; IF-EVL-INLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 357; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 358; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 359; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 360; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 361; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 362; IF-EVL-INLOOP: vector.body: 363; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 364; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 365; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] 366; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 367; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) 368; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 369; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0 370; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 371; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() 372; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]] 373; IF-EVL-INLOOP-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP15]] 374; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] 375; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 376; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 377; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) 378; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) 379; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer 380; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> [[TMP20]], i32 [[TMP12]]) 381; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] 382; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 383; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] 384; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] 385; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 386; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 387; IF-EVL-INLOOP: middle.block: 388; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 389; IF-EVL-INLOOP: scalar.ph: 390; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 391; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 392; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 393; IF-EVL-INLOOP: for.body: 394; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 395; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 396; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 397; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 398; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 3 399; IF-EVL-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 400; IF-EVL-INLOOP: if.then: 401; IF-EVL-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP25]] 402; IF-EVL-INLOOP-NEXT: br label [[FOR_INC]] 403; IF-EVL-INLOOP: for.inc: 404; IF-EVL-INLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 405; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 406; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 407; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 408; IF-EVL-INLOOP: for.end: 409; IF-EVL-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 410; IF-EVL-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 411; 412; NO-VP-OUTLOOP-LABEL: define i32 @cond_add_pred( 413; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 414; NO-VP-OUTLOOP-NEXT: entry: 415; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 416; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 417; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] 418; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 419; NO-VP-OUTLOOP: vector.ph: 420; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 421; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 422; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] 423; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 424; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 425; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 426; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 427; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 428; NO-VP-OUTLOOP: vector.body: 429; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 430; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP11]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] 431; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 432; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 433; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 434; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP21]], align 4 435; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) 436; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] 437; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true) 438; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP16]] 439; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 440; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 441; NO-VP-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 442; NO-VP-OUTLOOP: middle.block: 443; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]]) 444; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 445; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 446; NO-VP-OUTLOOP: scalar.ph: 447; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 448; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 449; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 450; NO-VP-OUTLOOP: for.body: 451; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 452; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 453; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 454; NO-VP-OUTLOOP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 455; NO-VP-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 3 456; NO-VP-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 457; NO-VP-OUTLOOP: if.then: 458; NO-VP-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP15]] 459; NO-VP-OUTLOOP-NEXT: br label [[FOR_INC]] 460; NO-VP-OUTLOOP: for.inc: 461; NO-VP-OUTLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 462; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 463; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 464; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 465; NO-VP-OUTLOOP: for.end: 466; NO-VP-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 467; NO-VP-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 468; 469; NO-VP-INLOOP-LABEL: define i32 @cond_add_pred( 470; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 471; NO-VP-INLOOP-NEXT: entry: 472; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 473; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 474; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 475; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 476; NO-VP-INLOOP: vector.ph: 477; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 478; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 479; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 480; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 481; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 482; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 483; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 484; NO-VP-INLOOP: vector.body: 485; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 486; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 487; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 488; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] 489; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 490; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 491; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) 492; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer 493; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) 494; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] 495; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 496; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 497; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 498; NO-VP-INLOOP: middle.block: 499; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 500; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 501; NO-VP-INLOOP: scalar.ph: 502; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 503; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 504; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 505; NO-VP-INLOOP: for.body: 506; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 507; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 508; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 509; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 510; NO-VP-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 3 511; NO-VP-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 512; NO-VP-INLOOP: if.then: 513; NO-VP-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP14]] 514; NO-VP-INLOOP-NEXT: br label [[FOR_INC]] 515; NO-VP-INLOOP: for.inc: 516; NO-VP-INLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 517; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 518; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 519; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 520; NO-VP-INLOOP: for.end: 521; NO-VP-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 522; NO-VP-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 523; 524entry: 525 br label %for.body 526 527for.body: 528 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 529 %rdx = phi i32 [ %start, %entry ], [ %rdx.add, %for.inc ] 530 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 531 %0 = load i32, ptr %arrayidx, align 4 532 %cmp = icmp sgt i32 %0, 3 533 br i1 %cmp, label %if.then, label %for.inc 534 535if.then: 536 %add.pred = add nsw i32 %rdx, %0 537 br label %for.inc 538 539for.inc: 540 %rdx.add = phi i32 [ %add.pred, %if.then ], [ %rdx, %for.body ] 541 %iv.next = add nuw nsw i64 %iv, 1 542 %exitcond.not = icmp eq i64 %iv.next, %n 543 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 544 545for.end: 546 ret i32 %rdx.add 547} 548 549define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { 550; IF-EVL-OUTLOOP-LABEL: define i32 @step_cond_add( 551; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 552; IF-EVL-OUTLOOP-NEXT: entry: 553; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 554; IF-EVL-OUTLOOP: for.body: 555; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 556; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ] 557; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] 558; IF-EVL-OUTLOOP-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 559; IF-EVL-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 560; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP37]], [[IV_TRUNC]] 561; IF-EVL-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP37]], i32 0 562; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 563; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw nsw i64 [[EVL_BASED_IV]], 1 564; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] 565; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 566; IF-EVL-OUTLOOP: for.end: 567; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[VECTOR_BODY]] ] 568; IF-EVL-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] 569; 570; IF-EVL-INLOOP-LABEL: define i32 @step_cond_add( 571; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 572; IF-EVL-INLOOP-NEXT: entry: 573; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 574; IF-EVL-INLOOP: for.body: 575; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 576; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ] 577; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] 578; IF-EVL-INLOOP-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 579; IF-EVL-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 580; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], [[IV_TRUNC]] 581; IF-EVL-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP28]], i32 0 582; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 583; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw nsw i64 [[EVL_BASED_IV]], 1 584; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] 585; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 586; IF-EVL-INLOOP: for.end: 587; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[VECTOR_BODY]] ] 588; IF-EVL-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] 589; 590; NO-VP-OUTLOOP-LABEL: define i32 @step_cond_add( 591; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 592; NO-VP-OUTLOOP-NEXT: entry: 593; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 594; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 595; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] 596; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 597; NO-VP-OUTLOOP: vector.ph: 598; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 599; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 600; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] 601; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 602; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 603; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 604; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 605; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 606; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1) 607; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] 608; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 609; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] 610; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0 611; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 612; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 613; NO-VP-OUTLOOP: vector.body: 614; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 615; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP11]], [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] 616; NO-VP-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 617; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 0 618; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]] 619; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 620; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP20]], align 4 621; NO-VP-OUTLOOP-NEXT: [[TMP27:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]] 622; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select <vscale x 4 x i1> [[TMP27]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer 623; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI]] 624; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 625; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] 626; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 627; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 628; NO-VP-OUTLOOP: middle.block: 629; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP23]]) 630; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 631; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 632; NO-VP-OUTLOOP: scalar.ph: 633; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 634; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP26]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 635; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 636; NO-VP-OUTLOOP: for.body: 637; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 638; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 639; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 640; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 641; NO-VP-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 642; NO-VP-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], [[IV_TRUNC]] 643; NO-VP-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP21]], i32 0 644; NO-VP-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 645; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 646; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 647; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 648; NO-VP-OUTLOOP: for.end: 649; NO-VP-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 650; NO-VP-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] 651; 652; NO-VP-INLOOP-LABEL: define i32 @step_cond_add( 653; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 654; NO-VP-INLOOP-NEXT: entry: 655; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 656; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 657; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 658; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 659; NO-VP-INLOOP: vector.ph: 660; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 661; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 662; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 663; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 664; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 665; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 666; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 667; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1) 668; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] 669; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 670; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] 671; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 672; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 673; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 674; NO-VP-INLOOP: vector.body: 675; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 676; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 677; NO-VP-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 678; NO-VP-INLOOP-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 679; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 680; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 681; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4 682; NO-VP-INLOOP-NEXT: [[TMP15:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_IND]] 683; NO-VP-INLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer 684; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]]) 685; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] 686; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 687; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] 688; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 689; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 690; NO-VP-INLOOP: middle.block: 691; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 692; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 693; NO-VP-INLOOP: scalar.ph: 694; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 695; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 696; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 697; NO-VP-INLOOP: for.body: 698; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 699; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 700; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 701; NO-VP-INLOOP-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 702; NO-VP-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 703; NO-VP-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], [[IV_TRUNC]] 704; NO-VP-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP20]], i32 0 705; NO-VP-INLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] 706; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 707; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 708; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 709; NO-VP-INLOOP: for.end: 710; NO-VP-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 711; NO-VP-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] 712; 713entry: 714 br label %for.body 715 716for.body: 717 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 718 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] 719 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 720 %0 = load i32, ptr %arrayidx, align 4 721 %iv.trunc = trunc i64 %iv to i32 722 %cmp = icmp sgt i32 %0, %iv.trunc 723 %select = select i1 %cmp, i32 %0, i32 0 724 %add = add nsw i32 %select, %rdx 725 %iv.next = add nuw nsw i64 %iv, 1 726 %exitcond.not = icmp eq i64 %iv.next, %n 727 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 728 729for.end: 730 ret i32 %add 731} 732 733define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { 734; IF-EVL-OUTLOOP-LABEL: define i32 @step_cond_add_pred( 735; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 736; IF-EVL-OUTLOOP-NEXT: entry: 737; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 738; IF-EVL-OUTLOOP: for.body: 739; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[MIDDLE_BLOCK:%.*]] ] 740; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP37:%.*]], [[MIDDLE_BLOCK]] ] 741; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 742; IF-EVL-OUTLOOP-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 743; IF-EVL-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 744; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP38]], [[IV_TRUNC]] 745; IF-EVL-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[MIDDLE_BLOCK]] 746; IF-EVL-OUTLOOP: if.then: 747; IF-EVL-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[BC_MERGE_RDX]], [[TMP38]] 748; IF-EVL-OUTLOOP-NEXT: br label [[MIDDLE_BLOCK]] 749; IF-EVL-OUTLOOP: for.inc: 750; IF-EVL-OUTLOOP-NEXT: [[TMP37]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[BC_MERGE_RDX]], [[VECTOR_BODY]] ] 751; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 752; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 753; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6]] 754; IF-EVL-OUTLOOP: for.end: 755; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[TMP37]], [[MIDDLE_BLOCK]] ] 756; IF-EVL-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 757; 758; IF-EVL-INLOOP-LABEL: define i32 @step_cond_add_pred( 759; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 760; IF-EVL-INLOOP-NEXT: entry: 761; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 762; IF-EVL-INLOOP: for.body: 763; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[MIDDLE_BLOCK:%.*]] ] 764; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP32:%.*]], [[MIDDLE_BLOCK]] ] 765; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 766; IF-EVL-INLOOP-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 767; IF-EVL-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 768; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP35]], [[IV_TRUNC]] 769; IF-EVL-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[MIDDLE_BLOCK]] 770; IF-EVL-INLOOP: if.then: 771; IF-EVL-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[BC_MERGE_RDX]], [[TMP35]] 772; IF-EVL-INLOOP-NEXT: br label [[MIDDLE_BLOCK]] 773; IF-EVL-INLOOP: for.inc: 774; IF-EVL-INLOOP-NEXT: [[TMP32]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[BC_MERGE_RDX]], [[VECTOR_BODY]] ] 775; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 776; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 777; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6]] 778; IF-EVL-INLOOP: for.end: 779; IF-EVL-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[TMP32]], [[MIDDLE_BLOCK]] ] 780; IF-EVL-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 781; 782; NO-VP-OUTLOOP-LABEL: define i32 @step_cond_add_pred( 783; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 784; NO-VP-OUTLOOP-NEXT: entry: 785; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 786; NO-VP-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 787; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[N]], [[TMP2]] 788; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 789; NO-VP-OUTLOOP: vector.ph: 790; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 791; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 792; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]] 793; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 794; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 795; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 796; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 797; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 798; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1) 799; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] 800; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 801; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] 802; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0 803; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 804; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 805; NO-VP-OUTLOOP: vector.body: 806; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 807; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP11]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] 808; NO-VP-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 809; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 0 810; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]] 811; NO-VP-OUTLOOP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 812; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP27]], align 4 813; NO-VP-OUTLOOP-NEXT: [[TMP28:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]] 814; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] 815; NO-VP-OUTLOOP-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[TMP28]], splat (i1 true) 816; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP23]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP22]] 817; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 818; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] 819; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 820; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 821; NO-VP-OUTLOOP: middle.block: 822; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]]) 823; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 824; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 825; NO-VP-OUTLOOP: scalar.ph: 826; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 827; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 828; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 829; NO-VP-OUTLOOP: for.body: 830; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 831; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 832; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 833; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 834; NO-VP-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 835; NO-VP-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], [[IV_TRUNC]] 836; NO-VP-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 837; NO-VP-OUTLOOP: if.then: 838; NO-VP-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP21]] 839; NO-VP-OUTLOOP-NEXT: br label [[FOR_INC]] 840; NO-VP-OUTLOOP: for.inc: 841; NO-VP-OUTLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 842; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 843; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 844; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 845; NO-VP-OUTLOOP: for.end: 846; NO-VP-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 847; NO-VP-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 848; 849; NO-VP-INLOOP-LABEL: define i32 @step_cond_add_pred( 850; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { 851; NO-VP-INLOOP-NEXT: entry: 852; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 853; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 854; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 855; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 856; NO-VP-INLOOP: vector.ph: 857; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 858; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 859; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 860; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 861; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 862; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 863; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() 864; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1) 865; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] 866; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 867; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] 868; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0 869; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 870; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 871; NO-VP-INLOOP: vector.body: 872; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 873; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 874; NO-VP-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 875; NO-VP-INLOOP-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 876; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] 877; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 878; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4 879; NO-VP-INLOOP-NEXT: [[TMP15:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_IND]] 880; NO-VP-INLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer 881; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]]) 882; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]] 883; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 884; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] 885; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 886; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 887; NO-VP-INLOOP: middle.block: 888; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 889; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 890; NO-VP-INLOOP: scalar.ph: 891; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 892; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] 893; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 894; NO-VP-INLOOP: for.body: 895; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 896; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] 897; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 898; NO-VP-INLOOP-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 899; NO-VP-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 900; NO-VP-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], [[IV_TRUNC]] 901; NO-VP-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] 902; NO-VP-INLOOP: if.then: 903; NO-VP-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP20]] 904; NO-VP-INLOOP-NEXT: br label [[FOR_INC]] 905; NO-VP-INLOOP: for.inc: 906; NO-VP-INLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 907; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 908; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 909; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 910; NO-VP-INLOOP: for.end: 911; NO-VP-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 912; NO-VP-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] 913; 914entry: 915 br label %for.body 916 917for.body: 918 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 919 %rdx = phi i32 [ %start, %entry ], [ %rdx.add, %for.inc ] 920 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 921 %0 = load i32, ptr %arrayidx, align 4 922 %iv.trunc = trunc i64 %iv to i32 923 %cmp = icmp sgt i32 %0, %iv.trunc 924 br i1 %cmp, label %if.then, label %for.inc 925 926if.then: 927 %add.pred = add nsw i32 %rdx, %0 928 br label %for.inc 929 930for.inc: 931 %rdx.add = phi i32 [ %add.pred, %if.then ], [ %rdx, %for.body ] 932 %iv.next = add nuw nsw i64 %iv, 1 933 %exitcond.not = icmp eq i64 %iv.next, %n 934 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 935 936for.end: 937 ret i32 %rdx.add 938} 939 940!0 = distinct !{!0, !1} 941!1 = !{!"llvm.loop.vectorize.enable", i1 true} 942;. 943; IF-EVL-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 944; IF-EVL-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 945; IF-EVL-OUTLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 946; IF-EVL-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 947; IF-EVL-OUTLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 948; IF-EVL-OUTLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 949; IF-EVL-OUTLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META7:![0-9]+]]} 950; IF-EVL-OUTLOOP: [[META7]] = !{!"llvm.loop.vectorize.enable", i1 true} 951;. 952; IF-EVL-INLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 953; IF-EVL-INLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 954; IF-EVL-INLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 955; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 956; IF-EVL-INLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 957; IF-EVL-INLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 958; IF-EVL-INLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META7:![0-9]+]]} 959; IF-EVL-INLOOP: [[META7]] = !{!"llvm.loop.vectorize.enable", i1 true} 960;. 961; NO-VP-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 962; NO-VP-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 963; NO-VP-OUTLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 964; NO-VP-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 965; NO-VP-OUTLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 966; NO-VP-OUTLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 967; NO-VP-OUTLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 968; NO-VP-OUTLOOP: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 969; NO-VP-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 970; NO-VP-OUTLOOP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 971;. 972; NO-VP-INLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 973; NO-VP-INLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 974; NO-VP-INLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 975; NO-VP-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 976; NO-VP-INLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 977; NO-VP-INLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 978; NO-VP-INLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 979; NO-VP-INLOOP: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 980; NO-VP-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 981; NO-VP-INLOOP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 982;. 983