1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -passes=loop-vectorize \ 3; RUN: -force-tail-folding-style=data-with-evl \ 4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 5; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-OUTLOOP 6 7; RUN: opt -passes=loop-vectorize \ 8; RUN: -prefer-inloop-reductions \ 9; RUN: -force-tail-folding-style=data-with-evl \ 10; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 11; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-INLOOP 12 13; RUN: opt -passes=loop-vectorize \ 14; RUN: -force-tail-folding-style=none \ 15; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 16; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-OUTLOOP 17 18; RUN: opt -passes=loop-vectorize \ 19; RUN: -prefer-inloop-reductions \ 20; RUN: -force-tail-folding-style=none \ 21; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 22; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-INLOOP 23 24define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) { 25; IF-EVL-OUTLOOP-LABEL: define void @reduction_intermediate_store( 26; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { 27; IF-EVL-OUTLOOP-NEXT: entry: 28; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 29; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 30; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 31; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 32; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 33; IF-EVL-OUTLOOP: vector.memcheck: 34; IF-EVL-OUTLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4 35; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = shl i64 [[N]], 2 36; IF-EVL-OUTLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] 37; IF-EVL-OUTLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]] 38; IF-EVL-OUTLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 39; IF-EVL-OUTLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 40; IF-EVL-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[ENTRY:%.*]] 41; IF-EVL-OUTLOOP: vector.ph: 42; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 43; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 44; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 45; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP7]] 46; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] 47; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 48; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 49; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 50; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 51; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 52; IF-EVL-OUTLOOP: vector.body: 53; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 54; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] 55; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP10]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] 56; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 57; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) 58; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 59; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 60; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 61; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] 62; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) 63; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]]) 64; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 65; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] 66; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP9]] 67; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] 68; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 69; IF-EVL-OUTLOOP: middle.block: 70; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) 71; IF-EVL-OUTLOOP-NEXT: store i32 [[TMP23]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] 72; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 73; IF-EVL-OUTLOOP: scalar.ph: 74; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY1:%.*]] ] 75; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY1]] ] 76; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY1:%.*]] 77; IF-EVL-OUTLOOP: for.body: 78; IF-EVL-OUTLOOP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] 79; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY1]] ] 80; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] 81; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 82; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP27]], [[RDX]] 83; IF-EVL-OUTLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 84; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 85; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] 86; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP8:![0-9]+]] 87; IF-EVL-OUTLOOP: for.end: 88; IF-EVL-OUTLOOP-NEXT: ret void 89; 90; IF-EVL-INLOOP-LABEL: define void @reduction_intermediate_store( 91; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { 92; IF-EVL-INLOOP-NEXT: entry: 93; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] 94; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 95; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 96; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] 97; IF-EVL-INLOOP-NEXT: br i1 [[TMP4]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 98; IF-EVL-INLOOP: vector.memcheck: 99; IF-EVL-INLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4 100; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = shl i64 [[N]], 2 101; IF-EVL-INLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] 102; IF-EVL-INLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]] 103; IF-EVL-INLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 104; IF-EVL-INLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 105; IF-EVL-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 106; IF-EVL-INLOOP: vector.ph: 107; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 108; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 109; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1 110; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]] 111; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]] 112; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 113; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 114; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 115; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 116; IF-EVL-INLOOP: vector.body: 117; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 118; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] 119; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] 120; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] 121; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP13]], i32 4, i1 true) 122; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = add i64 [[EVL_BASED_IV]], 0 123; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] 124; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 125; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]] 126; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]) 127; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] 128; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 129; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] 130; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] 131; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 132; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 133; IF-EVL-INLOOP: middle.block: 134; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] 135; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 136; IF-EVL-INLOOP: scalar.ph: 137; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 138; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] 139; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 140; IF-EVL-INLOOP: for.body: 141; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 142; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 143; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 144; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 145; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP25]], [[RDX]] 146; IF-EVL-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 147; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 148; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 149; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 150; IF-EVL-INLOOP: for.end: 151; IF-EVL-INLOOP-NEXT: ret void 152; 153; NO-VP-OUTLOOP-LABEL: define void @reduction_intermediate_store( 154; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { 155; NO-VP-OUTLOOP-NEXT: entry: 156; NO-VP-OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 157; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 158; NO-VP-OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 159; NO-VP-OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 160; NO-VP-OUTLOOP: vector.memcheck: 161; NO-VP-OUTLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4 162; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = shl i64 [[N]], 2 163; NO-VP-OUTLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] 164; NO-VP-OUTLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]] 165; NO-VP-OUTLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 166; NO-VP-OUTLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 167; NO-VP-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 168; NO-VP-OUTLOOP: vector.ph: 169; NO-VP-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 170; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 171; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] 172; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 173; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 174; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 175; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 176; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 177; NO-VP-OUTLOOP: vector.body: 178; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 179; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 180; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 181; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 182; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 183; NO-VP-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META0:![0-9]+]] 184; NO-VP-OUTLOOP-NEXT: [[TMP12]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] 185; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 186; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 187; NO-VP-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 188; NO-VP-OUTLOOP: middle.block: 189; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]]) 190; NO-VP-OUTLOOP-NEXT: store i32 [[TMP14]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] 191; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 192; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 193; NO-VP-OUTLOOP: scalar.ph: 194; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 195; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] 196; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] 197; NO-VP-OUTLOOP: for.body: 198; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 199; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 200; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 201; NO-VP-OUTLOOP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 202; NO-VP-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP15]], [[RDX]] 203; NO-VP-OUTLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 204; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 205; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 206; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 207; NO-VP-OUTLOOP: for.end: 208; NO-VP-OUTLOOP-NEXT: ret void 209; 210; NO-VP-INLOOP-LABEL: define void @reduction_intermediate_store( 211; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { 212; NO-VP-INLOOP-NEXT: entry: 213; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 214; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 215; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 216; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 217; NO-VP-INLOOP: vector.memcheck: 218; NO-VP-INLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4 219; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl i64 [[N]], 2 220; NO-VP-INLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] 221; NO-VP-INLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]] 222; NO-VP-INLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 223; NO-VP-INLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 224; NO-VP-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 225; NO-VP-INLOOP: vector.ph: 226; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 227; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 228; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] 229; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 230; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 231; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 232; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] 233; NO-VP-INLOOP: vector.body: 234; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 235; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] 236; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 237; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] 238; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 239; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META0:![0-9]+]] 240; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]]) 241; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] 242; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 243; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 244; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 245; NO-VP-INLOOP: middle.block: 246; NO-VP-INLOOP-NEXT: store i32 [[TMP12]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] 247; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 248; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 249; NO-VP-INLOOP: scalar.ph: 250; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 251; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] 252; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] 253; NO-VP-INLOOP: for.body: 254; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 255; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 256; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 257; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 258; NO-VP-INLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP14]], [[RDX]] 259; NO-VP-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 260; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 261; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 262; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 263; NO-VP-INLOOP: for.end: 264; NO-VP-INLOOP-NEXT: ret void 265; 266entry: 267 br label %for.body 268 269for.body: 270 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 271 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] 272 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 273 %0 = load i32, ptr %arrayidx, align 4 274 %add = add nsw i32 %0, %rdx 275 store i32 %add, ptr %addr, align 4 276 %iv.next = add nuw nsw i64 %iv, 1 277 %exitcond.not = icmp eq i64 %iv.next, %n 278 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 279 280for.end: 281 ret void 282} 283 284!0 = distinct !{!0, !1} 285!1 = !{!"llvm.loop.vectorize.enable", i1 true} 286;. 287; IF-EVL-OUTLOOP: [[META0]] = !{[[META1:![0-9]+]]} 288; IF-EVL-OUTLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} 289; IF-EVL-OUTLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"} 290; IF-EVL-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} 291; IF-EVL-OUTLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} 292; IF-EVL-OUTLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} 293; IF-EVL-OUTLOOP: [[META6]] = !{[[META7:![0-9]+]]} 294; IF-EVL-OUTLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} 295; IF-EVL-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} 296;. 297; IF-EVL-INLOOP: [[META0]] = !{[[META1:![0-9]+]]} 298; IF-EVL-INLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} 299; IF-EVL-INLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"} 300; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} 301; IF-EVL-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} 302; IF-EVL-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} 303; IF-EVL-INLOOP: [[META6]] = !{[[META7:![0-9]+]]} 304; IF-EVL-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} 305; IF-EVL-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} 306;. 307; NO-VP-OUTLOOP: [[META0]] = !{[[META1:![0-9]+]]} 308; NO-VP-OUTLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} 309; NO-VP-OUTLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"} 310; NO-VP-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} 311; NO-VP-OUTLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} 312; NO-VP-OUTLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} 313; NO-VP-OUTLOOP: [[META6]] = !{[[META7:![0-9]+]]} 314; NO-VP-OUTLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} 315; NO-VP-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} 316;. 317; NO-VP-INLOOP: [[META0]] = !{[[META1:![0-9]+]]} 318; NO-VP-INLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} 319; NO-VP-INLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"} 320; NO-VP-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} 321; NO-VP-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} 322; NO-VP-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} 323; NO-VP-INLOOP: [[META6]] = !{[[META7:![0-9]+]]} 324; NO-VP-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} 325; NO-VP-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} 326;. 327