1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; REQUIRES: asserts 3; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 -prefer-predicate-over-epilogue=scalar-epilogue \ 4; RUN: -debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t | FileCheck %s --check-prefix=CHECK 5; RUN: cat %t | FileCheck %s --check-prefix=DEBUG 6; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 \ 7; RUN: -debug-only=loop-vectorize -S 2>%t | FileCheck %s --check-prefix=CHECK-VF8 8; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED 9 10target triple = "aarch64-linux-gnu" 11 12; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16' 13; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) 14; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1 15 16; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16' 17; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. 18; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) 19; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 20 21define void @main_vf_vscale_x_16(ptr %A) #0 { 22; CHECK-LABEL: @main_vf_vscale_x_16( 23; CHECK-NEXT: iter.check: 24; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 25; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 26; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 27; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 28; CHECK: vector.main.loop.iter.check: 29; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 30; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 31; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024, [[TMP3]] 32; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 33; CHECK: vector.ph: 34; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 35; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 36; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] 37; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 38; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 39; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 40; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 41; CHECK: vector.body: 42; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 43; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 44; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP8]] 45; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 46; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() 47; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 48; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] 49; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP16]], align 1 50; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP19]], align 1 51; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 52; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 53; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 54; CHECK: middle.block: 55; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 56; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 57; CHECK: vec.epilog.iter.check: 58; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] 59; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 60; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8 61; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] 62; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 63; CHECK: vec.epilog.ph: 64; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 65; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 66; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 67; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP24]] 68; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] 69; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 70; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 71; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 72; CHECK: vec.epilog.vector.body: 73; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 74; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 75; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]] 76; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 77; CHECK-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[TMP29]], align 1 78; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] 79; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] 80; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 81; CHECK: vec.epilog.middle.block: 82; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] 83; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 84; CHECK: vec.epilog.scalar.ph: 85; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 86; CHECK-NEXT: br label [[FOR_BODY:%.*]] 87; CHECK: for.body: 88; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 89; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 90; CHECK-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 91; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 92; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 93; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] 94; CHECK: exit: 95; CHECK-NEXT: ret void 96; 97; CHECK-VF8-LABEL: @main_vf_vscale_x_16( 98; CHECK-VF8-NEXT: iter.check: 99; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 100; CHECK-VF8: vector.main.loop.iter.check: 101; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 102; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 103; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 104; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 105; CHECK-VF8: vector.ph: 106; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 107; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 108; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 109; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 110; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 111; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 112; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] 113; CHECK-VF8: vector.body: 114; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 115; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 116; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] 117; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 118; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 119; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 120; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] 121; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP14]], align 1 122; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP17]], align 1 123; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 124; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 125; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 126; CHECK-VF8: middle.block: 127; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 128; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 129; CHECK-VF8: vec.epilog.iter.check: 130; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] 131; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 132; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 133; CHECK-VF8: vec.epilog.ph: 134; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 135; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 136; CHECK-VF8: vec.epilog.vector.body: 137; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 138; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 139; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] 140; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 141; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP21]], align 1 142; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 143; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 144; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 145; CHECK-VF8: vec.epilog.middle.block: 146; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 147; CHECK-VF8: vec.epilog.scalar.ph: 148; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 149; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] 150; CHECK-VF8: for.body: 151; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 152; CHECK-VF8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 153; CHECK-VF8-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 154; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 155; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 156; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] 157; CHECK-VF8: exit: 158; CHECK-VF8-NEXT: ret void 159; 160entry: 161 br label %for.body 162 163for.body: 164 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 165 %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv 166 store i8 1, ptr %arrayidx, align 1 167 %iv.next = add nuw nsw i64 %iv, 1 168 %exitcond = icmp ne i64 %iv.next, 1024 169 br i1 %exitcond, label %for.body, label %exit 170 171exit: 172 ret void 173} 174 175 176; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_2' 177; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) 178; DEBUG: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 179 180; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_2' 181; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. 182; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) 183; DEBUG-FORCED: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 184 185; When the vector.body uses VF=vscale x 1 (or VF=vscale x 2 because 186; that's the minimum supported VF by SVE), we could still use a wide 187; fixed-width VF=8 for the epilogue if the vectors are known to be 188; sufficiently wide. This information can be deduced from vscale_range or 189; VScaleForTuning (set by mcpu/mtune). 190define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { 191; CHECK-LABEL: @main_vf_vscale_x_2( 192; CHECK-NEXT: iter.check: 193; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 194; CHECK: vector.main.loop.iter.check: 195; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 196; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 197; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 198; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 199; CHECK: vector.ph: 200; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 201; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 202; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 203; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 204; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 205; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 206; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 207; CHECK: vector.body: 208; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 209; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 210; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 211; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 212; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 213; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 214; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] 215; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1 216; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 217; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 218; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 219; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 220; CHECK: middle.block: 221; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 222; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 223; CHECK: vec.epilog.iter.check: 224; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] 225; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 226; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 227; CHECK: vec.epilog.ph: 228; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 229; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 230; CHECK: vec.epilog.vector.body: 231; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 232; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 233; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] 234; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 235; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 236; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 237; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 238; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 239; CHECK: vec.epilog.middle.block: 240; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 241; CHECK: vec.epilog.scalar.ph: 242; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 243; CHECK-NEXT: br label [[FOR_BODY:%.*]] 244; CHECK: for.body: 245; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 246; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 247; CHECK-NEXT: store i64 1, ptr [[ARRAYIDX]], align 1 248; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 249; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 250; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] 251; CHECK: exit: 252; CHECK-NEXT: ret void 253; 254; CHECK-VF8-LABEL: @main_vf_vscale_x_2( 255; CHECK-VF8-NEXT: iter.check: 256; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 257; CHECK-VF8: vector.main.loop.iter.check: 258; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 259; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 260; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 261; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 262; CHECK-VF8: vector.ph: 263; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 264; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 265; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 266; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 267; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 268; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 269; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] 270; CHECK-VF8: vector.body: 271; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 272; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 273; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] 274; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 275; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 276; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 277; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] 278; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1 279; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 280; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 281; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 282; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 283; CHECK-VF8: middle.block: 284; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 285; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 286; CHECK-VF8: vec.epilog.iter.check: 287; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] 288; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 289; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 290; CHECK-VF8: vec.epilog.ph: 291; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 292; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 293; CHECK-VF8: vec.epilog.vector.body: 294; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 295; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 296; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] 297; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 298; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 299; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 300; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 301; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 302; CHECK-VF8: vec.epilog.middle.block: 303; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 304; CHECK-VF8: vec.epilog.scalar.ph: 305; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 306; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] 307; CHECK-VF8: for.body: 308; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 309; CHECK-VF8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 310; CHECK-VF8-NEXT: store i64 1, ptr [[ARRAYIDX]], align 1 311; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 312; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 313; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] 314; CHECK-VF8: exit: 315; CHECK-VF8-NEXT: ret void 316; 317entry: 318 br label %for.body 319 320for.body: 321 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 322 %arrayidx = getelementptr inbounds i64, ptr %A, i64 %iv 323 store i64 1, ptr %arrayidx, align 1 324 %iv.next = add nuw nsw i64 %iv, 1 325 %exitcond = icmp ne i64 %iv.next, 1024 326 br i1 %exitcond, label %for.body, label %exit 327 328exit: 329 ret void 330} 331 332; FIXME: The epilogue loop is currently miscompiled: the pointer induction 333; uses an incorrect resume value. 334define void @test_pr57912_pointer_induction(ptr %start) #0 { 335; CHECK-LABEL: @test_pr57912_pointer_induction( 336; CHECK-NEXT: iter.check: 337; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 338; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 339; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] 340; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 341; CHECK: vector.main.loop.iter.check: 342; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 343; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 344; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 10000, [[TMP3]] 345; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 346; CHECK: vector.ph: 347; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 348; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 349; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP5]] 350; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] 351; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 352; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 353; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 354; CHECK: vector.body: 355; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 356; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 357; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP8]] 358; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0 359; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() 360; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 361; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]] 362; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP16]], align 1 363; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP19]], align 1 364; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] 365; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 366; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 367; CHECK: middle.block: 368; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] 369; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 370; CHECK: vec.epilog.iter.check: 371; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] 372; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] 373; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 374; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 8 375; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]] 376; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 377; CHECK: vec.epilog.ph: 378; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 379; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() 380; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 381; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]] 382; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]] 383; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 384; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 385; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] 386; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 387; CHECK: vec.epilog.vector.body: 388; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 389; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX7]], 0 390; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]] 391; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i32 0 392; CHECK-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP29]], align 1 393; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP26]] 394; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] 395; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 396; CHECK: vec.epilog.middle.block: 397; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10000, [[N_VEC3]] 398; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 399; CHECK: vec.epilog.scalar.ph: 400; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 401; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] 402; CHECK-NEXT: br label [[LOOP:%.*]] 403; CHECK: loop: 404; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 405; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] 406; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 407; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 408; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 409; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 410; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] 411; CHECK: exit: 412; CHECK-NEXT: ret void 413; 414; CHECK-VF8-LABEL: @test_pr57912_pointer_induction( 415; CHECK-VF8-NEXT: iter.check: 416; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 417; CHECK-VF8: vector.main.loop.iter.check: 418; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 419; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 420; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] 421; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 422; CHECK-VF8: vector.ph: 423; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 424; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 425; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] 426; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] 427; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 428; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 429; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] 430; CHECK-VF8: vector.body: 431; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 432; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 433; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]] 434; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0 435; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 436; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 437; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]] 438; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP14]], align 1 439; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP17]], align 1 440; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 441; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 442; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 443; CHECK-VF8: middle.block: 444; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] 445; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 446; CHECK-VF8: vec.epilog.iter.check: 447; CHECK-VF8-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] 448; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]] 449; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 450; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 451; CHECK-VF8: vec.epilog.ph: 452; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 453; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 10000 454; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 455; CHECK-VF8: vec.epilog.vector.body: 456; CHECK-VF8-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 457; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX3]], 0 458; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP19]] 459; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0 460; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP21]], align 1 461; CHECK-VF8-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 8 462; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000 463; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 464; CHECK-VF8: vec.epilog.middle.block: 465; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 466; CHECK-VF8: vec.epilog.scalar.ph: 467; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 468; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ] 469; CHECK-VF8-NEXT: br label [[LOOP:%.*]] 470; CHECK-VF8: loop: 471; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 472; CHECK-VF8-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] 473; CHECK-VF8-NEXT: store i8 0, ptr [[PTR_IV]], align 1 474; CHECK-VF8-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 475; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 476; CHECK-VF8-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 477; CHECK-VF8-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] 478; CHECK-VF8: exit: 479; CHECK-VF8-NEXT: ret void 480; 481entry: 482 br label %loop 483 484loop: 485 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 486 %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ] 487 store i8 0, ptr %ptr.iv, align 1 488 %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 1 489 %iv.next = add nuw nsw i64 %iv, 1 490 %cmp = icmp eq i64 %iv.next, 10000 491 br i1 %cmp, label %exit, label %loop 492 493exit: 494 ret void 495} 496 497attributes #0 = { "target-features"="+sve" } 498