1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s --check-prefixes=CHECK 3 4target triple = "aarch64-unknown-linux-gnu" 5 6declare void @init_mem(ptr, i64); 7 8define i64 @same_exit_block_pre_inc_use1() #1 { 9; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1( 10; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 13; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 14; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 15; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) 16; CHECK-NEXT: br label [[LOOP:%.*]] 17; CHECK: loop: 18; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] 19; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] 20; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 21; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] 22; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 23; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] 24; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] 25; CHECK: loop.inc: 26; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 27; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 28; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 29; CHECK: loop.end: 30; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] 31; CHECK-NEXT: ret i64 [[RETVAL]] 32; 33entry: 34 %p1 = alloca [1024 x i8] 35 %p2 = alloca [1024 x i8] 36 call void @init_mem(ptr %p1, i64 1024) 37 call void @init_mem(ptr %p2, i64 1024) 38 br label %loop 39 40loop: 41 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] 42 %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index 43 %ld1 = load i8, ptr %arrayidx, align 1 44 %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index 45 %ld2 = load i8, ptr %arrayidx1, align 1 46 %cmp3 = icmp eq i8 %ld1, %ld2 47 br i1 %cmp3, label %loop.inc, label %loop.end 48 49loop.inc: 50 %index.next = add i64 %index, 1 51 %exitcond = icmp ne i64 %index.next, 67 52 br i1 %exitcond, label %loop, label %loop.end 53 54loop.end: 55 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] 56 ret i64 %retval 57} 58 59 60; In this example the early exit block appears in the list of ExitNotTaken 61; SCEVs, but is not computable. 62define i64 @same_exit_block_pre_inc_use4() { 63; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() { 64; CHECK-NEXT: entry: 65; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8 66; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8 67; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 68; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) 69; CHECK-NEXT: br label [[LOOP:%.*]] 70; CHECK: loop: 71; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] 72; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] 73; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 74; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] 75; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] 76; CHECK: loop.inc: 77; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 78; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 79; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 80; CHECK: loop.end: 81; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] 82; CHECK-NEXT: ret i64 [[RETVAL]] 83; 84entry: 85 %p1 = alloca [1024 x i64] 86 %p2 = alloca [1024 x i64] 87 call void @init_mem(ptr %p1, i64 1024) 88 call void @init_mem(ptr %p2, i64 1024) 89 br label %loop 90 91loop: 92 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] 93 %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index 94 %ld1 = load i64, ptr %arrayidx, align 1 95 %cmp3 = icmp ult i64 %index, %ld1 96 br i1 %cmp3, label %loop.inc, label %loop.end 97 98loop.inc: 99 %index.next = add i64 %index, 1 100 %exitcond = icmp ne i64 %index.next, 67 101 br i1 %exitcond, label %loop, label %loop.end 102 103loop.end: 104 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] 105 ret i64 %retval 106} 107 108 109define i64 @loop_contains_safe_call() #1 { 110; CHECK-LABEL: define i64 @loop_contains_safe_call( 111; CHECK-SAME: ) #[[ATTR0]] { 112; CHECK-NEXT: entry: 113; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 114; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 115; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 116; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) 117; CHECK-NEXT: br label [[LOOP:%.*]] 118; CHECK: loop: 119; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] 120; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] 121; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 122; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) 123; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 124; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] 125; CHECK: loop.inc: 126; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 127; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 128; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 129; CHECK: loop.end: 130; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] 131; CHECK-NEXT: ret i64 [[RETVAL]] 132; 133entry: 134 %p1 = alloca [1024 x i8] 135 %p2 = alloca [1024 x i8] 136 call void @init_mem(ptr %p1, i64 1024) 137 call void @init_mem(ptr %p2, i64 1024) 138 br label %loop 139 140loop: 141 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] 142 %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index 143 %ld1 = load float, ptr %arrayidx, align 1 144 %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) 145 %cmp = fcmp fast ult float %sqrt, 3.0e+00 146 br i1 %cmp, label %loop.inc, label %loop.end 147 148loop.inc: 149 %index.next = add i64 %index, 1 150 %exitcond = icmp ne i64 %index.next, 67 151 br i1 %exitcond, label %loop, label %loop.end 152 153loop.end: 154 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] 155 ret i64 %retval 156} 157 158 159define i64 @loop_contains_safe_div() #1 { 160; CHECK-LABEL: define i64 @loop_contains_safe_div( 161; CHECK-SAME: ) #[[ATTR0]] { 162; CHECK-NEXT: entry: 163; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 164; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 165; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 166; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) 167; CHECK-NEXT: br label [[LOOP:%.*]] 168; CHECK: loop: 169; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] 170; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] 171; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 172; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 173; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 174; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] 175; CHECK: loop.inc: 176; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 177; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 178; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 179; CHECK: loop.end: 180; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] 181; CHECK-NEXT: ret i64 [[RETVAL]] 182; 183entry: 184 %p1 = alloca [1024 x i8] 185 %p2 = alloca [1024 x i8] 186 call void @init_mem(ptr %p1, i64 1024) 187 call void @init_mem(ptr %p2, i64 1024) 188 br label %loop 189 190loop: 191 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] 192 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index 193 %ld1 = load i32, ptr %arrayidx, align 1 194 %div = udiv i32 %ld1, 20000 195 %cmp = icmp eq i32 %div, 1 196 br i1 %cmp, label %loop.inc, label %loop.end 197 198loop.inc: 199 %index.next = add i64 %index, 1 200 %exitcond = icmp ne i64 %index.next, 67 201 br i1 %exitcond, label %loop, label %loop.end 202 203loop.end: 204 %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] 205 ret i64 %retval 206} 207 208 209define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { 210; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' 211; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 212; DEBUG-NEXT: LV: We can vectorize this loop! 213; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. 214; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit( 215; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) { 216; CHECK-NEXT: entry: 217; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 218; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 219; CHECK-NEXT: br label [[LOOP:%.*]] 220; CHECK: loop: 221; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] 222; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] 223; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 224; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 225; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] 226; CHECK: loop.inc: 227; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] 228; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 229; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 230; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 231; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 232; CHECK: loop.end: 233; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ] 234; CHECK-NEXT: ret i64 [[RETVAL]] 235; 236entry: 237 %p1 = alloca [1024 x i8] 238 call void @init_mem(ptr %p1, i64 1024) 239 br label %loop 240 241loop: 242 %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] 243 %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index 244 %ld1 = load i32, ptr %arrayidx, align 1 245 %cmp = icmp eq i32 %ld1, 1 246 br i1 %cmp, label %loop.inc, label %loop.end 247 248loop.inc: 249 %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index 250 %ld2 = load i64, ptr %arrayidx2, align 8 251 %index.next = add i64 %index, 1 252 %exitcond = icmp ne i64 %index.next, 67 253 br i1 %exitcond, label %loop, label %loop.end 254 255loop.end: 256 %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] 257 ret i64 %retval 258} 259 260 261; The form of the induction variables requires SCEV predicates. 262define i32 @diff_exit_block_needs_scev_check(i32 %end) { 263; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' 264; DEBUG: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw> 265; DEBUG-NEXT: LV: We can vectorize this loop! 266; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. 267; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check( 268; CHECK-SAME: i32 [[END:%.*]]) { 269; CHECK-NEXT: entry: 270; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4 271; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4 272; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) 273; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) 274; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 275; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10 276; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64 277; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) 278; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 12 279; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 280; CHECK: vector.scevcheck: 281; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) 282; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 283; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 284; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] 285; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 286; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 287; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 288; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 289; CHECK: vector.ph: 290; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 291; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] 292; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[N_VEC]] to i8 293; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 294; CHECK: vector.body: 295; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 296; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 297; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP9]] 298; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 299; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 300; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP9]] 301; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 302; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 303; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] 304; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 305; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) 306; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 307; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] 308; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 309; CHECK: middle.split: 310; CHECK-NEXT: br i1 [[TMP15]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]] 311; CHECK: middle.block: 312; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] 313; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 314; CHECK: scalar.ph: 315; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] 316; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] 317; CHECK-NEXT: br label [[FOR_BODY:%.*]] 318; CHECK: for.body: 319; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 320; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 321; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] 322; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 323; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] 324; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 325; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] 326; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]] 327; CHECK: for.inc: 328; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 329; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 330; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 331; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] 332; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] 333; CHECK: found: 334; CHECK-NEXT: ret i32 1 335; CHECK: exit: 336; CHECK-NEXT: ret i32 0 337; 338entry: 339 %p1 = alloca [1024 x i32] 340 %p2 = alloca [1024 x i32] 341 call void @init_mem(ptr %p1, i64 1024) 342 call void @init_mem(ptr %p2, i64 1024) 343 %end.clamped = and i32 %end, 1023 344 br label %for.body 345 346for.body: 347 %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] 348 %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] 349 %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind 350 %0 = load i32, ptr %arrayidx1, align 4 351 %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind 352 %1 = load i32, ptr %arrayidx2, align 4 353 %cmp.early = icmp eq i32 %0, %1 354 br i1 %cmp.early, label %found, label %for.inc 355 356for.inc: 357 %ind.next = add i8 %ind, 1 358 %conv = zext i8 %ind.next to i32 359 %gep.ind.next = add i64 %gep.ind, 1 360 %cmp = icmp ult i32 %conv, %end.clamped 361 br i1 %cmp, label %for.body, label %exit 362 363found: 364 ret i32 1 365 366exit: 367 ret i32 0 368} 369 370%my.struct = type { i8, i8 } 371 372define i64 @same_exit_block_requires_interleaving() { 373; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() { 374; CHECK-NEXT: entry: 375; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8 376; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256) 377; CHECK-NEXT: br label [[LOOP:%.*]] 378; CHECK: loop: 379; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] 380; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]] 381; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 382; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 383; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]] 384; CHECK: loop.latch: 385; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 386; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 69 387; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] 388; CHECK: loop.end: 389; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP_LATCH]] ], [ 1, [[LOOP]] ] 390; CHECK-NEXT: ret i64 [[RETVAL]] 391; 392entry: 393 %p1 = alloca [128 x %my.struct] 394 call void @init_mem(ptr %p1, i64 256) 395 br label %loop 396 397loop: 398 %index = phi i64 [ %index.next, %loop.latch ], [ 3, %entry ] 399 %arrayidx = getelementptr inbounds [128 x %my.struct], ptr %p1, i64 0, i64 %index 400 %ld1 = load i8, ptr %arrayidx, align 1 401 %cmp3 = icmp eq i8 %ld1, 3 402 br i1 %cmp3, label %loop.latch, label %loop.end 403 404loop.latch: 405 %index.next = add i64 %index, 1 406 %exitcond = icmp ne i64 %index.next, 69 407 br i1 %exitcond, label %loop, label %loop.end 408 409loop.end: 410 %retval = phi i64 [ 0, %loop.latch ], [ 1, %loop ] 411 ret i64 %retval 412} 413 414declare i32 @foo(i32) readonly 415declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>) 416 417attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } 418attributes #1 = { "target-features"="+sve" vscale_range(1,16) } 419;. 420; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 421; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 422; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 423; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} 424;. 425