1; RUN: opt %s -passes=loop-vectorize -hoist-runtime-checks=false -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 5define void @same_step_and_size(ptr %a, ptr %b, i64 %n) { 6; CHECK-LABEL: @same_step_and_size( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64 9; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64 10; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 11; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 12; CHECK: vector.memcheck: 13; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]] 14; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 15; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph 16; 17entry: 18 br label %loop 19 20loop: 21 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 22 %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv 23 %l = load i32, ptr %gep.a 24 %mul = mul nsw i32 %l, 3 25 %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv 26 store i32 %mul, ptr %gep.b 27 %iv.next = add nuw nsw i64 %iv, 1 28 %exitcond = icmp eq i64 %iv.next, %n 29 br i1 %exitcond, label %exit, label %loop 30 31exit: 32 ret void 33} 34 35define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i64 %n, i64 %x) { 36; CHECK-LABEL: @same_step_and_size_no_dominance_between_accesses( 37; CHECK-NEXT: entry: 38; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64 39; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 40; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 41; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 42; CHECK: vector.memcheck: 43; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[B2]] 44; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 45; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph 46; 47entry: 48 br label %loop 49 50loop: 51 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 52 %cmp = icmp ne i64 %iv, %x 53 br i1 %cmp, label %then, label %else 54 55then: 56 %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv 57 store i32 0, ptr %gep.a 58 br label %loop.latch 59 60else: 61 %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv 62 store i32 10, ptr %gep.b 63 br label %loop.latch 64 65loop.latch: 66 %iv.next = add nuw nsw i64 %iv, 1 67 %exitcond = icmp eq i64 %iv.next, %n 68 br i1 %exitcond, label %exit, label %loop 69 70exit: 71 ret void 72} 73 74define void @different_steps_and_different_access_sizes(ptr %a, ptr %b, i64 %n) { 75; CHECK-LABEL: @different_steps_and_different_access_sizes( 76; CHECK-NEXT: entry: 77; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 78; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 79; CHECK: vector.memcheck: 80; CHECK-NEXT: [[N_SHL_2:%.]] = shl i64 %n, 2 81; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr %b, i64 [[N_SHL_2]] 82; CHECK-NEXT: [[N_SHL_1:%.]] = shl i64 %n, 1 83; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr %a, i64 [[N_SHL_1]] 84; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr %b, [[SCEVGEP4]] 85; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr %a, [[SCEVGEP]] 86; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 87; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph 88; 89entry: 90 br label %loop 91 92loop: 93 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 94 %gep.a = getelementptr inbounds i16, ptr %a, i64 %iv 95 %l = load i16, ptr %gep.a 96 %l.ext = sext i16 %l to i32 97 %mul = mul nsw i32 %l.ext, 3 98 %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv 99 store i32 %mul, ptr %gep.b 100 %iv.next = add nuw nsw i64 %iv, 1 101 %exitcond = icmp eq i64 %iv.next, %n 102 br i1 %exitcond, label %exit, label %loop 103 104exit: 105 ret void 106} 107 108define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) { 109; CHECK-LABEL: @steps_match_but_different_access_sizes_1( 110; CHECK-NEXT: entry: 111; CHECK-NEXT: [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64 112; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64 113; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 114; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 115; CHECK: vector.memcheck: 116; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B1]], -2 117; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]] 118; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 119; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph 120; 121entry: 122 br label %loop 123 124loop: 125 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 126 %gep.a = getelementptr inbounds [2 x i16], ptr %a, i64 %iv, i64 1 127 %l = load i16, ptr %gep.a 128 %l.ext = sext i16 %l to i32 129 %mul = mul nsw i32 %l.ext, 3 130 %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv 131 store i32 %mul, ptr %gep.b 132 %iv.next = add nuw nsw i64 %iv, 1 133 %exitcond = icmp eq i64 %iv.next, %n 134 br i1 %exitcond, label %exit, label %loop 135 136exit: 137 ret void 138} 139 140; Same as @steps_match_but_different_access_sizes_1, but with source and sink 141; accesses flipped. 142define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) { 143; CHECK-LABEL: @steps_match_but_different_access_sizes_2( 144; CHECK-NEXT: entry: 145; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64 146; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64 147; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 148; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 149; CHECK: vector.memcheck: 150; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A1]], 2 151; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[B2]] 152; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 153; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph 154; 155entry: 156 br label %loop 157 158loop: 159 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 160 %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv 161 %l = load i32, ptr %gep.b 162 %mul = mul nsw i32 %l, 3 163 %gep.a = getelementptr inbounds [2 x i16], ptr %a, i64 %iv, i64 1 164 %trunc = trunc i32 %mul to i16 165 store i16 %trunc, ptr %gep.a 166 %iv.next = add nuw nsw i64 %iv, 1 167 %exitcond = icmp eq i64 %iv.next, %n 168 br i1 %exitcond, label %exit, label %loop 169 170exit: 171 ret void 172} 173 174; Full no-overlap checks are required instead of difference checks, as 175; one of the add-recs used is invariant in the inner loop. 176; Test case for PR57315. 177define void @nested_loop_outer_iv_addrec_invariant_in_inner1(ptr %a, ptr %b, i64 %n) { 178; CHECK-LABEL: @nested_loop_outer_iv_addrec_invariant_in_inner1( 179; CHECK: entry: 180; CHECK-NEXT: [[N_SHL_2:%.]] = shl i64 %n, 2 181; CHECK-NEXT: [[B_GEP_UPPER:%.*]] = getelementptr i8, ptr %b, i64 [[N_SHL_2]] 182; CHECK-NEXT: br label %outer 183 184; CHECK: outer.header: 185; CHECK: [[OUTER_IV_SHL_2:%.]] = shl i64 %outer.iv, 2 186; CHECK-NEXT: [[A_GEP_UPPER:%.*]] = getelementptr nuw i8, ptr %a, i64 [[OUTER_IV_SHL_2]] 187; CHECK-NEXT: [[OUTER_IV_4:%.]] = add i64 [[OUTER_IV_SHL_2]], 4 188; CHECK-NEXT: [[A_GEP_UPPER_4:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_4]] 189; CHECK: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 190; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 191 192; CHECK: vector.memcheck: 193; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A_GEP_UPPER]], [[B_GEP_UPPER]] 194; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr %b, [[A_GEP_UPPER_4]] 195; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 196; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph 197; 198entry: 199 br label %outer.header 200 201outer.header: 202 %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] 203 %gep.a = getelementptr inbounds i32, ptr %a, i64 %outer.iv 204 br label %inner.body 205 206inner.body: 207 %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] 208 %gep.b = getelementptr inbounds i32, ptr %b, i64 %inner.iv 209 %l = load i32, ptr %gep.b, align 4 210 %sub = sub i32 %l, 10 211 store i32 %sub, ptr %gep.a, align 4 212 %inner.iv.next = add nuw nsw i64 %inner.iv, 1 213 %inner.cond = icmp eq i64 %inner.iv.next, %n 214 br i1 %inner.cond, label %outer.latch, label %inner.body 215 216outer.latch: 217 %outer.iv.next = add nuw nsw i64 %outer.iv, 1 218 %outer.cond = icmp eq i64 %outer.iv.next, %n 219 br i1 %outer.cond, label %exit, label %outer.header 220 221exit: 222 ret void 223} 224 225; Same as @nested_loop_outer_iv_addrec_invariant_in_inner1 but with dependence 226; sink and source swapped. 227define void @nested_loop_outer_iv_addrec_invariant_in_inner2(ptr %a, ptr %b, i64 %n) { 228; CHECK-LABEL: @nested_loop_outer_iv_addrec_invariant_in_inner2( 229; CHECK: entry: 230; CHECK-NEXT: [[N_SHL_2:%.]] = shl i64 %n, 2 231; CHECK-NEXT: [[B_GEP_UPPER:%.*]] = getelementptr i8, ptr %b, i64 [[N_SHL_2]] 232; CHECK-NEXT: br label %outer 233 234; CHECK: outer.header: 235; CHECK: [[OUTER_IV_SHL_2:%.]] = shl i64 %outer.iv, 2 236; CHECK-NEXT: [[A_GEP_UPPER:%.*]] = getelementptr nuw i8, ptr %a, i64 [[OUTER_IV_SHL_2]] 237; CHECK-NEXT: [[OUTER_IV_4:%.]] = add i64 [[OUTER_IV_SHL_2]], 4 238; CHECK-NEXT: [[A_GEP_UPPER_4:%.*]] = getelementptr i8, ptr %a, i64 [[OUTER_IV_4]] 239; CHECK: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 240; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck 241 242; CHECK: vector.memcheck: 243; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr %b, [[A_GEP_UPPER_4]] 244; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A_GEP_UPPER]], [[B_GEP_UPPER]] 245; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 246; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph 247; 248entry: 249 br label %outer.header 250 251outer.header: 252 %outer.iv = phi i64 [ %outer.iv.next, %outer.latch ], [ 0, %entry ] 253 %gep.a = getelementptr inbounds i32, ptr %a, i64 %outer.iv 254 br label %inner.body 255 256inner.body: 257 %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner.body ] 258 %l = load i32, ptr %gep.a, align 4 259 %sub = sub i32 %l, 10 260 %gep.b = getelementptr inbounds i32, ptr %b, i64 %inner.iv 261 store i32 %sub, ptr %gep.b, align 4 262 %inner.iv.next = add nuw nsw i64 %inner.iv, 1 263 %inner.cond = icmp eq i64 %inner.iv.next, %n 264 br i1 %inner.cond, label %outer.latch, label %inner.body 265 266outer.latch: 267 %outer.iv.next = add nuw nsw i64 %outer.iv, 1 268 %outer.cond = icmp eq i64 %outer.iv.next, %n 269 br i1 %outer.cond, label %exit, label %outer.header 270 271exit: 272 ret void 273} 274 275; Test case where the AddRec for the pointers in the inner loop have the AddRec 276; of the outer loop as start value. It is sufficient to subtract the start 277; values (%dst, %src) of the outer AddRecs. 278define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %m, i64 noundef %n) { 279; CHECK-LABEL: @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec( 280; CHECK-NEXT: entry: 281; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64 282; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64 283; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DST1]], [[SRC2]] 284; CHECK-NEXT: br label [[OUTER_LOOP:%.*]] 285; CHECK: outer.loop: 286; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_EXIT:%.*]] ] 287; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[OUTER_IV]], [[N]] 288; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 289; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 290; CHECK: vector.memcheck: 291; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[SUB]], 16 292; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 293; 294entry: 295 br label %outer.loop 296 297outer.loop: 298 %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] 299 %mul = mul nsw i64 %outer.iv, %n 300 br label %inner.loop 301 302inner.loop: 303 %iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ] 304 %idx = add nuw nsw i64 %iv.inner, %mul 305 %gep.src = getelementptr inbounds i32, ptr %src, i64 %idx 306 %l = load i32, ptr %gep.src, align 4 307 %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %idx 308 %add = add nsw i32 %l, 10 309 store i32 %add, ptr %gep.dst, align 4 310 %iv.inner.next = add nuw nsw i64 %iv.inner, 1 311 %inner.exit.cond = icmp eq i64 %iv.inner.next, %n 312 br i1 %inner.exit.cond, label %inner.exit, label %inner.loop 313 314inner.exit: 315 %outer.iv.next = add nuw nsw i64 %outer.iv, 1 316 %outer.exit.cond = icmp eq i64 %outer.iv.next, %m 317 br i1 %outer.exit.cond, label %outer.exit, label %outer.loop 318 319outer.exit: 320 ret void 321} 322 323define void @use_diff_checks_when_retrying_with_rt_checks(i64 %off, ptr %dst, ptr %src) { 324; CHECK-LABEL: @use_diff_checks_when_retrying_with_rt_checks( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr %src to i64 327; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr %dst to i64 328; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.memcheck 329; CHECK: vector.memcheck: 330; CHECK-NEXT: [[TMP0:%.*]] = mul i64 %off, -8 331; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 332; CHECK-NEXT: [[TMP1:%.*]] = shl i64 %off, 3 333; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[DST1]], [[TMP1]] 334; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[SRC2]] 335; CHECK-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP3]], 32 336; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] 337; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[SRC2]], 8 338; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[DST1]] 339; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], [[TMP1]] 340; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP6]], 32 341; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]] 342; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[DST1]], [[SRC2]] 343; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP7]], 32 344; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX5]], [[DIFF_CHECK6]] 345; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[DST1]], -8 346; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[SRC2]] 347; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP9]], 32 348; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]] 349; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label %scalar.ph, label %vector.ph 350; CHECK: vector.ph: 351; CHECK-NEXT: br label %vector.body 352; 353entry: 354 br label %loop 355 356loop: 357 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 358 %iv.off = add i64 %off, %iv 359 %gep.src = getelementptr i64, ptr %src, i64 %iv 360 %l.0 = load i64, ptr %gep.src, align 8 361 %gep.dst.off = getelementptr i64, ptr %dst, i64 %iv.off 362 store i64 %l.0, ptr %gep.dst.off, align 8 363 %gep.src.8 = getelementptr i8, ptr %gep.src, i64 8 364 %l.1 = load i64, ptr %gep.src.8, align 8 365 %gep.dst.iv = getelementptr i64, ptr %dst, i64 %iv 366 store i64 %l.1, ptr %gep.dst.iv, align 8 367 %iv.next = add i64 %iv, 1 368 %ec = icmp eq i64 %iv.next, 1000 369 br i1 %ec, label %exit, label %loop 370 371exit: 372 ret void 373} 374