1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3 2; RUN: opt -S -disable-output -passes='print<access-info>' %s 2>&1 | FileCheck %s 3 4; 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7; A forwarding in the presence of symbolic strides. 8define void @single_stride(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) { 9; CHECK-LABEL: 'single_stride' 10; CHECK-NEXT: loop: 11; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop 12; CHECK-NEXT: Backward loop carried data dependence. 13; CHECK-NEXT: Dependences: 14; CHECK-NEXT: Backward: 15; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 -> 16; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4 17; CHECK-EMPTY: 18; CHECK-NEXT: Run-time memory checks: 19; CHECK-NEXT: Grouped accesses: 20; CHECK-EMPTY: 21; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 22; CHECK-NEXT: SCEV assumptions: 23; CHECK-NEXT: Equal predicate: %stride == 1 24; CHECK-EMPTY: 25; CHECK-NEXT: Expressions re-written: 26; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul: 27; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop> 28; CHECK-NEXT: --> {%A,+,4}<%loop> 29; 30entry: 31 br label %loop 32 33loop: 34 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 35 %mul = mul i64 %iv, %stride 36 %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul 37 %load = load i32, ptr %gep.A, align 4 38 %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv 39 %load_1 = load i32, ptr %gep.B, align 4 40 %add = add i32 %load_1, %load 41 %iv.next = add nuw nsw i64 %iv, 1 42 %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next 43 store i32 %add, ptr %gep.A.next, align 4 44 %exitcond = icmp eq i64 %iv.next, %N 45 br i1 %exitcond, label %exit, label %loop 46 47exit: ; preds = %loop 48 ret void 49} 50 51; A forwarding in the presence of symbolic strides, 52; with nusw instead of inbounds on the GEPs. 53define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) { 54; CHECK-LABEL: 'single_stride_nusw' 55; CHECK-NEXT: loop: 56; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop 57; CHECK-NEXT: Backward loop carried data dependence. 58; CHECK-NEXT: Dependences: 59; CHECK-NEXT: Backward: 60; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 -> 61; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4 62; CHECK-EMPTY: 63; CHECK-NEXT: Run-time memory checks: 64; CHECK-NEXT: Grouped accesses: 65; CHECK-EMPTY: 66; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 67; CHECK-NEXT: SCEV assumptions: 68; CHECK-NEXT: Equal predicate: %stride == 1 69; CHECK-EMPTY: 70; CHECK-NEXT: Expressions re-written: 71; CHECK-NEXT: [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul: 72; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop> 73; CHECK-NEXT: --> {%A,+,4}<%loop> 74; 75entry: 76 br label %loop 77 78loop: 79 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 80 %mul = mul i64 %iv, %stride 81 %gep.A = getelementptr nusw i32, ptr %A, i64 %mul 82 %load = load i32, ptr %gep.A, align 4 83 %gep.B = getelementptr nusw i32, ptr %B, i64 %iv 84 %load_1 = load i32, ptr %gep.B, align 4 85 %add = add i32 %load_1, %load 86 %iv.next = add nuw nsw i64 %iv, 1 87 %gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next 88 store i32 %add, ptr %gep.A.next, align 4 89 %exitcond = icmp eq i64 %iv.next, %N 90 br i1 %exitcond, label %exit, label %loop 91 92exit: ; preds = %loop 93 ret void 94} 95 96; Similar to @single_stride, but with struct types. 97define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) { 98; CHECK-LABEL: 'single_stride_struct' 99; CHECK-NEXT: loop: 100; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop 101; CHECK-NEXT: Backward loop carried data dependence. 102; CHECK-NEXT: Dependences: 103; CHECK-NEXT: Backward: 104; CHECK-NEXT: %load = load { i32, i8 }, ptr %gep.A, align 4 -> 105; CHECK-NEXT: store { i32, i8 } %ins, ptr %gep.A.next, align 4 106; CHECK-EMPTY: 107; CHECK-NEXT: Run-time memory checks: 108; CHECK-NEXT: Grouped accesses: 109; CHECK-EMPTY: 110; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 111; CHECK-NEXT: SCEV assumptions: 112; CHECK-NEXT: Equal predicate: %stride == 1 113; CHECK-EMPTY: 114; CHECK-NEXT: Expressions re-written: 115; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul: 116; CHECK-NEXT: {%A,+,(8 * %stride)}<%loop> 117; CHECK-NEXT: --> {%A,+,8}<%loop> 118; 119entry: 120 br label %loop 121 122loop: 123 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 124 %mul = mul i64 %iv, %stride 125 %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul 126 %load = load { i32, i8 }, ptr %gep.A, align 4 127 %gep.B = getelementptr inbounds { i32, i8 }, ptr %B, i64 %iv 128 %load_1 = load { i32, i8 }, ptr %gep.B, align 4 129 %v1 = extractvalue { i32, i8 } %load, 0 130 %v2 = extractvalue { i32, i8} %load_1, 0 131 %add = add i32 %v1, %v2 132 %ins = insertvalue { i32, i8 } undef, i32 %add, 0 133 %iv.next = add nuw nsw i64 %iv, 1 134 %gep.A.next = getelementptr inbounds { i32, i8 }, ptr %A, i64 %iv.next 135 store { i32, i8 } %ins, ptr %gep.A.next, align 4 136 %exitcond = icmp eq i64 %iv.next, %N 137 br i1 %exitcond, label %exit, label %loop 138 139exit: 140 ret void 141} 142 143define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) { 144; CHECK-LABEL: 'single_stride_castexpr' 145; CHECK-NEXT: inner.loop: 146; CHECK-NEXT: Memory dependences are safe with run-time checks 147; CHECK-NEXT: Dependences: 148; CHECK-NEXT: Run-time memory checks: 149; CHECK-NEXT: Check 0: 150; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): 151; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2 152; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): 153; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3 154; CHECK-NEXT: Grouped accesses: 155; CHECK-NEXT: Group [[GRP1]]: 156; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + %dst)) 157; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop> 158; CHECK-NEXT: Group [[GRP2]]: 159; CHECK-NEXT: (Low: %src High: (804 + %src)) 160; CHECK-NEXT: Member: {%src,+,4}<nuw><%inner.loop> 161; CHECK-EMPTY: 162; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 163; CHECK-NEXT: SCEV assumptions: 164; CHECK-NEXT: Equal predicate: %offset == 1 165; CHECK-EMPTY: 166; CHECK-NEXT: Expressions re-written: 167; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2: 168; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop> 169; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop> 170; CHECK-NEXT: outer.header: 171; CHECK-NEXT: Report: loop is not the innermost loop 172; CHECK-NEXT: Dependences: 173; CHECK-NEXT: Run-time memory checks: 174; CHECK-NEXT: Grouped accesses: 175; CHECK-EMPTY: 176; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 177; CHECK-NEXT: SCEV assumptions: 178; CHECK-EMPTY: 179; CHECK-NEXT: Expressions re-written: 180; 181entry: 182 %offset.ext = sext i32 %offset to i64 183 br label %outer.header 184 185outer.header: 186 %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ] 187 br i1 %cond, label %inner.loop, label %exit 188 189inner.loop: 190 %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ] 191 %iv.3 = phi i32 [ 0, %outer.header ], [ %iv.3.next, %inner.loop ] 192 %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3 193 %load = load i32, ptr %gep.src, align 8 194 %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2 195 store i32 %load, ptr %gep.dst, align 8 196 %iv.2.next = add i64 %iv.2, %offset.ext 197 %iv.3.next = add i32 %iv.3, 1 198 %ec = icmp eq i32 %iv.3, 200 199 br i1 %ec, label %outer.header, label %inner.loop 200 201exit: 202 ret void 203} 204 205define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1 %cond) { 206; CHECK-LABEL: 'single_stride_castexpr_multiuse' 207; CHECK-NEXT: inner.loop: 208; CHECK-NEXT: Memory dependences are safe with run-time checks 209; CHECK-NEXT: Dependences: 210; CHECK-NEXT: Run-time memory checks: 211; CHECK-NEXT: Check 0: 212; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]): 213; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2 214; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]): 215; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3 216; CHECK-NEXT: Grouped accesses: 217; CHECK-NEXT: Group [[GRP3]]: 218; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + (-4 * (zext i32 %offset to i64))<nsw> + %dst)) 219; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop> 220; CHECK-NEXT: Group [[GRP4]]: 221; CHECK-NEXT: (Low: (4 + %src) High: (808 + (-4 * (zext i32 %offset to i64))<nsw> + %src)) 222; CHECK-NEXT: Member: {(4 + %src),+,4}<%inner.loop> 223; CHECK-EMPTY: 224; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 225; CHECK-NEXT: SCEV assumptions: 226; CHECK-NEXT: Equal predicate: %offset == 1 227; CHECK-EMPTY: 228; CHECK-NEXT: Expressions re-written: 229; CHECK-NEXT: [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3: 230; CHECK-NEXT: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop> 231; CHECK-NEXT: --> {(4 + %src),+,4}<%inner.loop> 232; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2: 233; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop> 234; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop> 235; CHECK-NEXT: outer.header: 236; CHECK-NEXT: Report: loop is not the innermost loop 237; CHECK-NEXT: Dependences: 238; CHECK-NEXT: Run-time memory checks: 239; CHECK-NEXT: Grouped accesses: 240; CHECK-EMPTY: 241; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 242; CHECK-NEXT: SCEV assumptions: 243; CHECK-EMPTY: 244; CHECK-NEXT: Expressions re-written: 245; 246entry: 247 %offset.ext = sext i32 %offset to i64 248 %offset.zext = zext i32 %offset to i64 249 br label %outer.header 250 251outer.header: 252 %iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ] 253 br i1 %cond, label %inner.loop, label %exit 254 255inner.loop: 256 %iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ] 257 %iv.3 = phi i64 [ %offset.zext, %outer.header ], [ %iv.3.next, %inner.loop ] 258 %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3 259 %load = load i32, ptr %gep.src, align 8 260 %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2 261 store i32 %load, ptr %gep.dst, align 8 262 %iv.2.next = add i64 %iv.2, %offset.ext 263 %iv.3.next = add i64 %iv.3, 1 264 %ec = icmp eq i64 %iv.3, 200 265 br i1 %ec, label %outer.header, label %inner.loop 266 267exit: 268 ret void 269} 270 271define double @single_iteration_unknown_stride(i32 %x, ptr %y, i1 %cond) { 272; CHECK-LABEL: 'single_iteration_unknown_stride' 273; CHECK-NEXT: loop.body: 274; CHECK-NEXT: Memory dependences are safe 275; CHECK-NEXT: Dependences: 276; CHECK-NEXT: Run-time memory checks: 277; CHECK-NEXT: Grouped accesses: 278; CHECK-EMPTY: 279; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 280; CHECK-NEXT: SCEV assumptions: 281; CHECK-NEXT: Equal predicate: %x == 1 282; CHECK-EMPTY: 283; CHECK-NEXT: Expressions re-written: 284; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul: 285; CHECK-NEXT: {(8 + %y),+,(8 * (sext i32 %x to i64))<nsw>}<%loop.body> 286; CHECK-NEXT: --> {(8 + %y),+,8}<%loop.body> 287; 288entry: 289 br i1 %cond, label %noloop.exit, label %loop.ph 290 291loop.ph: ; preds = %entry 292 %sext7 = sext i32 %x to i64 293 %gep8 = getelementptr i8, ptr %y, i64 8 294 br label %loop.body 295 296loop.body: ; preds = %loop.body, %loop.ph 297 %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.body ] 298 %mul = mul i64 %iv, %sext7 299 %gep10 = getelementptr double, ptr %gep8, i64 %mul 300 %load11 = load double, ptr %gep10, align 8 301 store double %load11, ptr %y, align 8 302 %iv.next = add i64 %iv, 1 303 %icmp = icmp eq i64 %iv, 0 304 br i1 %icmp, label %loop.exit, label %loop.body 305 306noloop.exit: ; preds = %entry 307 %sext = sext i32 %x to i64 308 %gep = getelementptr double, ptr %y, i64 %sext 309 %load5 = load double, ptr %gep, align 8 310 ret double %load5 311 312loop.exit: ; preds = %loop.body 313 %sext2 = sext i32 %x to i64 314 %gep2 = getelementptr double, ptr %y, i64 %sext2 315 %load6 = load double, ptr %gep2, align 8 316 ret double %load6 317} 318 319; A loop with two symbolic strides. 320define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) { 321; CHECK-LABEL: 'two_strides' 322; CHECK-NEXT: loop: 323; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop 324; CHECK-NEXT: Backward loop carried data dependence. 325; CHECK-NEXT: Dependences: 326; CHECK-NEXT: Backward: 327; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 -> 328; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4 329; CHECK-EMPTY: 330; CHECK-NEXT: Run-time memory checks: 331; CHECK-NEXT: Grouped accesses: 332; CHECK-EMPTY: 333; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 334; CHECK-NEXT: SCEV assumptions: 335; CHECK-NEXT: Equal predicate: %stride.2 == 1 336; CHECK-NEXT: Equal predicate: %stride.1 == 1 337; CHECK-EMPTY: 338; CHECK-NEXT: Expressions re-written: 339; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul: 340; CHECK-NEXT: {%A,+,(4 * %stride.1)}<%loop> 341; CHECK-NEXT: --> {%A,+,4}<%loop> 342; CHECK-NEXT: [PSE] %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2: 343; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop> 344; CHECK-NEXT: --> {(4 + %A),+,4}<%loop> 345; 346entry: 347 br label %loop 348 349loop: 350 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 351 %mul = mul i64 %iv, %stride.1 352 %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul 353 %load = load i32, ptr %gep.A, align 4 354 %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv 355 %load_1 = load i32, ptr %gep.B, align 4 356 %add = add i32 %load_1, %load 357 %iv.next = add nuw nsw i64 %iv, 1 358 %mul.2 = mul i64 %iv.next, %stride.2 359 %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2 360 store i32 %add, ptr %gep.A.next, align 4 361 %exitcond = icmp eq i64 %iv.next, %N 362 br i1 %exitcond, label %exit, label %loop 363 364exit: 365 ret void 366} 367 368define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) { 369; CHECK-LABEL: 'single_stride_used_for_trip_count' 370; CHECK-NEXT: loop: 371; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop 372; CHECK-NEXT: Unsafe indirect dependence. 373; CHECK-NEXT: Dependences: 374; CHECK-NEXT: IndirectUnsafe: 375; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 -> 376; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4 377; CHECK-EMPTY: 378; CHECK-NEXT: Run-time memory checks: 379; CHECK-NEXT: Grouped accesses: 380; CHECK-EMPTY: 381; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 382; CHECK-NEXT: SCEV assumptions: 383; CHECK-EMPTY: 384; CHECK-NEXT: Expressions re-written: 385; 386entry: 387 br label %loop 388 389loop: 390 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 391 %mul = mul i64 %iv, %stride 392 %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul 393 %load = load i32, ptr %gep.A, align 4 394 %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv 395 %load_1 = load i32, ptr %gep.B, align 4 396 %add = add i32 %load_1, %load 397 %iv.next = add nuw nsw i64 %iv, 1 398 %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next 399 store i32 %add, ptr %gep.A.next, align 4 400 %exitcond = icmp eq i64 %iv.next, %stride 401 br i1 %exitcond, label %exit, label %loop 402 403exit: ; preds = %loop 404 ret void 405} 406 407; Check the scenario where we have an unknown Stride, which happens to also be 408; the loop iteration count. If we speculate Stride==1, it implies that the loop 409; will iterate no more than a single iteration. 410define void @unknown_stride_equalto_tc(i32 %N, ptr %A, ptr %B, i32 %j) { 411; CHECK-LABEL: 'unknown_stride_equalto_tc' 412; CHECK-NEXT: loop: 413; CHECK-NEXT: Memory dependences are safe with run-time checks 414; CHECK-NEXT: Dependences: 415; CHECK-NEXT: Run-time memory checks: 416; CHECK-NEXT: Check 0: 417; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]): 418; CHECK-NEXT: ptr %A 419; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]): 420; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 421; CHECK-NEXT: Grouped accesses: 422; CHECK-NEXT: Group [[GRP5]]: 423; CHECK-NEXT: (Low: %A High: (4 + %A)) 424; CHECK-NEXT: Member: %A 425; CHECK-NEXT: Group [[GRP6]]: 426; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)))) 427; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop> 428; CHECK-EMPTY: 429; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 430; CHECK-NEXT: SCEV assumptions: 431; CHECK-NEXT: {%j,+,%N}<%loop> Added Flags: <nssw> 432; CHECK-EMPTY: 433; CHECK-NEXT: Expressions re-written: 434; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: 435; CHECK-NEXT: ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B) 436; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop> 437; 438entry: 439 %cmp = icmp eq i32 %N, 0 440 br i1 %cmp, label %exit, label %loop 441 442loop: 443 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 444 %mul = mul i32 %iv, %N 445 %add = add i32 %mul, %j 446 %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 447 %load = load i16, ptr %arrayidx 448 %sext = sext i16 %load to i32 449 store i32 %sext, ptr %A 450 %iv.next = add nuw i32 %iv, 1 451 %exitcond = icmp eq i32 %iv.next, %N 452 br i1 %exitcond, label %exit, label %loop 453 454exit: 455 ret void 456} 457 458 459; Check the scenario where we have an unknown Stride, which happens to also be 460; the loop iteration count, but the TC is zero-extended from a narrower type. 461define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 %j) { 462; CHECK-LABEL: 'unknown_stride_equalto_zext_tc' 463; CHECK-NEXT: loop: 464; CHECK-NEXT: Memory dependences are safe with run-time checks 465; CHECK-NEXT: Dependences: 466; CHECK-NEXT: Run-time memory checks: 467; CHECK-NEXT: Check 0: 468; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]): 469; CHECK-NEXT: ptr %A 470; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]): 471; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 472; CHECK-NEXT: Grouped accesses: 473; CHECK-NEXT: Group [[GRP7]]: 474; CHECK-NEXT: (Low: %A High: (4 + %A)) 475; CHECK-NEXT: Member: %A 476; CHECK-NEXT: Group [[GRP8]]: 477; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)))) 478; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop> 479; CHECK-EMPTY: 480; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 481; CHECK-NEXT: SCEV assumptions: 482; CHECK-NEXT: {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw> 483; CHECK-EMPTY: 484; CHECK-NEXT: Expressions re-written: 485; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: 486; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B) 487; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop> 488; 489entry: 490 %N.ext = zext i16 %N to i32 491 %cmp = icmp eq i16 %N, 0 492 br i1 %cmp, label %exit, label %loop 493 494loop: 495 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 496 %mul = mul nuw i32 %iv, %N.ext 497 %add = add i32 %mul, %j 498 %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 499 %load = load i16, ptr %arrayidx 500 %sext = sext i16 %load to i32 501 store i32 %sext, ptr %A 502 %iv.next = add nuw nsw i32 %iv, 1 503 %exitcond = icmp eq i32 %iv.next, %N.ext 504 br i1 %exitcond, label %exit, label %loop 505 506exit: 507 ret void 508} 509 510; Check the scenario where we have an unknown Stride, which happens to also be 511; the loop iteration count, but the TC is sign-extended from a narrower type. 512define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) { 513; CHECK-LABEL: 'unknown_stride_equalto_sext_tc' 514; CHECK-NEXT: loop: 515; CHECK-NEXT: Memory dependences are safe with run-time checks 516; CHECK-NEXT: Dependences: 517; CHECK-NEXT: Run-time memory checks: 518; CHECK-NEXT: Check 0: 519; CHECK-NEXT: Comparing group ([[GRP9:0x[0-9a-f]+]]): 520; CHECK-NEXT: ptr %A 521; CHECK-NEXT: Against group ([[GRP10:0x[0-9a-f]+]]): 522; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 523; CHECK-NEXT: Grouped accesses: 524; CHECK-NEXT: Group [[GRP9]]: 525; CHECK-NEXT: (Low: %A High: (4 + %A)) 526; CHECK-NEXT: Member: %A 527; CHECK-NEXT: Group [[GRP10]]: 528; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)))) 529; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop> 530; CHECK-EMPTY: 531; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 532; CHECK-NEXT: SCEV assumptions: 533; CHECK-NEXT: {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw> 534; CHECK-EMPTY: 535; CHECK-NEXT: Expressions re-written: 536; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: 537; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B) 538; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop> 539; 540entry: 541 %N.ext = sext i16 %N to i32 542 %cmp = icmp eq i16 %N, 0 543 br i1 %cmp, label %exit, label %loop 544 545loop: 546 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 547 %mul = mul nuw i32 %iv, %N.ext 548 %add = add i32 %mul, %j 549 %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 550 %load = load i16, ptr %arrayidx 551 %sext = sext i16 %load to i32 552 store i32 %sext, ptr %A 553 %iv.next = add nuw nsw i32 %iv, 1 554 %exitcond = icmp eq i32 %iv.next, %N.ext 555 br i1 %exitcond, label %exit, label %loop 556 557exit: 558 ret void 559} 560 561; Check the scenario where we have an unknown Stride, which happens to also be 562; the loop iteration count, but the TC is truncated from a wider type. 563define void @unknown_stride_equalto_trunc_tc(i64 %N, ptr %A, ptr %B, i32 %j) { 564; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc' 565; CHECK-NEXT: loop: 566; CHECK-NEXT: Memory dependences are safe with run-time checks 567; CHECK-NEXT: Dependences: 568; CHECK-NEXT: Run-time memory checks: 569; CHECK-NEXT: Check 0: 570; CHECK-NEXT: Comparing group ([[GRP11:0x[0-9a-f]+]]): 571; CHECK-NEXT: ptr %A 572; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]): 573; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 574; CHECK-NEXT: Grouped accesses: 575; CHECK-NEXT: Group [[GRP11]]: 576; CHECK-NEXT: (Low: %A High: (4 + %A)) 577; CHECK-NEXT: Member: %A 578; CHECK-NEXT: Group [[GRP12]]: 579; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)))) 580; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop> 581; CHECK-EMPTY: 582; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. 583; CHECK-NEXT: SCEV assumptions: 584; CHECK-NEXT: {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw> 585; CHECK-EMPTY: 586; CHECK-NEXT: Expressions re-written: 587; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: 588; CHECK-NEXT: ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B) 589; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop> 590; 591entry: 592 %N.trunc = trunc i64 %N to i32 593 %cmp = icmp eq i64 %N, 0 594 br i1 %cmp, label %exit, label %loop 595 596loop: 597 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 598 %mul = mul nuw i32 %iv, %N.trunc 599 %add = add i32 %mul, %j 600 %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add 601 %load = load i16, ptr %arrayidx 602 %sext = sext i16 %load to i32 603 store i32 %sext, ptr %A 604 %iv.next = add nuw nsw i32 %iv, 1 605 %exitcond = icmp eq i32 %iv.next, %N.trunc 606 br i1 %exitcond, label %exit, label %loop 607 608exit: 609 ret void 610} 611