1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=loop-predication -loop-predication-enable-iv-truncation=true < %s 2>&1 | FileCheck %s 3; RUN: opt -S -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa < %s 2>&1 | FileCheck %s 4declare void @llvm.experimental.guard(i1, ...) 5 6declare i32 @length(ptr) 7 8declare i16 @short_length(ptr) 9; Consider range check of type i16 and i32, while IV is of type i64 10; We can loop predicate this because the IV range is within i16 and within i32. 11define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, ptr %arrA, ptr %arrB) { 12; CHECK-LABEL: @iv_wider_type_rc_two_narrow_types( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[LENGTHA:%.*]] = call i32 @length(ptr [[ARRA:%.*]]) 15; CHECK-NEXT: [[LENGTHB:%.*]] = call i16 @short_length(ptr [[ARRB:%.*]]) 16; CHECK-NEXT: [[TMP0:%.*]] = sub i16 [[LENGTHB]], [[OFFB:%.*]] 17; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i16 16, [[TMP0]] 18; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i16 [[OFFB]], [[LENGTHB]] 19; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]] 20; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] 21; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[LENGTHA]], [[OFFA:%.*]] 22; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i32 16, [[TMP5]] 23; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]] 24; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP6]] 25; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] 26; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP4]], [[TMP9]] 27; CHECK-NEXT: br label [[LOOP:%.*]] 28; CHECK: loop: 29; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 30; CHECK-NEXT: [[IV_TRUNC_32:%.*]] = trunc i64 [[IV]] to i32 31; CHECK-NEXT: [[IV_TRUNC_16:%.*]] = trunc i64 [[IV]] to i16 32; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC_32]], [[OFFA]] 33; CHECK-NEXT: [[INDEXB:%.*]] = add i16 [[IV_TRUNC_16]], [[OFFB]] 34; CHECK-NEXT: [[RCA:%.*]] = icmp ult i32 [[INDEXA]], [[LENGTHA]] 35; CHECK-NEXT: [[RCB:%.*]] = icmp ult i16 [[INDEXB]], [[LENGTHB]] 36; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[RCA]], [[RCB]] 37; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP10]], i32 9) [ "deopt"() ] 38; CHECK-NEXT: call void @llvm.assume(i1 [[WIDE_CHK]]) 39; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64 40; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, ptr [[ARRA]], i64 [[INDEXA_EXT]] 41; CHECK-NEXT: [[ELTA:%.*]] = load i8, ptr [[ADDRA]], align 1 42; CHECK-NEXT: [[INDEXB_EXT:%.*]] = zext i16 [[INDEXB]] to i64 43; CHECK-NEXT: [[ADDRB:%.*]] = getelementptr inbounds i8, ptr [[ARRB]], i64 [[INDEXB_EXT]] 44; CHECK-NEXT: store i8 [[ELTA]], ptr [[ADDRB]], align 1 45; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 46; CHECK-NEXT: [[LATCH_CHECK:%.*]] = icmp ult i64 [[IV_NEXT]], 16 47; CHECK-NEXT: br i1 [[LATCH_CHECK]], label [[LOOP]], label [[EXIT:%.*]] 48; CHECK: exit: 49; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[LOOP]] ] 50; CHECK-NEXT: ret i64 [[IV_LCSSA]] 51; 52entry: 53 %lengthA = call i32 @length(ptr %arrA) 54 %lengthB = call i16 @short_length(ptr %arrB) 55 br label %loop 56 57loop: 58 %iv = phi i64 [0, %entry ], [ %iv.next, %loop ] 59 %iv.trunc.32 = trunc i64 %iv to i32 60 %iv.trunc.16 = trunc i64 %iv to i16 61 %indexA = add i32 %iv.trunc.32, %offA 62 %indexB = add i16 %iv.trunc.16, %offB 63 %rcA = icmp ult i32 %indexA, %lengthA 64 %rcB = icmp ult i16 %indexB, %lengthB 65 %wide.chk = and i1 %rcA, %rcB 66 call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk, i32 9) [ "deopt"() ] 67 %indexA.ext = zext i32 %indexA to i64 68 %addrA = getelementptr inbounds i8, ptr %arrA, i64 %indexA.ext 69 %eltA = load i8, ptr %addrA 70 %indexB.ext = zext i16 %indexB to i64 71 %addrB = getelementptr inbounds i8, ptr %arrB, i64 %indexB.ext 72 store i8 %eltA, ptr %addrB 73 %iv.next = add nuw nsw i64 %iv, 1 74 %latch.check = icmp ult i64 %iv.next, 16 75 br i1 %latch.check, label %loop, label %exit 76 77exit: 78 ret i64 %iv 79} 80 81 82; Consider an IV of type long and an array access into int array. 83; IV is of type i64 while the range check operands are of type i32 and i64. 84define i64 @iv_rc_different_types(i32 %offA, i32 %offB, ptr %arrA, ptr %arrB, i64 %max) 85; CHECK-LABEL: @iv_rc_different_types( 86; CHECK-NEXT: entry: 87; CHECK-NEXT: [[LENGTHA:%.*]] = call i32 @length(ptr [[ARRA:%.*]]) 88; CHECK-NEXT: [[LENGTHB:%.*]] = call i32 @length(ptr [[ARRB:%.*]]) 89; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTHB]], -1 90; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[OFFB:%.*]] 91; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i32 15, [[TMP1]] 92; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[OFFB]], [[LENGTHB]] 93; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[TMP2]] 94; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] 95; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[MAX:%.*]], -1 96; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 15, [[TMP6]] 97; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 0, [[MAX]] 98; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[TMP7]] 99; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP9]] 100; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[LENGTHA]], -1 101; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], [[OFFA:%.*]] 102; CHECK-NEXT: [[TMP13:%.*]] = icmp ule i32 15, [[TMP12]] 103; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]] 104; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]] 105; CHECK-NEXT: [[TMP16:%.*]] = freeze i1 [[TMP15]] 106; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP5]], [[TMP10]] 107; CHECK-NEXT: [[TMP18:%.*]] = and i1 [[TMP17]], [[TMP16]] 108; CHECK-NEXT: br label [[LOOP:%.*]] 109; CHECK: loop: 110; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 111; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 112; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC]], [[OFFA]] 113; CHECK-NEXT: [[INDEXB:%.*]] = add i32 [[IV_TRUNC]], [[OFFB]] 114; CHECK-NEXT: [[RCA:%.*]] = icmp ult i32 [[INDEXA]], [[LENGTHA]] 115; CHECK-NEXT: [[RCIV:%.*]] = icmp ult i64 [[IV]], [[MAX]] 116; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[RCA]], [[RCIV]] 117; CHECK-NEXT: [[RCB:%.*]] = icmp ult i32 [[INDEXB]], [[LENGTHB]] 118; CHECK-NEXT: [[WIDE_CHK_FINAL:%.*]] = and i1 [[WIDE_CHK]], [[RCB]] 119; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP18]], i32 9) [ "deopt"() ] 120; CHECK-NEXT: call void @llvm.assume(i1 [[WIDE_CHK_FINAL]]) 121; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64 122; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, ptr [[ARRA]], i64 [[INDEXA_EXT]] 123; CHECK-NEXT: [[ELTA:%.*]] = load i8, ptr [[ADDRA]], align 1 124; CHECK-NEXT: [[INDEXB_EXT:%.*]] = zext i32 [[INDEXB]] to i64 125; CHECK-NEXT: [[ADDRB:%.*]] = getelementptr inbounds i8, ptr [[ARRB]], i64 [[INDEXB_EXT]] 126; CHECK-NEXT: [[ELTB:%.*]] = load i8, ptr [[ADDRB]], align 1 127; CHECK-NEXT: [[RESULT:%.*]] = xor i8 [[ELTA]], [[ELTB]] 128; CHECK-NEXT: store i8 [[RESULT]], ptr [[ADDRA]], align 1 129; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 130; CHECK-NEXT: [[LATCH_CHECK:%.*]] = icmp ult i64 [[IV]], 15 131; CHECK-NEXT: br i1 [[LATCH_CHECK]], label [[LOOP]], label [[EXIT:%.*]] 132; CHECK: exit: 133; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[LOOP]] ] 134; CHECK-NEXT: ret i64 [[IV_LCSSA]] 135; 136{ 137entry: 138 %lengthA = call i32 @length(ptr %arrA) 139 %lengthB = call i32 @length(ptr %arrB) 140 br label %loop 141 142loop: 143 %iv = phi i64 [0, %entry ], [ %iv.next, %loop ] 144 %iv.trunc = trunc i64 %iv to i32 145 %indexA = add i32 %iv.trunc, %offA 146 %indexB = add i32 %iv.trunc, %offB 147 %rcA = icmp ult i32 %indexA, %lengthA 148 %rcIV = icmp ult i64 %iv, %max 149 %wide.chk = and i1 %rcA, %rcIV 150 %rcB = icmp ult i32 %indexB, %lengthB 151 %wide.chk.final = and i1 %wide.chk, %rcB 152 call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk.final, i32 9) [ "deopt"() ] 153 %indexA.ext = zext i32 %indexA to i64 154 %addrA = getelementptr inbounds i8, ptr %arrA, i64 %indexA.ext 155 %eltA = load i8, ptr %addrA 156 %indexB.ext = zext i32 %indexB to i64 157 %addrB = getelementptr inbounds i8, ptr %arrB, i64 %indexB.ext 158 %eltB = load i8, ptr %addrB 159 %result = xor i8 %eltA, %eltB 160 store i8 %result, ptr %addrA 161 %iv.next = add nuw nsw i64 %iv, 1 162 %latch.check = icmp ult i64 %iv, 15 163 br i1 %latch.check, label %loop, label %exit 164 165exit: 166 ret i64 %iv 167} 168 169; cannot narrow the IV to the range type, because we lose information. 170; for (i64 i= 5; i>= 2; i++) 171; this loop wraps around after reaching 2^64. 172define i64 @iv_rc_different_type(i32 %offA, ptr %arrA) { 173; CHECK-LABEL: @iv_rc_different_type( 174; CHECK-NEXT: entry: 175; CHECK-NEXT: [[LENGTHA:%.*]] = call i32 @length(ptr [[ARRA:%.*]]) 176; CHECK-NEXT: br label [[LOOP:%.*]] 177; CHECK: loop: 178; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 5, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 179; CHECK-NEXT: [[IV_TRUNC_32:%.*]] = trunc i64 [[IV]] to i32 180; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC_32]], [[OFFA:%.*]] 181; CHECK-NEXT: [[RCA:%.*]] = icmp ult i32 [[INDEXA]], [[LENGTHA]] 182; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[RCA]], i32 9) [ "deopt"() ] 183; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64 184; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, ptr [[ARRA]], i64 [[INDEXA_EXT]] 185; CHECK-NEXT: [[ELTA:%.*]] = load i8, ptr [[ADDRA]], align 1 186; CHECK-NEXT: [[RES:%.*]] = add i8 [[ELTA]], 2 187; CHECK-NEXT: store i8 [[ELTA]], ptr [[ADDRA]], align 1 188; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 189; CHECK-NEXT: [[LATCH_CHECK:%.*]] = icmp sge i64 [[IV_NEXT]], 2 190; CHECK-NEXT: br i1 [[LATCH_CHECK]], label [[LOOP]], label [[EXIT:%.*]] 191; CHECK: exit: 192; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], [[LOOP]] ] 193; CHECK-NEXT: ret i64 [[IV_LCSSA]] 194; 195entry: 196 %lengthA = call i32 @length(ptr %arrA) 197 br label %loop 198 199loop: 200 %iv = phi i64 [ 5, %entry ], [ %iv.next, %loop ] 201 %iv.trunc.32 = trunc i64 %iv to i32 202 %indexA = add i32 %iv.trunc.32, %offA 203 %rcA = icmp ult i32 %indexA, %lengthA 204 call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) [ "deopt"() ] 205 %indexA.ext = zext i32 %indexA to i64 206 %addrA = getelementptr inbounds i8, ptr %arrA, i64 %indexA.ext 207 %eltA = load i8, ptr %addrA 208 %res = add i8 %eltA, 2 209 store i8 %eltA, ptr %addrA 210 %iv.next = add i64 %iv, 1 211 %latch.check = icmp sge i64 %iv.next, 2 212 br i1 %latch.check, label %loop, label %exit 213 214exit: 215 ret i64 %iv 216} 217