1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV 3 4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5 6; For this loop: 7; unsigned index = 0; 8; for (int i = 0; i < n; i++) { 9; A[2 * index] = A[2 * index] + B[i]; 10; index++; 11; } 12; 13; SCEV is unable to prove that A[2 * i] does not overflow. 14; 15; Analyzing the IR does not help us because the GEPs are not 16; affine AddRecExprs. However, we can turn them into AddRecExprs 17; using SCEV Predicates. 18; 19; Once we have an affine expression we need to add an additional NUSW 20; to check that the pointers don't wrap since the GEPs are not 21; inbound. 22 23; The expression for %mul_ext as analyzed by SCEV is 24; (zext i32 {0,+,2}<%for.body> to i64) 25; We have added the nusw flag to turn this expression into the SCEV expression: 26; i64 {0,+,2}<%for.body> 27 28define void @f1(i16* noalias %a, 29; LV-LABEL: @f1( 30; LV-NEXT: for.body.lver.check: 31; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8* 32; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 33; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 34; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) 35; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 36; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 37; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] 38; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] 39; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 40; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 41; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 42; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 43; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] 44; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 45; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 46; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 47; LV-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT3]] 48; LV-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] 49; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP9]] 50; LV-NEXT: [[TMP12:%.*]] = icmp ugt i8* [[TMP11]], [[A5]] 51; LV-NEXT: [[TMP13:%.*]] = icmp ult i8* [[TMP10]], [[A5]] 52; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW4]] 53; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP8]], [[TMP14]] 54; LV-NEXT: br i1 [[TMP15]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 55; LV: for.body.ph.lver.orig: 56; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 57; LV: for.body.lver.orig: 58; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 59; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 60; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 61; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 62; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 63; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXA_LVER_ORIG]], align 2 64; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, i16* [[B:%.*]], i64 [[IND_LVER_ORIG]] 65; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXB_LVER_ORIG]], align 2 66; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 67; LV-NEXT: store i16 [[ADD_LVER_ORIG]], i16* [[ARRAYIDXA_LVER_ORIG]], align 2 68; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 69; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 70; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 71; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 72; LV: for.body.ph: 73; LV-NEXT: br label [[FOR_BODY:%.*]] 74; LV: for.body: 75; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 76; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 77; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 78; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 79; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT]] 80; LV-NEXT: [[LOADA:%.*]] = load i16, i16* [[ARRAYIDXA]], align 2 81; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, i16* [[B]], i64 [[IND]] 82; LV-NEXT: [[LOADB:%.*]] = load i16, i16* [[ARRAYIDXB]], align 2 83; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 84; LV-NEXT: store i16 [[ADD]], i16* [[ARRAYIDXA]], align 2 85; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 86; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 87; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 88; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 89; LV: for.end.loopexit: 90; LV-NEXT: br label [[FOR_END:%.*]] 91; LV: for.end.loopexit6: 92; LV-NEXT: br label [[FOR_END]] 93; LV: for.end: 94; LV-NEXT: ret void 95; 96 i16* noalias %b, i64 %N) { 97entry: 98 br label %for.body 99 100for.body: ; preds = %for.body, %entry 101 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 102 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 103 104 %mul = mul i32 %ind1, 2 105 %mul_ext = zext i32 %mul to i64 106 107 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext 108 %loadA = load i16, i16* %arrayidxA, align 2 109 110 %arrayidxB = getelementptr i16, i16* %b, i64 %ind 111 %loadB = load i16, i16* %arrayidxB, align 2 112 113 %add = mul i16 %loadA, %loadB 114 115 store i16 %add, i16* %arrayidxA, align 2 116 117 %inc = add nuw nsw i64 %ind, 1 118 %inc1 = add i32 %ind1, 1 119 120 %exitcond = icmp eq i64 %inc, %N 121 br i1 %exitcond, label %for.end, label %for.body 122 123for.end: ; preds = %for.body 124 ret void 125} 126 127; For this loop: 128; unsigned index = n; 129; for (int i = 0; i < n; i++) { 130; A[2 * index] = A[2 * index] + B[i]; 131; index--; 132; } 133; 134; the SCEV expression for 2 * index is not an AddRecExpr 135; (and implictly not affine). However, we are able to make assumptions 136; that will turn the expression into an affine one and continue the 137; analysis. 138; 139; Once we have an affine expression we need to add an additional NUSW 140; to check that the pointers don't wrap since the GEPs are not 141; inbounds. 142; 143; This loop has a negative stride for A, and the nusw flag is required in 144; order to properly extend the increment from i32 -4 to i64 -4. 145 146; The expression for %mul_ext as analyzed by SCEV is 147; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64) 148; We have added the nusw flag to turn this expression into the following SCEV: 149; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body> 150 151define void @f2(i16* noalias %a, 152; LV-LABEL: @f2( 153; LV-NEXT: for.body.lver.check: 154; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 155; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 156; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 157; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 158; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 159; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 160; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 161; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] 162; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 163; LV-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]] 164; LV-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]] 165; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 166; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] 167; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] 168; LV-NEXT: [[TMP10:%.*]] = trunc i64 [[N]] to i31 169; LV-NEXT: [[TMP11:%.*]] = zext i31 [[TMP10]] to i64 170; LV-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1 171; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] 172; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 173; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 174; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 175; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* 176; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]] 177; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] 178; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]] 179; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]] 180; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]] 181; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] 182; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP9]], [[TMP18]] 183; LV-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 184; LV: for.body.ph.lver.orig: 185; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 186; LV: for.body.lver.orig: 187; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 188; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 189; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 190; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 191; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 192; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXA_LVER_ORIG]], align 2 193; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, i16* [[B:%.*]], i64 [[IND_LVER_ORIG]] 194; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXB_LVER_ORIG]], align 2 195; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 196; LV-NEXT: store i16 [[ADD_LVER_ORIG]], i16* [[ARRAYIDXA_LVER_ORIG]], align 2 197; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 198; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 199; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 200; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 201; LV: for.body.ph: 202; LV-NEXT: br label [[FOR_BODY:%.*]] 203; LV: for.body: 204; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 205; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 206; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 207; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 208; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT]] 209; LV-NEXT: [[LOADA:%.*]] = load i16, i16* [[ARRAYIDXA]], align 2 210; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, i16* [[B]], i64 [[IND]] 211; LV-NEXT: [[LOADB:%.*]] = load i16, i16* [[ARRAYIDXB]], align 2 212; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 213; LV-NEXT: store i16 [[ADD]], i16* [[ARRAYIDXA]], align 2 214; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 215; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 216; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 217; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 218; LV: for.end.loopexit: 219; LV-NEXT: br label [[FOR_END:%.*]] 220; LV: for.end.loopexit6: 221; LV-NEXT: br label [[FOR_END]] 222; LV: for.end: 223; LV-NEXT: ret void 224; 225 i16* noalias %b, i64 %N) { 226entry: 227 %TruncN = trunc i64 %N to i32 228 br label %for.body 229 230for.body: ; preds = %for.body, %entry 231 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 232 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 233 234 %mul = mul i32 %ind1, 2 235 %mul_ext = zext i32 %mul to i64 236 237 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext 238 %loadA = load i16, i16* %arrayidxA, align 2 239 240 %arrayidxB = getelementptr i16, i16* %b, i64 %ind 241 %loadB = load i16, i16* %arrayidxB, align 2 242 243 %add = mul i16 %loadA, %loadB 244 245 store i16 %add, i16* %arrayidxA, align 2 246 247 %inc = add nuw nsw i64 %ind, 1 248 %dec = sub i32 %ind1, 1 249 250 %exitcond = icmp eq i64 %inc, %N 251 br i1 %exitcond, label %for.end, label %for.body 252 253for.end: ; preds = %for.body 254 ret void 255} 256 257; We replicate the tests above, but this time sign extend 2 * index instead 258; of zero extending it. 259 260; The expression for %mul_ext as analyzed by SCEV is 261; i64 (sext i32 {0,+,2}<%for.body> to i64) 262; We have added the nssw flag to turn this expression into the following SCEV: 263; i64 {0,+,2}<%for.body> 264 265define void @f3(i16* noalias %a, 266; LV-LABEL: @f3( 267; LV-NEXT: for.body.lver.check: 268; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8* 269; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 270; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 271; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) 272; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 273; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 274; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] 275; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] 276; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 277; LV-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 278; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 279; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 280; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] 281; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 282; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 283; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 284; LV-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT3]] 285; LV-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] 286; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP9]] 287; LV-NEXT: [[TMP12:%.*]] = icmp ugt i8* [[TMP11]], [[A5]] 288; LV-NEXT: [[TMP13:%.*]] = icmp ult i8* [[TMP10]], [[A5]] 289; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW4]] 290; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP8]], [[TMP14]] 291; LV-NEXT: br i1 [[TMP15]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 292; LV: for.body.ph.lver.orig: 293; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 294; LV: for.body.lver.orig: 295; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 296; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 297; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 298; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64 299; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 300; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXA_LVER_ORIG]], align 2 301; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, i16* [[B:%.*]], i64 [[IND_LVER_ORIG]] 302; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXB_LVER_ORIG]], align 2 303; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 304; LV-NEXT: store i16 [[ADD_LVER_ORIG]], i16* [[ARRAYIDXA_LVER_ORIG]], align 2 305; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 306; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 307; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 308; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 309; LV: for.body.ph: 310; LV-NEXT: br label [[FOR_BODY:%.*]] 311; LV: for.body: 312; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 313; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 314; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 315; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64 316; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT]] 317; LV-NEXT: [[LOADA:%.*]] = load i16, i16* [[ARRAYIDXA]], align 2 318; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, i16* [[B]], i64 [[IND]] 319; LV-NEXT: [[LOADB:%.*]] = load i16, i16* [[ARRAYIDXB]], align 2 320; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 321; LV-NEXT: store i16 [[ADD]], i16* [[ARRAYIDXA]], align 2 322; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 323; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 324; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 325; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 326; LV: for.end.loopexit: 327; LV-NEXT: br label [[FOR_END:%.*]] 328; LV: for.end.loopexit6: 329; LV-NEXT: br label [[FOR_END]] 330; LV: for.end: 331; LV-NEXT: ret void 332; 333 i16* noalias %b, i64 %N) { 334entry: 335 br label %for.body 336 337for.body: ; preds = %for.body, %entry 338 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 339 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 340 341 %mul = mul i32 %ind1, 2 342 %mul_ext = sext i32 %mul to i64 343 344 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext 345 %loadA = load i16, i16* %arrayidxA, align 2 346 347 %arrayidxB = getelementptr i16, i16* %b, i64 %ind 348 %loadB = load i16, i16* %arrayidxB, align 2 349 350 %add = mul i16 %loadA, %loadB 351 352 store i16 %add, i16* %arrayidxA, align 2 353 354 %inc = add nuw nsw i64 %ind, 1 355 %inc1 = add i32 %ind1, 1 356 357 %exitcond = icmp eq i64 %inc, %N 358 br i1 %exitcond, label %for.end, label %for.body 359 360for.end: ; preds = %for.body 361 ret void 362} 363 364define void @f4(i16* noalias %a, 365; LV-LABEL: @f4( 366; LV-NEXT: for.body.lver.check: 367; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 368; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 369; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 370; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 371; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 372; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 373; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 374; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] 375; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 376; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] 377; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] 378; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 379; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] 380; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] 381; LV-NEXT: [[TMP10:%.*]] = sext i32 [[TMP1]] to i64 382; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP10]] 383; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 384; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 385; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 386; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* 387; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] 388; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] 389; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP11]] 390; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[SCEVGEP5]] 391; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[SCEVGEP5]] 392; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] 393; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP9]], [[TMP16]] 394; LV-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 395; LV: for.body.ph.lver.orig: 396; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 397; LV: for.body.lver.orig: 398; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 399; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 400; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 401; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64 402; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 403; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXA_LVER_ORIG]], align 2 404; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, i16* [[B:%.*]], i64 [[IND_LVER_ORIG]] 405; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXB_LVER_ORIG]], align 2 406; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 407; LV-NEXT: store i16 [[ADD_LVER_ORIG]], i16* [[ARRAYIDXA_LVER_ORIG]], align 2 408; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 409; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 410; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 411; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 412; LV: for.body.ph: 413; LV-NEXT: br label [[FOR_BODY:%.*]] 414; LV: for.body: 415; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 416; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 417; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 418; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64 419; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, i16* [[A]], i64 [[MUL_EXT]] 420; LV-NEXT: [[LOADA:%.*]] = load i16, i16* [[ARRAYIDXA]], align 2 421; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, i16* [[B]], i64 [[IND]] 422; LV-NEXT: [[LOADB:%.*]] = load i16, i16* [[ARRAYIDXB]], align 2 423; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 424; LV-NEXT: store i16 [[ADD]], i16* [[ARRAYIDXA]], align 2 425; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 426; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 427; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 428; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 429; LV: for.end.loopexit: 430; LV-NEXT: br label [[FOR_END:%.*]] 431; LV: for.end.loopexit6: 432; LV-NEXT: br label [[FOR_END]] 433; LV: for.end: 434; LV-NEXT: ret void 435; 436 i16* noalias %b, i64 %N) { 437entry: 438 %TruncN = trunc i64 %N to i32 439 br label %for.body 440 441for.body: ; preds = %for.body, %entry 442 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 443 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 444 445 %mul = mul i32 %ind1, 2 446 %mul_ext = sext i32 %mul to i64 447 448 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext 449 %loadA = load i16, i16* %arrayidxA, align 2 450 451 %arrayidxB = getelementptr i16, i16* %b, i64 %ind 452 %loadB = load i16, i16* %arrayidxB, align 2 453 454 %add = mul i16 %loadA, %loadB 455 456 store i16 %add, i16* %arrayidxA, align 2 457 458 %inc = add nuw nsw i64 %ind, 1 459 %dec = sub i32 %ind1, 1 460 461 %exitcond = icmp eq i64 %inc, %N 462 br i1 %exitcond, label %for.end, label %for.body 463 464for.end: ; preds = %for.body 465 ret void 466} 467 468; The following function is similar to the one above, but has the GEP 469; to pointer %A inbounds. The index %mul doesn't have the nsw flag. 470; This means that the SCEV expression for %mul can wrap and we need 471; a SCEV predicate to continue analysis. 472; 473; We can still analyze this by adding the required no wrap SCEV predicates. 474 475define void @f5(i16* noalias %a, 476; LV-LABEL: @f5( 477; LV-NEXT: for.body.lver.check: 478; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 479; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 480; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 481; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 482; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 483; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 484; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 485; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]] 486; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 487; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]] 488; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]] 489; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 490; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]] 491; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] 492; LV-NEXT: [[TMP10:%.*]] = sext i32 [[TMP1]] to i64 493; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP10]] 494; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 495; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 496; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 497; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* 498; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] 499; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] 500; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP11]] 501; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[SCEVGEP5]] 502; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[SCEVGEP5]] 503; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW4]] 504; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP9]], [[TMP16]] 505; LV-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 506; LV: for.body.ph.lver.orig: 507; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 508; LV: for.body.lver.orig: 509; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 510; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 511; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 512; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[MUL_LVER_ORIG]] 513; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXA_LVER_ORIG]], align 2 514; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 [[IND_LVER_ORIG]] 515; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, i16* [[ARRAYIDXB_LVER_ORIG]], align 2 516; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 517; LV-NEXT: store i16 [[ADD_LVER_ORIG]], i16* [[ARRAYIDXA_LVER_ORIG]], align 2 518; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 519; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 520; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 521; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 522; LV: for.body.ph: 523; LV-NEXT: br label [[FOR_BODY:%.*]] 524; LV: for.body: 525; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 526; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 527; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 528; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[MUL]] 529; LV-NEXT: [[LOADA:%.*]] = load i16, i16* [[ARRAYIDXA]], align 2 530; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 [[IND]] 531; LV-NEXT: [[LOADB:%.*]] = load i16, i16* [[ARRAYIDXB]], align 2 532; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 533; LV-NEXT: store i16 [[ADD]], i16* [[ARRAYIDXA]], align 2 534; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 535; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 536; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 537; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 538; LV: for.end.loopexit: 539; LV-NEXT: br label [[FOR_END:%.*]] 540; LV: for.end.loopexit6: 541; LV-NEXT: br label [[FOR_END]] 542; LV: for.end: 543; LV-NEXT: ret void 544; 545 i16* noalias %b, i64 %N) { 546entry: 547 %TruncN = trunc i64 %N to i32 548 br label %for.body 549 550for.body: ; preds = %for.body, %entry 551 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 552 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 553 554 %mul = mul i32 %ind1, 2 555 556 %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul 557 %loadA = load i16, i16* %arrayidxA, align 2 558 559 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind 560 %loadB = load i16, i16* %arrayidxB, align 2 561 562 %add = mul i16 %loadA, %loadB 563 564 store i16 %add, i16* %arrayidxA, align 2 565 566 %inc = add nuw nsw i64 %ind, 1 567 %dec = sub i32 %ind1, 1 568 569 %exitcond = icmp eq i64 %inc, %N 570 br i1 %exitcond, label %for.end, label %for.body 571 572for.end: ; preds = %for.body 573 ret void 574} 575