10c09e5bdSPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2e7f4ad13SNikita Popov; RUN: opt -passes=loop-versioning -S < %s | FileCheck %s -check-prefix=LV 30c09e5bdSPhilip Reames 40c09e5bdSPhilip Reamestarget datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 50c09e5bdSPhilip Reames 60c09e5bdSPhilip Reames; For this loop: 70c09e5bdSPhilip Reames; unsigned index = 0; 80c09e5bdSPhilip Reames; for (int i = 0; i < n; i++) { 90c09e5bdSPhilip Reames; A[2 * index] = A[2 * index] + B[i]; 100c09e5bdSPhilip Reames; index++; 110c09e5bdSPhilip Reames; } 120c09e5bdSPhilip Reames; 130c09e5bdSPhilip Reames; SCEV is unable to prove that A[2 * i] does not overflow. 140c09e5bdSPhilip Reames; 150c09e5bdSPhilip Reames; Analyzing the IR does not help us because the GEPs are not 160c09e5bdSPhilip Reames; affine AddRecExprs. However, we can turn them into AddRecExprs 170c09e5bdSPhilip Reames; using SCEV Predicates. 180c09e5bdSPhilip Reames; 190c09e5bdSPhilip Reames; Once we have an affine expression we need to add an additional NUSW 200c09e5bdSPhilip Reames; to check that the pointers don't wrap since the GEPs are not 210c09e5bdSPhilip Reames; inbound. 220c09e5bdSPhilip Reames 230c09e5bdSPhilip Reames; The expression for %mul_ext as analyzed by SCEV is 240c09e5bdSPhilip Reames; (zext i32 {0,+,2}<%for.body> to i64) 250c09e5bdSPhilip Reames; We have added the nusw flag to turn this expression into the SCEV expression: 260c09e5bdSPhilip Reames; i64 {0,+,2}<%for.body> 270c09e5bdSPhilip Reames 28e7f4ad13SNikita Popovdefine void @f1(ptr noalias %a, 290c09e5bdSPhilip Reames; LV-LABEL: @f1( 300c09e5bdSPhilip Reames; LV-NEXT: for.body.lver.check: 310c09e5bdSPhilip Reames; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 32*f82bb3d4SFlorian Hahn; LV-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 33e7f4ad13SNikita Popov; LV-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 34e7f4ad13SNikita Popov; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 35e7f4ad13SNikita Popov; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 36e7f4ad13SNikita Popov; LV-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]] 37e7f4ad13SNikita Popov; LV-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]] 38e7f4ad13SNikita Popov; LV-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]] 39df8efbdbSFlorian Hahn; LV-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 40*f82bb3d4SFlorian Hahn; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP6]] 41*f82bb3d4SFlorian Hahn; LV-NEXT: br i1 [[TMP7]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 420c09e5bdSPhilip Reames; LV: for.body.ph.lver.orig: 430c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 440c09e5bdSPhilip Reames; LV: for.body.lver.orig: 450c09e5bdSPhilip Reames; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 460c09e5bdSPhilip Reames; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 470c09e5bdSPhilip Reames; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 480c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 49e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]] 50e7f4ad13SNikita Popov; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 51e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] 52e7f4ad13SNikita Popov; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 530c09e5bdSPhilip Reames; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 54e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2 550c09e5bdSPhilip Reames; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 560c09e5bdSPhilip Reames; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 570c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 580c09e5bdSPhilip Reames; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 590c09e5bdSPhilip Reames; LV: for.body.ph: 600c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY:%.*]] 610c09e5bdSPhilip Reames; LV: for.body: 620c09e5bdSPhilip Reames; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 630c09e5bdSPhilip Reames; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 640c09e5bdSPhilip Reames; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 650c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 66e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]] 67e7f4ad13SNikita Popov; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2 68e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]] 69e7f4ad13SNikita Popov; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2 700c09e5bdSPhilip Reames; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 71e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2 720c09e5bdSPhilip Reames; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 730c09e5bdSPhilip Reames; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 740c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 75e7f4ad13SNikita Popov; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] 760c09e5bdSPhilip Reames; LV: for.end.loopexit: 770c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END:%.*]] 78e7f4ad13SNikita Popov; LV: for.end.loopexit2: 790c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END]] 800c09e5bdSPhilip Reames; LV: for.end: 810c09e5bdSPhilip Reames; LV-NEXT: ret void 820c09e5bdSPhilip Reames; 83e7f4ad13SNikita Popov ptr noalias %b, i64 %N) { 840c09e5bdSPhilip Reamesentry: 850c09e5bdSPhilip Reames br label %for.body 860c09e5bdSPhilip Reames 870c09e5bdSPhilip Reamesfor.body: ; preds = %for.body, %entry 880c09e5bdSPhilip Reames %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 890c09e5bdSPhilip Reames %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 900c09e5bdSPhilip Reames 910c09e5bdSPhilip Reames %mul = mul i32 %ind1, 2 920c09e5bdSPhilip Reames %mul_ext = zext i32 %mul to i64 930c09e5bdSPhilip Reames 94e7f4ad13SNikita Popov %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext 95e7f4ad13SNikita Popov %loadA = load i16, ptr %arrayidxA, align 2 960c09e5bdSPhilip Reames 97e7f4ad13SNikita Popov %arrayidxB = getelementptr i16, ptr %b, i64 %ind 98e7f4ad13SNikita Popov %loadB = load i16, ptr %arrayidxB, align 2 990c09e5bdSPhilip Reames 1000c09e5bdSPhilip Reames %add = mul i16 %loadA, %loadB 1010c09e5bdSPhilip Reames 102e7f4ad13SNikita Popov store i16 %add, ptr %arrayidxA, align 2 1030c09e5bdSPhilip Reames 1040c09e5bdSPhilip Reames %inc = add nuw nsw i64 %ind, 1 1050c09e5bdSPhilip Reames %inc1 = add i32 %ind1, 1 1060c09e5bdSPhilip Reames 1070c09e5bdSPhilip Reames %exitcond = icmp eq i64 %inc, %N 1080c09e5bdSPhilip Reames br i1 %exitcond, label %for.end, label %for.body 1090c09e5bdSPhilip Reames 1100c09e5bdSPhilip Reamesfor.end: ; preds = %for.body 1110c09e5bdSPhilip Reames ret void 1120c09e5bdSPhilip Reames} 1130c09e5bdSPhilip Reames 1140c09e5bdSPhilip Reames; For this loop: 1150c09e5bdSPhilip Reames; unsigned index = n; 1160c09e5bdSPhilip Reames; for (int i = 0; i < n; i++) { 1170c09e5bdSPhilip Reames; A[2 * index] = A[2 * index] + B[i]; 1180c09e5bdSPhilip Reames; index--; 1190c09e5bdSPhilip Reames; } 1200c09e5bdSPhilip Reames; 1210c09e5bdSPhilip Reames; the SCEV expression for 2 * index is not an AddRecExpr 1220c09e5bdSPhilip Reames; (and implictly not affine). However, we are able to make assumptions 1230c09e5bdSPhilip Reames; that will turn the expression into an affine one and continue the 1240c09e5bdSPhilip Reames; analysis. 1250c09e5bdSPhilip Reames; 1260c09e5bdSPhilip Reames; Once we have an affine expression we need to add an additional NUSW 1270c09e5bdSPhilip Reames; to check that the pointers don't wrap since the GEPs are not 1280c09e5bdSPhilip Reames; inbounds. 1290c09e5bdSPhilip Reames; 1300c09e5bdSPhilip Reames; This loop has a negative stride for A, and the nusw flag is required in 1310c09e5bdSPhilip Reames; order to properly extend the increment from i32 -4 to i64 -4. 1320c09e5bdSPhilip Reames 1330c09e5bdSPhilip Reames; The expression for %mul_ext as analyzed by SCEV is 1340c09e5bdSPhilip Reames; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64) 1350c09e5bdSPhilip Reames; We have added the nusw flag to turn this expression into the following SCEV: 1360c09e5bdSPhilip Reames; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body> 1370c09e5bdSPhilip Reames 138e7f4ad13SNikita Popovdefine void @f2(ptr noalias %a, 1390c09e5bdSPhilip Reames; LV-LABEL: @f2( 1400c09e5bdSPhilip Reames; LV-NEXT: for.body.lver.check: 1410c09e5bdSPhilip Reames; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 1420c09e5bdSPhilip Reames; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 1430c09e5bdSPhilip Reames; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 1440c09e5bdSPhilip Reames; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 1450c09e5bdSPhilip Reames; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 1460c09e5bdSPhilip Reames; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 1470c09e5bdSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 148e7f4ad13SNikita Popov; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 149e7f4ad13SNikita Popov; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], [[TMP1]] 150e7f4ad13SNikita Popov; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 151e7f4ad13SNikita Popov; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 152e7f4ad13SNikita Popov; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 153e7f4ad13SNikita Popov; LV-NEXT: [[TMP8:%.*]] = trunc i64 [[N]] to i31 154e7f4ad13SNikita Popov; LV-NEXT: [[TMP9:%.*]] = zext i31 [[TMP8]] to i64 155e7f4ad13SNikita Popov; LV-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 156e7f4ad13SNikita Popov; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP10]] 157e735f2bfSPhilip Reames; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 158e735f2bfSPhilip Reames; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 159e735f2bfSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 160e7f4ad13SNikita Popov; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] 161e7f4ad13SNikita Popov; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP11]] 162e7f4ad13SNikita Popov; LV-NEXT: [[TMP13:%.*]] = icmp ugt ptr [[TMP12]], [[SCEVGEP]] 163e7f4ad13SNikita Popov; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW4]] 164e7f4ad13SNikita Popov; LV-NEXT: [[TMP15:%.*]] = or i1 [[TMP7]], [[TMP14]] 165e7f4ad13SNikita Popov; LV-NEXT: br i1 [[TMP15]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 1660c09e5bdSPhilip Reames; LV: for.body.ph.lver.orig: 1670c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 1680c09e5bdSPhilip Reames; LV: for.body.lver.orig: 1690c09e5bdSPhilip Reames; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 1700c09e5bdSPhilip Reames; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 1710c09e5bdSPhilip Reames; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 1720c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 173e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]] 174e7f4ad13SNikita Popov; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 175e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] 176e7f4ad13SNikita Popov; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 1770c09e5bdSPhilip Reames; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 178e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2 1790c09e5bdSPhilip Reames; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 1800c09e5bdSPhilip Reames; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 1810c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 1820c09e5bdSPhilip Reames; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 1830c09e5bdSPhilip Reames; LV: for.body.ph: 1840c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY:%.*]] 1850c09e5bdSPhilip Reames; LV: for.body: 1860c09e5bdSPhilip Reames; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 1870c09e5bdSPhilip Reames; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 1880c09e5bdSPhilip Reames; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 1890c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 190e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]] 191e7f4ad13SNikita Popov; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2 192e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]] 193e7f4ad13SNikita Popov; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2 1940c09e5bdSPhilip Reames; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 195e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2 1960c09e5bdSPhilip Reames; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 1970c09e5bdSPhilip Reames; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 1980c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 199e7f4ad13SNikita Popov; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] 2000c09e5bdSPhilip Reames; LV: for.end.loopexit: 2010c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END:%.*]] 202e7f4ad13SNikita Popov; LV: for.end.loopexit5: 2030c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END]] 2040c09e5bdSPhilip Reames; LV: for.end: 2050c09e5bdSPhilip Reames; LV-NEXT: ret void 2060c09e5bdSPhilip Reames; 207e7f4ad13SNikita Popov ptr noalias %b, i64 %N) { 2080c09e5bdSPhilip Reamesentry: 2090c09e5bdSPhilip Reames %TruncN = trunc i64 %N to i32 2100c09e5bdSPhilip Reames br label %for.body 2110c09e5bdSPhilip Reames 2120c09e5bdSPhilip Reamesfor.body: ; preds = %for.body, %entry 2130c09e5bdSPhilip Reames %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 2140c09e5bdSPhilip Reames %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 2150c09e5bdSPhilip Reames 2160c09e5bdSPhilip Reames %mul = mul i32 %ind1, 2 2170c09e5bdSPhilip Reames %mul_ext = zext i32 %mul to i64 2180c09e5bdSPhilip Reames 219e7f4ad13SNikita Popov %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext 220e7f4ad13SNikita Popov %loadA = load i16, ptr %arrayidxA, align 2 2210c09e5bdSPhilip Reames 222e7f4ad13SNikita Popov %arrayidxB = getelementptr i16, ptr %b, i64 %ind 223e7f4ad13SNikita Popov %loadB = load i16, ptr %arrayidxB, align 2 2240c09e5bdSPhilip Reames 2250c09e5bdSPhilip Reames %add = mul i16 %loadA, %loadB 2260c09e5bdSPhilip Reames 227e7f4ad13SNikita Popov store i16 %add, ptr %arrayidxA, align 2 2280c09e5bdSPhilip Reames 2290c09e5bdSPhilip Reames %inc = add nuw nsw i64 %ind, 1 2300c09e5bdSPhilip Reames %dec = sub i32 %ind1, 1 2310c09e5bdSPhilip Reames 2320c09e5bdSPhilip Reames %exitcond = icmp eq i64 %inc, %N 2330c09e5bdSPhilip Reames br i1 %exitcond, label %for.end, label %for.body 2340c09e5bdSPhilip Reames 2350c09e5bdSPhilip Reamesfor.end: ; preds = %for.body 2360c09e5bdSPhilip Reames ret void 2370c09e5bdSPhilip Reames} 2380c09e5bdSPhilip Reames 2390c09e5bdSPhilip Reames; We replicate the tests above, but this time sign extend 2 * index instead 2400c09e5bdSPhilip Reames; of zero extending it. 2410c09e5bdSPhilip Reames 2420c09e5bdSPhilip Reames; The expression for %mul_ext as analyzed by SCEV is 2430c09e5bdSPhilip Reames; i64 (sext i32 {0,+,2}<%for.body> to i64) 2440c09e5bdSPhilip Reames; We have added the nssw flag to turn this expression into the following SCEV: 2450c09e5bdSPhilip Reames; i64 {0,+,2}<%for.body> 2460c09e5bdSPhilip Reames 247e7f4ad13SNikita Popovdefine void @f3(ptr noalias %a, 2480c09e5bdSPhilip Reames; LV-LABEL: @f3( 2490c09e5bdSPhilip Reames; LV-NEXT: for.body.lver.check: 2500c09e5bdSPhilip Reames; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 2510c09e5bdSPhilip Reames; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 2520c09e5bdSPhilip Reames; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) 2530c09e5bdSPhilip Reames; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 2540c09e5bdSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 255e7f4ad13SNikita Popov; LV-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_RESULT]], 0 256e7f4ad13SNikita Popov; LV-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]] 257e7f4ad13SNikita Popov; LV-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 258e7f4ad13SNikita Popov; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 259e735f2bfSPhilip Reames; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 260e735f2bfSPhilip Reames; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 261e735f2bfSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 262e7f4ad13SNikita Popov; LV-NEXT: [[TMP6:%.*]] = sub i64 0, [[MUL_RESULT3]] 263e7f4ad13SNikita Popov; LV-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT3]] 264e7f4ad13SNikita Popov; LV-NEXT: [[TMP8:%.*]] = icmp ult ptr [[TMP7]], [[A]] 265e7f4ad13SNikita Popov; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW4]] 266e7f4ad13SNikita Popov; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]] 267e7f4ad13SNikita Popov; LV-NEXT: br i1 [[TMP10]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 2680c09e5bdSPhilip Reames; LV: for.body.ph.lver.orig: 2690c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 2700c09e5bdSPhilip Reames; LV: for.body.lver.orig: 2710c09e5bdSPhilip Reames; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 2720c09e5bdSPhilip Reames; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 2730c09e5bdSPhilip Reames; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 2740c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64 275e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]] 276e7f4ad13SNikita Popov; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 277e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] 278e7f4ad13SNikita Popov; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 2790c09e5bdSPhilip Reames; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 280e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2 2810c09e5bdSPhilip Reames; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 2820c09e5bdSPhilip Reames; LV-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 2830c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 2840c09e5bdSPhilip Reames; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 2850c09e5bdSPhilip Reames; LV: for.body.ph: 2860c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY:%.*]] 2870c09e5bdSPhilip Reames; LV: for.body: 2880c09e5bdSPhilip Reames; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 2890c09e5bdSPhilip Reames; LV-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 2900c09e5bdSPhilip Reames; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 2910c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64 292e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]] 293e7f4ad13SNikita Popov; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2 294e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]] 295e7f4ad13SNikita Popov; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2 2960c09e5bdSPhilip Reames; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 297e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2 2980c09e5bdSPhilip Reames; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 2990c09e5bdSPhilip Reames; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 3000c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 301e7f4ad13SNikita Popov; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] 3020c09e5bdSPhilip Reames; LV: for.end.loopexit: 3030c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END:%.*]] 304e7f4ad13SNikita Popov; LV: for.end.loopexit5: 3050c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END]] 3060c09e5bdSPhilip Reames; LV: for.end: 3070c09e5bdSPhilip Reames; LV-NEXT: ret void 3080c09e5bdSPhilip Reames; 309e7f4ad13SNikita Popov ptr noalias %b, i64 %N) { 3100c09e5bdSPhilip Reamesentry: 3110c09e5bdSPhilip Reames br label %for.body 3120c09e5bdSPhilip Reames 3130c09e5bdSPhilip Reamesfor.body: ; preds = %for.body, %entry 3140c09e5bdSPhilip Reames %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 3150c09e5bdSPhilip Reames %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 3160c09e5bdSPhilip Reames 3170c09e5bdSPhilip Reames %mul = mul i32 %ind1, 2 3180c09e5bdSPhilip Reames %mul_ext = sext i32 %mul to i64 3190c09e5bdSPhilip Reames 320e7f4ad13SNikita Popov %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext 321e7f4ad13SNikita Popov %loadA = load i16, ptr %arrayidxA, align 2 3220c09e5bdSPhilip Reames 323e7f4ad13SNikita Popov %arrayidxB = getelementptr i16, ptr %b, i64 %ind 324e7f4ad13SNikita Popov %loadB = load i16, ptr %arrayidxB, align 2 3250c09e5bdSPhilip Reames 3260c09e5bdSPhilip Reames %add = mul i16 %loadA, %loadB 3270c09e5bdSPhilip Reames 328e7f4ad13SNikita Popov store i16 %add, ptr %arrayidxA, align 2 3290c09e5bdSPhilip Reames 3300c09e5bdSPhilip Reames %inc = add nuw nsw i64 %ind, 1 3310c09e5bdSPhilip Reames %inc1 = add i32 %ind1, 1 3320c09e5bdSPhilip Reames 3330c09e5bdSPhilip Reames %exitcond = icmp eq i64 %inc, %N 3340c09e5bdSPhilip Reames br i1 %exitcond, label %for.end, label %for.body 3350c09e5bdSPhilip Reames 3360c09e5bdSPhilip Reamesfor.end: ; preds = %for.body 3370c09e5bdSPhilip Reames ret void 3380c09e5bdSPhilip Reames} 3390c09e5bdSPhilip Reames 340e7f4ad13SNikita Popovdefine void @f4(ptr noalias %a, 3410c09e5bdSPhilip Reames; LV-LABEL: @f4( 3420c09e5bdSPhilip Reames; LV-NEXT: for.body.lver.check: 3430c09e5bdSPhilip Reames; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 3440c09e5bdSPhilip Reames; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 3450c09e5bdSPhilip Reames; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 3460c09e5bdSPhilip Reames; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 3470c09e5bdSPhilip Reames; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 3480c09e5bdSPhilip Reames; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 3490c09e5bdSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 350e7f4ad13SNikita Popov; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 351e7f4ad13SNikita Popov; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]] 352e7f4ad13SNikita Popov; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 353e7f4ad13SNikita Popov; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 354e7f4ad13SNikita Popov; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 355e7f4ad13SNikita Popov; LV-NEXT: [[TMP8:%.*]] = sext i32 [[TMP1]] to i64 356e7f4ad13SNikita Popov; LV-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP8]], 1 357e7f4ad13SNikita Popov; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP9]] 358e735f2bfSPhilip Reames; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) 359e735f2bfSPhilip Reames; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 360e735f2bfSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 361e7f4ad13SNikita Popov; LV-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] 362e7f4ad13SNikita Popov; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP10]] 363e7f4ad13SNikita Popov; LV-NEXT: [[TMP12:%.*]] = icmp ugt ptr [[TMP11]], [[SCEVGEP]] 364e7f4ad13SNikita Popov; LV-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW4]] 365e7f4ad13SNikita Popov; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]] 366e7f4ad13SNikita Popov; LV-NEXT: br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 3670c09e5bdSPhilip Reames; LV: for.body.ph.lver.orig: 3680c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 3690c09e5bdSPhilip Reames; LV: for.body.lver.orig: 3700c09e5bdSPhilip Reames; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 3710c09e5bdSPhilip Reames; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 3720c09e5bdSPhilip Reames; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 3730c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64 374e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]] 375e7f4ad13SNikita Popov; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 376e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] 377e7f4ad13SNikita Popov; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 3780c09e5bdSPhilip Reames; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 379e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2 3800c09e5bdSPhilip Reames; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 3810c09e5bdSPhilip Reames; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 3820c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 3830c09e5bdSPhilip Reames; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 3840c09e5bdSPhilip Reames; LV: for.body.ph: 3850c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY:%.*]] 3860c09e5bdSPhilip Reames; LV: for.body: 3870c09e5bdSPhilip Reames; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 3880c09e5bdSPhilip Reames; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 3890c09e5bdSPhilip Reames; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 3900c09e5bdSPhilip Reames; LV-NEXT: [[MUL_EXT:%.*]] = sext i32 [[MUL]] to i64 391e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT]] 392e7f4ad13SNikita Popov; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2 393e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr i16, ptr [[B]], i64 [[IND]] 394e7f4ad13SNikita Popov; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2 3950c09e5bdSPhilip Reames; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 396e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2 3970c09e5bdSPhilip Reames; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 3980c09e5bdSPhilip Reames; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 3990c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 400e7f4ad13SNikita Popov; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] 4010c09e5bdSPhilip Reames; LV: for.end.loopexit: 4020c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END:%.*]] 403e7f4ad13SNikita Popov; LV: for.end.loopexit5: 4040c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END]] 4050c09e5bdSPhilip Reames; LV: for.end: 4060c09e5bdSPhilip Reames; LV-NEXT: ret void 4070c09e5bdSPhilip Reames; 408e7f4ad13SNikita Popov ptr noalias %b, i64 %N) { 4090c09e5bdSPhilip Reamesentry: 4100c09e5bdSPhilip Reames %TruncN = trunc i64 %N to i32 4110c09e5bdSPhilip Reames br label %for.body 4120c09e5bdSPhilip Reames 4130c09e5bdSPhilip Reamesfor.body: ; preds = %for.body, %entry 4140c09e5bdSPhilip Reames %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 4150c09e5bdSPhilip Reames %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 4160c09e5bdSPhilip Reames 4170c09e5bdSPhilip Reames %mul = mul i32 %ind1, 2 4180c09e5bdSPhilip Reames %mul_ext = sext i32 %mul to i64 4190c09e5bdSPhilip Reames 420e7f4ad13SNikita Popov %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext 421e7f4ad13SNikita Popov %loadA = load i16, ptr %arrayidxA, align 2 4220c09e5bdSPhilip Reames 423e7f4ad13SNikita Popov %arrayidxB = getelementptr i16, ptr %b, i64 %ind 424e7f4ad13SNikita Popov %loadB = load i16, ptr %arrayidxB, align 2 4250c09e5bdSPhilip Reames 4260c09e5bdSPhilip Reames %add = mul i16 %loadA, %loadB 4270c09e5bdSPhilip Reames 428e7f4ad13SNikita Popov store i16 %add, ptr %arrayidxA, align 2 4290c09e5bdSPhilip Reames 4300c09e5bdSPhilip Reames %inc = add nuw nsw i64 %ind, 1 4310c09e5bdSPhilip Reames %dec = sub i32 %ind1, 1 4320c09e5bdSPhilip Reames 4330c09e5bdSPhilip Reames %exitcond = icmp eq i64 %inc, %N 4340c09e5bdSPhilip Reames br i1 %exitcond, label %for.end, label %for.body 4350c09e5bdSPhilip Reames 4360c09e5bdSPhilip Reamesfor.end: ; preds = %for.body 4370c09e5bdSPhilip Reames ret void 4380c09e5bdSPhilip Reames} 4390c09e5bdSPhilip Reames 4400c09e5bdSPhilip Reames; The following function is similar to the one above, but has the GEP 4410c09e5bdSPhilip Reames; to pointer %A inbounds. The index %mul doesn't have the nsw flag. 4420c09e5bdSPhilip Reames; This means that the SCEV expression for %mul can wrap and we need 4430c09e5bdSPhilip Reames; a SCEV predicate to continue analysis. 4440c09e5bdSPhilip Reames; 4450c09e5bdSPhilip Reames; We can still analyze this by adding the required no wrap SCEV predicates. 4460c09e5bdSPhilip Reames 447e7f4ad13SNikita Popovdefine void @f5(ptr noalias %a, 4480c09e5bdSPhilip Reames; LV-LABEL: @f5( 4490c09e5bdSPhilip Reames; LV-NEXT: for.body.lver.check: 4500c09e5bdSPhilip Reames; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 4510c09e5bdSPhilip Reames; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 4520c09e5bdSPhilip Reames; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 4530c09e5bdSPhilip Reames; LV-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 4540c09e5bdSPhilip Reames; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) 4550c09e5bdSPhilip Reames; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 4560c09e5bdSPhilip Reames; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 457e7f4ad13SNikita Popov; LV-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]] 458e7f4ad13SNikita Popov; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]] 459e7f4ad13SNikita Popov; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 460e7f4ad13SNikita Popov; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 461a353e258SFlorian Hahn; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP5]], [[TMP6]] 462e7f4ad13SNikita Popov; LV-NEXT: br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] 4630c09e5bdSPhilip Reames; LV: for.body.ph.lver.orig: 4640c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 4650c09e5bdSPhilip Reames; LV: for.body.lver.orig: 4660c09e5bdSPhilip Reames; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 4670c09e5bdSPhilip Reames; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 4680c09e5bdSPhilip Reames; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 469a353e258SFlorian Hahn; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[MUL_LVER_ORIG]] 470e7f4ad13SNikita Popov; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 471e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] 472e7f4ad13SNikita Popov; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 4730c09e5bdSPhilip Reames; LV-NEXT: [[ADD_LVER_ORIG:%.*]] = mul i16 [[LOADA_LVER_ORIG]], [[LOADB_LVER_ORIG]] 474e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD_LVER_ORIG]], ptr [[ARRAYIDXA_LVER_ORIG]], align 2 4750c09e5bdSPhilip Reames; LV-NEXT: [[INC_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 4760c09e5bdSPhilip Reames; LV-NEXT: [[DEC_LVER_ORIG]] = sub i32 [[IND1_LVER_ORIG]], 1 4770c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[INC_LVER_ORIG]], [[N]] 4780c09e5bdSPhilip Reames; LV-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 4790c09e5bdSPhilip Reames; LV: for.body.ph: 4800c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_BODY:%.*]] 4810c09e5bdSPhilip Reames; LV: for.body: 4820c09e5bdSPhilip Reames; LV-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 4830c09e5bdSPhilip Reames; LV-NEXT: [[IND1:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] 4840c09e5bdSPhilip Reames; LV-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 485e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[MUL]] 486e7f4ad13SNikita Popov; LV-NEXT: [[LOADA:%.*]] = load i16, ptr [[ARRAYIDXA]], align 2 487e7f4ad13SNikita Popov; LV-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[IND]] 488e7f4ad13SNikita Popov; LV-NEXT: [[LOADB:%.*]] = load i16, ptr [[ARRAYIDXB]], align 2 4890c09e5bdSPhilip Reames; LV-NEXT: [[ADD:%.*]] = mul i16 [[LOADA]], [[LOADB]] 490e7f4ad13SNikita Popov; LV-NEXT: store i16 [[ADD]], ptr [[ARRAYIDXA]], align 2 4910c09e5bdSPhilip Reames; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 4920c09e5bdSPhilip Reames; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 4930c09e5bdSPhilip Reames; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] 494e7f4ad13SNikita Popov; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] 4950c09e5bdSPhilip Reames; LV: for.end.loopexit: 4960c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END:%.*]] 497a353e258SFlorian Hahn; LV: for.end.loopexit2: 4980c09e5bdSPhilip Reames; LV-NEXT: br label [[FOR_END]] 4990c09e5bdSPhilip Reames; LV: for.end: 5000c09e5bdSPhilip Reames; LV-NEXT: ret void 5010c09e5bdSPhilip Reames; 502e7f4ad13SNikita Popov ptr noalias %b, i64 %N) { 5030c09e5bdSPhilip Reamesentry: 5040c09e5bdSPhilip Reames %TruncN = trunc i64 %N to i32 5050c09e5bdSPhilip Reames br label %for.body 5060c09e5bdSPhilip Reames 5070c09e5bdSPhilip Reamesfor.body: ; preds = %for.body, %entry 5080c09e5bdSPhilip Reames %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] 5090c09e5bdSPhilip Reames %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ] 5100c09e5bdSPhilip Reames 5110c09e5bdSPhilip Reames %mul = mul i32 %ind1, 2 5120c09e5bdSPhilip Reames 513e7f4ad13SNikita Popov %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul 514e7f4ad13SNikita Popov %loadA = load i16, ptr %arrayidxA, align 2 5150c09e5bdSPhilip Reames 516e7f4ad13SNikita Popov %arrayidxB = getelementptr inbounds i16, ptr %b, i64 %ind 517e7f4ad13SNikita Popov %loadB = load i16, ptr %arrayidxB, align 2 5180c09e5bdSPhilip Reames 5190c09e5bdSPhilip Reames %add = mul i16 %loadA, %loadB 5200c09e5bdSPhilip Reames 521e7f4ad13SNikita Popov store i16 %add, ptr %arrayidxA, align 2 5220c09e5bdSPhilip Reames 5230c09e5bdSPhilip Reames %inc = add nuw nsw i64 %ind, 1 5240c09e5bdSPhilip Reames %dec = sub i32 %ind1, 1 5250c09e5bdSPhilip Reames 5260c09e5bdSPhilip Reames %exitcond = icmp eq i64 %inc, %N 5270c09e5bdSPhilip Reames br i1 %exitcond, label %for.end, label %for.body 5280c09e5bdSPhilip Reames 5290c09e5bdSPhilip Reamesfor.end: ; preds = %for.body 5300c09e5bdSPhilip Reames ret void 5310c09e5bdSPhilip Reames} 532