1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes="default<O3>" -S %s | FileCheck %s 3 4target triple = "arm64-apple-darwin" 5 6; Make sure we can vectorize a loop that uses a function to clamp a double to 7; be between a given minimum and maximum value. 8 9define internal double @clamp(double %v) { 10entry: 11 %retval = alloca double, align 8 12 %v.addr = alloca double, align 8 13 store double %v, ptr %v.addr, align 8 14 %0 = load double, ptr %v.addr, align 8 15 %cmp = fcmp olt double %0, 0.000000e+00 16 br i1 %cmp, label %if.then, label %if.end 17 18if.then: ; preds = %entry 19 store double 0.000000e+00, ptr %retval, align 8 20 br label %return 21 22if.end: ; preds = %entry 23 %1 = load double, ptr %v.addr, align 8 24 %cmp1 = fcmp ogt double %1, 6.000000e+00 25 br i1 %cmp1, label %if.then2, label %if.end3 26 27if.then2: ; preds = %if.end 28 store double 6.000000e+00, ptr %retval, align 8 29 br label %return 30 31if.end3: ; preds = %if.end 32 %2 = load double, ptr %v.addr, align 8 33 store double %2, ptr %retval, align 8 34 br label %return 35 36return: ; preds = %if.end3, %if.then2, %if.then 37 %3 = load double, ptr %retval, align 8 38 ret double %3 39} 40 41define void @loop(ptr %X, ptr %Y) { 42; CHECK-LABEL: @loop( 43; CHECK-NEXT: entry: 44; CHECK-NEXT: [[X6:%.*]] = ptrtoint ptr [[X:%.*]] to i64 45; CHECK-NEXT: [[Y7:%.*]] = ptrtoint ptr [[Y:%.*]] to i64 46; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X6]], [[Y7]] 47; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 48; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] 49; CHECK: vector.body: 50; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] 51; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]] 52; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 53; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8 54; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 55; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer 56; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer 57; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], splat (double 6.000000e+00) 58; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], splat (double 6.000000e+00) 59; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD]] 60; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD8]] 61; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]] 62; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]] 63; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX]] 64; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 16 65; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP11]], align 8 66; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[TMP12]], align 8 67; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 68; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 69; CHECK-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 70; CHECK: for.cond.cleanup: 71; CHECK-NEXT: ret void 72; CHECK: for.body: 73; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] 74; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]] 75; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX]], align 8 76; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP14]], 0.000000e+00 77; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP14]], 6.000000e+00 78; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP14]] 79; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]] 80; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]] 81; CHECK-NEXT: store double [[RETVAL_0_I]], ptr [[ARRAYIDX2]], align 8 82; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 83; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20000 84; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 85; 86entry: 87 %X.addr = alloca ptr, align 8 88 %Y.addr = alloca ptr, align 8 89 %i = alloca i32, align 4 90 store ptr %X, ptr %X.addr, align 8 91 store ptr %Y, ptr %Y.addr, align 8 92 call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2 93 store i32 0, ptr %i, align 4 94 br label %for.cond 95 96for.cond: ; preds = %for.inc, %entry 97 %0 = load i32, ptr %i, align 4 98 %cmp = icmp ult i32 %0, 20000 99 br i1 %cmp, label %for.body, label %for.cond.cleanup 100 101for.cond.cleanup: ; preds = %for.cond 102 call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2 103 br label %for.end 104 105for.body: ; preds = %for.cond 106 %1 = load ptr, ptr %Y.addr, align 8 107 %2 = load i32, ptr %i, align 4 108 %idxprom = zext i32 %2 to i64 109 %arrayidx = getelementptr inbounds double, ptr %1, i64 %idxprom 110 %3 = load double, ptr %arrayidx, align 8 111 %call = call double @clamp(double %3) 112 %4 = load ptr, ptr %X.addr, align 8 113 %5 = load i32, ptr %i, align 4 114 %idxprom1 = zext i32 %5 to i64 115 %arrayidx2 = getelementptr inbounds double, ptr %4, i64 %idxprom1 116 store double %call, ptr %arrayidx2, align 8 117 br label %for.inc 118 119for.inc: ; preds = %for.body 120 %6 = load i32, ptr %i, align 4 121 %inc = add i32 %6, 1 122 store i32 %inc, ptr %i, align 4 123 br label %for.cond 124 125for.end: ; preds = %for.cond.cleanup 126 ret void 127} 128 129; Test that requires sinking/hoisting of instructions for vectorization. 130 131define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { 132; CHECK-LABEL: @loop2( 133; CHECK-NEXT: entry: 134; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000 135; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000 136; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000 137; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP2]] 138; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]] 139; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 140; CHECK-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]] 141; CHECK-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 142; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] 143; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] 144; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]] 145; CHECK: vector.ph: 146; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 147; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 148; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 149; CHECK: vector.body: 150; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 151; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[INDEX]] 152; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 153; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]] 154; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]] 155; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) 156; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20) 157; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] 158; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16 159; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]] 160; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META7]] 161; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]] 162; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]] 163; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] 164; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 165; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]] 166; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]] 167; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP6]], [[WIDE_LOAD10]] 168; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]] 169; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]] 170; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]] 171; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16 172; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]] 173; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]] 174; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 175; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 176; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 177; CHECK: loop.body: 178; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] 179; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[IV1]] 180; CHECK-NEXT: [[C_LV:%.*]] = load i32, ptr [[C_GEP]], align 4 181; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20 182; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV1]] 183; CHECK-NEXT: [[A_LV_0:%.*]] = load float, ptr [[A_GEP_0]], align 4 184; CHECK-NEXT: [[MUL2_I81_I:%.*]] = fmul float [[X]], [[A_LV_0]] 185; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV1]] 186; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]] 187; CHECK: else: 188; CHECK-NEXT: [[B_LV:%.*]] = load float, ptr [[B_GEP_0]], align 4 189; CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]] 190; CHECK-NEXT: br label [[LOOP_LATCH]] 191; CHECK: loop.latch: 192; CHECK-NEXT: [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ] 193; CHECK-NEXT: store float [[ADD_SINK]], ptr [[B_GEP_0]], align 4 194; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 195; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 196; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 197; CHECK: exit: 198; CHECK-NEXT: ret void 199; 200entry: 201 br label %loop.header 202 203loop.header: 204 %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] 205 %cmp.0 = icmp ult i64 %iv, 10000 206 br i1 %cmp.0, label %loop.body, label %exit 207 208loop.body: 209 %C.gep = getelementptr inbounds i32, ptr %C, i64 %iv 210 %C.lv = load i32, ptr %C.gep 211 %cmp = icmp eq i32 %C.lv, 20 212 br i1 %cmp, label %then, label %else 213 214then: 215 %A.gep.0 = getelementptr inbounds float, ptr %A, i64 %iv 216 %A.lv.0 = load float, ptr %A.gep.0, align 4 217 %mul2.i81.i = fmul float %A.lv.0, %x 218 %B.gep.0 = getelementptr inbounds float, ptr %B, i64 %iv 219 store float %mul2.i81.i, ptr %B.gep.0, align 4 220 br label %loop.latch 221 222else: 223 %A.gep.1 = getelementptr inbounds float, ptr %A, i64 %iv 224 %A.lv.1 = load float, ptr %A.gep.1, align 4 225 %mul2 = fmul float %A.lv.1, %x 226 %B.gep.1 = getelementptr inbounds float, ptr %B, i64 %iv 227 %B.lv = load float, ptr %B.gep.1, align 4 228 %add = fadd float %mul2, %B.lv 229 store float %add, ptr %B.gep.1, align 4 230 br label %loop.latch 231 232loop.latch: 233 %iv.next = add nuw nsw i64 %iv, 1 234 br label %loop.header 235 236exit: 237 ret void 238} 239 240declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) 241 242declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) 243