1; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s 2; REQUIRES: asserts 3 4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5target triple = "thumbv8.1m.main-arm-none-eabi" 6 7; CHECK-LABEL: LV: Checking a loop in 'expensive_icmp' 8; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 9; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 10; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i16, ptr %arrayidx, align 2 11; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = sext i16 %1 to i32 12; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 13; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp2, label %if.then, label %for.inc 14; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv6 = add i16 %1, %0 15; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 16; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %conv6, ptr %arrayidx7, align 2 17; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br label %for.inc 18; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %inc = add nuw nsw i32 %i.016, 1 19; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %inc, %n 20; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body 21; CHECK: LV: Scalar loop costs: 5. 22; CHECK: Cost of 1 for VF 2: induction instruction %inc = add nuw nsw i32 %i.016, 1 23; CHECK: Cost of 0 for VF 2: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 24; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n 25; CHECK: Cost of 0 for VF 2: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 26; CHECK: Cost of 0 for VF 2: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> 27; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> 28; CHECK: Cost of 0 for VF 2: vp<%4> = vector-pointer ir<%arrayidx> 29; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<%4> 30; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 31; CHECK: Cost of 20 for VF 2: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> 32; CHECK: Cost of 26 for VF 2: WIDEN ir<%conv6> = add ir<%1>, ir<%0> 33; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> 34; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%arrayidx7> 35; CHECK: Cost of 16 for VF 2: WIDEN store vp<%5>, ir<%conv6>, ir<%cmp2> 36; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> 37; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%1> 38; CHECK: Cost for VF 2: 86 (Estimated cost per lane: 43. 39; CHECK: Cost of 1 for VF 4: induction instruction %inc = add nuw nsw i32 %i.016, 1 40; CHECK: Cost of 0 for VF 4: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 41; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n 42; CHECK: Cost of 0 for VF 4: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 43; CHECK: Cost of 0 for VF 4: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> 44; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> 45; CHECK: Cost of 0 for VF 4: vp<%4> = vector-pointer ir<%arrayidx> 46; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<%4> 47; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 48; CHECK: Cost of 2 for VF 4: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> 49; CHECK: Cost of 2 for VF 4: WIDEN ir<%conv6> = add ir<%1>, ir<%0> 50; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> 51; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%arrayidx7> 52; CHECK: Cost of 2 for VF 4: WIDEN store vp<%5>, ir<%conv6>, ir<%cmp2> 53; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> 54; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%1> 55; CHECK: Cost for VF 4: 10 (Estimated cost per lane: 2. 56; CHECK: Cost of 1 for VF 8: induction instruction %inc = add nuw nsw i32 %i.016, 1 57; CHECK: Cost of 0 for VF 8: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 58; CHECK: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n 59; CHECK: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 60; CHECK: Cost of 0 for VF 8: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> 61; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<%3> 62; CHECK: Cost of 0 for VF 8: vp<%4> = vector-pointer ir<%arrayidx> 63; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<%4> 64; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv> = sext ir<%1> to i32 65; CHECK: Cost of 36 for VF 8: WIDEN ir<%cmp2> = icmp sgt ir<%conv>, ir<%conv1> 66; CHECK: Cost of 2 for VF 8: WIDEN ir<%conv6> = add ir<%1>, ir<%0> 67; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx7> = getelementptr ir<%d>, vp<%3> 68; CHECK: Cost of 0 for VF 8: vp<%5> = vector-pointer ir<%arrayidx7> 69; CHECK: Cost of 2 for VF 8: WIDEN store vp<%5>, ir<%conv6>, ir<%cmp2> 70; CHECK: Cost of 0 for VF 8: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> 71; CHECK: Cost of 0 for VF 8: EMIT branch-on-count vp<%index.next>, vp<%1> 72; CHECK: Cost for VF 8: 46 (Estimated cost per lane: 5. 73; CHECK: LV: Selecting VF: 4. 74define void @expensive_icmp(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n, i16 zeroext %m) #0 { 75entry: 76 %cmp15 = icmp sgt i32 %n, 0 77 br i1 %cmp15, label %for.body.lr.ph, label %for.cond.cleanup 78 79for.body.lr.ph: ; preds = %entry 80 %conv1 = zext i16 %m to i32 81 %0 = trunc i32 %n to i16 82 br label %for.body 83 84for.cond.cleanup: ; preds = %for.inc, %entry 85 ret void 86 87for.body: ; preds = %for.body.lr.ph, %for.inc 88 %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 89 %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 90 %1 = load i16, ptr %arrayidx, align 2 91 %conv = sext i16 %1 to i32 92 %cmp2 = icmp sgt i32 %conv, %conv1 93 br i1 %cmp2, label %if.then, label %for.inc 94 95if.then: ; preds = %for.body 96 %conv6 = add i16 %1, %0 97 %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 98 store i16 %conv6, ptr %arrayidx7, align 2 99 br label %for.inc 100 101for.inc: ; preds = %for.body, %if.then 102 %inc = add nuw nsw i32 %i.016, 1 103 %exitcond.not = icmp eq i32 %inc, %n 104 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 105} 106 107; CHECK-LABEL: LV: Checking a loop in 'cheap_icmp' 108; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 109; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 110; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 111; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 112; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 113; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 114; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv1 = sext i8 %0 to i32 115; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 116; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 117; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv3 = sext i8 %1 to i32 118; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv3, %conv1 119; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shr = ashr i32 %mul, 7 120; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %2 = icmp slt i32 %shr, 127 121; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 122; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv4 = trunc i32 %spec.select.i to i8 123; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 124; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 125; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %dec = add i32 %blkCnt.012, -1 126; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp.not = icmp eq i32 %dec, 0 127; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body 128; CHECK: LV: Scalar loop costs: 9. 129; CHECK: Cost of 1 for VF 2: induction instruction %dec = add i32 %blkCnt.012, -1 130; CHECK: Cost of 0 for VF 2: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 131; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 132; CHECK: Cost of 0 for VF 2: induction instruction %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 133; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 134; CHECK: Cost of 0 for VF 2: induction instruction %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 135; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 136; CHECK: Cost of 0 for VF 2: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 137; CHECK: Cost of 1 for VF 2: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 138; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 139; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 140; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> 141; CHECK: Cost of 0 for VF 2: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 142; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> 143; CHECK: Cost of 0 for VF 2: vp<[[STEPS3:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 144; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> 145; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> 146; CHECK: Cost of 18 for VF 2: WIDEN ir<%0> = load vp<[[VEC_PTR]]> 147; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 148; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 149; CHECK: Cost of 18 for VF 2: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> 150; CHECK: Cost of 4 for VF 2: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 151; CHECK: Cost of 26 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> 152; CHECK: Cost of 18 for VF 2: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> 153; CHECK: Cost of 0 for VF 2: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> 154; CHECK: Cost of 22 for VF 2: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> 155; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 156; CHECK: Cost of 0 for VF 2: vp<[[VEC_PTR3:%.+]]> = vector-pointer vp<%next.gep>.1 157; CHECK: Cost of 18 for VF 2: WIDEN store vp<[[VEC_PTR3]]>, ir<%conv4> 158; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<%0> 159; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<{{.+}}> 160; CHECK: Cost for VF 2: 130 (Estimated cost per lane: 65. 161; CHECK: Cost of 1 for VF 4: induction instruction %dec = add i32 %blkCnt.012, -1 162; CHECK: Cost of 0 for VF 4: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 163; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 164; CHECK: Cost of 0 for VF 4: induction instruction %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 165; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 166; CHECK: Cost of 0 for VF 4: induction instruction %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 167; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 168; CHECK: Cost of 0 for VF 4: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 169; CHECK: Cost of 1 for VF 4: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 170; CHECK: Cost of 0 for VF 4: EMIT vp<[[CAN_IV:%.]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 171; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 172; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> 173; CHECK: Cost of 0 for VF 4: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 174; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> 175; CHECK: Cost of 0 for VF 4: vp<[[STEPS3:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 176; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> 177; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep> 178; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load vp<[[VEC_PTR1]]> 179; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 180; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 181; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> 182; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 183; CHECK: Cost of 2 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> 184; CHECK: Cost of 2 for VF 4: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> 185; CHECK: Cost of 0 for VF 4: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> 186; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> 187; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 188; CHECK: Cost of 0 for VF 4: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.1 189; CHECK: Cost of 2 for VF 4: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv4> 190; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<%0> 191; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<{{.+}}> 192; CHECK: Cost for VF 4: 14 (Estimated cost per lane: 3. 193; CHECK: Cost of 1 for VF 8: induction instruction %dec = add i32 %blkCnt.012, -1 194; CHECK: Cost of 0 for VF 8: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 195; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 196; CHECK: Cost of 0 for VF 8: induction instruction %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 197; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 198; CHECK: Cost of 0 for VF 8: induction instruction %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 199; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 200; CHECK: Cost of 0 for VF 8: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 201; CHECK: Cost of 1 for VF 8: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 202; CHECK: Cost of 0 for VF 8: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 203; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 204; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> 205; CHECK: Cost of 0 for VF 8: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 206; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> 207; CHECK: Cost of 0 for VF 8: vp<[[STEPS3:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 208; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> 209; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep> 210; CHECK: Cost of 2 for VF 8: WIDEN ir<%0> = load vp<[[VEC_PTR1]]> 211; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 212; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.2 213; CHECK: Cost of 2 for VF 8: WIDEN ir<%1> = load vp<[[VEC_PTR2]]> 214; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 215; CHECK: Cost of 4 for VF 8: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> 216; CHECK: Cost of 4 for VF 8: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> 217; CHECK: Cost of 0 for VF 8: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> 218; CHECK: Cost of 4 for VF 8: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> 219; CHECK: Cost of 2 for VF 8: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 220; CHECK: Cost of 0 for VF 8: vp<[[VEC_PTR3:%.+]]> = vector-pointer vp<%next.gep>.1 221; CHECK: Cost of 2 for VF 8: WIDEN store vp<[[VEC_PTR3]]>, ir<%conv4> 222; CHECK: Cost of 0 for VF 8: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<{{.+}} 223; CHECK: Cost of 0 for VF 8: EMIT branch-on-count vp<%index.next>, vp<{{.+}}> 224; CHECK: Cost for VF 8: 26 (Estimated cost per lane: 3. 225; CHECK: Cost of 1 for VF 16: induction instruction %dec = add i32 %blkCnt.012, -1 226; CHECK: Cost of 0 for VF 16: induction instruction %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 227; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 228; CHECK: Cost of 0 for VF 16: induction instruction %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 229; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 230; CHECK: Cost of 0 for VF 16: induction instruction %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 231; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 232; CHECK: Cost of 0 for VF 16: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 233; CHECK: Cost of 1 for VF 16: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 234; CHECK: Cost of 0 for VF 16: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 235; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 236; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> 237; CHECK: Cost of 0 for VF 16: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 238; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> 239; CHECK: Cost of 0 for VF 16: vp<[[STEPS3:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> 240; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.2 = ptradd ir<%pSrcB>, vp<[[STEPS3]]> 241; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> 242; CHECK: Cost of 2 for VF 16: WIDEN ir<%0> = load vp<[[VEC_PTR]]> 243; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv1> = sext ir<%0> to i32 244; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR1:%.+]]> = vector-pointer vp<%next.gep>.2 245; CHECK: Cost of 2 for VF 16: WIDEN ir<%1> = load vp<[[VEC_PTR1]]> 246; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv3> = sext ir<%1> to i32 247; CHECK: Cost of 8 for VF 16: WIDEN ir<%mul> = mul nsw ir<%conv3>, ir<%conv1> 248; CHECK: Cost of 8 for VF 16: WIDEN ir<%shr> = ashr ir<%mul>, ir<7> 249; CHECK: Cost of 0 for VF 16: WIDEN ir<%2> = icmp slt ir<%shr>, ir<127> 250; CHECK: Cost of 8 for VF 16: WIDEN-SELECT ir<%spec.select.i> = select ir<%2>, ir<%shr>, ir<127> 251; CHECK: Cost of 6 for VF 16: WIDEN-CAST ir<%conv4> = trunc ir<%spec.select.i> to i8 252; CHECK: Cost of 0 for VF 16: vp<[[VEC_PTR2:%.+]]> = vector-pointer vp<%next.gep>.1 253; CHECK: Cost of 2 for VF 16: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv4> 254; CHECK: Cost of 0 for VF 16: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<{{.+}}> 255; CHECK: Cost of 0 for VF 16: EMIT branch-on-count vp<%index.next>, vp<{{.+}}> 256; CHECK: Cost for VF 16: 50 257; CHECK: LV: Selecting VF: 16. 258define void @cheap_icmp(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %pDst, i32 %blockSize) #0 { 259entry: 260 %cmp.not8 = icmp eq i32 %blockSize, 0 261 br i1 %cmp.not8, label %while.end, label %while.body.preheader 262 263while.body.preheader: ; preds = %entry 264 br label %while.body 265 266while.body: ; preds = %while.body.preheader, %while.body 267 %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] 268 %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] 269 %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] 270 %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] 271 %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 272 %0 = load i8, ptr %pSrcA.addr.011, align 1 273 %conv1 = sext i8 %0 to i32 274 %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 275 %1 = load i8, ptr %pSrcB.addr.09, align 1 276 %conv3 = sext i8 %1 to i32 277 %mul = mul nsw i32 %conv3, %conv1 278 %shr = ashr i32 %mul, 7 279 %2 = icmp slt i32 %shr, 127 280 %spec.select.i = select i1 %2, i32 %shr, i32 127 281 %conv4 = trunc i32 %spec.select.i to i8 282 %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 283 store i8 %conv4, ptr %pDst.addr.010, align 1 284 %dec = add i32 %blkCnt.012, -1 285 %cmp.not = icmp eq i32 %dec, 0 286 br i1 %cmp.not, label %while.end.loopexit, label %while.body 287 288while.end.loopexit: ; preds = %while.body 289 br label %while.end 290 291while.end: ; preds = %while.end.loopexit, %entry 292 ret void 293} 294 295; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp 296; CHECK: Cost of 12 for VF 2: WIDEN ir<%cmp1> = fcmp olt ir<%0>, ir<0.000000e+00> 297; CHECK: Cost of 24 for VF 4: WIDEN ir<%cmp1> = fcmp olt ir<%0>, ir<0.000000e+00> 298define void @floatcmp(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 { 299entry: 300 %cmp.not7 = icmp eq i32 %blockSize, 0 301 br i1 %cmp.not7, label %while.end, label %while.body 302 303while.body: ; preds = %entry, %while.body 304 %pSrc.addr.010 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ] 305 %blockSize.addr.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ] 306 %pDst.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pDst, %entry ] 307 %0 = load float, ptr %pSrc.addr.010, align 4 308 %cmp1 = fcmp nnan ninf nsz olt float %0, 0.000000e+00 309 %cond = select nnan ninf nsz i1 %cmp1, float 1.000000e+01, float %0 310 %conv = fptosi float %cond to i32 311 %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.08, i32 1 312 store i32 %conv, ptr %pDst.addr.08, align 4 313 %incdec.ptr2 = getelementptr inbounds float, ptr %pSrc.addr.010, i32 1 314 %dec = add i32 %blockSize.addr.09, -1 315 %cmp.not = icmp eq i32 %dec, 0 316 br i1 %cmp.not, label %while.end, label %while.body 317 318while.end: ; preds = %while.body, %entry 319 ret void 320} 321 322attributes #0 = { "target-features"="+mve" } 323