1; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 2; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9 3; REQUIRES: asserts 4 5@a = global [1024 x i8] zeroinitializer, align 16 6@b = global [1024 x i8] zeroinitializer, align 16 7 8define i32 @foo() { 9; CHECK-LABEL: foo 10 11; CHECK-PWR8: Executing best plan with VF=16, UF=4 12 13; CHECK-PWR9: Executing best plan with VF=8, UF=8 14 15 16entry: 17 br label %for.body 18 19for.cond.cleanup: 20 %add.lcssa = phi i32 [ %add, %for.body ] 21 ret i32 %add.lcssa 22 23for.body: 24 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 25 %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 26 %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv 27 %0 = load i8, ptr %arrayidx, align 1 28 %conv = zext i8 %0 to i32 29 %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv 30 %1 = load i8, ptr %arrayidx2, align 1 31 %conv3 = zext i8 %1 to i32 32 %sub = sub nsw i32 %conv, %conv3 33 %ispos = icmp sgt i32 %sub, -1 34 %neg = sub nsw i32 0, %sub 35 %2 = select i1 %ispos, i32 %sub, i32 %neg 36 %add = add nsw i32 %2, %s.015 37 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 38 %exitcond = icmp eq i64 %indvars.iv.next, 1024 39 br i1 %exitcond, label %for.cond.cleanup, label %for.body 40} 41 42define i32 @goo() { 43; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, 44; it will not have vector version and the vector register usage will not exceed the 45; available vector register number. 46 47; CHECK-LABEL: goo 48 49; CHECK: Executing best plan with VF=16, UF=4 50 51entry: 52 br label %for.body 53 54for.cond.cleanup: ; preds = %for.body 55 %add.lcssa = phi i32 [ %add, %for.body ] 56 ret i32 %add.lcssa 57 58for.body: ; preds = %for.body, %entry 59 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 60 %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 61 %tmp1 = add nsw i64 %indvars.iv, 3 62 %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1 63 %tmp = load i8, ptr %arrayidx, align 1 64 %conv = zext i8 %tmp to i32 65 %tmp2 = add nsw i64 %indvars.iv, 2 66 %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2 67 %tmp3 = load i8, ptr %arrayidx2, align 1 68 %conv3 = zext i8 %tmp3 to i32 69 %sub = sub nsw i32 %conv, %conv3 70 %ispos = icmp sgt i32 %sub, -1 71 %neg = sub nsw i32 0, %sub 72 %tmp4 = select i1 %ispos, i32 %sub, i32 %neg 73 %add = add nsw i32 %tmp4, %s.015 74 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 75 %exitcond = icmp eq i64 %indvars.iv.next, 1024 76 br i1 %exitcond, label %for.cond.cleanup, label %for.body 77} 78 79define i64 @bar(ptr nocapture %a) { 80; CHECK-LABEL: bar 81 82; CHECK: Executing best plan with VF=2, UF=8 83 84entry: 85 br label %for.body 86 87for.cond.cleanup: 88 %add2.lcssa = phi i64 [ %add2, %for.body ] 89 ret i64 %add2.lcssa 90 91for.body: 92 %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 93 %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] 94 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012 95 %0 = load i64, ptr %arrayidx, align 8 96 %add = add nsw i64 %0, %i.012 97 store i64 %add, ptr %arrayidx, align 8 98 %add2 = add nsw i64 %add, %s.011 99 %inc = add nuw nsw i64 %i.012, 1 100 %exitcond = icmp eq i64 %inc, 1024 101 br i1 %exitcond, label %for.cond.cleanup, label %for.body 102} 103 104@d = external global [0 x i64], align 8 105@e = external global [0 x i32], align 4 106@c = external global [0 x i32], align 4 107 108define void @hoo(i32 %n) { 109; CHECK-LABEL: hoo 110; CHECK: Executing best plan with VF=1, UF=8 111 112entry: 113 br label %for.body 114 115for.body: ; preds = %for.body, %entry 116 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 117 %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv 118 %tmp = load i64, ptr %arrayidx, align 8 119 %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp 120 %tmp1 = load i32, ptr %arrayidx1, align 4 121 %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv 122 store i32 %tmp1, ptr %arrayidx3, align 4 123 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 124 %exitcond = icmp eq i64 %indvars.iv.next, 10000 125 br i1 %exitcond, label %for.end, label %for.body 126 127for.end: ; preds = %for.body 128 ret void 129} 130 131define float @float_(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { 132;CHECK-LABEL: float_ 133;CHECK: LV(REG): VF = 1 134;CHECK: LV(REG): Found max usage: 2 item 135;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 136;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers 137;CHECK: LV(REG): Found invariant usage: 1 item 138;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers 139 140entry: 141 %cmp = icmp sgt i32 %n, 0 142 br i1 %cmp, label %preheader, label %for.end 143 144preheader: 145 %t0 = sext i32 %n to i64 146 br label %for 147 148for: 149 %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ] 150 %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ] 151 %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv 152 %t1 = load float, ptr %arrayidx, align 4 153 %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv 154 %t2 = load float, ptr %arrayidx3, align 4 155 %add = fadd fast float %t1, %s.02 156 %add4 = fadd fast float %add, %t2 157 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 158 %cmp1 = icmp slt i64 %indvars.iv.next, %t0 159 br i1 %cmp1, label %for, label %loopexit 160 161loopexit: 162 %add4.lcssa = phi float [ %add4, %for ] 163 br label %for.end 164 165for.end: 166 %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ] 167 ret float %s.0.lcssa 168} 169 170 171define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp { 172;CHECK-LABEL: double_ 173;CHECK-PWR8: LV(REG): VF = 2 174;CHECK-PWR8: LV(REG): Found max usage: 2 item 175;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 176;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers 177;CHECK-PWR8: LV(REG): Found invariant usage: 1 item 178;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers 179 180;CHECK-PWR9: LV(REG): VF = 1 181;CHECK-PWR9: LV(REG): Found max usage: 2 item 182;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 183;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers 184;CHECK-PWR9: LV(REG): Found invariant usage: 1 item 185;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers 186 187 %1 = sext i32 %n to i64 188 br label %2 189 190; <label>:2 ; preds = %2, %0 191 %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ] 192 %3 = getelementptr inbounds double, ptr %A, i64 %indvars.iv 193 %4 = load double, ptr %3, align 8 194 %5 = fadd double %4, 3.000000e+00 195 %6 = fmul double %4, 2.000000e+00 196 %7 = fadd double %5, %6 197 %8 = fadd double %7, 2.000000e+00 198 %9 = fmul double %8, 5.000000e-01 199 %10 = fadd double %6, %9 200 %11 = fsub double %10, %5 201 %12 = fadd double %4, %11 202 %13 = fdiv double %8, %12 203 %14 = fmul double %13, %8 204 %15 = fmul double %6, %14 205 %16 = fmul double %5, %15 206 %17 = fadd double %16, -3.000000e+00 207 %18 = fsub double %4, %5 208 %19 = fadd double %6, %18 209 %20 = fadd double %13, %19 210 %21 = fadd double %20, %17 211 %22 = fadd double %21, 3.000000e+00 212 %23 = fmul double %4, %22 213 store double %23, ptr %3, align 8 214 %indvars.iv.next = add i64 %indvars.iv, -1 215 %24 = trunc i64 %indvars.iv to i32 216 %25 = icmp eq i32 %24, 0 217 br i1 %25, label %26, label %2 218 219; <label>:26 ; preds = %2 220 ret void 221} 222 223define ppc_fp128 @fp128_(ptr nocapture %n, ppc_fp128 %d) nounwind readonly { 224;CHECK-LABEL: fp128_ 225;CHECK: LV(REG): VF = 1 226;CHECK: LV(REG): Found max usage: 2 item 227;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 228;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers 229entry: 230 br label %for.body 231 232for.body: ; preds = %for.body, %entry 233 %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 234 %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ] 235 %arrayidx = getelementptr inbounds ppc_fp128, ptr %n, i32 %i.06 236 %0 = load ppc_fp128, ptr %arrayidx, align 8 237 %sub = fsub fast ppc_fp128 %x.05, %0 238 %inc = add nsw i32 %i.06, 1 239 %exitcond = icmp eq i32 %inc, 2048 240 br i1 %exitcond, label %for.end, label %for.body 241 242for.end: ; preds = %for.body 243 ret ppc_fp128 %sub 244} 245 246 247define void @fp16_(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { 248;CHECK-LABEL: fp16_ 249;CHECK: LV(REG): VF = 1 250;CHECK: LV(REG): Found max usage: 2 item 251;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers 252;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers 253entry: 254 %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16 255 %0 = bitcast i16 %tmp.0.extract.trunc to half 256 %mul = mul i32 %numCols, %numRows 257 %shr = lshr i32 %mul, 2 258 %cmp26 = icmp eq i32 %shr, 0 259 br i1 %cmp26, label %while.end, label %while.body 260 261while.body: ; preds = %entry, %while.body 262 %pIn.addr.029 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ] 263 %pOut.addr.028 = phi ptr [ %add.ptr7, %while.body ], [ %pOut, %entry ] 264 %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ] 265 %1 = load half, ptr %pIn.addr.029, align 2 266 %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.029, i32 1 267 %2 = load half, ptr %arrayidx2, align 2 268 %mul3 = fmul half %1, %0 269 %mul4 = fmul half %2, %0 270 store half %mul3, ptr %pOut.addr.028, align 2 271 %arrayidx6 = getelementptr inbounds half, ptr %pOut.addr.028, i32 1 272 store half %mul4, ptr %arrayidx6, align 2 273 %add.ptr = getelementptr inbounds half, ptr %pIn.addr.029, i32 2 274 %add.ptr7 = getelementptr inbounds half, ptr %pOut.addr.028, i32 2 275 %dec = add nsw i32 %blkCnt.027, -1 276 %cmp = icmp eq i32 %dec, 0 277 br i1 %cmp, label %while.end, label %while.body 278 279while.end: ; preds = %while.body, %entry 280 ret void 281} 282