1; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-interleave=1 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF1 2; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-interleave=2 -force-vector-width=4 -force-target-supports-scalable-vectors=true -scalable-vectorization=on < %s | FileCheck %s --check-prefix=CHECKUF2 3 4; CHECKUF1: for.body.preheader: 5; CHECKUF1-DAG: %wide.trip.count = zext nneg i32 %N to i64 6; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 7; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 8; CHECKUF1-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX4]], %wide.trip.count 9 10; CHECKUF1: vector.ph: 11; CHECKUF1-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 12; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 13; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]] 14; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf 15; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 16; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 17 18; CHECKUF1: vector.body: 19; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 20; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index 21; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8 22; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00) 23; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index 24; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8 25; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]] 26; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec 27; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 28 29 30; For an interleave factor of 2, vscale is scaled by 8 instead of 4 (and thus shifted left by 3 instead of 2). 31; There is also the increment for the next iteration, e.g. instead of indexing IDXB, it indexes at IDXB + vscale * 4. 32 33; CHECKUF2: for.body.preheader: 34; CHECKUF2-DAG: %wide.trip.count = zext nneg i32 %N to i64 35; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 36; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 37; CHECKUF2-DAG: %min.iters.check = icmp ugt i64 %[[VSCALEX8]], %wide.trip.count 38 39; CHECKUF2: vector.ph: 40; CHECKUF2-DAG: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 41; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 42; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]] 43; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf 44; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 45; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 46 47; CHECKUF2: vector.body: 48; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 49; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index 50; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 51; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 5 52; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds i8, ptr %[[IDXB]], i64 %[[VSCALE2]] 53; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8 54; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8 55; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00) 56; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, splat (double 1.000000e+00) 57; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index 58; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 59; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 5 60; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds i8, ptr %[[IDXA]], i64 %[[VSCALE2]] 61; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8 62; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8 63; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]] 64; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec 65; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 66 67define void @loop(i32 %N, ptr nocapture %a, ptr nocapture readonly %b) { 68entry: 69 %cmp7 = icmp sgt i32 %N, 0 70 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup 71 72for.body.preheader: ; preds = %entry 73 %wide.trip.count = zext i32 %N to i64 74 br label %for.body 75 76for.cond.cleanup: ; preds = %for.body, %entry 77 ret void 78 79for.body: ; preds = %for.body.preheader, %for.body 80 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 81 %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv 82 %0 = load double, ptr %arrayidx, align 8 83 %add = fadd double %0, 1.000000e+00 84 %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv 85 store double %add, ptr %arrayidx2, align 8 86 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 87 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 88 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1 89} 90 91!1 = distinct !{!1, !2} 92!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 93