1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; REQUIRES: asserts 3; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>%t | FileCheck %s 4; RUN: cat %t | FileCheck %s --check-prefix=VPLANS 5 6; These tests ensure that tail-folding is enabled when the predicate.enable 7; loop attribute is set to true. 8 9target triple = "aarch64-unknown-linux-gnu" 10 11; VPLANS-LABEL: Checking a loop in 'simple_memset' 12; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { 13; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF 14; VPLANS: vp<[[TC:%[0-9]+]]> = original trip-count 15; VPLANS-EMPTY: 16; VPLANS-NEXT: ir-bb<entry>: 17; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) 18; VPLANS-NEXT: Successor(s): vector.ph 19; VPLANS-EMPTY: 20; VPLANS-NEXT: vector.ph: 21; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]> 22; VPLANS-NEXT: EMIT vp<[[VF:%.+]]> = VF * Part + ir<0> 23; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF]]>, vp<[[TC]]> 24; VPLANS-NEXT: Successor(s): vector loop 25; VPLANS-EMPTY: 26; VPLANS-NEXT: <x1> vector loop: { 27; VPLANS-NEXT: vector.body: 28; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION 29; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%.+]]> 30; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1> 31; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]> 32; VPLANS-NEXT: vp<[[VEC_PTR:%[0-9]+]]> = vector-pointer ir<%gep> 33; VPLANS-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%val>, vp<[[LANEMASK_PHI]]> 34; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%.+]]> = add vp<[[INDV]]>, vp<[[VFxUF]]> 35; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV]]> 36; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[NEWTC]]> 37; VPLANS-NEXT: EMIT vp<[[NOT:%[0-9]+]]> = not vp<[[LANEMASK_LOOP]]> 38; VPLANS-NEXT: EMIT branch-on-cond vp<[[NOT]]> 39; VPLANS-NEXT: No successors 40; VPLANS-NEXT: } 41 42define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { 43; CHECK-LABEL: @simple_memset( 44; CHECK-NEXT: entry: 45; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) 46; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 47; CHECK: vector.ph: 48; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 49; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 50; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 51; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] 52; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 53; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 54; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() 55; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 56; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 57; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 58; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] 59; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] 60; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 61; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) 62; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 63; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 64; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 65; CHECK: vector.body: 66; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] 67; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 68; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 69; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] 70; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 71; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) 72; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] 73; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) 74; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 75; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 76; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 77; CHECK: middle.block: 78; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 79; CHECK: scalar.ph: 80; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 81; CHECK-NEXT: br label [[WHILE_BODY:%.*]] 82; CHECK: while.body: 83; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 84; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] 85; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 86; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 87; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] 88; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] 89; CHECK: while.end.loopexit: 90; CHECK-NEXT: ret void 91; 92entry: 93 br label %while.body 94 95while.body: ; preds = %while.body, %entry 96 %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] 97 %gep = getelementptr i32, ptr %ptr, i64 %index 98 store i32 %val, ptr %gep 99 %index.next = add nsw i64 %index, 1 100 %cmp10 = icmp ult i64 %index.next, %n 101 br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 102 103while.end.loopexit: ; preds = %while.body 104 ret void 105} 106 107 108attributes #0 = { "target-features"="+sve" } 109 110!0 = distinct !{!0, !1} 111!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} 112