1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize -S -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) #0 { 7; CHECK-LABEL: @inv_store_i16( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 10; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 11; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 12; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 13; CHECK: vector.ph: 14; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 15; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 16; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 17; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 18; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 19; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 20; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 21; CHECK: vector.body: 22; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 23; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 24; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP6]] 25; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 26; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2 27; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() 28; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 29; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 30; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP11]] 31; CHECK-NEXT: store i16 [[TMP12]], ptr [[DST:%.*]], align 2 32; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 33; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 34; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 35; CHECK: middle.block: 36; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 37; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC24:%.*]], label [[SCALAR_PH]] 38; CHECK: scalar.ph: 39; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 40; CHECK-NEXT: br label [[FOR_BODY14:%.*]] 41; CHECK: for.body14: 42; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY14]] ] 43; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[INDVARS_IV]] 44; CHECK-NEXT: [[LD:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 45; CHECK-NEXT: store i16 [[LD]], ptr [[DST]], align 2 46; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 47; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] 48; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP4:![0-9]+]] 49; CHECK: for.inc24: 50; CHECK-NEXT: ret void 51; 52entry: 53 br label %for.body14 54 55for.body14: ; preds = %for.body14, %entry 56 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body14 ] 57 %arrayidx = getelementptr inbounds i16, ptr %src, i64 %indvars.iv 58 %ld = load i16, ptr %arrayidx 59 store i16 %ld, ptr %dst, align 2 60 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 61 %exitcond.not = icmp eq i64 %indvars.iv.next, %N 62 br i1 %exitcond.not, label %for.inc24, label %for.body14, !llvm.loop !0 63 64for.inc24: ; preds = %for.body14, %for.body 65 ret void 66} 67 68 69define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 %N) #0 { 70; CHECK-LABEL: @cond_inv_store_i32( 71; CHECK-NEXT: entry: 72; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 73; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 74; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 75; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 76; CHECK: vector.ph: 77; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 78; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 79; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 80; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 81; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 82; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 83; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0 84; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer 85; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 86; CHECK: vector.body: 87; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 88; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 89; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP6]] 90; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 91; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 92; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer 93; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP9]]) 94; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 95; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 96; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 97; CHECK: middle.block: 98; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 99; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 100; CHECK: scalar.ph: 101; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 102; CHECK-NEXT: br label [[FOR_BODY:%.*]] 103; CHECK: for.body: 104; CHECK-NEXT: [[I_09:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 105; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[I_09]] 106; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 107; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0 108; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 109; CHECK: if.then: 110; CHECK-NEXT: store i32 [[TMP11]], ptr [[DST]], align 4 111; CHECK-NEXT: br label [[FOR_INC]] 112; CHECK: for.inc: 113; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_09]], 1 114; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 115; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 116; CHECK: for.end: 117; CHECK-NEXT: ret void 118; 119entry: 120 br label %for.body 121 122for.body: ; preds = %entry, %for.inc 123 %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] 124 %arrayidx = getelementptr inbounds i32, ptr %src, i64 %i.09 125 %0 = load i32, ptr %arrayidx, align 4 126 %cmp1 = icmp sgt i32 %0, 0 127 br i1 %cmp1, label %if.then, label %for.inc 128 129if.then: ; preds = %for.body 130 store i32 %0, ptr %dst, align 4 131 br label %for.inc 132 133for.inc: ; preds = %for.body, %if.then 134 %inc = add nuw nsw i64 %i.09, 1 135 %exitcond.not = icmp eq i64 %inc, %N 136 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 137 138for.end: ; preds = %for.inc, %entry 139 ret void 140} 141 142attributes #0 = { "target-features"="+neon,+sve" vscale_range(1, 16) } 143 144!0 = distinct !{!0, !1, !2, !3, !4, !5} 145!1 = !{!"llvm.loop.mustprogress"} 146!2 = !{!"llvm.loop.vectorize.width", i32 4} 147!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 148!4 = !{!"llvm.loop.vectorize.enable", i1 true} 149!5 = !{!"llvm.loop.interleave.count", i32 1} 150