1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK 3 4; Exercise tail folding on RISCV w/scalable vectors. 5 6target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 7target triple = "riscv64" 8 9define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 10; CHECK-LABEL: @vector_add( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 13; CHECK: vector.ph: 14; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 15; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 16; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 17; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] 18; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 19; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 20; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 21; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 22; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 23; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 24; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 25; CHECK: vector.body: 26; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 27; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 28; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) 29; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] 30; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 31; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 32; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] 33; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP10]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 34; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 35; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 36; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 37; CHECK: middle.block: 38; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 39; CHECK: scalar.ph: 40; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 41; CHECK-NEXT: br label [[FOR_BODY:%.*]] 42; CHECK: for.body: 43; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 44; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 45; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 46; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 47; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 48; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 49; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 50; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 51; CHECK: for.end: 52; CHECK-NEXT: ret void 53; 54entry: 55 br label %for.body 56 57for.body: 58 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 59 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 60 %elem = load i64, ptr %arrayidx 61 %add = add i64 %elem, %v 62 store i64 %add, ptr %arrayidx 63 %iv.next = add nuw nsw i64 %iv, 1 64 %exitcond.not = icmp eq i64 %iv.next, 1025 65 br i1 %exitcond.not, label %for.end, label %for.body 66 67for.end: 68 ret void 69} 70 71 72; a[b[i]] = v, exercise scatter support 73define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 74; CHECK-LABEL: @indexed_store( 75; CHECK-NEXT: entry: 76; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 77; CHECK: vector.ph: 78; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 79; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 80; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 81; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] 82; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 83; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 84; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 85; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 86; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 87; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 88; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 89; CHECK: vector.body: 90; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 91; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 92; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) 93; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] 94; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 95; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 96; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_MASKED_LOAD]] 97; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 98; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 99; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 100; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 101; CHECK: middle.block: 102; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 103; CHECK: scalar.ph: 104; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 105; CHECK-NEXT: br label [[FOR_BODY:%.*]] 106; CHECK: for.body: 107; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 108; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 109; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 110; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 111; CHECK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 112; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 113; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 114; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 115; CHECK: for.end: 116; CHECK-NEXT: ret void 117; 118entry: 119 br label %for.body 120 121for.body: 122 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 123 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 124 %aidx = load i64, ptr %baddr 125 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 126 store i64 %v, ptr %aaddr 127 %iv.next = add nuw nsw i64 %iv, 1 128 %exitcond.not = icmp eq i64 %iv.next, 1025 129 br i1 %exitcond.not, label %for.end, label %for.body 130 131for.end: 132 ret void 133} 134 135define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 136; CHECK-LABEL: @indexed_load( 137; CHECK-NEXT: entry: 138; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 139; CHECK: vector.ph: 140; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 141; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 142; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 143; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] 144; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 145; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 146; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 147; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 148; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 149; CHECK: vector.body: 150; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 151; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 152; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 153; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) 154; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] 155; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 156; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 157; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_MASKED_LOAD]] 158; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 159; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 160; CHECK-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[VEC_PHI]] 161; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 162; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 163; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 164; CHECK: middle.block: 165; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP12]]) 166; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 167; CHECK: scalar.ph: 168; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 169; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 170; CHECK-NEXT: br label [[FOR_BODY:%.*]] 171; CHECK: for.body: 172; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 173; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 174; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 175; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 176; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 177; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 178; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 179; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 180; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 181; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 182; CHECK: for.end: 183; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 184; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 185; 186entry: 187 br label %for.body 188 189for.body: 190 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 191 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 192 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 193 %aidx = load i64, ptr %baddr 194 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 195 %elem = load i64, ptr %aaddr 196 %iv.next = add nuw nsw i64 %iv, 1 197 %sum.next = add i64 %sum, %elem 198 %exitcond.not = icmp eq i64 %iv.next, 1025 199 br i1 %exitcond.not, label %for.end, label %for.body 200 201for.end: 202 ret i64 %sum.next 203} 204 205define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 206; CHECK-LABEL: @splat_int( 207; CHECK-NEXT: entry: 208; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 209; CHECK: vector.ph: 210; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 211; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 212; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 213; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] 214; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 215; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 216; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 217; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 218; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 219; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 220; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 221; CHECK: vector.body: 222; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 223; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 224; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) 225; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] 226; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 227; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 228; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 229; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 230; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 231; CHECK: middle.block: 232; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 233; CHECK: scalar.ph: 234; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 235; CHECK-NEXT: br label [[FOR_BODY:%.*]] 236; CHECK: for.body: 237; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 238; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 239; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 240; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 241; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 242; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 243; CHECK: for.end: 244; CHECK-NEXT: ret void 245; 246entry: 247 br label %for.body 248 249for.body: 250 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 251 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 252 store i64 %v, ptr %arrayidx 253 %iv.next = add nuw nsw i64 %iv, 1 254 %exitcond.not = icmp eq i64 %iv.next, 1025 255 br i1 %exitcond.not, label %for.end, label %for.body 256 257for.end: 258 ret void 259} 260 261define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 262; CHECK-LABEL: @uniform_store( 263; CHECK-NEXT: entry: 264; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 265; CHECK: vector.ph: 266; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 267; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 268; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 269; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] 270; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 271; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 272; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 273; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 274; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 275; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 276; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 277; CHECK: vector.body: 278; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 279; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 280; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) 281; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 282; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] 283; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 284; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 285; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 286; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 287; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 288; CHECK: middle.block: 289; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 290; CHECK: scalar.ph: 291; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 292; CHECK-NEXT: br label [[FOR_BODY:%.*]] 293; CHECK: for.body: 294; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 295; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8 296; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 297; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 298; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 299; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 300; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 301; CHECK: for.end: 302; CHECK-NEXT: ret void 303; 304entry: 305 br label %for.body 306 307for.body: 308 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 309 store i64 %v, ptr %b, align 8 310 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 311 store i64 %v, ptr %arrayidx 312 %iv.next = add nuw nsw i64 %iv, 1 313 %exitcond.not = icmp eq i64 %iv.next, 1025 314 br i1 %exitcond.not, label %for.end, label %for.body 315 316for.end: 317 ret void 318} 319 320define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 321; CHECK-LABEL: @uniform_load( 322; CHECK-NEXT: entry: 323; CHECK-NEXT: br label [[FOR_BODY:%.*]] 324; CHECK: for.body: 325; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 326; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 327; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 328; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 329; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 330; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 331; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 332; CHECK: for.end: 333; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 334; CHECK-NEXT: ret i64 [[V_LCSSA]] 335; 336entry: 337 br label %for.body 338 339for.body: 340 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 341 %v = load i64, ptr %b, align 8 342 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 343 store i64 %v, ptr %arrayidx 344 %iv.next = add nuw nsw i64 %iv, 1 345 %exitcond.not = icmp eq i64 %iv.next, 1025 346 br i1 %exitcond.not, label %for.end, label %for.body 347 348for.end: 349 ret i64 %v 350} 351 352 353define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { 354; CHECK-LABEL: @vector_add_trip1024( 355; CHECK-NEXT: entry: 356; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 357; CHECK: vector.ph: 358; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 359; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 360; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 361; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] 362; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 363; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 364; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 365; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 366; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 367; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 368; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 369; CHECK: vector.body: 370; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 371; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 372; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1024) 373; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] 374; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 375; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) 376; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] 377; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP10]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 378; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 379; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 380; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 381; CHECK: middle.block: 382; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 383; CHECK: scalar.ph: 384; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 385; CHECK-NEXT: br label [[FOR_BODY:%.*]] 386; CHECK: for.body: 387; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 388; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 389; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 390; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 391; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 392; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 393; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 394; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 395; CHECK: for.end: 396; CHECK-NEXT: ret void 397; 398entry: 399 br label %for.body 400 401for.body: 402 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 403 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 404 %elem = load i64, ptr %arrayidx 405 %add = add i64 %elem, %v 406 store i64 %add, ptr %arrayidx 407 %iv.next = add nuw nsw i64 %iv, 1 408 %exitcond.not = icmp eq i64 %iv.next, 1024 409 br i1 %exitcond.not, label %for.end, label %for.body 410 411for.end: 412 ret void 413} 414