1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize -riscv-v-vector-bits-min=128 -scalable-vectorization=on -force-target-instruction-cost=1 -S < %s | FileCheck %s 3 4target triple = "riscv64" 5 6define void @trip1_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 7; CHECK-LABEL: @trip1_i8( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: br label [[FOR_BODY:%.*]] 10; CHECK: for.body: 11; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 12; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]] 13; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 14; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1 15; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] 16; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 17; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]] 18; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 19; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 20; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1 21; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 22; CHECK: for.end: 23; CHECK-NEXT: ret void 24; 25entry: 26 br label %for.body 27 28for.body: ; preds = %entry, %for.body 29 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 30 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 31 %0 = load i8, ptr %arrayidx, align 1 32 %mul = shl i8 %0, 1 33 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 34 %1 = load i8, ptr %arrayidx1, align 1 35 %add = add i8 %mul, %1 36 store i8 %add, ptr %arrayidx1, align 1 37 %inc = add nuw nsw i64 %i.08, 1 38 %exitcond.not = icmp eq i64 %inc, 1 39 br i1 %exitcond.not, label %for.end, label %for.body 40 41for.end: ; preds = %for.body 42 ret void 43} 44 45define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 46; CHECK-LABEL: @trip3_i8( 47; CHECK-NEXT: entry: 48; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 49; CHECK: vector.ph: 50; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 51; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 52; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 53; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]] 54; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 55; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 56; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 57; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 58; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 59; CHECK: vector.body: 60; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3) 61; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 62; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 63; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison) 64; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) 65; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0 66; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 67; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison) 68; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]] 69; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 70; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) 71; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] 72; CHECK: middle.block: 73; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 74; CHECK: scalar.ph: 75; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 76; CHECK-NEXT: br label [[FOR_BODY:%.*]] 77; CHECK: for.body: 78; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 79; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 80; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 81; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 82; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]] 83; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 84; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] 85; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 86; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 87; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3 88; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 89; CHECK: for.end: 90; CHECK-NEXT: ret void 91; 92entry: 93 br label %for.body 94 95for.body: ; preds = %entry, %for.body 96 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 97 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 98 %0 = load i8, ptr %arrayidx, align 1 99 %mul = shl i8 %0, 1 100 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 101 %1 = load i8, ptr %arrayidx1, align 1 102 %add = add i8 %mul, %1 103 store i8 %add, ptr %arrayidx1, align 1 104 %inc = add nuw nsw i64 %i.08, 1 105 %exitcond.not = icmp eq i64 %inc, 3 106 br i1 %exitcond.not, label %for.end, label %for.body 107 108for.end: ; preds = %for.body 109 ret void 110} 111 112define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 113; CHECK-LABEL: @trip5_i8( 114; CHECK-NEXT: entry: 115; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 116; CHECK: vector.ph: 117; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 118; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 119; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 120; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP2]] 121; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 122; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 123; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 124; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 125; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 126; CHECK: vector.body: 127; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5) 128; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 129; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 130; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 131; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) 132; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0 133; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 134; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 135; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]] 136; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 137; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) 138; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] 139; CHECK: middle.block: 140; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 141; CHECK: scalar.ph: 142; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 143; CHECK-NEXT: br label [[FOR_BODY:%.*]] 144; CHECK: for.body: 145; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 146; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 147; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 148; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 149; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]] 150; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 151; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] 152; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 153; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 154; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5 155; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 156; CHECK: for.end: 157; CHECK-NEXT: ret void 158; 159entry: 160 br label %for.body 161 162for.body: ; preds = %entry, %for.body 163 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 164 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 165 %0 = load i8, ptr %arrayidx, align 1 166 %mul = shl i8 %0, 1 167 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 168 %1 = load i8, ptr %arrayidx1, align 1 169 %add = add i8 %mul, %1 170 store i8 %add, ptr %arrayidx1, align 1 171 %inc = add nuw nsw i64 %i.08, 1 172 %exitcond.not = icmp eq i64 %inc, 5 173 br i1 %exitcond.not, label %for.end, label %for.body 174 175for.end: ; preds = %for.body 176 ret void 177} 178 179define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 180; CHECK-LABEL: @trip8_i8( 181; CHECK-NEXT: entry: 182; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 183; CHECK: vector.ph: 184; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 185; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 186; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 187; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP2]] 188; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 189; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 190; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 191; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 192; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 193; CHECK: vector.body: 194; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8) 195; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 196; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 197; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 198; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) 199; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0 200; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 201; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 202; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]] 203; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 204; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) 205; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] 206; CHECK: middle.block: 207; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 208; CHECK: scalar.ph: 209; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 210; CHECK-NEXT: br label [[FOR_BODY:%.*]] 211; CHECK: for.body: 212; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 213; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 214; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 215; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 216; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]] 217; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 218; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] 219; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 220; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 221; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8 222; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 223; CHECK: for.end: 224; CHECK-NEXT: ret void 225; 226entry: 227 br label %for.body 228 229for.body: ; preds = %entry, %for.body 230 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 231 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 232 %0 = load i8, ptr %arrayidx, align 1 233 %mul = shl i8 %0, 1 234 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 235 %1 = load i8, ptr %arrayidx1, align 1 236 %add = add i8 %mul, %1 237 store i8 %add, ptr %arrayidx1, align 1 238 %inc = add nuw nsw i64 %i.08, 1 239 %exitcond.not = icmp eq i64 %inc, 8 240 br i1 %exitcond.not, label %for.end, label %for.body 241 242for.end: ; preds = %for.body 243 ret void 244} 245 246define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 247; CHECK-LABEL: @trip16_i8( 248; CHECK-NEXT: entry: 249; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 250; CHECK: vector.ph: 251; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 252; CHECK: vector.body: 253; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0 254; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 255; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 256; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1) 257; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 258; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 259; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 260; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP2]], [[WIDE_LOAD1]] 261; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 262; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 1 263; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] 264; CHECK: middle.block: 265; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 266; CHECK: scalar.ph: 267; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 268; CHECK-NEXT: br label [[FOR_BODY:%.*]] 269; CHECK: for.body: 270; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 271; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] 272; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 273; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 274; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 275; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 276; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] 277; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 278; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 279; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 280; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 281; CHECK: for.end: 282; CHECK-NEXT: ret void 283; 284entry: 285 br label %for.body 286 287for.body: ; preds = %entry, %for.body 288 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 289 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 290 %0 = load i8, ptr %arrayidx, align 1 291 %mul = shl i8 %0, 1 292 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 293 %1 = load i8, ptr %arrayidx1, align 1 294 %add = add i8 %mul, %1 295 store i8 %add, ptr %arrayidx1, align 1 296 %inc = add nuw nsw i64 %i.08, 1 297 %exitcond.not = icmp eq i64 %inc, 16 298 br i1 %exitcond.not, label %for.end, label %for.body 299 300for.end: ; preds = %for.body 301 ret void 302} 303 304 305define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 306; CHECK-LABEL: @trip32_i8( 307; CHECK-NEXT: entry: 308; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 309; CHECK: vector.ph: 310; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 311; CHECK: vector.body: 312; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0 313; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 314; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1 315; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1) 316; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0 317; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 318; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 319; CHECK-NEXT: [[TMP5:%.*]] = add <32 x i8> [[TMP2]], [[WIDE_LOAD1]] 320; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 321; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP6]], align 1 322; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] 323; CHECK: middle.block: 324; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 325; CHECK: scalar.ph: 326; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 327; CHECK-NEXT: br label [[FOR_BODY:%.*]] 328; CHECK: for.body: 329; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 330; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] 331; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 332; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 333; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 334; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 335; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] 336; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 337; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 338; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32 339; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 340; CHECK: for.end: 341; CHECK-NEXT: ret void 342; 343entry: 344 br label %for.body 345 346for.body: ; preds = %entry, %for.body 347 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 348 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 349 %0 = load i8, ptr %arrayidx, align 1 350 %mul = shl i8 %0, 1 351 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 352 %1 = load i8, ptr %arrayidx1, align 1 353 %add = add i8 %mul, %1 354 store i8 %add, ptr %arrayidx1, align 1 355 %inc = add nuw nsw i64 %i.08, 1 356 %exitcond.not = icmp eq i64 %inc, 32 357 br i1 %exitcond.not, label %for.end, label %for.body 358 359for.end: ; preds = %for.body 360 ret void 361} 362 363define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { 364; CHECK-LABEL: @trip24_i8( 365; CHECK-NEXT: entry: 366; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 367; CHECK: vector.ph: 368; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 369; CHECK: vector.body: 370; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 371; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 372; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] 373; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 374; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 375; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1) 376; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] 377; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 378; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 379; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i8> [[TMP3]], [[WIDE_LOAD1]] 380; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP5]], align 1 381; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 382; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 383; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 384; CHECK: middle.block: 385; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 386; CHECK: scalar.ph: 387; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 388; CHECK-NEXT: br label [[FOR_BODY:%.*]] 389; CHECK: for.body: 390; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 391; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] 392; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 393; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP8]], 1 394; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] 395; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 396; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP9]] 397; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 398; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 399; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24 400; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 401; CHECK: for.end: 402; CHECK-NEXT: ret void 403; 404entry: 405 br label %for.body 406 407for.body: ; preds = %entry, %for.body 408 %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 409 %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 410 %0 = load i8, ptr %arrayidx, align 1 411 %mul = shl i8 %0, 1 412 %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 413 %1 = load i8, ptr %arrayidx1, align 1 414 %add = add i8 %mul, %1 415 store i8 %add, ptr %arrayidx1, align 1 416 %inc = add nuw nsw i64 %i.08, 1 417 %exitcond.not = icmp eq i64 %inc, 24 418 br i1 %exitcond.not, label %for.end, label %for.body 419 420for.end: ; preds = %for.body 421 ret void 422} 423 424attributes #0 = { "target-features"="+v,+d" vscale_range(2, 1024) } 425 426