1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64" 6 7define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 8; CHECK-LABEL: define void @add_a 9; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0 12; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] 13; CHECK: iter.check: 14; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 15; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 16; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 17; CHECK: vector.main.loop.iter.check: 18; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 19; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 20; CHECK: vector.ph: 21; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 22; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 23; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 24; CHECK: vector.body: 25; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 26; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 27; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] 28; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 29; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 30; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2) 31; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] 32; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 33; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 34; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 35; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 36; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 37; CHECK: middle.block: 38; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 39; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 40; CHECK: vec.epilog.iter.check: 41; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] 42; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 43; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 44; CHECK: vec.epilog.ph: 45; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 46; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 47; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] 48; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 49; CHECK: vec.epilog.vector.body: 50; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 51; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 52; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] 53; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 54; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 55; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[WIDE_LOAD5]], splat (i8 2) 56; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] 57; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 58; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1 59; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 60; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] 61; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 62; CHECK: vec.epilog.middle.block: 63; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] 64; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 65; CHECK: vec.epilog.scalar.ph: 66; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 67; CHECK-NEXT: br label [[FOR_BODY:%.*]] 68; CHECK: for.cond.cleanup.loopexit: 69; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 70; CHECK: for.cond.cleanup: 71; CHECK-NEXT: ret void 72; CHECK: for.body: 73; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] 74; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 75; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 76; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP15]] to i32 77; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 78; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 79; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 80; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1 81; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 82; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 83; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 84; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 85; 86entry: 87 %cmp8 = icmp sgt i32 %len, 0 88 br i1 %cmp8, label %for.body, label %for.cond.cleanup 89 90for.cond.cleanup: ; preds = %for.body, %entry 91 ret void 92 93for.body: ; preds = %entry, %for.body 94 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 95 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 96 %0 = load i8, ptr %arrayidx 97 %conv = zext i8 %0 to i32 98 %add = add nuw nsw i32 %conv, 2 99 %conv1 = trunc i32 %add to i8 100 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 101 store i8 %conv1, ptr %arrayidx3 102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 103 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 104 %exitcond = icmp eq i32 %lftr.wideiv, %len 105 br i1 %exitcond, label %for.cond.cleanup, label %for.body 106} 107 108; Ensure that we preserve nuw/nsw if we're not shrinking the values we're 109; working with. 110define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 111; CHECK-LABEL: define void @add_a1 112; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 113; CHECK-NEXT: entry: 114; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0 115; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] 116; CHECK: iter.check: 117; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 118; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 119; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 120; CHECK: vector.main.loop.iter.check: 121; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 122; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 123; CHECK: vector.ph: 124; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 125; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 126; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 127; CHECK: vector.body: 128; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 129; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 130; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] 131; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 132; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 133; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2) 134; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] 135; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 136; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 138; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 139; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 140; CHECK: middle.block: 141; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 142; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 143; CHECK: vec.epilog.iter.check: 144; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] 145; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 146; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 147; CHECK: vec.epilog.ph: 148; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 149; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 150; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] 151; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 152; CHECK: vec.epilog.vector.body: 153; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 154; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 155; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] 156; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 157; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 158; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i8> [[WIDE_LOAD5]], splat (i8 2) 159; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] 160; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 161; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP13]], align 1 162; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 163; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] 164; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 165; CHECK: vec.epilog.middle.block: 166; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] 167; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 168; CHECK: vec.epilog.scalar.ph: 169; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 170; CHECK-NEXT: br label [[FOR_BODY:%.*]] 171; CHECK: for.cond.cleanup.loopexit: 172; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 173; CHECK: for.cond.cleanup: 174; CHECK-NEXT: ret void 175; CHECK: for.body: 176; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] 177; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 178; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 179; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i8 [[TMP15]], 2 180; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 181; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX3]], align 1 182; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 183; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 184; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 185; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 186; 187entry: 188 %cmp8 = icmp sgt i32 %len, 0 189 br i1 %cmp8, label %for.body, label %for.cond.cleanup 190 191for.cond.cleanup: ; preds = %for.body, %entry 192 ret void 193 194for.body: ; preds = %entry, %for.body 195 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 196 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 197 %0 = load i8, ptr %arrayidx 198 %add = add nuw nsw i8 %0, 2 199 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 200 store i8 %add, ptr %arrayidx3 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 203 %exitcond = icmp eq i32 %lftr.wideiv, %len 204 br i1 %exitcond, label %for.cond.cleanup, label %for.body 205} 206 207define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 208; CHECK-LABEL: define void @add_b 209; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 210; CHECK-NEXT: entry: 211; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[LEN]], 0 212; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 213; CHECK: for.body.preheader: 214; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 215; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 216; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 217; CHECK: vector.ph: 218; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 219; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 220; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 221; CHECK: vector.body: 222; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 223; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 224; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]] 225; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 226; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 227; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2) 228; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] 229; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 230; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 231; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 232; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 233; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 234; CHECK: middle.block: 235; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 236; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] 237; CHECK: scalar.ph: 238; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 239; CHECK-NEXT: br label [[FOR_BODY:%.*]] 240; CHECK: for.cond.cleanup.loopexit: 241; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 242; CHECK: for.cond.cleanup: 243; CHECK-NEXT: ret void 244; CHECK: for.body: 245; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 246; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] 247; CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 248; CHECK-NEXT: [[CONV8:%.*]] = zext i16 [[TMP8]] to i32 249; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV8]], 2 250; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 251; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] 252; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2 253; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 254; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 255; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 256; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 257; 258entry: 259 %cmp9 = icmp sgt i32 %len, 0 260 br i1 %cmp9, label %for.body, label %for.cond.cleanup 261 262for.cond.cleanup: ; preds = %for.body, %entry 263 ret void 264 265for.body: ; preds = %entry, %for.body 266 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 267 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv 268 %0 = load i16, ptr %arrayidx 269 %conv8 = zext i16 %0 to i32 270 %add = add nuw nsw i32 %conv8, 2 271 %conv1 = trunc i32 %add to i16 272 %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv 273 store i16 %conv1, ptr %arrayidx3 274 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 275 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 276 %exitcond = icmp eq i32 %lftr.wideiv, %len 277 br i1 %exitcond, label %for.cond.cleanup, label %for.body 278} 279 280define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 281; CHECK-LABEL: define void @add_c 282; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 283; CHECK-NEXT: entry: 284; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0 285; CHECK-NEXT: br i1 [[CMP8]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] 286; CHECK: iter.check: 287; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 288; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 289; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 290; CHECK: vector.main.loop.iter.check: 291; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 292; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 293; CHECK: vector.ph: 294; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 295; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 296; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 297; CHECK: vector.body: 298; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 299; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 300; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] 301; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 302; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 303; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> 304; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2) 305; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] 306; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 307; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 308; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 309; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 310; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 311; CHECK: middle.block: 312; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 313; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 314; CHECK: vec.epilog.iter.check: 315; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] 316; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 317; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 318; CHECK: vec.epilog.ph: 319; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 320; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 321; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] 322; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 323; CHECK: vec.epilog.vector.body: 324; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 325; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX4]], 0 326; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] 327; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 328; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 329; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[WIDE_LOAD5]] to <4 x i16> 330; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i16> [[TMP12]], splat (i16 2) 331; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP9]] 332; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0 333; CHECK-NEXT: store <4 x i16> [[TMP13]], ptr [[TMP15]], align 2 334; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 335; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] 336; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 337; CHECK: vec.epilog.middle.block: 338; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] 339; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 340; CHECK: vec.epilog.scalar.ph: 341; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 342; CHECK-NEXT: br label [[FOR_BODY:%.*]] 343; CHECK: for.cond.cleanup.loopexit: 344; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 345; CHECK: for.cond.cleanup: 346; CHECK-NEXT: ret void 347; CHECK: for.body: 348; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] 349; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 350; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 351; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP17]] to i32 352; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 353; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 354; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]] 355; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2 356; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 357; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 358; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 359; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 360; 361entry: 362 %cmp8 = icmp sgt i32 %len, 0 363 br i1 %cmp8, label %for.body, label %for.cond.cleanup 364 365for.cond.cleanup: ; preds = %for.body, %entry 366 ret void 367 368for.body: ; preds = %entry, %for.body 369 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 370 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 371 %0 = load i8, ptr %arrayidx 372 %conv = zext i8 %0 to i32 373 %add = add nuw nsw i32 %conv, 2 374 %conv1 = trunc i32 %add to i16 375 %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv 376 store i16 %conv1, ptr %arrayidx3 377 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 378 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 379 %exitcond = icmp eq i32 %lftr.wideiv, %len 380 br i1 %exitcond, label %for.cond.cleanup, label %for.body 381} 382 383define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 384; CHECK-LABEL: define void @add_d 385; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 386; CHECK-NEXT: entry: 387; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[LEN]], 0 388; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 389; CHECK: for.body.preheader: 390; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 391; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 392; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 393; CHECK: vector.ph: 394; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 395; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 396; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 397; CHECK: vector.body: 398; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 399; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 400; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]] 401; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 402; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 403; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> 404; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2) 405; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[TMP1]] 406; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 407; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4 408; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 409; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 410; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 411; CHECK: middle.block: 412; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 413; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] 414; CHECK: scalar.ph: 415; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 416; CHECK-NEXT: br label [[FOR_BODY:%.*]] 417; CHECK: for.cond.cleanup.loopexit: 418; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 419; CHECK: for.cond.cleanup: 420; CHECK-NEXT: ret void 421; CHECK: for.body: 422; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 423; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] 424; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 425; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 426; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 427; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[INDVARS_IV]] 428; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 429; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 430; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 431; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 432; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 433; 434entry: 435 %cmp7 = icmp sgt i32 %len, 0 436 br i1 %cmp7, label %for.body, label %for.cond.cleanup 437 438for.cond.cleanup: ; preds = %for.body, %entry 439 ret void 440 441for.body: ; preds = %entry, %for.body 442 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 443 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv 444 %0 = load i16, ptr %arrayidx 445 %conv = sext i16 %0 to i32 446 %add = add nsw i32 %conv, 2 447 %arrayidx2 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv 448 store i32 %add, ptr %arrayidx2 449 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 450 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 451 %exitcond = icmp eq i32 %lftr.wideiv, %len 452 br i1 %exitcond, label %for.cond.cleanup, label %for.body 453} 454 455define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 456; CHECK-LABEL: define void @add_e 457; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 [[ARG1:%.*]], i8 [[ARG2:%.*]], i32 [[LEN:%.*]]) { 458; CHECK-NEXT: entry: 459; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0 460; CHECK-NEXT: br i1 [[CMP_32]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] 461; CHECK: iter.check: 462; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32 463; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32 464; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 465; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 466; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 467; CHECK: vector.main.loop.iter.check: 468; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 469; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 470; CHECK: vector.ph: 471; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 472; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 473; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0 474; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer 475; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8> 476; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0 477; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer 478; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8> 479; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 480; CHECK: vector.body: 481; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 482; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 483; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]] 484; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 485; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 486; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4) 487; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32) 488; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51) 489; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60) 490; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]] 491; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4) 492; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]] 493; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]] 494; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] 495; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 496; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP15]], align 1 497; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 498; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 499; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 500; CHECK: middle.block: 501; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 502; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 503; CHECK: vec.epilog.iter.check: 504; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] 505; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 506; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 507; CHECK: vec.epilog.ph: 508; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 509; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4 510; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] 511; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0 512; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer 513; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8> 514; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0 515; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer 516; CHECK-NEXT: [[TMP18:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8> 517; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 518; CHECK: vec.epilog.vector.body: 519; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 520; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX10]], 0 521; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP19]] 522; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 523; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 524; CHECK-NEXT: [[TMP22:%.*]] = shl <4 x i8> [[WIDE_LOAD11]], splat (i8 4) 525; CHECK-NEXT: [[TMP23:%.*]] = add <4 x i8> [[TMP22]], splat (i8 32) 526; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i8> [[WIDE_LOAD11]], splat (i8 51) 527; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i8> [[TMP24]], splat (i8 60) 528; CHECK-NEXT: [[TMP26:%.*]] = and <4 x i8> [[TMP23]], [[TMP17]] 529; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP25]], splat (i8 -4) 530; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i8> [[TMP27]], [[TMP18]] 531; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], [[TMP26]] 532; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] 533; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0 534; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP31]], align 1 535; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4 536; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] 537; CHECK-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 538; CHECK: vec.epilog.middle.block: 539; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] 540; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 541; CHECK: vec.epilog.scalar.ph: 542; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 543; CHECK-NEXT: br label [[FOR_BODY:%.*]] 544; CHECK: for.cond.cleanup.loopexit: 545; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 546; CHECK: for.cond.cleanup: 547; CHECK-NEXT: ret void 548; CHECK: for.body: 549; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 550; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 551; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 552; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP33]] to i32 553; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4 554; CHECK-NEXT: [[CONV2:%.*]] = add nuw nsw i32 [[ADD]], 32 555; CHECK-NEXT: [[OR:%.*]] = or i32 [[CONV]], 51 556; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[OR]], 60 557; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]] 558; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252 559; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]] 560; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]] 561; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8 562; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 563; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1 564; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 565; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 566; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 567; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 568; 569entry: 570 %cmp.32 = icmp sgt i32 %len, 0 571 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 572 573for.body.lr.ph: ; preds = %entry 574 %conv11 = zext i8 %arg2 to i32 575 %conv13 = zext i8 %arg1 to i32 576 br label %for.body 577 578for.cond.cleanup: ; preds = %for.body, %entry 579 ret void 580 581for.body: ; preds = %for.body, %for.body.lr.ph 582 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 583 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 584 %0 = load i8, ptr %arrayidx 585 %conv = zext i8 %0 to i32 586 %add = shl i32 %conv, 4 587 %conv2 = add nuw nsw i32 %add, 32 588 %or = or i32 %conv, 51 589 %mul = mul nuw nsw i32 %or, 60 590 %and = and i32 %conv2, %conv13 591 %mul.masked = and i32 %mul, 252 592 %conv17 = xor i32 %mul.masked, %conv11 593 %mul18 = mul nuw nsw i32 %conv17, %and 594 %conv19 = trunc i32 %mul18 to i8 595 %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 596 store i8 %conv19, ptr %arrayidx21 597 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 598 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 599 %exitcond = icmp eq i32 %lftr.wideiv, %len 600 br i1 %exitcond, label %for.cond.cleanup, label %for.body 601} 602 603define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { 604; CHECK-LABEL: define void @add_f 605; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 [[ARG1:%.*]], i8 [[ARG2:%.*]], i32 [[LEN:%.*]]) { 606; CHECK-NEXT: entry: 607; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0 608; CHECK-NEXT: br i1 [[CMP_32]], label [[ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] 609; CHECK: iter.check: 610; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32 611; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32 612; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 613; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 614; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 615; CHECK: vector.main.loop.iter.check: 616; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 617; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 618; CHECK: vector.ph: 619; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 620; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 621; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0 622; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer 623; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i8> 624; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0 625; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer 626; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT3]] to <16 x i8> 627; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 628; CHECK: vector.body: 629; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 630; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 631; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP3]] 632; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 633; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 634; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8> 635; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4) 636; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32) 637; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52) 638; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51) 639; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60) 640; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]] 641; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4) 642; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]] 643; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]] 644; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] 645; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 646; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr [[TMP17]], align 1 647; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 648; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 649; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 650; CHECK: middle.block: 651; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 652; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 653; CHECK: vec.epilog.iter.check: 654; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] 655; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 656; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 657; CHECK: vec.epilog.ph: 658; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 659; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 4 660; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] 661; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[CONV13]], i64 0 662; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer 663; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT7]] to <4 x i8> 664; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV11]], i64 0 665; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer 666; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT9]] to <4 x i8> 667; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 668; CHECK: vec.epilog.vector.body: 669; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 670; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX10]], 0 671; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]] 672; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 673; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i16>, ptr [[TMP23]], align 2 674; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i16> [[WIDE_LOAD11]] to <4 x i8> 675; CHECK-NEXT: [[TMP25:%.*]] = shl <4 x i8> [[TMP24]], splat (i8 4) 676; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i8> [[TMP25]], splat (i8 32) 677; CHECK-NEXT: [[TMP27:%.*]] = and <4 x i8> [[TMP24]], splat (i8 -52) 678; CHECK-NEXT: [[TMP28:%.*]] = or <4 x i8> [[TMP27]], splat (i8 51) 679; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i8> [[TMP28]], splat (i8 60) 680; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], [[TMP19]] 681; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP29]], splat (i8 -4) 682; CHECK-NEXT: [[TMP32:%.*]] = xor <4 x i8> [[TMP31]], [[TMP20]] 683; CHECK-NEXT: [[TMP33:%.*]] = mul <4 x i8> [[TMP32]], [[TMP30]] 684; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]] 685; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0 686; CHECK-NEXT: store <4 x i8> [[TMP33]], ptr [[TMP35]], align 1 687; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 4 688; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] 689; CHECK-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 690; CHECK: vec.epilog.middle.block: 691; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] 692; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 693; CHECK: vec.epilog.scalar.ph: 694; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 695; CHECK-NEXT: br label [[FOR_BODY:%.*]] 696; CHECK: for.cond.cleanup.loopexit: 697; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 698; CHECK: for.cond.cleanup: 699; CHECK-NEXT: ret void 700; CHECK: for.body: 701; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 702; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]] 703; CHECK-NEXT: [[TMP37:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 704; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP37]] to i32 705; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4 706; CHECK-NEXT: [[CONV2:%.*]] = add nsw i32 [[ADD]], 32 707; CHECK-NEXT: [[OR:%.*]] = and i32 [[CONV]], 204 708; CHECK-NEXT: [[CONV8:%.*]] = or i32 [[OR]], 51 709; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV8]], 60 710; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]] 711; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252 712; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]] 713; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]] 714; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8 715; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 716; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1 717; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 718; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 719; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 720; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 721; 722entry: 723 %cmp.32 = icmp sgt i32 %len, 0 724 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup 725 726for.body.lr.ph: ; preds = %entry 727 %conv11 = zext i8 %arg2 to i32 728 %conv13 = zext i8 %arg1 to i32 729 br label %for.body 730 731for.cond.cleanup: ; preds = %for.body, %entry 732 ret void 733 734for.body: ; preds = %for.body, %for.body.lr.ph 735 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 736 %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv 737 %0 = load i16, ptr %arrayidx 738 %conv = sext i16 %0 to i32 739 %add = shl i32 %conv, 4 740 %conv2 = add nsw i32 %add, 32 741 %or = and i32 %conv, 204 742 %conv8 = or i32 %or, 51 743 %mul = mul nuw nsw i32 %conv8, 60 744 %and = and i32 %conv2, %conv13 745 %mul.masked = and i32 %mul, 252 746 %conv17 = xor i32 %mul.masked, %conv11 747 %mul18 = mul nuw nsw i32 %conv17, %and 748 %conv19 = trunc i32 %mul18 to i8 749 %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 750 store i8 %conv19, ptr %arrayidx21 751 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 752 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 753 %exitcond = icmp eq i32 %lftr.wideiv, %len 754 br i1 %exitcond, label %for.cond.cleanup, label %for.body 755} 756 757define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 758; CHECK-LABEL: define void @add_phifail 759; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 760; CHECK-NEXT: entry: 761; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[LEN]], 0 762; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 763; CHECK: for.body.preheader: 764; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64 765; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 766; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 767; CHECK: vector.ph: 768; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 769; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 770; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 771; CHECK: vector.body: 772; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 773; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 774; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 775; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] 776; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 777; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 778; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> 779; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2) 780; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8> 781; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] 782; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 783; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 784; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 785; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 786; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 787; CHECK: middle.block: 788; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 789; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 790; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] 791; CHECK: scalar.ph: 792; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 793; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 794; CHECK-NEXT: br label [[FOR_BODY:%.*]] 795; CHECK: for.cond.cleanup.loopexit: 796; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 797; CHECK: for.cond.cleanup: 798; CHECK-NEXT: ret void 799; CHECK: for.body: 800; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 801; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 802; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 803; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 804; CHECK-NEXT: [[CONV]] = zext i8 [[TMP10]] to i32 805; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 806; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 807; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 808; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1 809; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 810; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 811; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 812; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 813; 814entry: 815 %cmp8 = icmp sgt i32 %len, 0 816 br i1 %cmp8, label %for.body, label %for.cond.cleanup 817 818for.cond.cleanup: ; preds = %for.body, %entry 819 ret void 820 821for.body: ; preds = %entry, %for.body 822 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 823 %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ] 824 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 825 %0 = load i8, ptr %arrayidx 826 %conv = zext i8 %0 to i32 827 %add = add nuw nsw i32 %conv, 2 828 %conv1 = trunc i32 %add to i8 829 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 830 store i8 %conv1, ptr %arrayidx3 831 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 832 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 833 %exitcond = icmp eq i32 %lftr.wideiv, %len 834 br i1 %exitcond, label %for.cond.cleanup, label %for.body 835} 836 837; When we vectorize this loop, we generate correct code 838; even when %len exactly divides VF (since we extract from the second last index 839; and pass this to the for.cond.cleanup block). Vectorized loop returns 840; the correct value a_phi = p[len -2] 841define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { 842; CHECK-LABEL: define i8 @add_phifail2 843; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) { 844; CHECK-NEXT: entry: 845; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1 846; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 847; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 848; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 849; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 850; CHECK: vector.ph: 851; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 852; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 853; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 854; CHECK: vector.body: 855; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 856; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 857; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 858; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]] 859; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 860; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 861; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> 862; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2) 863; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8> 864; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] 865; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 866; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1 867; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 868; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 869; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 870; CHECK: middle.block: 871; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 872; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 873; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 874; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 875; CHECK: scalar.ph: 876; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 877; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 878; CHECK-NEXT: br label [[FOR_BODY:%.*]] 879; CHECK: for.cond.cleanup: 880; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] 881; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[A_PHI_LCSSA]] to i8 882; CHECK-NEXT: ret i8 [[RET]] 883; CHECK: for.body: 884; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 885; CHECK-NEXT: [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 886; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] 887; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 888; CHECK-NEXT: [[CONV]] = zext i8 [[TMP12]] to i32 889; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 890; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 891; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] 892; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1 893; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 894; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 895; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]] 896; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 897; 898entry: 899 br label %for.body 900 901for.cond.cleanup: ; preds = %for.body, %entry 902 %ret = trunc i32 %a_phi to i8 903 ret i8 %ret 904 905for.body: ; preds = %entry, %for.body 906 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 907 %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ] 908 %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv 909 %0 = load i8, ptr %arrayidx 910 %conv = zext i8 %0 to i32 911 %add = add nuw nsw i32 %conv, 2 912 %conv1 = trunc i32 %add to i8 913 %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv 914 store i8 %conv1, ptr %arrayidx3 915 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 916 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 917 %exitcond = icmp eq i32 %lftr.wideiv, %len 918 br i1 %exitcond, label %for.cond.cleanup, label %for.body 919} 920