1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=4 2>&1 | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64" 6 7define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef readonly %C, i64 noundef %Iterations) { 8; CHECK-LABEL: @add_i8( 9; CHECK-NEXT: iter.check: 10; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ITERATIONS:%.*]], 8 11; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 12; CHECK: vector.main.loop.iter.check: 13; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[ITERATIONS]], 64 14; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 15; CHECK: vector.ph: 16; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ITERATIONS]], 64 17; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF]] 18; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 19; CHECK: vector.body: 20; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 22; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[TMP0]] 23; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 24; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 25; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 26; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 27; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 28; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 29; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 30; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 31; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 [[TMP0]] 32; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 33; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 34; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 32 35; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 48 36; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 37; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 38; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 39; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 40; CHECK-NEXT: [[TMP11:%.*]] = add <16 x i8> [[WIDE_LOAD5]], [[WIDE_LOAD]] 41; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD6]], [[WIDE_LOAD2]] 42; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i8> [[WIDE_LOAD7]], [[WIDE_LOAD3]] 43; CHECK-NEXT: [[TMP14:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[WIDE_LOAD4]] 44; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] 45; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 0 46; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 16 47; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 32 48; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 48 49; CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP16]], align 1 50; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP17]], align 1 51; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP18]], align 1 52; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[TMP19]], align 1 53; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 54; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 55; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 56; CHECK: middle.block: 57; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] 58; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 59; CHECK: vec.epilog.iter.check: 60; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[ITERATIONS]], [[N_VEC]] 61; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 62; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 63; CHECK: vec.epilog.ph: 64; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 65; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 8 66; CHECK-NEXT: [[N_VEC10:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF9]] 67; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 68; CHECK: vec.epilog.vector.body: 69; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 70; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0 71; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP21]] 72; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0 73; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i8>, ptr [[TMP23]], align 1 74; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP21]] 75; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0 76; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i8>, ptr [[TMP25]], align 1 77; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[WIDE_LOAD13]], [[WIDE_LOAD12]] 78; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP21]] 79; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0 80; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP28]], align 1 81; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 82; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] 83; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 84; CHECK: vec.epilog.middle.block: 85; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] 86; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 87; CHECK: vec.epilog.scalar.ph: 88; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 89; CHECK-NEXT: br label [[FOR_BODY:%.*]] 90; CHECK: for.body: 91; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 92; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 93; CHECK-NEXT: [[TMP30:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 94; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[IV]] 95; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 96; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP31]], [[TMP30]] 97; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 98; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1 99; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 100; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] 101; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 102; CHECK: exit: 103; CHECK-NEXT: ret void 104; 105entry: 106 br label %for.body 107 108for.body: 109 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 110 %arrayidx = getelementptr inbounds i8, ptr %B, i64 %iv 111 %0 = load i8, ptr %arrayidx, align 1 112 %arrayidx2 = getelementptr inbounds i8, ptr %C, i64 %iv 113 %1 = load i8, ptr %arrayidx2, align 1 114 %add = add i8 %1, %0 115 %arrayidx6 = getelementptr inbounds i8, ptr %A, i64 %iv 116 store i8 %add, ptr %arrayidx6, align 1 117 %iv.next = add nuw nsw i64 %iv, 1 118 %exitcond.not = icmp eq i64 %iv.next, %Iterations 119 br i1 %exitcond.not, label %exit, label %for.body 120 121exit: 122 ret void 123} 124 125define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef readonly %C, i64 noundef %Iterations) { 126; CHECK-LABEL: @add_i16( 127; CHECK-NEXT: iter.check: 128; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ITERATIONS:%.*]], 4 129; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 130; CHECK: vector.main.loop.iter.check: 131; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[ITERATIONS]], 32 132; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 133; CHECK: vector.ph: 134; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ITERATIONS]], 32 135; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF]] 136; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 137; CHECK: vector.body: 138; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 139; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 140; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[TMP0]] 141; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 142; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 8 143; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 16 144; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 24 145; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 1 146; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 1 147; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP4]], align 1 148; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP5]], align 1 149; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i64 [[TMP0]] 150; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 151; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 8 152; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 16 153; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 24 154; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP7]], align 1 155; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP8]], align 1 156; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP9]], align 1 157; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP10]], align 1 158; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i16> [[WIDE_LOAD5]], [[WIDE_LOAD]] 159; CHECK-NEXT: [[TMP12:%.*]] = add <8 x i16> [[WIDE_LOAD6]], [[WIDE_LOAD2]] 160; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[WIDE_LOAD7]], [[WIDE_LOAD3]] 161; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i16> [[WIDE_LOAD8]], [[WIDE_LOAD4]] 162; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[TMP0]] 163; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 164; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 8 165; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 16 166; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 24 167; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP16]], align 1 168; CHECK-NEXT: store <8 x i16> [[TMP12]], ptr [[TMP17]], align 1 169; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP18]], align 1 170; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP19]], align 1 171; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 172; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 173; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 174; CHECK: middle.block: 175; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] 176; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 177; CHECK: vec.epilog.iter.check: 178; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[ITERATIONS]], [[N_VEC]] 179; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 180; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 181; CHECK: vec.epilog.ph: 182; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 183; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 184; CHECK-NEXT: [[N_VEC10:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF9]] 185; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 186; CHECK: vec.epilog.vector.body: 187; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 188; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0 189; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[TMP21]] 190; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 191; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i16>, ptr [[TMP23]], align 1 192; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[TMP21]] 193; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0 194; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i16>, ptr [[TMP25]], align 1 195; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i16> [[WIDE_LOAD13]], [[WIDE_LOAD12]] 196; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP21]] 197; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i16, ptr [[TMP27]], i32 0 198; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP28]], align 1 199; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 200; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] 201; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 202; CHECK: vec.epilog.middle.block: 203; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] 204; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 205; CHECK: vec.epilog.scalar.ph: 206; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 207; CHECK-NEXT: br label [[FOR_BODY:%.*]] 208; CHECK: for.body: 209; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 210; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[IV]] 211; CHECK-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX]], align 1 212; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[IV]] 213; CHECK-NEXT: [[TMP31:%.*]] = load i16, ptr [[ARRAYIDX2]], align 1 214; CHECK-NEXT: [[ADD:%.*]] = add i16 [[TMP31]], [[TMP30]] 215; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV]] 216; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX6]], align 1 217; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 218; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] 219; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 220; CHECK: exit: 221; CHECK-NEXT: ret void 222; 223entry: 224 br label %for.body 225 226for.body: 227 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 228 %arrayidx = getelementptr inbounds i16, ptr %B, i64 %iv 229 %0 = load i16, ptr %arrayidx, align 1 230 %arrayidx2 = getelementptr inbounds i16, ptr %C, i64 %iv 231 %1 = load i16, ptr %arrayidx2, align 1 232 %add = add i16 %1, %0 233 %arrayidx6 = getelementptr inbounds i16, ptr %A, i64 %iv 234 store i16 %add, ptr %arrayidx6, align 1 235 %iv.next = add nuw nsw i64 %iv, 1 236 %exitcond.not = icmp eq i64 %iv.next, %Iterations 237 br i1 %exitcond.not, label %exit, label %for.body 238 239exit: 240 ret void 241} 242 243define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef readonly %C, i64 noundef %Iterations) { 244; CHECK-LABEL: @add_i32( 245; CHECK-NEXT: iter.check: 246; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ITERATIONS:%.*]], 4 247; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 248; CHECK: vector.main.loop.iter.check: 249; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[ITERATIONS]], 16 250; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 251; CHECK: vector.ph: 252; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ITERATIONS]], 16 253; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF]] 254; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 255; CHECK: vector.body: 256; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 257; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 258; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] 259; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 260; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 261; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 262; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 263; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 264; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 265; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP4]], align 1 266; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 1 267; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] 268; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 269; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 270; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 8 271; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 12 272; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP7]], align 1 273; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP8]], align 1 274; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP9]], align 1 275; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP10]], align 1 276; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD]] 277; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD6]], [[WIDE_LOAD2]] 278; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] 279; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] 280; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] 281; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 282; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 283; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 8 284; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 12 285; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP16]], align 1 286; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP17]], align 1 287; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP18]], align 1 288; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP19]], align 1 289; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 290; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 291; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 292; CHECK: middle.block: 293; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] 294; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 295; CHECK: vec.epilog.iter.check: 296; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[ITERATIONS]], [[N_VEC]] 297; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 298; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 299; CHECK: vec.epilog.ph: 300; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 301; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 302; CHECK-NEXT: [[N_VEC10:%.*]] = sub i64 [[ITERATIONS]], [[N_MOD_VF9]] 303; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 304; CHECK: vec.epilog.vector.body: 305; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 306; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0 307; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 308; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0 309; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP23]], align 1 310; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP21]] 311; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 312; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, ptr [[TMP25]], align 1 313; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i32> [[WIDE_LOAD13]], [[WIDE_LOAD12]] 314; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 315; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 0 316; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP28]], align 1 317; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 318; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] 319; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 320; CHECK: vec.epilog.middle.block: 321; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] 322; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] 323; CHECK: vec.epilog.scalar.ph: 324; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 325; CHECK-NEXT: br label [[FOR_BODY:%.*]] 326; CHECK: for.body: 327; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 328; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 329; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 330; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] 331; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX2]], align 1 332; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP31]], [[TMP30]] 333; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 334; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX6]], align 1 335; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 336; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] 337; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 338; CHECK: exit: 339; CHECK-NEXT: ret void 340; 341entry: 342 br label %for.body 343 344for.body: 345 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 346 %arrayidx = getelementptr inbounds i32, ptr %B, i64 %iv 347 %0 = load i32, ptr %arrayidx, align 1 348 %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %iv 349 %1 = load i32, ptr %arrayidx2, align 1 350 %add = add i32 %1, %0 351 %arrayidx6 = getelementptr inbounds i32, ptr %A, i64 %iv 352 store i32 %add, ptr %arrayidx6, align 1 353 %iv.next = add nuw nsw i64 %iv, 1 354 %exitcond.not = icmp eq i64 %iv.next, %Iterations 355 br i1 %exitcond.not, label %exit, label %for.body 356 357exit: 358 ret void 359} 360