1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -S -o - | FileCheck %s 3; RUN: opt < %s -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -o - | FileCheck --check-prefix=MAX-BW %s 4 5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 6target triple = "x86_64-unknown-linux-gnu" 7 8; This test checks that the given loop still beneficial for vecotization 9; even if it contains scalarized load (gather on AVX2) 10 11; Function Attrs: norecurse nounwind readonly uwtable 12define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_unnamed_addr #0 { 13; CHECK-LABEL: @matrix_row_col( 14; CHECK-NEXT: iter.check: 15; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64 16; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64 17; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 18; CHECK: vector.main.loop.iter.check: 19; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 20; CHECK: vector.ph: 21; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 22; CHECK: vector.body: 23; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 24; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ] 25; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ] 26; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ] 27; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ] 28; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 29; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 30; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 31; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 32; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 33; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 34; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 35; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 36; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 37; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 38; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 39; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 40; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 41; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 42; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 43; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 44; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 45; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17 46; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18 47; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 19 48; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 20 49; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 21 50; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 22 51; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 23 52; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 24 53; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 25 54; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 26 55; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 27 56; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[INDEX]], 28 57; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 58; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 59; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 60; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] 61; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 62; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 63; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 64; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 65; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP36]], align 4, !tbaa [[TBAA1:![0-9]+]] 66; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]] 67; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]] 68; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]] 69; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] 70; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] 71; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] 72; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] 73; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP4]], i64 [[IDXPROM5]] 74; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]] 75; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]] 76; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]] 77; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP8]], i64 [[IDXPROM5]] 78; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP9]], i64 [[IDXPROM5]] 79; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP10]], i64 [[IDXPROM5]] 80; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP11]], i64 [[IDXPROM5]] 81; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP12]], i64 [[IDXPROM5]] 82; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP13]], i64 [[IDXPROM5]] 83; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP14]], i64 [[IDXPROM5]] 84; CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP15]], i64 [[IDXPROM5]] 85; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP16]], i64 [[IDXPROM5]] 86; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP17]], i64 [[IDXPROM5]] 87; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP18]], i64 [[IDXPROM5]] 88; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP19]], i64 [[IDXPROM5]] 89; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP20]], i64 [[IDXPROM5]] 90; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP21]], i64 [[IDXPROM5]] 91; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP22]], i64 [[IDXPROM5]] 92; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP23]], i64 [[IDXPROM5]] 93; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP24]], i64 [[IDXPROM5]] 94; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP25]], i64 [[IDXPROM5]] 95; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP26]], i64 [[IDXPROM5]] 96; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP27]], i64 [[IDXPROM5]] 97; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP28]], i64 [[IDXPROM5]] 98; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] 99; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] 100; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] 101; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]] 102; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]] 103; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]] 104; CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]] 105; CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]] 106; CHECK-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]] 107; CHECK-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]] 108; CHECK-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]] 109; CHECK-NEXT: [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0 110; CHECK-NEXT: [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1 111; CHECK-NEXT: [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2 112; CHECK-NEXT: [[TMP83:%.*]] = insertelement <8 x i32> [[TMP82]], i32 [[TMP75]], i32 3 113; CHECK-NEXT: [[TMP84:%.*]] = insertelement <8 x i32> [[TMP83]], i32 [[TMP76]], i32 4 114; CHECK-NEXT: [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5 115; CHECK-NEXT: [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6 116; CHECK-NEXT: [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7 117; CHECK-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]] 118; CHECK-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]] 119; CHECK-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]] 120; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]] 121; CHECK-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]] 122; CHECK-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]] 123; CHECK-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]] 124; CHECK-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]] 125; CHECK-NEXT: [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0 126; CHECK-NEXT: [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1 127; CHECK-NEXT: [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2 128; CHECK-NEXT: [[TMP99:%.*]] = insertelement <8 x i32> [[TMP98]], i32 [[TMP91]], i32 3 129; CHECK-NEXT: [[TMP100:%.*]] = insertelement <8 x i32> [[TMP99]], i32 [[TMP92]], i32 4 130; CHECK-NEXT: [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5 131; CHECK-NEXT: [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6 132; CHECK-NEXT: [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7 133; CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]] 134; CHECK-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]] 135; CHECK-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]] 136; CHECK-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]] 137; CHECK-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]] 138; CHECK-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]] 139; CHECK-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]] 140; CHECK-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]] 141; CHECK-NEXT: [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0 142; CHECK-NEXT: [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1 143; CHECK-NEXT: [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2 144; CHECK-NEXT: [[TMP115:%.*]] = insertelement <8 x i32> [[TMP114]], i32 [[TMP107]], i32 3 145; CHECK-NEXT: [[TMP116:%.*]] = insertelement <8 x i32> [[TMP115]], i32 [[TMP108]], i32 4 146; CHECK-NEXT: [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5 147; CHECK-NEXT: [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6 148; CHECK-NEXT: [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7 149; CHECK-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]] 150; CHECK-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]] 151; CHECK-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]] 152; CHECK-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]] 153; CHECK-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]] 154; CHECK-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]] 155; CHECK-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]] 156; CHECK-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]] 157; CHECK-NEXT: [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0 158; CHECK-NEXT: [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1 159; CHECK-NEXT: [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2 160; CHECK-NEXT: [[TMP131:%.*]] = insertelement <8 x i32> [[TMP130]], i32 [[TMP123]], i32 3 161; CHECK-NEXT: [[TMP132:%.*]] = insertelement <8 x i32> [[TMP131]], i32 [[TMP124]], i32 4 162; CHECK-NEXT: [[TMP133:%.*]] = insertelement <8 x i32> [[TMP132]], i32 [[TMP125]], i32 5 163; CHECK-NEXT: [[TMP134:%.*]] = insertelement <8 x i32> [[TMP133]], i32 [[TMP126]], i32 6 164; CHECK-NEXT: [[TMP135:%.*]] = insertelement <8 x i32> [[TMP134]], i32 [[TMP127]], i32 7 165; CHECK-NEXT: [[TMP136:%.*]] = mul nsw <8 x i32> [[TMP87]], [[WIDE_LOAD]] 166; CHECK-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] 167; CHECK-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] 168; CHECK-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] 169; CHECK-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) 170; CHECK-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) 171; CHECK-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) 172; CHECK-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) 173; CHECK-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] 174; CHECK-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] 175; CHECK-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] 176; CHECK-NEXT: [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]] 177; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 178; CHECK-NEXT: [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 179; CHECK-NEXT: br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 180; CHECK: middle.block: 181; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]] 182; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]] 183; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]] 184; CHECK-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) 185; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 186; CHECK: vec.epilog.iter.check: 187; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 188; CHECK: vec.epilog.ph: 189; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 190; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 191; CHECK-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 192; CHECK-NEXT: br label [[FOR_BODY:%.*]] 193; CHECK: vec.epilog.vector.body: 194; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ] 195; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ] 196; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 197; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 198; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 199; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 200; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] 201; CHECK-NEXT: [[TMP153:%.*]] = getelementptr inbounds i32, ptr [[TMP152]], i32 0 202; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP153]], align 4, !tbaa [[TBAA1]] 203; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] 204; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] 205; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] 206; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] 207; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]] 208; CHECK-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]] 209; CHECK-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]] 210; CHECK-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]] 211; CHECK-NEXT: [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0 212; CHECK-NEXT: [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1 213; CHECK-NEXT: [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2 214; CHECK-NEXT: [[TMP165:%.*]] = insertelement <4 x i32> [[TMP164]], i32 [[TMP161]], i32 3 215; CHECK-NEXT: [[TMP166:%.*]] = mul nsw <4 x i32> [[TMP165]], [[WIDE_LOAD11]] 216; CHECK-NEXT: [[TMP167:%.*]] = add <4 x i32> [[VEC_PHI10]], splat (i32 4) 217; CHECK-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] 218; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 219; CHECK-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 220; CHECK-NEXT: br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 221; CHECK: vec.epilog.middle.block: 222; CHECK-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) 223; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] 224; CHECK: vec.epilog.scalar.ph: 225; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] 226; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ] 227; CHECK-NEXT: br label [[FOR_BODY1:%.*]] 228; CHECK: for.cond.cleanup: 229; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] 230; CHECK-NEXT: ret i32 [[ADD7_LCSSA]] 231; CHECK: for.body: 232; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ] 233; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ] 234; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] 235; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]] 236; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] 237; CHECK-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]] 238; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]] 239; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4 240; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] 241; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 242; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 243; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] 244; 245; MAX-BW-LABEL: @matrix_row_col( 246; MAX-BW-NEXT: iter.check: 247; MAX-BW-NEXT: [[IDXPROM:%.*]] = sext i32 [[I:%.*]] to i64 248; MAX-BW-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J:%.*]] to i64 249; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 250; MAX-BW: vector.main.loop.iter.check: 251; MAX-BW-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 252; MAX-BW: vector.ph: 253; MAX-BW-NEXT: br label [[VECTOR_BODY:%.*]] 254; MAX-BW: vector.body: 255; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 256; MAX-BW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP144:%.*]], [[VECTOR_BODY]] ] 257; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP145:%.*]], [[VECTOR_BODY]] ] 258; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP146:%.*]], [[VECTOR_BODY]] ] 259; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP147:%.*]], [[VECTOR_BODY]] ] 260; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 261; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 262; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 263; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 264; MAX-BW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 265; MAX-BW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 266; MAX-BW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 267; MAX-BW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 268; MAX-BW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 269; MAX-BW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 270; MAX-BW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 271; MAX-BW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 272; MAX-BW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 273; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 274; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 275; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 276; MAX-BW-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 277; MAX-BW-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17 278; MAX-BW-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18 279; MAX-BW-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 19 280; MAX-BW-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 20 281; MAX-BW-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 21 282; MAX-BW-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 22 283; MAX-BW-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 23 284; MAX-BW-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 24 285; MAX-BW-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 25 286; MAX-BW-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 26 287; MAX-BW-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 27 288; MAX-BW-NEXT: [[TMP28:%.*]] = add i64 [[INDEX]], 28 289; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 290; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 291; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 292; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] 293; MAX-BW-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 294; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 295; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 296; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 297; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP36]], align 4, !tbaa [[TBAA1:![0-9]+]] 298; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[TBAA1]] 299; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[TBAA1]] 300; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[TBAA1]] 301; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] 302; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] 303; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] 304; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] 305; MAX-BW-NEXT: [[TMP44:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP4]], i64 [[IDXPROM5]] 306; MAX-BW-NEXT: [[TMP45:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]] 307; MAX-BW-NEXT: [[TMP46:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]] 308; MAX-BW-NEXT: [[TMP47:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]] 309; MAX-BW-NEXT: [[TMP48:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP8]], i64 [[IDXPROM5]] 310; MAX-BW-NEXT: [[TMP49:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP9]], i64 [[IDXPROM5]] 311; MAX-BW-NEXT: [[TMP50:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP10]], i64 [[IDXPROM5]] 312; MAX-BW-NEXT: [[TMP51:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP11]], i64 [[IDXPROM5]] 313; MAX-BW-NEXT: [[TMP52:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP12]], i64 [[IDXPROM5]] 314; MAX-BW-NEXT: [[TMP53:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP13]], i64 [[IDXPROM5]] 315; MAX-BW-NEXT: [[TMP54:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP14]], i64 [[IDXPROM5]] 316; MAX-BW-NEXT: [[TMP55:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP15]], i64 [[IDXPROM5]] 317; MAX-BW-NEXT: [[TMP56:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP16]], i64 [[IDXPROM5]] 318; MAX-BW-NEXT: [[TMP57:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP17]], i64 [[IDXPROM5]] 319; MAX-BW-NEXT: [[TMP58:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP18]], i64 [[IDXPROM5]] 320; MAX-BW-NEXT: [[TMP59:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP19]], i64 [[IDXPROM5]] 321; MAX-BW-NEXT: [[TMP60:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP20]], i64 [[IDXPROM5]] 322; MAX-BW-NEXT: [[TMP61:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP21]], i64 [[IDXPROM5]] 323; MAX-BW-NEXT: [[TMP62:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP22]], i64 [[IDXPROM5]] 324; MAX-BW-NEXT: [[TMP63:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP23]], i64 [[IDXPROM5]] 325; MAX-BW-NEXT: [[TMP64:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP24]], i64 [[IDXPROM5]] 326; MAX-BW-NEXT: [[TMP65:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP25]], i64 [[IDXPROM5]] 327; MAX-BW-NEXT: [[TMP66:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP26]], i64 [[IDXPROM5]] 328; MAX-BW-NEXT: [[TMP67:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP27]], i64 [[IDXPROM5]] 329; MAX-BW-NEXT: [[TMP68:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP28]], i64 [[IDXPROM5]] 330; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] 331; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] 332; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] 333; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[TBAA1]] 334; MAX-BW-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[TBAA1]] 335; MAX-BW-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[TBAA1]] 336; MAX-BW-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA1]] 337; MAX-BW-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP44]], align 4, !tbaa [[TBAA1]] 338; MAX-BW-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP45]], align 4, !tbaa [[TBAA1]] 339; MAX-BW-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP46]], align 4, !tbaa [[TBAA1]] 340; MAX-BW-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP47]], align 4, !tbaa [[TBAA1]] 341; MAX-BW-NEXT: [[TMP80:%.*]] = insertelement <8 x i32> poison, i32 [[TMP72]], i32 0 342; MAX-BW-NEXT: [[TMP81:%.*]] = insertelement <8 x i32> [[TMP80]], i32 [[TMP73]], i32 1 343; MAX-BW-NEXT: [[TMP82:%.*]] = insertelement <8 x i32> [[TMP81]], i32 [[TMP74]], i32 2 344; MAX-BW-NEXT: [[TMP83:%.*]] = insertelement <8 x i32> [[TMP82]], i32 [[TMP75]], i32 3 345; MAX-BW-NEXT: [[TMP84:%.*]] = insertelement <8 x i32> [[TMP83]], i32 [[TMP76]], i32 4 346; MAX-BW-NEXT: [[TMP85:%.*]] = insertelement <8 x i32> [[TMP84]], i32 [[TMP77]], i32 5 347; MAX-BW-NEXT: [[TMP86:%.*]] = insertelement <8 x i32> [[TMP85]], i32 [[TMP78]], i32 6 348; MAX-BW-NEXT: [[TMP87:%.*]] = insertelement <8 x i32> [[TMP86]], i32 [[TMP79]], i32 7 349; MAX-BW-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA1]] 350; MAX-BW-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP49]], align 4, !tbaa [[TBAA1]] 351; MAX-BW-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP50]], align 4, !tbaa [[TBAA1]] 352; MAX-BW-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP51]], align 4, !tbaa [[TBAA1]] 353; MAX-BW-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP52]], align 4, !tbaa [[TBAA1]] 354; MAX-BW-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA1]] 355; MAX-BW-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP54]], align 4, !tbaa [[TBAA1]] 356; MAX-BW-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA1]] 357; MAX-BW-NEXT: [[TMP96:%.*]] = insertelement <8 x i32> poison, i32 [[TMP88]], i32 0 358; MAX-BW-NEXT: [[TMP97:%.*]] = insertelement <8 x i32> [[TMP96]], i32 [[TMP89]], i32 1 359; MAX-BW-NEXT: [[TMP98:%.*]] = insertelement <8 x i32> [[TMP97]], i32 [[TMP90]], i32 2 360; MAX-BW-NEXT: [[TMP99:%.*]] = insertelement <8 x i32> [[TMP98]], i32 [[TMP91]], i32 3 361; MAX-BW-NEXT: [[TMP100:%.*]] = insertelement <8 x i32> [[TMP99]], i32 [[TMP92]], i32 4 362; MAX-BW-NEXT: [[TMP101:%.*]] = insertelement <8 x i32> [[TMP100]], i32 [[TMP93]], i32 5 363; MAX-BW-NEXT: [[TMP102:%.*]] = insertelement <8 x i32> [[TMP101]], i32 [[TMP94]], i32 6 364; MAX-BW-NEXT: [[TMP103:%.*]] = insertelement <8 x i32> [[TMP102]], i32 [[TMP95]], i32 7 365; MAX-BW-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP56]], align 4, !tbaa [[TBAA1]] 366; MAX-BW-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP57]], align 4, !tbaa [[TBAA1]] 367; MAX-BW-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP58]], align 4, !tbaa [[TBAA1]] 368; MAX-BW-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP59]], align 4, !tbaa [[TBAA1]] 369; MAX-BW-NEXT: [[TMP108:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA1]] 370; MAX-BW-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP61]], align 4, !tbaa [[TBAA1]] 371; MAX-BW-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP62]], align 4, !tbaa [[TBAA1]] 372; MAX-BW-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP63]], align 4, !tbaa [[TBAA1]] 373; MAX-BW-NEXT: [[TMP112:%.*]] = insertelement <8 x i32> poison, i32 [[TMP104]], i32 0 374; MAX-BW-NEXT: [[TMP113:%.*]] = insertelement <8 x i32> [[TMP112]], i32 [[TMP105]], i32 1 375; MAX-BW-NEXT: [[TMP114:%.*]] = insertelement <8 x i32> [[TMP113]], i32 [[TMP106]], i32 2 376; MAX-BW-NEXT: [[TMP115:%.*]] = insertelement <8 x i32> [[TMP114]], i32 [[TMP107]], i32 3 377; MAX-BW-NEXT: [[TMP116:%.*]] = insertelement <8 x i32> [[TMP115]], i32 [[TMP108]], i32 4 378; MAX-BW-NEXT: [[TMP117:%.*]] = insertelement <8 x i32> [[TMP116]], i32 [[TMP109]], i32 5 379; MAX-BW-NEXT: [[TMP118:%.*]] = insertelement <8 x i32> [[TMP117]], i32 [[TMP110]], i32 6 380; MAX-BW-NEXT: [[TMP119:%.*]] = insertelement <8 x i32> [[TMP118]], i32 [[TMP111]], i32 7 381; MAX-BW-NEXT: [[TMP120:%.*]] = load i32, ptr [[TMP64]], align 4, !tbaa [[TBAA1]] 382; MAX-BW-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP65]], align 4, !tbaa [[TBAA1]] 383; MAX-BW-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP66]], align 4, !tbaa [[TBAA1]] 384; MAX-BW-NEXT: [[TMP123:%.*]] = load i32, ptr [[TMP67]], align 4, !tbaa [[TBAA1]] 385; MAX-BW-NEXT: [[TMP124:%.*]] = load i32, ptr [[TMP68]], align 4, !tbaa [[TBAA1]] 386; MAX-BW-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP69]], align 4, !tbaa [[TBAA1]] 387; MAX-BW-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP70]], align 4, !tbaa [[TBAA1]] 388; MAX-BW-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP71]], align 4, !tbaa [[TBAA1]] 389; MAX-BW-NEXT: [[TMP128:%.*]] = insertelement <8 x i32> poison, i32 [[TMP120]], i32 0 390; MAX-BW-NEXT: [[TMP129:%.*]] = insertelement <8 x i32> [[TMP128]], i32 [[TMP121]], i32 1 391; MAX-BW-NEXT: [[TMP130:%.*]] = insertelement <8 x i32> [[TMP129]], i32 [[TMP122]], i32 2 392; MAX-BW-NEXT: [[TMP131:%.*]] = insertelement <8 x i32> [[TMP130]], i32 [[TMP123]], i32 3 393; MAX-BW-NEXT: [[TMP132:%.*]] = insertelement <8 x i32> [[TMP131]], i32 [[TMP124]], i32 4 394; MAX-BW-NEXT: [[TMP133:%.*]] = insertelement <8 x i32> [[TMP132]], i32 [[TMP125]], i32 5 395; MAX-BW-NEXT: [[TMP134:%.*]] = insertelement <8 x i32> [[TMP133]], i32 [[TMP126]], i32 6 396; MAX-BW-NEXT: [[TMP135:%.*]] = insertelement <8 x i32> [[TMP134]], i32 [[TMP127]], i32 7 397; MAX-BW-NEXT: [[TMP136:%.*]] = mul nsw <8 x i32> [[TMP87]], [[WIDE_LOAD]] 398; MAX-BW-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] 399; MAX-BW-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] 400; MAX-BW-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] 401; MAX-BW-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) 402; MAX-BW-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) 403; MAX-BW-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) 404; MAX-BW-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) 405; MAX-BW-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] 406; MAX-BW-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] 407; MAX-BW-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] 408; MAX-BW-NEXT: [[TMP147]] = add <8 x i32> [[TMP143]], [[TMP139]] 409; MAX-BW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 410; MAX-BW-NEXT: [[TMP148:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 411; MAX-BW-NEXT: br i1 [[TMP148]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 412; MAX-BW: middle.block: 413; MAX-BW-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP145]], [[TMP144]] 414; MAX-BW-NEXT: [[BIN_RDX7:%.*]] = add <8 x i32> [[TMP146]], [[BIN_RDX]] 415; MAX-BW-NEXT: [[BIN_RDX8:%.*]] = add <8 x i32> [[TMP147]], [[BIN_RDX7]] 416; MAX-BW-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) 417; MAX-BW-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 418; MAX-BW: vec.epilog.iter.check: 419; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] 420; MAX-BW: vec.epilog.ph: 421; MAX-BW-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 422; MAX-BW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 423; MAX-BW-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 424; MAX-BW-NEXT: br label [[FOR_BODY:%.*]] 425; MAX-BW: vec.epilog.vector.body: 426; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[FOR_BODY]] ] 427; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], [[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], [[FOR_BODY]] ] 428; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 429; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 430; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 431; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 432; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] 433; MAX-BW-NEXT: [[TMP153:%.*]] = getelementptr inbounds i32, ptr [[TMP152]], i32 0 434; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP153]], align 4, !tbaa [[TBAA1]] 435; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] 436; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] 437; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] 438; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] 439; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[TBAA1]] 440; MAX-BW-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[TBAA1]] 441; MAX-BW-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA1]] 442; MAX-BW-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[TBAA1]] 443; MAX-BW-NEXT: [[TMP162:%.*]] = insertelement <4 x i32> poison, i32 [[TMP158]], i32 0 444; MAX-BW-NEXT: [[TMP163:%.*]] = insertelement <4 x i32> [[TMP162]], i32 [[TMP159]], i32 1 445; MAX-BW-NEXT: [[TMP164:%.*]] = insertelement <4 x i32> [[TMP163]], i32 [[TMP160]], i32 2 446; MAX-BW-NEXT: [[TMP165:%.*]] = insertelement <4 x i32> [[TMP164]], i32 [[TMP161]], i32 3 447; MAX-BW-NEXT: [[TMP166:%.*]] = mul nsw <4 x i32> [[TMP165]], [[WIDE_LOAD11]] 448; MAX-BW-NEXT: [[TMP167:%.*]] = add <4 x i32> [[VEC_PHI10]], splat (i32 4) 449; MAX-BW-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] 450; MAX-BW-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 451; MAX-BW-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 452; MAX-BW-NEXT: br i1 [[TMP169]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 453; MAX-BW: vec.epilog.middle.block: 454; MAX-BW-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) 455; MAX-BW-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] 456; MAX-BW: vec.epilog.scalar.ph: 457; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] 458; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ] 459; MAX-BW-NEXT: br label [[FOR_BODY1:%.*]] 460; MAX-BW: for.cond.cleanup: 461; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] 462; MAX-BW-NEXT: ret i32 [[ADD7_LCSSA]] 463; MAX-BW: for.body: 464; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ] 465; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY1]] ] 466; MAX-BW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] 467; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA1]] 468; MAX-BW-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] 469; MAX-BW-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[TBAA1]] 470; MAX-BW-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP151]], [[TMP150]] 471; MAX-BW-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4 472; MAX-BW-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] 473; MAX-BW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 474; MAX-BW-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 475; MAX-BW-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] 476; 477entry: 478 %idxprom = sext i32 %i to i64 479 %idxprom5 = sext i32 %j to i64 480 br label %for.body 481 482 for.cond.cleanup: ; preds = %for.body 483 ret i32 %add7 484 485 for.body: ; preds = %for.body, %entry 486 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 487 %sum.015 = phi i32 [ 0, %entry ], [ %add7, %for.body ] 488 ; first consecutive load as vector load 489 %arrayidx2 = getelementptr inbounds [100 x i32], ptr %data, i64 %idxprom, i64 %indvars.iv 490 %0 = load i32, ptr %arrayidx2, align 4, !tbaa !1 491 ; second strided load scalarized 492 %arrayidx6 = getelementptr inbounds [100 x i32], ptr %data, i64 %indvars.iv, i64 %idxprom5 493 %1 = load i32, ptr %arrayidx6, align 4, !tbaa !1 494 %mul = mul nsw i32 %1, %0 495 %add = add i32 %sum.015, 4 496 %add7 = add i32 %add, %mul 497 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 498 %exitcond = icmp eq i64 %indvars.iv.next, 100 499 br i1 %exitcond, label %for.cond.cleanup, label %for.body 500} 501 502define void @test(ptr %A, ptr noalias %B) #0 { 503; CHECK-LABEL: @test( 504; CHECK-NEXT: entry: 505; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 506; CHECK: vector.ph: 507; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 508; CHECK: vector.body: 509; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 510; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 511; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 512; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 513; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 514; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 515; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 516; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 517; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 518; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 519; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP0]], 0 520; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP1]], 0 521; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP2]], 0 522; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP3]], 0 523; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP4]], 0 524; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 0 525; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 0 526; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 0 527; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP8]] 528; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 529; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 530; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 531; CHECK-NEXT: [[TMP18:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] 532; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[TMP18]] to <8 x i8> 533; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP8]] 534; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP9]] 535; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP10]] 536; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP11]] 537; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP12]] 538; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP13]] 539; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP14]] 540; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP15]] 541; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i8> [[TMP19]], i32 0 542; CHECK-NEXT: store i8 [[TMP28]], ptr [[TMP20]], align 1 543; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i8> [[TMP19]], i32 1 544; CHECK-NEXT: store i8 [[TMP29]], ptr [[TMP21]], align 1 545; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i8> [[TMP19]], i32 2 546; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP22]], align 1 547; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i8> [[TMP19]], i32 3 548; CHECK-NEXT: store i8 [[TMP31]], ptr [[TMP23]], align 1 549; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i8> [[TMP19]], i32 4 550; CHECK-NEXT: store i8 [[TMP32]], ptr [[TMP24]], align 1 551; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP19]], i32 5 552; CHECK-NEXT: store i8 [[TMP33]], ptr [[TMP25]], align 1 553; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i8> [[TMP19]], i32 6 554; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP26]], align 1 555; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP19]], i32 7 556; CHECK-NEXT: store i8 [[TMP35]], ptr [[TMP27]], align 1 557; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 558; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 559; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 560; CHECK: middle.block: 561; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 562; CHECK: scalar.ph: 563; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 564; CHECK-NEXT: br label [[FOR_BODY:%.*]] 565; CHECK: for.body: 566; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 567; CHECK-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 568; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 569; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] 570; CHECK-NEXT: [[IN1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_1]] 571; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[IN0]], align 4 572; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[IN1]], align 4 573; CHECK-NEXT: [[REDUCE_ADD_0:%.*]] = add i32 [[V0]], [[V1]] 574; CHECK-NEXT: [[REDUCE_ADD_0_NARROW:%.*]] = trunc i32 [[REDUCE_ADD_0]] to i8 575; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[IV_0]] 576; CHECK-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 577; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 578; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 579; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] 580; CHECK: for.cond.cleanup: 581; CHECK-NEXT: ret void 582; 583; MAX-BW-LABEL: @test( 584; MAX-BW-NEXT: entry: 585; MAX-BW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 586; MAX-BW: vector.ph: 587; MAX-BW-NEXT: br label [[VECTOR_BODY:%.*]] 588; MAX-BW: vector.body: 589; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 590; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 591; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 592; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 593; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 594; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 595; MAX-BW-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 596; MAX-BW-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 597; MAX-BW-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 598; MAX-BW-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 599; MAX-BW-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 600; MAX-BW-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 601; MAX-BW-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 602; MAX-BW-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 603; MAX-BW-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 604; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 605; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 606; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 607; MAX-BW-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[TMP0]], 0 608; MAX-BW-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[TMP1]], 0 609; MAX-BW-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP2]], 0 610; MAX-BW-NEXT: [[TMP19:%.*]] = add nuw nsw i64 [[TMP3]], 0 611; MAX-BW-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP4]], 0 612; MAX-BW-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP5]], 0 613; MAX-BW-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP6]], 0 614; MAX-BW-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP7]], 0 615; MAX-BW-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP8]], 0 616; MAX-BW-NEXT: [[TMP25:%.*]] = add nuw nsw i64 [[TMP9]], 0 617; MAX-BW-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[TMP10]], 0 618; MAX-BW-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP11]], 0 619; MAX-BW-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP12]], 0 620; MAX-BW-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP13]], 0 621; MAX-BW-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[TMP14]], 0 622; MAX-BW-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[TMP15]], 0 623; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP16]] 624; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4 625; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 626; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 627; MAX-BW-NEXT: [[TMP34:%.*]] = add <16 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] 628; MAX-BW-NEXT: [[TMP35:%.*]] = trunc <16 x i32> [[TMP34]] to <16 x i8> 629; MAX-BW-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B:%.*]], i64 0, i64 [[TMP16]] 630; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP17]] 631; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP18]] 632; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP19]] 633; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP20]] 634; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP21]] 635; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP22]] 636; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP23]] 637; MAX-BW-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP24]] 638; MAX-BW-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP25]] 639; MAX-BW-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP26]] 640; MAX-BW-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP27]] 641; MAX-BW-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP28]] 642; MAX-BW-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP29]] 643; MAX-BW-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP30]] 644; MAX-BW-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP31]] 645; MAX-BW-NEXT: [[TMP52:%.*]] = extractelement <16 x i8> [[TMP35]], i32 0 646; MAX-BW-NEXT: store i8 [[TMP52]], ptr [[TMP36]], align 1 647; MAX-BW-NEXT: [[TMP53:%.*]] = extractelement <16 x i8> [[TMP35]], i32 1 648; MAX-BW-NEXT: store i8 [[TMP53]], ptr [[TMP37]], align 1 649; MAX-BW-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[TMP35]], i32 2 650; MAX-BW-NEXT: store i8 [[TMP54]], ptr [[TMP38]], align 1 651; MAX-BW-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP35]], i32 3 652; MAX-BW-NEXT: store i8 [[TMP55]], ptr [[TMP39]], align 1 653; MAX-BW-NEXT: [[TMP56:%.*]] = extractelement <16 x i8> [[TMP35]], i32 4 654; MAX-BW-NEXT: store i8 [[TMP56]], ptr [[TMP40]], align 1 655; MAX-BW-NEXT: [[TMP57:%.*]] = extractelement <16 x i8> [[TMP35]], i32 5 656; MAX-BW-NEXT: store i8 [[TMP57]], ptr [[TMP41]], align 1 657; MAX-BW-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[TMP35]], i32 6 658; MAX-BW-NEXT: store i8 [[TMP58]], ptr [[TMP42]], align 1 659; MAX-BW-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP35]], i32 7 660; MAX-BW-NEXT: store i8 [[TMP59]], ptr [[TMP43]], align 1 661; MAX-BW-NEXT: [[TMP60:%.*]] = extractelement <16 x i8> [[TMP35]], i32 8 662; MAX-BW-NEXT: store i8 [[TMP60]], ptr [[TMP44]], align 1 663; MAX-BW-NEXT: [[TMP61:%.*]] = extractelement <16 x i8> [[TMP35]], i32 9 664; MAX-BW-NEXT: store i8 [[TMP61]], ptr [[TMP45]], align 1 665; MAX-BW-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[TMP35]], i32 10 666; MAX-BW-NEXT: store i8 [[TMP62]], ptr [[TMP46]], align 1 667; MAX-BW-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP35]], i32 11 668; MAX-BW-NEXT: store i8 [[TMP63]], ptr [[TMP47]], align 1 669; MAX-BW-NEXT: [[TMP64:%.*]] = extractelement <16 x i8> [[TMP35]], i32 12 670; MAX-BW-NEXT: store i8 [[TMP64]], ptr [[TMP48]], align 1 671; MAX-BW-NEXT: [[TMP65:%.*]] = extractelement <16 x i8> [[TMP35]], i32 13 672; MAX-BW-NEXT: store i8 [[TMP65]], ptr [[TMP49]], align 1 673; MAX-BW-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[TMP35]], i32 14 674; MAX-BW-NEXT: store i8 [[TMP66]], ptr [[TMP50]], align 1 675; MAX-BW-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP35]], i32 15 676; MAX-BW-NEXT: store i8 [[TMP67]], ptr [[TMP51]], align 1 677; MAX-BW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 678; MAX-BW-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 679; MAX-BW-NEXT: br i1 [[TMP68]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 680; MAX-BW: middle.block: 681; MAX-BW-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 682; MAX-BW: scalar.ph: 683; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 684; MAX-BW-NEXT: br label [[FOR_BODY:%.*]] 685; MAX-BW: for.body: 686; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 687; MAX-BW-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 688; MAX-BW-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 689; MAX-BW-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] 690; MAX-BW-NEXT: [[IN1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_1]] 691; MAX-BW-NEXT: [[V0:%.*]] = load i32, ptr [[IN0]], align 4 692; MAX-BW-NEXT: [[V1:%.*]] = load i32, ptr [[IN1]], align 4 693; MAX-BW-NEXT: [[REDUCE_ADD_0:%.*]] = add i32 [[V0]], [[V1]] 694; MAX-BW-NEXT: [[REDUCE_ADD_0_NARROW:%.*]] = trunc i32 [[REDUCE_ADD_0]] to i8 695; MAX-BW-NEXT: [[OUT:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[IV_0]] 696; MAX-BW-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 697; MAX-BW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 698; MAX-BW-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 699; MAX-BW-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] 700; MAX-BW: for.cond.cleanup: 701; MAX-BW-NEXT: ret void 702; 703entry: 704 br label %for.body 705 706for.body: 707 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 708 709 %iv.0 = add nuw nsw i64 %iv, 0 710 %iv.1 = add nuw nsw i64 %iv, 1 711 712 %in0 = getelementptr inbounds [1024 x i32], ptr %A, i64 0, i64 %iv.0 713 %in1 = getelementptr inbounds [1024 x i32], ptr %A, i64 0, i64 %iv.1 714 715 %v0 = load i32, ptr %in0 716 %v1 = load i32, ptr %in1 717 718 %reduce.add.0 = add i32 %v0, %v1 719 720 %reduce.add.0.narrow = trunc i32 %reduce.add.0 to i8 721 722 %out = getelementptr inbounds [1024 x i8], ptr %B, i64 0, i64 %iv.0 723 store i8 %reduce.add.0.narrow, ptr %out 724 725 %iv.next = add nuw nsw i64 %iv.0, 2 726 %cmp = icmp ult i64 %iv.next, 1024 727 br i1 %cmp, label %for.body, label %for.cond.cleanup 728 729for.cond.cleanup: 730 ret void 731} 732 733attributes #0 = { "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" } 734 735!llvm.ident = !{!0} 736 737!0 = !{!"clang version 4.0.0 (cfe/trunk 284570)"} 738!1 = !{!2, !2, i64 0} 739!2 = !{!"int", !3, i64 0} 740!3 = !{!"omnipotent char", !4, i64 0} 741!4 = !{!"Simple C/C++ TBAA"} 742