1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVE1 3; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVED 4; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -vectorizer-maximize-bandwidth -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-MAXBW 5 6target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 7target triple = "aarch64-none-unknown-elf" 8 9define i32 @dotp(ptr %a, ptr %b) #0 { 10; CHECK-INTERLEAVE1-LABEL: define i32 @dotp( 11; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { 12; CHECK-INTERLEAVE1-NEXT: entry: 13; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 14; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 15; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 16; CHECK-INTERLEAVE1: vector.ph: 17; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 18; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 19; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 20; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 21; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 22; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 23; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY1:%.*]] 24; CHECK-INTERLEAVE1: vector.body: 25; CHECK-INTERLEAVE1-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT1:%.*]], [[VECTOR_BODY1]] ] 26; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP14:%.*]], [[VECTOR_BODY1]] ] 27; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 28; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] 29; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0 30; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP17]], align 1 31; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 32; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] 33; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0 34; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP21]], align 1 35; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> 36; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP18]], [[TMP9]] 37; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] 38; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP5]] 39; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT1]], [[N_VEC]] 40; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] 41; CHECK-INTERLEAVE1: middle.block: 42; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 43; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 44; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_PH]] 45; CHECK-INTERLEAVE1: scalar.ph: 46; CHECK-INTERLEAVE1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH:%.*]] ] 47; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 48; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 49; CHECK-INTERLEAVE1: for.body: 50; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 51; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ] 52; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] 53; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 54; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 55; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] 56; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 57; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 58; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 59; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 60; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 61; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 62; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 63; CHECK-INTERLEAVE1: for.exit: 64; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[VECTOR_BODY]] ], [ [[TMP27]], [[VEC_EPILOG_ITER_CHECK]] ] 65; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 66; 67; CHECK-INTERLEAVED-LABEL: define i32 @dotp( 68; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { 69; CHECK-INTERLEAVED-NEXT: entry: 70; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 71; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 72; CHECK-INTERLEAVED-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]] 73; CHECK-INTERLEAVED: vector.ph: 74; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 75; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 76; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 77; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 78; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 79; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 80; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY1:%.*]] 81; CHECK-INTERLEAVED: vector.body: 82; CHECK-INTERLEAVED-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT1:%.*]], [[VECTOR_BODY1]] ] 83; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP23:%.*]], [[VECTOR_BODY1]] ] 84; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH1]] ], [ [[TMP24:%.*]], [[VECTOR_BODY1]] ] 85; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX1]], 0 86; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP17]] 87; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0 88; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 89; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP14]], 4 90; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP20]], i64 [[TMP10]] 91; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP21]], align 1 92; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i8>, ptr [[TMP11]], align 1 93; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 94; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD2]] to <vscale x 4 x i32> 95; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]] 96; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP28]], i32 0 97; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() 98; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 4 99; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP28]], i64 [[TMP27]] 100; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x i8>, ptr [[TMP9]], align 1 101; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP18]], align 1 102; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD3]] to <vscale x 4 x i32> 103; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32> 104; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul <vscale x 4 x i32> [[TMP19]], [[TMP12]] 105; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul <vscale x 4 x i32> [[TMP29]], [[TMP13]] 106; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP30]], [[VEC_PHI]] 107; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] 108; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[INDEX1]], [[TMP5]] 109; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT1]], [[N_VEC]] 110; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] 111; CHECK-INTERLEAVED: middle.block: 112; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP24]], [[TMP23]] 113; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) 114; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 115; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_PH]] 116; CHECK-INTERLEAVED: scalar.ph: 117; CHECK-INTERLEAVED-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH:%.*]] ] 118; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ] 119; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 120; CHECK-INTERLEAVED: for.body: 121; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 122; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ] 123; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] 124; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 125; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 126; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] 127; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 128; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 129; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 130; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 131; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 132; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 133; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 134; CHECK-INTERLEAVED: for.exit: 135; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[VECTOR_BODY]] ], [ [[TMP16]], [[VEC_EPILOG_ITER_CHECK]] ] 136; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 137; 138; CHECK-MAXBW-LABEL: define i32 @dotp( 139; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { 140; CHECK-MAXBW-NEXT: entry: 141; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 142; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 143; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 144; CHECK-MAXBW: vector.ph: 145; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 146; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 147; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 148; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 149; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 150; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 151; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 152; CHECK-MAXBW: vector.body: 153; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 154; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] 155; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 156; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] 157; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 158; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1 159; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32> 160; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] 161; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0 162; CHECK-MAXBW-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1 163; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32> 164; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul <vscale x 8 x i32> [[TMP20]], [[TMP13]] 165; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP22]]) 166; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 167; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 168; CHECK-MAXBW-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 169; CHECK-MAXBW: middle.block: 170; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE5]]) 171; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 172; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 173; CHECK-MAXBW: scalar.ph: 174; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 175; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 176; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 177; CHECK-MAXBW: for.body: 178; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 179; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 180; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 181; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 182; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 183; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 184; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 185; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 186; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 187; CHECK-MAXBW-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 188; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 189; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 190; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 191; CHECK-MAXBW: for.exit: 192; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 193; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 194; 195entry: 196 br label %for.body 197 198for.body: ; preds = %for.body, %entry 199 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 200 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 201 %gep.a = getelementptr i8, ptr %a, i64 %iv 202 %load.a = load i8, ptr %gep.a, align 1 203 %ext.a = zext i8 %load.a to i32 204 %gep.b = getelementptr i8, ptr %b, i64 %iv 205 %load.b = load i8, ptr %gep.b, align 1 206 %ext.b = zext i8 %load.b to i32 207 %mul = mul i32 %ext.b, %ext.a 208 %add = add i32 %mul, %accum 209 %iv.next = add i64 %iv, 1 210 %exitcond.not = icmp eq i64 %iv.next, 1024 211 br i1 %exitcond.not, label %for.exit, label %for.body 212 213for.exit: ; preds = %for.body 214 ret i32 %add 215} 216 217define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { 218; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_different_types( 219; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 220; CHECK-INTERLEAVE1-NEXT: entry: 221; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 222; CHECK-INTERLEAVE1: vector.ph: 223; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 224; CHECK-INTERLEAVE1: vector.body: 225; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 226; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] 227; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 228; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 229; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 230; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 231; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 232; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 233; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 234; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 235; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 236; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 237; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 238; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 239; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 240; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 241; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 242; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 243; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] 244; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0 245; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1 246; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> 247; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] 248; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] 249; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] 250; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] 251; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] 252; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] 253; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] 254; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] 255; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] 256; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 257; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] 258; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] 259; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] 260; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] 261; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] 262; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] 263; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2 264; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2 265; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2 266; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2 267; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2 268; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2 269; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2 270; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2 271; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2 272; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2 273; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP29]], align 2 274; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = load i16, ptr [[TMP30]], align 2 275; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2 276; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2 277; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2 278; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2 279; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0 280; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1 281; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2 282; CHECK-INTERLEAVE1-NEXT: [[TMP54:%.*]] = insertelement <16 x i16> [[TMP53]], i16 [[TMP38]], i32 3 283; CHECK-INTERLEAVE1-NEXT: [[TMP57:%.*]] = insertelement <16 x i16> [[TMP54]], i16 [[TMP41]], i32 4 284; CHECK-INTERLEAVE1-NEXT: [[TMP58:%.*]] = insertelement <16 x i16> [[TMP57]], i16 [[TMP42]], i32 5 285; CHECK-INTERLEAVE1-NEXT: [[TMP59:%.*]] = insertelement <16 x i16> [[TMP58]], i16 [[TMP43]], i32 6 286; CHECK-INTERLEAVE1-NEXT: [[TMP60:%.*]] = insertelement <16 x i16> [[TMP59]], i16 [[TMP44]], i32 7 287; CHECK-INTERLEAVE1-NEXT: [[TMP61:%.*]] = insertelement <16 x i16> [[TMP60]], i16 [[TMP45]], i32 8 288; CHECK-INTERLEAVE1-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP61]], i16 [[TMP46]], i32 9 289; CHECK-INTERLEAVE1-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP55]], i32 10 290; CHECK-INTERLEAVE1-NEXT: [[TMP62:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP56]], i32 11 291; CHECK-INTERLEAVE1-NEXT: [[TMP63:%.*]] = insertelement <16 x i16> [[TMP62]], i16 [[TMP47]], i32 12 292; CHECK-INTERLEAVE1-NEXT: [[TMP64:%.*]] = insertelement <16 x i16> [[TMP63]], i16 [[TMP48]], i32 13 293; CHECK-INTERLEAVE1-NEXT: [[TMP65:%.*]] = insertelement <16 x i16> [[TMP64]], i16 [[TMP49]], i32 14 294; CHECK-INTERLEAVE1-NEXT: [[TMP66:%.*]] = insertelement <16 x i16> [[TMP65]], i16 [[TMP50]], i32 15 295; CHECK-INTERLEAVE1-NEXT: [[TMP67:%.*]] = zext <16 x i16> [[TMP66]] to <16 x i32> 296; CHECK-INTERLEAVE1-NEXT: [[TMP68:%.*]] = mul <16 x i32> [[TMP67]], [[TMP18]] 297; CHECK-INTERLEAVE1-NEXT: [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]] 298; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 299; CHECK-INTERLEAVE1-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 300; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP70]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 301; CHECK-INTERLEAVE1: middle.block: 302; CHECK-INTERLEAVE1-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) 303; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 304; CHECK-INTERLEAVE1: scalar.ph: 305; CHECK-INTERLEAVE1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] 306; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP71]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 307; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 308; CHECK-INTERLEAVE1: for.body: 309; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 310; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 311; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 312; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 313; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 314; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 315; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2 316; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32 317; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 318; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 319; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 320; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 321; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 322; CHECK-INTERLEAVE1: for.exit: 323; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP71]], [[VEC_EPILOG_ITER_CHECK]] ] 324; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 325; 326; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_different_types( 327; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 328; CHECK-INTERLEAVED-NEXT: entry: 329; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 330; CHECK-INTERLEAVED: vector.ph: 331; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 332; CHECK-INTERLEAVED: vector.body: 333; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 334; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ] 335; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] 336; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 337; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 338; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 339; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 340; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 341; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 342; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 343; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 344; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 345; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 346; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 347; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 348; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 349; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 350; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 351; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 352; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 353; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17 354; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18 355; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 19 356; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 20 357; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 21 358; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 22 359; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 23 360; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 24 361; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 25 362; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 26 363; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 27 364; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = add i64 [[INDEX]], 28 365; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 366; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 367; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 368; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] 369; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i32 0 370; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 371; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP33]], align 1 372; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 373; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> 374; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> 375; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] 376; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] 377; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] 378; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] 379; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] 380; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] 381; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] 382; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] 383; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] 384; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 385; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] 386; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] 387; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] 388; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] 389; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] 390; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] 391; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]] 392; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]] 393; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]] 394; CHECK-INTERLEAVED-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP19]] 395; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP20]] 396; CHECK-INTERLEAVED-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP21]] 397; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP22]] 398; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP23]] 399; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP24]] 400; CHECK-INTERLEAVED-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP25]] 401; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP26]] 402; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP27]] 403; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP28]] 404; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP29]] 405; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]] 406; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]] 407; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i16, ptr [[TMP39]], align 2 408; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP40]], align 2 409; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP41]], align 2 410; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP42]], align 2 411; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP43]], align 2 412; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP44]], align 2 413; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP45]], align 2 414; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP46]], align 2 415; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP47]], align 2 416; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP48]], align 2 417; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP49]], align 2 418; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP50]], align 2 419; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP51]], align 2 420; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP52]], align 2 421; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP53]], align 2 422; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP54]], align 2 423; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0 424; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1 425; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2 426; CHECK-INTERLEAVED-NEXT: [[TMP88:%.*]] = insertelement <16 x i16> [[TMP87]], i16 [[TMP72]], i32 3 427; CHECK-INTERLEAVED-NEXT: [[TMP89:%.*]] = insertelement <16 x i16> [[TMP88]], i16 [[TMP73]], i32 4 428; CHECK-INTERLEAVED-NEXT: [[TMP90:%.*]] = insertelement <16 x i16> [[TMP89]], i16 [[TMP74]], i32 5 429; CHECK-INTERLEAVED-NEXT: [[TMP91:%.*]] = insertelement <16 x i16> [[TMP90]], i16 [[TMP75]], i32 6 430; CHECK-INTERLEAVED-NEXT: [[TMP92:%.*]] = insertelement <16 x i16> [[TMP91]], i16 [[TMP76]], i32 7 431; CHECK-INTERLEAVED-NEXT: [[TMP93:%.*]] = insertelement <16 x i16> [[TMP92]], i16 [[TMP77]], i32 8 432; CHECK-INTERLEAVED-NEXT: [[TMP94:%.*]] = insertelement <16 x i16> [[TMP93]], i16 [[TMP78]], i32 9 433; CHECK-INTERLEAVED-NEXT: [[TMP95:%.*]] = insertelement <16 x i16> [[TMP94]], i16 [[TMP79]], i32 10 434; CHECK-INTERLEAVED-NEXT: [[TMP96:%.*]] = insertelement <16 x i16> [[TMP95]], i16 [[TMP80]], i32 11 435; CHECK-INTERLEAVED-NEXT: [[TMP97:%.*]] = insertelement <16 x i16> [[TMP96]], i16 [[TMP81]], i32 12 436; CHECK-INTERLEAVED-NEXT: [[TMP98:%.*]] = insertelement <16 x i16> [[TMP97]], i16 [[TMP82]], i32 13 437; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = insertelement <16 x i16> [[TMP98]], i16 [[TMP83]], i32 14 438; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = insertelement <16 x i16> [[TMP99]], i16 [[TMP84]], i32 15 439; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP55]], align 2 440; CHECK-INTERLEAVED-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP56]], align 2 441; CHECK-INTERLEAVED-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP57]], align 2 442; CHECK-INTERLEAVED-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP58]], align 2 443; CHECK-INTERLEAVED-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP59]], align 2 444; CHECK-INTERLEAVED-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP60]], align 2 445; CHECK-INTERLEAVED-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP61]], align 2 446; CHECK-INTERLEAVED-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP62]], align 2 447; CHECK-INTERLEAVED-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP63]], align 2 448; CHECK-INTERLEAVED-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP64]], align 2 449; CHECK-INTERLEAVED-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP65]], align 2 450; CHECK-INTERLEAVED-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP66]], align 2 451; CHECK-INTERLEAVED-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP67]], align 2 452; CHECK-INTERLEAVED-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP68]], align 2 453; CHECK-INTERLEAVED-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP139]], align 2 454; CHECK-INTERLEAVED-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP140]], align 2 455; CHECK-INTERLEAVED-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0 456; CHECK-INTERLEAVED-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1 457; CHECK-INTERLEAVED-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2 458; CHECK-INTERLEAVED-NEXT: [[TMP120:%.*]] = insertelement <16 x i16> [[TMP119]], i16 [[TMP104]], i32 3 459; CHECK-INTERLEAVED-NEXT: [[TMP121:%.*]] = insertelement <16 x i16> [[TMP120]], i16 [[TMP105]], i32 4 460; CHECK-INTERLEAVED-NEXT: [[TMP122:%.*]] = insertelement <16 x i16> [[TMP121]], i16 [[TMP106]], i32 5 461; CHECK-INTERLEAVED-NEXT: [[TMP123:%.*]] = insertelement <16 x i16> [[TMP122]], i16 [[TMP107]], i32 6 462; CHECK-INTERLEAVED-NEXT: [[TMP124:%.*]] = insertelement <16 x i16> [[TMP123]], i16 [[TMP108]], i32 7 463; CHECK-INTERLEAVED-NEXT: [[TMP125:%.*]] = insertelement <16 x i16> [[TMP124]], i16 [[TMP109]], i32 8 464; CHECK-INTERLEAVED-NEXT: [[TMP126:%.*]] = insertelement <16 x i16> [[TMP125]], i16 [[TMP110]], i32 9 465; CHECK-INTERLEAVED-NEXT: [[TMP127:%.*]] = insertelement <16 x i16> [[TMP126]], i16 [[TMP111]], i32 10 466; CHECK-INTERLEAVED-NEXT: [[TMP128:%.*]] = insertelement <16 x i16> [[TMP127]], i16 [[TMP112]], i32 11 467; CHECK-INTERLEAVED-NEXT: [[TMP129:%.*]] = insertelement <16 x i16> [[TMP128]], i16 [[TMP113]], i32 12 468; CHECK-INTERLEAVED-NEXT: [[TMP130:%.*]] = insertelement <16 x i16> [[TMP129]], i16 [[TMP114]], i32 13 469; CHECK-INTERLEAVED-NEXT: [[TMP131:%.*]] = insertelement <16 x i16> [[TMP130]], i16 [[TMP115]], i32 14 470; CHECK-INTERLEAVED-NEXT: [[TMP132:%.*]] = insertelement <16 x i16> [[TMP131]], i16 [[TMP116]], i32 15 471; CHECK-INTERLEAVED-NEXT: [[TMP133:%.*]] = zext <16 x i16> [[TMP100]] to <16 x i32> 472; CHECK-INTERLEAVED-NEXT: [[TMP134:%.*]] = zext <16 x i16> [[TMP132]] to <16 x i32> 473; CHECK-INTERLEAVED-NEXT: [[TMP135:%.*]] = mul <16 x i32> [[TMP133]], [[TMP35]] 474; CHECK-INTERLEAVED-NEXT: [[TMP136:%.*]] = mul <16 x i32> [[TMP134]], [[TMP36]] 475; CHECK-INTERLEAVED-NEXT: [[TMP137]] = add <16 x i32> [[TMP135]], [[VEC_PHI]] 476; CHECK-INTERLEAVED-NEXT: [[TMP138]] = add <16 x i32> [[TMP136]], [[VEC_PHI1]] 477; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 478; CHECK-INTERLEAVED-NEXT: [[TMP141:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 479; CHECK-INTERLEAVED-NEXT: br i1 [[TMP141]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 480; CHECK-INTERLEAVED: middle.block: 481; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP138]], [[TMP137]] 482; CHECK-INTERLEAVED-NEXT: [[TMP142:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) 483; CHECK-INTERLEAVED-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 484; CHECK-INTERLEAVED: scalar.ph: 485; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 486; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP142]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 487; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 488; CHECK-INTERLEAVED: for.body: 489; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 490; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 491; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 492; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 493; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 494; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 495; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2 496; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32 497; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 498; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 499; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 500; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 501; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 502; CHECK-INTERLEAVED: for.exit: 503; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP142]], [[MIDDLE_BLOCK]] ] 504; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 505; 506; CHECK-MAXBW-LABEL: define i32 @not_dotp_different_types( 507; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 508; CHECK-MAXBW-NEXT: entry: 509; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 510; CHECK-MAXBW: vector.ph: 511; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 512; CHECK-MAXBW: vector.body: 513; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 514; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] 515; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 516; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 517; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 518; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 519; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 520; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 521; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 522; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 523; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 524; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 525; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 526; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 527; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 528; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 529; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 530; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 531; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] 532; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i32 0 533; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP33]], align 1 534; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> 535; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] 536; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] 537; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] 538; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] 539; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] 540; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] 541; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] 542; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] 543; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] 544; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 545; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] 546; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] 547; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] 548; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] 549; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] 550; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] 551; CHECK-MAXBW-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP37]], align 2 552; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP38]], align 2 553; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP39]], align 2 554; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP40]], align 2 555; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP41]], align 2 556; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP42]], align 2 557; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP43]], align 2 558; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP44]], align 2 559; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP45]], align 2 560; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP46]], align 2 561; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP47]], align 2 562; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP48]], align 2 563; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP49]], align 2 564; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP50]], align 2 565; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP51]], align 2 566; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP52]], align 2 567; CHECK-MAXBW-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0 568; CHECK-MAXBW-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1 569; CHECK-MAXBW-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2 570; CHECK-MAXBW-NEXT: [[TMP120:%.*]] = insertelement <16 x i16> [[TMP119]], i16 [[TMP104]], i32 3 571; CHECK-MAXBW-NEXT: [[TMP121:%.*]] = insertelement <16 x i16> [[TMP120]], i16 [[TMP105]], i32 4 572; CHECK-MAXBW-NEXT: [[TMP122:%.*]] = insertelement <16 x i16> [[TMP121]], i16 [[TMP106]], i32 5 573; CHECK-MAXBW-NEXT: [[TMP123:%.*]] = insertelement <16 x i16> [[TMP122]], i16 [[TMP107]], i32 6 574; CHECK-MAXBW-NEXT: [[TMP124:%.*]] = insertelement <16 x i16> [[TMP123]], i16 [[TMP108]], i32 7 575; CHECK-MAXBW-NEXT: [[TMP125:%.*]] = insertelement <16 x i16> [[TMP124]], i16 [[TMP109]], i32 8 576; CHECK-MAXBW-NEXT: [[TMP126:%.*]] = insertelement <16 x i16> [[TMP125]], i16 [[TMP110]], i32 9 577; CHECK-MAXBW-NEXT: [[TMP127:%.*]] = insertelement <16 x i16> [[TMP126]], i16 [[TMP111]], i32 10 578; CHECK-MAXBW-NEXT: [[TMP128:%.*]] = insertelement <16 x i16> [[TMP127]], i16 [[TMP112]], i32 11 579; CHECK-MAXBW-NEXT: [[TMP129:%.*]] = insertelement <16 x i16> [[TMP128]], i16 [[TMP113]], i32 12 580; CHECK-MAXBW-NEXT: [[TMP130:%.*]] = insertelement <16 x i16> [[TMP129]], i16 [[TMP114]], i32 13 581; CHECK-MAXBW-NEXT: [[TMP131:%.*]] = insertelement <16 x i16> [[TMP130]], i16 [[TMP115]], i32 14 582; CHECK-MAXBW-NEXT: [[TMP132:%.*]] = insertelement <16 x i16> [[TMP131]], i16 [[TMP116]], i32 15 583; CHECK-MAXBW-NEXT: [[TMP134:%.*]] = zext <16 x i16> [[TMP132]] to <16 x i32> 584; CHECK-MAXBW-NEXT: [[TMP136:%.*]] = mul <16 x i32> [[TMP134]], [[TMP36]] 585; CHECK-MAXBW-NEXT: [[TMP138]] = add <16 x i32> [[TMP136]], [[VEC_PHI1]] 586; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 587; CHECK-MAXBW-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 588; CHECK-MAXBW-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 589; CHECK-MAXBW: middle.block: 590; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP138]]) 591; CHECK-MAXBW-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 592; CHECK-MAXBW: scalar.ph: 593; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 594; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP71]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 595; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 596; CHECK-MAXBW: for.body: 597; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 598; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 599; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 600; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 601; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 602; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 603; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i16, ptr [[GEP_B]], align 2 604; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[LOAD_B]] to i32 605; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 606; CHECK-MAXBW-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 607; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 608; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 609; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 610; CHECK-MAXBW: for.exit: 611; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP71]], [[MIDDLE_BLOCK]] ] 612; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 613; 614entry: 615 br label %for.body 616 617for.body: ; preds = %for.body, %entry 618 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 619 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 620 %gep.a = getelementptr i8, ptr %a, i64 %iv 621 %load.a = load i8, ptr %gep.a, align 1 622 %ext.a = zext i8 %load.a to i32 623 %gep.b = getelementptr i8, ptr %b, i64 %iv 624 %load.b = load i16, ptr %gep.b, align 2 625 %ext.b = zext i16 %load.b to i32 626 %mul = mul i32 %ext.b, %ext.a 627 %add = add i32 %mul, %accum 628 %iv.next = add i64 %iv, 1 629 %exitcond.not = icmp eq i64 %iv.next, 1024 630 br i1 %exitcond.not, label %for.exit, label %for.body 631 632for.exit: ; preds = %for.body 633 ret i32 %add 634} 635 636define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { 637; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_loop_carried( 638; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 639; CHECK-INTERLEAVE1-NEXT: entry: 640; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 641; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 642; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 643; CHECK-INTERLEAVE1: vector.ph: 644; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 645; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 646; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 647; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 648; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 649; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 650; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 651; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 652; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 653; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 654; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 655; CHECK-INTERLEAVE1: vector.body: 656; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 657; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] 658; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 659; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 660; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 661; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 662; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 663; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 664; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 665; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1 666; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 667; CHECK-INTERLEAVE1-NEXT: [[TMP16]] = mul <vscale x 8 x i32> [[TMP15]], [[TMP12]] 668; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[VECTOR_RECUR]], <vscale x 8 x i32> [[TMP16]], i32 -1) 669; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP17]] 670; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 671; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 672; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 673; CHECK-INTERLEAVE1: middle.block: 674; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() 675; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], 8 676; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1 677; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i32> [[TMP18]], i32 [[TMP22]] 678; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() 679; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 8 680; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1 681; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP16]], i32 [[TMP26]] 682; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 683; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 684; CHECK-INTERLEAVE1: scalar.ph: 685; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 686; CHECK-INTERLEAVE1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 687; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 688; CHECK-INTERLEAVE1: for.body: 689; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 690; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 691; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 692; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 693; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 694; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 695; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 696; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 697; CHECK-INTERLEAVE1-NEXT: [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]] 698; CHECK-INTERLEAVE1-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 699; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 700; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 701; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 702; CHECK-INTERLEAVE1: for.exit: 703; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 704; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 705; 706; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_loop_carried( 707; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 708; CHECK-INTERLEAVED-NEXT: entry: 709; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 710; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 711; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 712; CHECK-INTERLEAVED: vector.ph: 713; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 714; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 715; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 716; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 717; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 718; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 719; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 720; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 721; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 722; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 723; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 724; CHECK-INTERLEAVED: vector.body: 725; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 726; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] 727; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 728; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 729; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 730; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 731; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 732; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]] 733; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 734; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1 735; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 736; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 737; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 738; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 739; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() 740; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 741; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP17]], i64 [[TMP20]] 742; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP18]], align 1 743; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP21]], align 1 744; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32> 745; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32> 746; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]] 747; CHECK-INTERLEAVED-NEXT: [[TMP25]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] 748; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[TMP24]], <vscale x 8 x i32> [[TMP25]], i32 -1) 749; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] 750; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 751; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 752; CHECK-INTERLEAVED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 753; CHECK-INTERLEAVED: middle.block: 754; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() 755; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 756; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1 757; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP31]] 758; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32() 759; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul i32 [[TMP33]], 8 760; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 1 761; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP35]] 762; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 763; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 764; CHECK-INTERLEAVED: scalar.ph: 765; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 766; CHECK-INTERLEAVED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 767; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 768; CHECK-INTERLEAVED: for.body: 769; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 770; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 771; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 772; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 773; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 774; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 775; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 776; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 777; CHECK-INTERLEAVED-NEXT: [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]] 778; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 779; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 780; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 781; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 782; CHECK-INTERLEAVED: for.exit: 783; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ] 784; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 785; 786; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_loop_carried( 787; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 788; CHECK-MAXBW-NEXT: entry: 789; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 790; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 791; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 792; CHECK-MAXBW: vector.ph: 793; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 794; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 795; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 796; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 797; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 798; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 799; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 800; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 801; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 802; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 803; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 804; CHECK-MAXBW: vector.body: 805; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 806; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] 807; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 808; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 809; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 810; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 811; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 812; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 813; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 814; CHECK-MAXBW-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP18]], align 1 815; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32> 816; CHECK-MAXBW-NEXT: [[TMP25]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] 817; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[VECTOR_RECUR]], <vscale x 8 x i32> [[TMP25]], i32 -1) 818; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] 819; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 820; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 821; CHECK-MAXBW-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 822; CHECK-MAXBW: middle.block: 823; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() 824; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], 8 825; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1 826; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP27]], i32 [[TMP22]] 827; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() 828; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = mul i32 [[TMP24]], 8 829; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1 830; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP25]], i32 [[TMP31]] 831; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 832; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 833; CHECK-MAXBW: scalar.ph: 834; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 835; CHECK-MAXBW-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 836; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 837; CHECK-MAXBW: for.body: 838; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 839; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] 840; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 841; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 842; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 843; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 844; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 845; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 846; CHECK-MAXBW-NEXT: [[MUL]] = mul i32 [[EXT_B]], [[EXT_A]] 847; CHECK-MAXBW-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 848; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 849; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 850; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 851; CHECK-MAXBW: for.exit: 852; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] 853; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 854; 855entry: 856 br label %for.body 857 858for.body: ; preds = %for.body, %entry 859 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 860 %accum = phi i32 [ 0, %entry ], [ %mul, %for.body ] 861 %gep.a = getelementptr i8, ptr %a, i64 %iv 862 %load.a = load i8, ptr %gep.a, align 1 863 %ext.a = zext i8 %load.a to i32 864 %gep.b = getelementptr i8, ptr %b, i64 %iv 865 %load.b = load i8, ptr %gep.b, align 1 866 %ext.b = zext i8 %load.b to i32 867 %mul = mul i32 %ext.b, %ext.a 868 %add = add i32 %mul, %accum 869 %iv.next = add i64 %iv, 1 870 %exitcond.not = icmp eq i64 %iv.next, 1024 871 br i1 %exitcond.not, label %for.exit, label %for.body 872 873for.exit: ; preds = %for.body 874 ret i32 %add 875} 876 877define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { 878; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi( 879; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 880; CHECK-INTERLEAVE1-NEXT: entry: 881; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 882; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 883; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 884; CHECK-INTERLEAVE1: vector.ph: 885; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 886; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 887; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 888; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 889; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 890; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 891; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 892; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 893; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 894; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 895; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 896; CHECK-INTERLEAVE1: vector.body: 897; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 898; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 899; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 900; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 901; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 902; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 903; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 904; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 905; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 906; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1 907; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 908; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul <vscale x 8 x i32> [[TMP15]], [[TMP12]] 909; CHECK-INTERLEAVE1-NEXT: [[TMP17]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]] 910; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 911; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 912; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 913; CHECK-INTERLEAVE1: middle.block: 914; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() 915; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 8 916; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1 917; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = extractelement <vscale x 8 x i32> [[TMP17]], i32 [[TMP21]] 918; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() 919; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 8 920; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1 921; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP17]], i32 [[TMP25]] 922; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 923; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 924; CHECK-INTERLEAVE1: scalar.ph: 925; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 926; CHECK-INTERLEAVE1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 927; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 928; CHECK-INTERLEAVE1: for.body: 929; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 930; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 931; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 932; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 933; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 934; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 935; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 936; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 937; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 938; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i32 [[MUL]], [[EXT_B]] 939; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 940; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 941; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 942; CHECK-INTERLEAVE1: for.exit: 943; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 944; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 945; 946; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi( 947; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 948; CHECK-INTERLEAVED-NEXT: entry: 949; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 950; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 951; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 952; CHECK-INTERLEAVED: vector.ph: 953; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 954; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 955; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 956; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 957; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 958; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 959; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 960; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 961; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 962; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 963; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 964; CHECK-INTERLEAVED: vector.body: 965; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 966; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 967; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 968; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 969; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 970; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 971; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]] 972; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1 973; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 974; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 975; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() 976; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 977; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP19]] 978; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP20]], align 1 979; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 980; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]] 981; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]] 982; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 983; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 984; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 985; CHECK-INTERLEAVED: middle.block: 986; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() 987; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = mul i32 [[TMP23]], 8 988; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sub i32 [[TMP31]], 1 989; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP25]] 990; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() 991; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 8 992; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sub i32 [[TMP28]], 1 993; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP29]] 994; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 995; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 996; CHECK-INTERLEAVED: scalar.ph: 997; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 998; CHECK-INTERLEAVED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 999; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 1000; CHECK-INTERLEAVED: for.body: 1001; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1002; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1003; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 1004; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1005; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 1006; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 1007; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 1008; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 1009; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 1010; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i32 [[MUL]], [[EXT_B]] 1011; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 1012; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1013; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1014; CHECK-INTERLEAVED: for.exit: 1015; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 1016; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 1017; 1018; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi( 1019; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1020; CHECK-MAXBW-NEXT: entry: 1021; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1022; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 1023; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1024; CHECK-MAXBW: vector.ph: 1025; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1026; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 1027; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 1028; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 1029; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1030; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 1031; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() 1032; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 1033; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 1034; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] 1035; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 1036; CHECK-MAXBW: vector.body: 1037; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1038; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 1039; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 1040; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] 1041; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 1042; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP13]], align 1 1043; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 1044; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] 1045; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 1046; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP18]], align 1 1047; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> 1048; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP14]] 1049; CHECK-MAXBW-NEXT: [[TMP21]] = add <vscale x 8 x i32> [[TMP20]], [[TMP19]] 1050; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1051; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1052; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 1053; CHECK-MAXBW: middle.block: 1054; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call i32 @llvm.vscale.i32() 1055; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = mul i32 [[TMP26]], 8 1056; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], 1 1057; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP28]] 1058; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() 1059; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 8 1060; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1 1061; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP25]] 1062; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 1063; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 1064; CHECK-MAXBW: scalar.ph: 1065; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1066; CHECK-MAXBW-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1067; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 1068; CHECK-MAXBW: for.body: 1069; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1070; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1071; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 1072; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1073; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 1074; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 1075; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 1076; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 1077; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 1078; CHECK-MAXBW-NEXT: [[ADD]] = add i32 [[MUL]], [[EXT_B]] 1079; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 1080; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 1081; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1082; CHECK-MAXBW: for.exit: 1083; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] 1084; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 1085; 1086entry: 1087 br label %for.body 1088 1089for.body: ; preds = %for.body, %entry 1090 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1091 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 1092 %gep.a = getelementptr i8, ptr %a, i64 %iv 1093 %load.a = load i8, ptr %gep.a, align 1 1094 %ext.a = zext i8 %load.a to i32 1095 %gep.b = getelementptr i8, ptr %b, i64 %iv 1096 %load.b = load i8, ptr %gep.b, align 1 1097 %ext.b = zext i8 %load.b to i32 1098 %mul = mul i32 %ext.b, %ext.a 1099 %add = add i32 %mul, %ext.b 1100 %iv.next = add i64 %iv, 1 1101 %exitcond.not = icmp eq i64 %iv.next, 1024 1102 br i1 %exitcond.not, label %for.exit, label %for.body 1103 1104for.exit: ; preds = %for.body 1105 ret i32 %add 1106} 1107 1108define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { 1109; CHECK-INTERLEAVE1-LABEL: define i32 @dotp_unrolled( 1110; CHECK-INTERLEAVE1-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1111; CHECK-INTERLEAVE1-NEXT: entry: 1112; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() 1113; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], 4 1114; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] 1115; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1116; CHECK-INTERLEAVE1: vector.ph: 1117; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() 1118; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 4 1119; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP18]] 1120; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] 1121; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() 1122; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = mul i64 [[TMP20]], 4 1123; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 1124; CHECK-INTERLEAVE1: vector.body: 1125; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1126; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] 1127; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] 1128; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] 1129; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] 1130; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1131; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] 1132; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] 1133; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1 1134; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]] 1135; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]] 1136; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP0]], 2 1137; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]] 1138; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]] 1139; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = or disjoint i64 [[TMP0]], 3 1140; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] 1141; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] 1142; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1143; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP12]], align 1 1144; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 1145; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 1146; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP14]], align 1 1147; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32> 1148; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = mul nsw <vscale x 4 x i32> [[TMP21]], [[TMP36]] 1149; CHECK-INTERLEAVE1-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP38]], [[VEC_PHI3]] 1150; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 1151; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i8>, ptr [[TMP17]], align 1 1152; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD5]] to <vscale x 4 x i32> 1153; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 1154; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP19]], align 1 1155; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD6]] to <vscale x 4 x i32> 1156; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = mul nsw <vscale x 4 x i32> [[TMP25]], [[TMP42]] 1157; CHECK-INTERLEAVE1-NEXT: [[TMP30]] = add <vscale x 4 x i32> [[TMP28]], [[VEC_PHI2]] 1158; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1159; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i8>, ptr [[TMP22]], align 1 1160; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD7]] to <vscale x 4 x i32> 1161; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 1162; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 4 x i8>, ptr [[TMP24]], align 1 1163; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD8]] to <vscale x 4 x i32> 1164; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = mul nsw <vscale x 4 x i32> [[TMP31]], [[TMP33]] 1165; CHECK-INTERLEAVE1-NEXT: [[TMP35]] = add <vscale x 4 x i32> [[TMP34]], [[VEC_PHI1]] 1166; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 1167; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 4 x i8>, ptr [[TMP27]], align 1 1168; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD9]] to <vscale x 4 x i32> 1169; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 1170; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x i8>, ptr [[TMP29]], align 1 1171; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD10]] to <vscale x 4 x i32> 1172; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = mul nsw <vscale x 4 x i32> [[TMP37]], [[TMP39]] 1173; CHECK-INTERLEAVE1-NEXT: [[TMP41]] = add <vscale x 4 x i32> [[TMP40]], [[VEC_PHI]] 1174; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] 1175; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1176; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1177; CHECK-INTERLEAVE1: middle.block: 1178; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP41]]) 1179; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP35]]) 1180; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP30]]) 1181; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP23]]) 1182; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]] 1183; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1184; CHECK-INTERLEAVE1: scalar.ph: 1185; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1186; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1187; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i32 [ [[TMP44]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1188; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i32 [ [[TMP45]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1189; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP46]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1190; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 1191; CHECK-INTERLEAVE1: for.body: 1192; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1193; CHECK-INTERLEAVE1-NEXT: [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ] 1194; CHECK-INTERLEAVE1-NEXT: [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ] 1195; CHECK-INTERLEAVE1-NEXT: [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX12]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ] 1196; CHECK-INTERLEAVE1-NEXT: [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ] 1197; CHECK-INTERLEAVE1-NEXT: [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1198; CHECK-INTERLEAVE1-NEXT: [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1199; CHECK-INTERLEAVE1-NEXT: [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1 1200; CHECK-INTERLEAVE1-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]] 1201; CHECK-INTERLEAVE1-NEXT: [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]] 1202; CHECK-INTERLEAVE1-NEXT: [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2 1203; CHECK-INTERLEAVE1-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]] 1204; CHECK-INTERLEAVE1-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]] 1205; CHECK-INTERLEAVE1-NEXT: [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3 1206; CHECK-INTERLEAVE1-NEXT: [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]] 1207; CHECK-INTERLEAVE1-NEXT: [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]] 1208; CHECK-INTERLEAVE1-NEXT: [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1 1209; CHECK-INTERLEAVE1-NEXT: [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32 1210; CHECK-INTERLEAVE1-NEXT: [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1 1211; CHECK-INTERLEAVE1-NEXT: [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32 1212; CHECK-INTERLEAVE1-NEXT: [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]] 1213; CHECK-INTERLEAVE1-NEXT: [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]] 1214; CHECK-INTERLEAVE1-NEXT: [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1 1215; CHECK-INTERLEAVE1-NEXT: [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32 1216; CHECK-INTERLEAVE1-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 1217; CHECK-INTERLEAVE1-NEXT: [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32 1218; CHECK-INTERLEAVE1-NEXT: [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]] 1219; CHECK-INTERLEAVE1-NEXT: [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]] 1220; CHECK-INTERLEAVE1-NEXT: [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1 1221; CHECK-INTERLEAVE1-NEXT: [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32 1222; CHECK-INTERLEAVE1-NEXT: [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1 1223; CHECK-INTERLEAVE1-NEXT: [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32 1224; CHECK-INTERLEAVE1-NEXT: [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]] 1225; CHECK-INTERLEAVE1-NEXT: [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]] 1226; CHECK-INTERLEAVE1-NEXT: [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1 1227; CHECK-INTERLEAVE1-NEXT: [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32 1228; CHECK-INTERLEAVE1-NEXT: [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1 1229; CHECK-INTERLEAVE1-NEXT: [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32 1230; CHECK-INTERLEAVE1-NEXT: [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]] 1231; CHECK-INTERLEAVE1-NEXT: [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]] 1232; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1233; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]] 1234; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1235; CHECK-INTERLEAVE1: exit: 1236; CHECK-INTERLEAVE1-NEXT: [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] 1237; CHECK-INTERLEAVE1-NEXT: [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 1238; CHECK-INTERLEAVE1-NEXT: [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] 1239; CHECK-INTERLEAVE1-NEXT: [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 1240; CHECK-INTERLEAVE1-NEXT: [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]] 1241; CHECK-INTERLEAVE1-NEXT: [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]] 1242; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]] 1243; CHECK-INTERLEAVE1-NEXT: ret i32 [[RESULT]] 1244; 1245; CHECK-INTERLEAVED-LABEL: define i32 @dotp_unrolled( 1246; CHECK-INTERLEAVED-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1247; CHECK-INTERLEAVED-NEXT: entry: 1248; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() 1249; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], 8 1250; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]] 1251; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1252; CHECK-INTERLEAVED: vector.ph: 1253; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() 1254; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 8 1255; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP18]] 1256; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] 1257; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() 1258; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = mul i64 [[TMP34]], 8 1259; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 1260; CHECK-INTERLEAVED: vector.body: 1261; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1262; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ] 1263; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ] 1264; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[VECTOR_BODY]] ] 1265; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP65:%.*]], [[VECTOR_BODY]] ] 1266; CHECK-INTERLEAVED-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] 1267; CHECK-INTERLEAVED-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] 1268; CHECK-INTERLEAVED-NEXT: [[VEC_PHI6:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] 1269; CHECK-INTERLEAVED-NEXT: [[VEC_PHI7:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[VECTOR_BODY]] ] 1270; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1271; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] 1272; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] 1273; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1 1274; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]] 1275; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]] 1276; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP0]], 2 1277; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]] 1278; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]] 1279; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[TMP0]], 3 1280; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] 1281; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] 1282; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1283; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() 1284; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul i64 [[TMP56]], 4 1285; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[TMP20]] 1286; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP12]], align 1 1287; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 4 x i8>, ptr [[TMP21]], align 1 1288; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 1289; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD8]] to <vscale x 4 x i32> 1290; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 1291; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() 1292; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 1293; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 [[TMP26]] 1294; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 4 x i8>, ptr [[TMP14]], align 1 1295; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x i8>, ptr [[TMP72]], align 1 1296; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD9]] to <vscale x 4 x i32> 1297; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD10]] to <vscale x 4 x i32> 1298; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul nsw <vscale x 4 x i32> [[TMP28]], [[TMP66]] 1299; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = mul nsw <vscale x 4 x i32> [[TMP82]], [[TMP23]] 1300; CHECK-INTERLEAVED-NEXT: [[TMP50]] = add <vscale x 4 x i32> [[TMP30]], [[VEC_PHI6]] 1301; CHECK-INTERLEAVED-NEXT: [[TMP33]] = add <vscale x 4 x i32> [[TMP31]], [[VEC_PHI7]] 1302; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 1303; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() 1304; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = mul i64 [[TMP35]], 4 1305; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP36]] 1306; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD11:%.*]] = load <vscale x 4 x i8>, ptr [[TMP17]], align 1 1307; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <vscale x 4 x i8>, ptr [[TMP37]], align 1 1308; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD11]] to <vscale x 4 x i32> 1309; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD12]] to <vscale x 4 x i32> 1310; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 1311; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() 1312; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = mul i64 [[TMP41]], 4 1313; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 [[TMP42]] 1314; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <vscale x 4 x i8>, ptr [[TMP19]], align 1 1315; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <vscale x 4 x i8>, ptr [[TMP43]], align 1 1316; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD13]] to <vscale x 4 x i32> 1317; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD14]] to <vscale x 4 x i32> 1318; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = mul nsw <vscale x 4 x i32> [[TMP38]], [[TMP44]] 1319; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = mul nsw <vscale x 4 x i32> [[TMP39]], [[TMP45]] 1320; CHECK-INTERLEAVED-NEXT: [[TMP48]] = add <vscale x 4 x i32> [[TMP46]], [[VEC_PHI4]] 1321; CHECK-INTERLEAVED-NEXT: [[TMP49]] = add <vscale x 4 x i32> [[TMP47]], [[VEC_PHI5]] 1322; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1323; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() 1324; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 4 1325; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP52]] 1326; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 4 x i8>, ptr [[TMP22]], align 1 1327; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD16:%.*]] = load <vscale x 4 x i8>, ptr [[TMP53]], align 1 1328; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD15]] to <vscale x 4 x i32> 1329; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD16]] to <vscale x 4 x i32> 1330; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 1331; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() 1332; CHECK-INTERLEAVED-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 4 1333; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP58]] 1334; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD17:%.*]] = load <vscale x 4 x i8>, ptr [[TMP24]], align 1 1335; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <vscale x 4 x i8>, ptr [[TMP59]], align 1 1336; CHECK-INTERLEAVED-NEXT: [[TMP60:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD17]] to <vscale x 4 x i32> 1337; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD18]] to <vscale x 4 x i32> 1338; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = mul nsw <vscale x 4 x i32> [[TMP54]], [[TMP60]] 1339; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = mul nsw <vscale x 4 x i32> [[TMP55]], [[TMP61]] 1340; CHECK-INTERLEAVED-NEXT: [[TMP64]] = add <vscale x 4 x i32> [[TMP62]], [[VEC_PHI2]] 1341; CHECK-INTERLEAVED-NEXT: [[TMP65]] = add <vscale x 4 x i32> [[TMP63]], [[VEC_PHI3]] 1342; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 1343; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = call i64 @llvm.vscale.i64() 1344; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = mul i64 [[TMP67]], 4 1345; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP68]] 1346; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <vscale x 4 x i8>, ptr [[TMP27]], align 1 1347; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <vscale x 4 x i8>, ptr [[TMP69]], align 1 1348; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD19]] to <vscale x 4 x i32> 1349; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD20]] to <vscale x 4 x i32> 1350; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 1351; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = call i64 @llvm.vscale.i64() 1352; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = mul i64 [[TMP73]], 4 1353; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[TMP74]] 1354; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <vscale x 4 x i8>, ptr [[TMP29]], align 1 1355; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD22:%.*]] = load <vscale x 4 x i8>, ptr [[TMP75]], align 1 1356; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD21]] to <vscale x 4 x i32> 1357; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD22]] to <vscale x 4 x i32> 1358; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = mul nsw <vscale x 4 x i32> [[TMP70]], [[TMP76]] 1359; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = mul nsw <vscale x 4 x i32> [[TMP71]], [[TMP77]] 1360; CHECK-INTERLEAVED-NEXT: [[TMP80]] = add <vscale x 4 x i32> [[TMP78]], [[VEC_PHI]] 1361; CHECK-INTERLEAVED-NEXT: [[TMP81]] = add <vscale x 4 x i32> [[TMP79]], [[VEC_PHI1]] 1362; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]] 1363; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1364; CHECK-INTERLEAVED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1365; CHECK-INTERLEAVED: middle.block: 1366; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP81]], [[TMP80]] 1367; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) 1368; CHECK-INTERLEAVED-NEXT: [[BIN_RDX23:%.*]] = add <vscale x 4 x i32> [[TMP65]], [[TMP64]] 1369; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX23]]) 1370; CHECK-INTERLEAVED-NEXT: [[BIN_RDX24:%.*]] = add <vscale x 4 x i32> [[TMP49]], [[TMP48]] 1371; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX24]]) 1372; CHECK-INTERLEAVED-NEXT: [[BIN_RDX25:%.*]] = add <vscale x 4 x i32> [[TMP33]], [[TMP50]] 1373; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX25]]) 1374; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]] 1375; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1376; CHECK-INTERLEAVED: scalar.ph: 1377; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1378; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP83]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1379; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX26:%.*]] = phi i32 [ [[TMP84]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1380; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX27:%.*]] = phi i32 [ [[TMP85]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1381; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX28:%.*]] = phi i32 [ [[TMP86]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1382; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 1383; CHECK-INTERLEAVED: for.body: 1384; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1385; CHECK-INTERLEAVED-NEXT: [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ] 1386; CHECK-INTERLEAVED-NEXT: [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX26]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ] 1387; CHECK-INTERLEAVED-NEXT: [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX27]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ] 1388; CHECK-INTERLEAVED-NEXT: [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX28]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ] 1389; CHECK-INTERLEAVED-NEXT: [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1390; CHECK-INTERLEAVED-NEXT: [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1391; CHECK-INTERLEAVED-NEXT: [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1 1392; CHECK-INTERLEAVED-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]] 1393; CHECK-INTERLEAVED-NEXT: [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]] 1394; CHECK-INTERLEAVED-NEXT: [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2 1395; CHECK-INTERLEAVED-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]] 1396; CHECK-INTERLEAVED-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]] 1397; CHECK-INTERLEAVED-NEXT: [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3 1398; CHECK-INTERLEAVED-NEXT: [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]] 1399; CHECK-INTERLEAVED-NEXT: [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]] 1400; CHECK-INTERLEAVED-NEXT: [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1 1401; CHECK-INTERLEAVED-NEXT: [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32 1402; CHECK-INTERLEAVED-NEXT: [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1 1403; CHECK-INTERLEAVED-NEXT: [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32 1404; CHECK-INTERLEAVED-NEXT: [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]] 1405; CHECK-INTERLEAVED-NEXT: [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]] 1406; CHECK-INTERLEAVED-NEXT: [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1 1407; CHECK-INTERLEAVED-NEXT: [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32 1408; CHECK-INTERLEAVED-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 1409; CHECK-INTERLEAVED-NEXT: [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32 1410; CHECK-INTERLEAVED-NEXT: [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]] 1411; CHECK-INTERLEAVED-NEXT: [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]] 1412; CHECK-INTERLEAVED-NEXT: [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1 1413; CHECK-INTERLEAVED-NEXT: [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32 1414; CHECK-INTERLEAVED-NEXT: [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1 1415; CHECK-INTERLEAVED-NEXT: [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32 1416; CHECK-INTERLEAVED-NEXT: [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]] 1417; CHECK-INTERLEAVED-NEXT: [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]] 1418; CHECK-INTERLEAVED-NEXT: [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1 1419; CHECK-INTERLEAVED-NEXT: [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32 1420; CHECK-INTERLEAVED-NEXT: [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1 1421; CHECK-INTERLEAVED-NEXT: [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32 1422; CHECK-INTERLEAVED-NEXT: [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]] 1423; CHECK-INTERLEAVED-NEXT: [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]] 1424; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1425; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]] 1426; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1427; CHECK-INTERLEAVED: exit: 1428; CHECK-INTERLEAVED-NEXT: [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ] 1429; CHECK-INTERLEAVED-NEXT: [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ] 1430; CHECK-INTERLEAVED-NEXT: [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP84]], [[MIDDLE_BLOCK]] ] 1431; CHECK-INTERLEAVED-NEXT: [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP83]], [[MIDDLE_BLOCK]] ] 1432; CHECK-INTERLEAVED-NEXT: [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]] 1433; CHECK-INTERLEAVED-NEXT: [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]] 1434; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]] 1435; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT]] 1436; 1437; CHECK-MAXBW-LABEL: define i32 @dotp_unrolled( 1438; CHECK-MAXBW-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1439; CHECK-MAXBW-NEXT: entry: 1440; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1441; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 1442; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP1]] 1443; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1444; CHECK-MAXBW: vector.ph: 1445; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1446; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 1447; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP3]] 1448; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]] 1449; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1450; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 1451; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 1452; CHECK-MAXBW: vector.body: 1453; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1454; CHECK-MAXBW-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE16:%.*]], [[VECTOR_BODY]] ] 1455; CHECK-MAXBW-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE17:%.*]], [[VECTOR_BODY]] ] 1456; CHECK-MAXBW-NEXT: [[VEC_PHI6:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] 1457; CHECK-MAXBW-NEXT: [[VEC_PHI7:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE11:%.*]], [[VECTOR_BODY]] ] 1458; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1459; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]] 1460; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]] 1461; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = or disjoint i64 [[TMP6]], 1 1462; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] 1463; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] 1464; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = or disjoint i64 [[TMP6]], 2 1465; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] 1466; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] 1467; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = or disjoint i64 [[TMP6]], 3 1468; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP15]] 1469; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP15]] 1470; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1471; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP18]], align 1 1472; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> 1473; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 1474; CHECK-MAXBW-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 8 x i8>, ptr [[TMP24]], align 1 1475; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD9]] to <vscale x 8 x i32> 1476; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = mul nsw <vscale x 8 x i32> [[TMP29]], [[TMP23]] 1477; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE11]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI7]], <vscale x 8 x i32> [[TMP31]]) 1478; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 1479; CHECK-MAXBW-NEXT: [[WIDE_LOAD12:%.*]] = load <vscale x 8 x i8>, ptr [[TMP32]], align 1 1480; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD12]] to <vscale x 8 x i32> 1481; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 1482; CHECK-MAXBW-NEXT: [[WIDE_LOAD14:%.*]] = load <vscale x 8 x i8>, ptr [[TMP38]], align 1 1483; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD14]] to <vscale x 8 x i32> 1484; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = mul nsw <vscale x 8 x i32> [[TMP37]], [[TMP43]] 1485; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI6]], <vscale x 8 x i32> [[TMP45]]) 1486; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 1487; CHECK-MAXBW-NEXT: [[WIDE_LOAD18:%.*]] = load <vscale x 8 x i8>, ptr [[TMP46]], align 1 1488; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD18]] to <vscale x 8 x i32> 1489; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 1490; CHECK-MAXBW-NEXT: [[WIDE_LOAD20:%.*]] = load <vscale x 8 x i8>, ptr [[TMP52]], align 1 1491; CHECK-MAXBW-NEXT: [[TMP57:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD20]] to <vscale x 8 x i32> 1492; CHECK-MAXBW-NEXT: [[TMP59:%.*]] = mul nsw <vscale x 8 x i32> [[TMP51]], [[TMP57]] 1493; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE17]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI5]], <vscale x 8 x i32> [[TMP59]]) 1494; CHECK-MAXBW-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 1495; CHECK-MAXBW-NEXT: [[WIDE_LOAD24:%.*]] = load <vscale x 8 x i8>, ptr [[TMP60]], align 1 1496; CHECK-MAXBW-NEXT: [[TMP65:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD24]] to <vscale x 8 x i32> 1497; CHECK-MAXBW-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i32 0 1498; CHECK-MAXBW-NEXT: [[WIDE_LOAD26:%.*]] = load <vscale x 8 x i8>, ptr [[TMP66]], align 1 1499; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD26]] to <vscale x 8 x i32> 1500; CHECK-MAXBW-NEXT: [[TMP73:%.*]] = mul nsw <vscale x 8 x i32> [[TMP65]], [[TMP71]] 1501; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI4]], <vscale x 8 x i32> [[TMP73]]) 1502; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1503; CHECK-MAXBW-NEXT: [[TMP74:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1504; CHECK-MAXBW-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1505; CHECK-MAXBW: middle.block: 1506; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE16]]) 1507; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE17]]) 1508; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE]]) 1509; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE11]]) 1510; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]] 1511; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1512; CHECK-MAXBW: scalar.ph: 1513; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1514; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP39]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1515; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i32 [ [[TMP40]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1516; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP41]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1517; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i32 [ [[TMP42]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1518; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 1519; CHECK-MAXBW: for.body: 1520; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1521; CHECK-MAXBW-NEXT: [[ACCUM3:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_A3:%.*]], [[FOR_BODY]] ] 1522; CHECK-MAXBW-NEXT: [[ACCUM2:%.*]] = phi i32 [ [[BC_MERGE_RDX14]], [[SCALAR_PH]] ], [ [[ADD_A2:%.*]], [[FOR_BODY]] ] 1523; CHECK-MAXBW-NEXT: [[ACCUM1:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], [[SCALAR_PH]] ], [ [[ADD_A1:%.*]], [[FOR_BODY]] ] 1524; CHECK-MAXBW-NEXT: [[ACCUM0:%.*]] = phi i32 [ [[BC_MERGE_RDX16]], [[SCALAR_PH]] ], [ [[ADD_A0:%.*]], [[FOR_BODY]] ] 1525; CHECK-MAXBW-NEXT: [[GEP_A0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1526; CHECK-MAXBW-NEXT: [[GEP_B0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1527; CHECK-MAXBW-NEXT: [[OFFSET_1:%.*]] = or disjoint i64 [[IV]], 1 1528; CHECK-MAXBW-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_1]] 1529; CHECK-MAXBW-NEXT: [[GEP_B1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_1]] 1530; CHECK-MAXBW-NEXT: [[OFFSET_2:%.*]] = or disjoint i64 [[IV]], 2 1531; CHECK-MAXBW-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_2]] 1532; CHECK-MAXBW-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_2]] 1533; CHECK-MAXBW-NEXT: [[OFFSET_3:%.*]] = or disjoint i64 [[IV]], 3 1534; CHECK-MAXBW-NEXT: [[GEP_A3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_3]] 1535; CHECK-MAXBW-NEXT: [[GEP_B3:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_3]] 1536; CHECK-MAXBW-NEXT: [[LOAD_A0:%.*]] = load i8, ptr [[GEP_A0]], align 1 1537; CHECK-MAXBW-NEXT: [[EXT_A0:%.*]] = sext i8 [[LOAD_A0]] to i32 1538; CHECK-MAXBW-NEXT: [[LOAD_B0:%.*]] = load i8, ptr [[GEP_B0]], align 1 1539; CHECK-MAXBW-NEXT: [[EXT_B0:%.*]] = sext i8 [[LOAD_B0]] to i32 1540; CHECK-MAXBW-NEXT: [[MUL_A0:%.*]] = mul nsw i32 [[EXT_B0]], [[EXT_A0]] 1541; CHECK-MAXBW-NEXT: [[ADD_A0]] = add nsw i32 [[MUL_A0]], [[ACCUM0]] 1542; CHECK-MAXBW-NEXT: [[LOAD_A1:%.*]] = load i8, ptr [[GEP_A1]], align 1 1543; CHECK-MAXBW-NEXT: [[EXT_A1:%.*]] = sext i8 [[LOAD_A1]] to i32 1544; CHECK-MAXBW-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 1545; CHECK-MAXBW-NEXT: [[EXT_B1:%.*]] = sext i8 [[LOAD_B1]] to i32 1546; CHECK-MAXBW-NEXT: [[MUL_A1:%.*]] = mul nsw i32 [[EXT_A1]], [[EXT_B1]] 1547; CHECK-MAXBW-NEXT: [[ADD_A1]] = add nsw i32 [[MUL_A1]], [[ACCUM1]] 1548; CHECK-MAXBW-NEXT: [[LOAD_A2:%.*]] = load i8, ptr [[GEP_A2]], align 1 1549; CHECK-MAXBW-NEXT: [[EXT_A2:%.*]] = sext i8 [[LOAD_A2]] to i32 1550; CHECK-MAXBW-NEXT: [[LOAD_B2:%.*]] = load i8, ptr [[GEP_B2]], align 1 1551; CHECK-MAXBW-NEXT: [[EXT_B2:%.*]] = sext i8 [[LOAD_B2]] to i32 1552; CHECK-MAXBW-NEXT: [[MUL_A2:%.*]] = mul nsw i32 [[EXT_A2]], [[EXT_B2]] 1553; CHECK-MAXBW-NEXT: [[ADD_A2]] = add nsw i32 [[MUL_A2]], [[ACCUM2]] 1554; CHECK-MAXBW-NEXT: [[LOAD_A3:%.*]] = load i8, ptr [[GEP_A3]], align 1 1555; CHECK-MAXBW-NEXT: [[EXT_A3:%.*]] = sext i8 [[LOAD_A3]] to i32 1556; CHECK-MAXBW-NEXT: [[LOAD_B3:%.*]] = load i8, ptr [[GEP_B3]], align 1 1557; CHECK-MAXBW-NEXT: [[EXT_B3:%.*]] = sext i8 [[LOAD_B3]] to i32 1558; CHECK-MAXBW-NEXT: [[MUL_A3:%.*]] = mul nsw i32 [[EXT_A3]], [[EXT_B3]] 1559; CHECK-MAXBW-NEXT: [[ADD_A3]] = add nsw i32 [[MUL_A3]], [[ACCUM3]] 1560; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1561; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[NUM_IN]] 1562; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1563; CHECK-MAXBW: exit: 1564; CHECK-MAXBW-NEXT: [[ADD_A0_LCSSA:%.*]] = phi i32 [ [[ADD_A0]], [[FOR_BODY]] ], [ [[TMP42]], [[MIDDLE_BLOCK]] ] 1565; CHECK-MAXBW-NEXT: [[ADD_A1_LCSSA:%.*]] = phi i32 [ [[ADD_A1]], [[FOR_BODY]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] 1566; CHECK-MAXBW-NEXT: [[ADD_A2_LCSSA:%.*]] = phi i32 [ [[ADD_A2]], [[FOR_BODY]] ], [ [[TMP40]], [[MIDDLE_BLOCK]] ] 1567; CHECK-MAXBW-NEXT: [[ADD_A3_LCSSA:%.*]] = phi i32 [ [[ADD_A3]], [[FOR_BODY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] 1568; CHECK-MAXBW-NEXT: [[RESULT0:%.*]] = add nsw i32 [[ADD_A0_LCSSA]], [[ADD_A1_LCSSA]] 1569; CHECK-MAXBW-NEXT: [[RESULT1:%.*]] = add nsw i32 [[ADD_A2_LCSSA]], [[ADD_A3_LCSSA]] 1570; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = add nsw i32 [[RESULT0]], [[RESULT1]] 1571; CHECK-MAXBW-NEXT: ret i32 [[RESULT]] 1572; 1573entry: 1574 br label %for.body 1575 1576for.body: ; preds = %entry, %for.body 1577 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1578 %accum3 = phi i32 [ 0, %entry ], [ %add.a3, %for.body ] 1579 %accum2 = phi i32 [ 0, %entry ], [ %add.a2, %for.body ] 1580 %accum1 = phi i32 [ 0, %entry ], [ %add.a1, %for.body ] 1581 %accum0 = phi i32 [ 0, %entry ], [ %add.a0, %for.body ] 1582 %gep.a0 = getelementptr inbounds i8, ptr %a, i64 %iv 1583 %gep.b0 = getelementptr inbounds i8, ptr %b, i64 %iv 1584 %offset.1 = or disjoint i64 %iv, 1 1585 %gep.a1 = getelementptr inbounds i8, ptr %a, i64 %offset.1 1586 %gep.b1 = getelementptr inbounds i8, ptr %b, i64 %offset.1 1587 %offset.2 = or disjoint i64 %iv, 2 1588 %gep.a2 = getelementptr inbounds i8, ptr %a, i64 %offset.2 1589 %gep.b2 = getelementptr inbounds i8, ptr %b, i64 %offset.2 1590 %offset.3 = or disjoint i64 %iv, 3 1591 %gep.a3 = getelementptr inbounds i8, ptr %a, i64 %offset.3 1592 %gep.b3 = getelementptr inbounds i8, ptr %b, i64 %offset.3 1593 %load.a0 = load i8, ptr %gep.a0, align 1 1594 %ext.a0 = sext i8 %load.a0 to i32 1595 %load.b0 = load i8, ptr %gep.b0, align 1 1596 %ext.b0 = sext i8 %load.b0 to i32 1597 %mul.a0 = mul nsw i32 %ext.b0, %ext.a0 1598 %add.a0 = add nsw i32 %mul.a0, %accum0 1599 %load.a1 = load i8, ptr %gep.a1, align 1 1600 %ext.a1 = sext i8 %load.a1 to i32 1601 %load.b1 = load i8, ptr %gep.b1, align 1 1602 %ext.b1 = sext i8 %load.b1 to i32 1603 %mul.a1 = mul nsw i32 %ext.a1, %ext.b1 1604 %add.a1 = add nsw i32 %mul.a1, %accum1 1605 %load.a2 = load i8, ptr %gep.a2, align 1 1606 %ext.a2 = sext i8 %load.a2 to i32 1607 %load.b2 = load i8, ptr %gep.b2, align 1 1608 %ext.b2 = sext i8 %load.b2 to i32 1609 %mul.a2 = mul nsw i32 %ext.a2, %ext.b2 1610 %add.a2 = add nsw i32 %mul.a2, %accum2 1611 %load.a3 = load i8, ptr %gep.a3, align 1 1612 %ext.a3 = sext i8 %load.a3 to i32 1613 %load.b3 = load i8, ptr %gep.b3, align 1 1614 %ext.b3 = sext i8 %load.b3 to i32 1615 %mul.a3 = mul nsw i32 %ext.a3, %ext.b3 1616 %add.a3 = add nsw i32 %mul.a3, %accum3 1617 %iv.next = add nuw nsw i64 %iv, 1 1618 %exitcond.not = icmp eq i64 %iv.next, %num_in 1619 br i1 %exitcond.not, label %exit, label %for.body 1620 1621exit: ; preds = %for.body 1622 %result0 = add nsw i32 %add.a0, %add.a1 1623 %result1 = add nsw i32 %add.a2, %add.a3 1624 %result = add nsw i32 %result0, %result1 1625 ret i32 %result 1626} 1627 1628define i32 @not_dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { 1629; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_predicated( 1630; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1631; CHECK-INTERLEAVE1-NEXT: entry: 1632; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 1633; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 1634; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP7]] 1635; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 1636; CHECK-INTERLEAVE1: vector.ph: 1637; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() 1638; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP10]], 4 1639; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1640; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1641; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 1642; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = mul i64 [[TMP11]], 4 1643; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 1644; CHECK-INTERLEAVE1: vector.body: 1645; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1646; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 1647; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1648; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] 1649; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1650; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP2]], align 1 1651; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 1652; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] 1653; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 1654; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP5]], align 1 1655; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> 1656; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul nsw <vscale x 4 x i32> [[TMP12]], [[TMP9]] 1657; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] 1658; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] 1659; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1660; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1661; CHECK-INTERLEAVE1: middle.block: 1662; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 1663; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1664; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] 1665; CHECK-INTERLEAVE1: scalar.ph: 1666; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1667; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1668; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 1669; CHECK-INTERLEAVE1: for.body: 1670; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1671; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1672; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1673; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1674; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 1675; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1676; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 1677; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 1678; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 1679; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 1680; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1681; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1682; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1683; CHECK-INTERLEAVE1: exit: 1684; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 1685; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 1686; 1687; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_predicated( 1688; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1689; CHECK-INTERLEAVED-NEXT: entry: 1690; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 1691; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP8]], 8 1692; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP14]] 1693; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 1694; CHECK-INTERLEAVED: vector.ph: 1695; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1696; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP15]], 8 1697; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1698; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1699; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1700; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 1701; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 1702; CHECK-INTERLEAVED: vector.body: 1703; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1704; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] 1705; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] 1706; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1707; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] 1708; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 1709; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1710; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 1711; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[TMP10]] 1712; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP2]], align 1 1713; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i8>, ptr [[TMP11]], align 1 1714; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 1715; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD2]] to <vscale x 4 x i32> 1716; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] 1717; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 1718; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() 1719; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 1720; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP17]] 1721; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1 1722; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP18]], align 1 1723; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD3]] to <vscale x 4 x i32> 1724; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32> 1725; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw <vscale x 4 x i32> [[TMP19]], [[TMP12]] 1726; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul nsw <vscale x 4 x i32> [[TMP20]], [[TMP25]] 1727; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP21]], [[VEC_PHI]] 1728; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] 1729; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1730; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1731; CHECK-INTERLEAVED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1732; CHECK-INTERLEAVED: middle.block: 1733; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP24]], [[TMP23]] 1734; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) 1735; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1736; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] 1737; CHECK-INTERLEAVED: scalar.ph: 1738; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1739; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP26]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1740; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 1741; CHECK-INTERLEAVED: for.body: 1742; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1743; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1744; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1745; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1746; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 1747; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1748; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 1749; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 1750; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 1751; CHECK-INTERLEAVED-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 1752; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1753; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1754; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1755; CHECK-INTERLEAVED: exit: 1756; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 1757; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 1758; 1759; CHECK-MAXBW-LABEL: define i32 @not_dotp_predicated( 1760; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1761; CHECK-MAXBW-NEXT: entry: 1762; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1763; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 1764; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 1765; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1766; CHECK-MAXBW: vector.ph: 1767; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 1768; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 1769; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 1770; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 1771; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 1772; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 1773; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 1774; CHECK-MAXBW: vector.body: 1775; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1776; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] 1777; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 1778; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]] 1779; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1780; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1 1781; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32> 1782; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]] 1783; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 1784; CHECK-MAXBW-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1 1785; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32> 1786; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul nsw <vscale x 8 x i32> [[TMP20]], [[TMP13]] 1787; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI1]], <vscale x 8 x i32> [[TMP22]]) 1788; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 1789; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1790; CHECK-MAXBW-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1791; CHECK-MAXBW: middle.block: 1792; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE5]]) 1793; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 1794; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1795; CHECK-MAXBW: scalar.ph: 1796; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1797; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1798; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 1799; CHECK-MAXBW: for.body: 1800; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1801; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1802; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1803; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1804; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 1805; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1806; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 1807; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 1808; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 1809; CHECK-MAXBW-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 1810; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1811; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1812; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1813; CHECK-MAXBW: exit: 1814; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 1815; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 1816; 1817entry: 1818 br label %for.body 1819 1820for.body: ; preds = %entry, %for.body 1821 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 1822 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 1823 %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv 1824 %load.a = load i8, ptr %gep.a, align 1 1825 %ext.a = sext i8 %load.a to i32 1826 %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv 1827 %load.b = load i8, ptr %gep.b, align 1 1828 %ext.b = sext i8 %load.b to i32 1829 %mul = mul nsw i32 %ext.b, %ext.a 1830 %add = add nsw i32 %mul, %accum 1831 %iv.next = add nuw nsw i64 %iv, 1 1832 %exitcond.not = icmp eq i64 %iv.next, %N 1833 br i1 %exitcond.not, label %exit, label %for.body 1834 1835exit: ; preds = %for.body 1836 ret i32 %add 1837} 1838 1839define i32 @not_dotp_predicated_pragma(i64 %N, ptr %a, ptr %b) #0 { 1840; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_predicated_pragma( 1841; CHECK-INTERLEAVE1-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1842; CHECK-INTERLEAVE1-NEXT: entry: 1843; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1844; CHECK-INTERLEAVE1: vector.ph: 1845; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1846; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 1847; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 1848; CHECK-INTERLEAVE1-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] 1849; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] 1850; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1851; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1852; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul i64 [[TMP12]], 4 1853; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1854; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = mul i64 [[TMP15]], 4 1855; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[TMP6]] 1856; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1857; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[TMP0]], i64 0 1858; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) 1859; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 1860; CHECK-INTERLEAVE1: vector.body: 1861; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1862; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1863; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 1864; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 1865; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]] 1866; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 1867; CHECK-INTERLEAVE1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP5]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 1868; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> 1869; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]] 1870; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1871; CHECK-INTERLEAVE1-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP8]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 1872; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD1]] to <vscale x 4 x i32> 1873; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 4 x i32> [[TMP16]], [[TMP13]] 1874; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[VEC_PHI]] 1875; CHECK-INTERLEAVE1-NEXT: [[TMP19]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]] 1876; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] 1877; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) 1878; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 1879; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[TMP20]], i32 0 1880; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1881; CHECK-INTERLEAVE1: middle.block: 1882; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) 1883; CHECK-INTERLEAVE1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 1884; CHECK-INTERLEAVE1: scalar.ph: 1885; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1886; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1887; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 1888; CHECK-INTERLEAVE1: for.body: 1889; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1890; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1891; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1892; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1893; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 1894; CHECK-INTERLEAVE1-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1895; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1 1896; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 1897; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 1898; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 1899; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1900; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1901; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1902; CHECK-INTERLEAVE1: exit: 1903; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 1904; CHECK-INTERLEAVE1-NEXT: ret i32 [[ADD_LCSSA]] 1905; 1906; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_predicated_pragma( 1907; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1908; CHECK-INTERLEAVED-NEXT: entry: 1909; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1910; CHECK-INTERLEAVED: vector.ph: 1911; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 1912; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 1913; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 1914; CHECK-INTERLEAVED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP11]] 1915; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]] 1916; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1917; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 1918; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul i64 [[TMP12]], 4 1919; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 1920; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP15]], 4 1921; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[TMP6]] 1922; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1923; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[TMP0]], i64 0 1924; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) 1925; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 1926; CHECK-INTERLEAVED: vector.body: 1927; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1928; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1929; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 1930; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 1931; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]] 1932; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 1933; CHECK-INTERLEAVED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP5]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 1934; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> 1935; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]] 1936; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 1937; CHECK-INTERLEAVED-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP8]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 1938; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD1]] to <vscale x 4 x i32> 1939; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 4 x i32> [[TMP16]], [[TMP13]] 1940; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[VEC_PHI]] 1941; CHECK-INTERLEAVED-NEXT: [[TMP19]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]] 1942; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] 1943; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) 1944; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 1945; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[TMP20]], i32 0 1946; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1947; CHECK-INTERLEAVED: middle.block: 1948; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) 1949; CHECK-INTERLEAVED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 1950; CHECK-INTERLEAVED: scalar.ph: 1951; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1952; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1953; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 1954; CHECK-INTERLEAVED: for.body: 1955; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 1956; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1957; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 1958; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 1959; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 1960; CHECK-INTERLEAVED-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 1961; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1 1962; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 1963; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 1964; CHECK-INTERLEAVED-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 1965; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1966; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 1967; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1968; CHECK-INTERLEAVED: exit: 1969; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 1970; CHECK-INTERLEAVED-NEXT: ret i32 [[ADD_LCSSA]] 1971; 1972; CHECK-MAXBW-LABEL: define i32 @not_dotp_predicated_pragma( 1973; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1974; CHECK-MAXBW-NEXT: entry: 1975; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1976; CHECK-MAXBW: vector.ph: 1977; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 1978; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 1979; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 1980; CHECK-MAXBW-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP2]] 1981; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] 1982; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 1983; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 1984; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 1985; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 1986; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 1987; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] 1988; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] 1989; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 1990; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) 1991; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 1992; CHECK-MAXBW: vector.body: 1993; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1994; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] 1995; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 1996; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 1997; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP10]] 1998; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 1999; CHECK-MAXBW-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 2000; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32> 2001; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]] 2002; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 2003; CHECK-MAXBW-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP15]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) 2004; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = sext <vscale x 4 x i8> [[WIDE_MASKED_LOAD1]] to <vscale x 4 x i32> 2005; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw <vscale x 4 x i32> [[TMP16]], [[TMP13]] 2006; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[TMP17]], [[VEC_PHI]] 2007; CHECK-MAXBW-NEXT: [[TMP19]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]] 2008; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] 2009; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) 2010; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) 2011; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[TMP20]], i32 0 2012; CHECK-MAXBW-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 2013; CHECK-MAXBW: middle.block: 2014; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) 2015; CHECK-MAXBW-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 2016; CHECK-MAXBW: scalar.ph: 2017; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2018; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2019; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 2020; CHECK-MAXBW: for.body: 2021; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2022; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2023; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] 2024; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 2025; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = sext i8 [[LOAD_A]] to i32 2026; CHECK-MAXBW-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] 2027; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_A2]], align 1 2028; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = sext i8 [[LOAD_B]] to i32 2029; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nsw i32 [[EXT_B]], [[EXT_A]] 2030; CHECK-MAXBW-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[ACCUM]] 2031; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2032; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2033; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 2034; CHECK-MAXBW: exit: 2035; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] 2036; CHECK-MAXBW-NEXT: ret i32 [[ADD_LCSSA]] 2037; 2038entry: 2039 br label %for.body 2040 2041for.body: ; preds = %entry, %for.body 2042 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2043 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 2044 %gep.a = getelementptr inbounds i8, ptr %b, i64 %iv 2045 %load.a = load i8, ptr %gep.a, align 1 2046 %ext.a = sext i8 %load.a to i32 2047 %gep.a2 = getelementptr inbounds i8, ptr %a, i64 %iv 2048 %load.b = load i8, ptr %gep.a2, align 1 2049 %ext.b = sext i8 %load.b to i32 2050 %mul = mul nsw i32 %ext.b, %ext.a 2051 %add = add nsw i32 %mul, %accum 2052 %iv.next = add nuw nsw i64 %iv, 1 2053 %exitcond.not = icmp eq i64 %iv.next, %N 2054 br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !7 2055 2056exit: ; preds = %for.body 2057 ret i32 %add 2058} 2059 2060define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { 2061; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_extend_user( 2062; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2063; CHECK-INTERLEAVE1-NEXT: entry: 2064; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2065; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 2066; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2067; CHECK-INTERLEAVE1: vector.ph: 2068; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 2069; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = mul i64 [[TMP8]], 4 2070; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP10]] 2071; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 2072; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 2073; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP11]], 4 2074; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 2075; CHECK-INTERLEAVE1: vector.body: 2076; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2077; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 2078; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 2079; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] 2080; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0 2081; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP4]], align 1 2082; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 2083; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] 2084; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0 2085; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1 2086; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32> 2087; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> [[TMP12]], [[TMP9]] 2088; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] 2089; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2090; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2091; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 2092; CHECK-INTERLEAVE1: middle.block: 2093; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) 2094; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() 2095; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 4 2096; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 2097; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i32> [[TMP12]], i32 [[TMP19]] 2098; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 2099; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 2100; CHECK-INTERLEAVE1: scalar.ph: 2101; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2102; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2103; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 2104; CHECK-INTERLEAVE1: for.body: 2105; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2106; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2107; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 2108; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 2109; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 2110; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 2111; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2112; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 2113; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 2114; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 2115; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 2116; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 2117; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 2118; CHECK-INTERLEAVE1: for.exit: 2119; CHECK-INTERLEAVE1-NEXT: [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 2120; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 2121; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]] 2122; CHECK-INTERLEAVE1-NEXT: ret i32 [[RESULT]] 2123; 2124; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_extend_user( 2125; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2126; CHECK-INTERLEAVED-NEXT: entry: 2127; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2128; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 2129; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2130; CHECK-INTERLEAVED: vector.ph: 2131; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 2132; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 2133; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]] 2134; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 2135; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 2136; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP14]], 8 2137; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 2138; CHECK-INTERLEAVED: vector.body: 2139; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2140; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] 2141; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] 2142; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 2143; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] 2144; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0 2145; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 2146; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP15]], 4 2147; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP10]] 2148; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP4]], align 1 2149; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i8>, ptr [[TMP11]], align 1 2150; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> 2151; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD2]] to <vscale x 4 x i32> 2152; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] 2153; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 2154; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() 2155; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 2156; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP17]] 2157; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x i8>, ptr [[TMP9]], align 1 2158; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i8>, ptr [[TMP18]], align 1 2159; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD3]] to <vscale x 4 x i32> 2160; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD4]] to <vscale x 4 x i32> 2161; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i32> [[TMP19]], [[TMP12]] 2162; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul <vscale x 4 x i32> [[TMP20]], [[TMP13]] 2163; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP21]], [[VEC_PHI]] 2164; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] 2165; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2166; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2167; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 2168; CHECK-INTERLEAVED: middle.block: 2169; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP24]], [[TMP23]] 2170; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) 2171; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() 2172; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 4 2173; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sub i32 [[TMP28]], 1 2174; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = extractelement <vscale x 4 x i32> [[TMP20]], i32 [[TMP29]] 2175; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 2176; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 2177; CHECK-INTERLEAVED: scalar.ph: 2178; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2179; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP26]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2180; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 2181; CHECK-INTERLEAVED: for.body: 2182; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2183; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2184; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 2185; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 2186; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 2187; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 2188; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2189; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 2190; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 2191; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 2192; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 2193; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 2194; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 2195; CHECK-INTERLEAVED: for.exit: 2196; CHECK-INTERLEAVED-NEXT: [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] 2197; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 2198; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]] 2199; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT]] 2200; 2201; CHECK-MAXBW-LABEL: define i32 @not_dotp_extend_user( 2202; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2203; CHECK-MAXBW-NEXT: entry: 2204; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2205; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 2206; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2207; CHECK-MAXBW: vector.ph: 2208; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2209; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 2210; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 2211; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 2212; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2213; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 2214; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 2215; CHECK-MAXBW: vector.body: 2216; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2217; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] 2218; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2219; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] 2220; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 2221; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1 2222; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32> 2223; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] 2224; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0 2225; CHECK-MAXBW-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1 2226; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i32> 2227; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul <vscale x 8 x i32> [[TMP20]], [[TMP13]] 2228; CHECK-MAXBW-NEXT: [[TMP24]] = add <vscale x 8 x i32> [[TMP22]], [[VEC_PHI1]] 2229; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2230; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2231; CHECK-MAXBW-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 2232; CHECK-MAXBW: middle.block: 2233; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP24]]) 2234; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() 2235; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 2236; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 2237; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = extractelement <vscale x 8 x i32> [[TMP20]], i32 [[TMP19]] 2238; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 2239; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 2240; CHECK-MAXBW: scalar.ph: 2241; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2242; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2243; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 2244; CHECK-MAXBW: for.body: 2245; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2246; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2247; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] 2248; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr [[GEP_A]], align 1 2249; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i8 [[LOAD_A]] to i32 2250; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i64 [[IV]] 2251; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2252; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i8 [[LOAD_B]] to i32 2253; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul i32 [[EXT_B]], [[EXT_A]] 2254; CHECK-MAXBW-NEXT: [[ADD]] = add i32 [[MUL]], [[ACCUM]] 2255; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 2256; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 2257; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 2258; CHECK-MAXBW: for.exit: 2259; CHECK-MAXBW-NEXT: [[EXT_B_LCSSA:%.*]] = phi i32 [ [[EXT_B]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] 2260; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 2261; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = add i32 [[ADD_LCSSA]], [[EXT_B_LCSSA]] 2262; CHECK-MAXBW-NEXT: ret i32 [[RESULT]] 2263; 2264entry: 2265 br label %for.body 2266 2267for.body: ; preds = %for.body, %entry 2268 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 2269 %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] 2270 %gep.a = getelementptr i8, ptr %a, i64 %iv 2271 %load.a = load i8, ptr %gep.a, align 1 2272 %ext.a = zext i8 %load.a to i32 2273 %gep.b = getelementptr i8, ptr %b, i64 %iv 2274 %load.b = load i8, ptr %gep.b, align 1 2275 %ext.b = zext i8 %load.b to i32 2276 %mul = mul i32 %ext.b, %ext.a 2277 %add = add i32 %mul, %accum 2278 %iv.next = add i64 %iv, 1 2279 %exitcond.not = icmp eq i64 %iv.next, 1024 2280 br i1 %exitcond.not, label %for.exit, label %for.body 2281 2282for.exit: ; preds = %for.body 2283 %result = add i32 %add, %ext.b 2284 ret i32 %result 2285} 2286 2287define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { 2288; CHECK-INTERLEAVE1-LABEL: define i64 @dotp_cost_disagreement( 2289; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2290; CHECK-INTERLEAVE1-NEXT: entry: 2291; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2292; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 2293; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16, [[TMP1]] 2294; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2295; CHECK-INTERLEAVE1: vector.ph: 2296; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2297; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 2298; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 16, [[TMP3]] 2299; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 16, [[N_MOD_VF]] 2300; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2301; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 2302; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 2303; CHECK-INTERLEAVE1: vector.body: 2304; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2305; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] 2306; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2307; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]] 2308; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0 2309; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP8]], align 1 2310; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD]] to <vscale x 2 x i64> 2311; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 2312; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]] 2313; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i32 0 2314; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i8>, ptr [[TMP12]], align 1 2315; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD1]] to <vscale x 2 x i64> 2316; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP13]], [[TMP9]] 2317; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = add <vscale x 2 x i64> [[VEC_PHI]], [[TMP14]] 2318; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2319; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2320; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 2321; CHECK-INTERLEAVE1: middle.block: 2322; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP15]]) 2323; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, [[N_VEC]] 2324; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 2325; CHECK-INTERLEAVE1: scalar.ph: 2326; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2327; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2328; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 2329; CHECK-INTERLEAVE1: for.body: 2330; CHECK-INTERLEAVE1-NEXT: [[I_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_IV_NEXT:%.*]], [[FOR_BODY]] ] 2331; CHECK-INTERLEAVE1-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2332; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[I_IV]] 2333; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2334; CHECK-INTERLEAVE1-NEXT: [[CONV:%.*]] = zext i8 [[TMP18]] to i64 2335; CHECK-INTERLEAVE1-NEXT: [[I_IV_NEXT]] = add nuw nsw i64 [[I_IV]], 1 2336; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[I_IV_NEXT]] 2337; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 2338; CHECK-INTERLEAVE1-NEXT: [[CONV3:%.*]] = zext i8 [[TMP19]] to i64 2339; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[CONV3]], [[CONV]] 2340; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i64 [[SUM]], [[MUL]] 2341; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_IV_NEXT]], 16 2342; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 2343; CHECK-INTERLEAVE1: exit: 2344; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 2345; CHECK-INTERLEAVE1-NEXT: ret i64 [[ADD_LCSSA]] 2346; 2347; CHECK-INTERLEAVED-LABEL: define i64 @dotp_cost_disagreement( 2348; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2349; CHECK-INTERLEAVED-NEXT: entry: 2350; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2351; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 2352; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16, [[TMP1]] 2353; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2354; CHECK-INTERLEAVED: vector.ph: 2355; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2356; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 2357; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 16, [[TMP3]] 2358; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 16, [[N_MOD_VF]] 2359; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2360; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 2361; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 2362; CHECK-INTERLEAVED: vector.body: 2363; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2364; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] 2365; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] 2366; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2367; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]] 2368; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0 2369; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 2370; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 2371; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[TMP10]] 2372; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP8]], align 1 2373; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i8>, ptr [[TMP11]], align 1 2374; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD]] to <vscale x 2 x i64> 2375; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD2]] to <vscale x 2 x i64> 2376; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 1 2377; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]] 2378; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i32 0 2379; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() 2380; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 2381; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[TMP18]] 2382; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1 2383; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 2 x i8>, ptr [[TMP19]], align 1 2384; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD3]] to <vscale x 2 x i64> 2385; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD4]] to <vscale x 2 x i64> 2386; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP20]], [[TMP12]] 2387; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP21]], [[TMP13]] 2388; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 2 x i64> [[VEC_PHI]], [[TMP22]] 2389; CHECK-INTERLEAVED-NEXT: [[TMP25]] = add <vscale x 2 x i64> [[VEC_PHI1]], [[TMP23]] 2390; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2391; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2392; CHECK-INTERLEAVED-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 2393; CHECK-INTERLEAVED: middle.block: 2394; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP25]], [[TMP24]] 2395; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]]) 2396; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, [[N_VEC]] 2397; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 2398; CHECK-INTERLEAVED: scalar.ph: 2399; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2400; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2401; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 2402; CHECK-INTERLEAVED: for.body: 2403; CHECK-INTERLEAVED-NEXT: [[I_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_IV_NEXT:%.*]], [[FOR_BODY]] ] 2404; CHECK-INTERLEAVED-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2405; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[I_IV]] 2406; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2407; CHECK-INTERLEAVED-NEXT: [[CONV:%.*]] = zext i8 [[TMP28]] to i64 2408; CHECK-INTERLEAVED-NEXT: [[I_IV_NEXT]] = add nuw nsw i64 [[I_IV]], 1 2409; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[I_IV_NEXT]] 2410; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 2411; CHECK-INTERLEAVED-NEXT: [[CONV3:%.*]] = zext i8 [[TMP29]] to i64 2412; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[CONV3]], [[CONV]] 2413; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i64 [[SUM]], [[MUL]] 2414; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_IV_NEXT]], 16 2415; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 2416; CHECK-INTERLEAVED: exit: 2417; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] 2418; CHECK-INTERLEAVED-NEXT: ret i64 [[ADD_LCSSA]] 2419; 2420; CHECK-MAXBW-LABEL: define i64 @dotp_cost_disagreement( 2421; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 2422; CHECK-MAXBW-NEXT: entry: 2423; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2424; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 2425; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 16, [[TMP1]] 2426; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2427; CHECK-MAXBW: vector.ph: 2428; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2429; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 2430; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 16, [[TMP3]] 2431; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 16, [[N_MOD_VF]] 2432; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2433; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 2434; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 2435; CHECK-MAXBW: vector.body: 2436; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2437; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] 2438; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2439; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]] 2440; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0 2441; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1 2442; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64> 2443; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 1 2444; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]] 2445; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i32 0 2446; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1 2447; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64> 2448; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP13]], [[TMP9]] 2449; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 1 x i64> @llvm.experimental.vector.partial.reduce.add.nxv1i64.nxv8i64(<vscale x 1 x i64> [[VEC_PHI]], <vscale x 8 x i64> [[TMP14]]) 2450; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2451; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2452; CHECK-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 2453; CHECK-MAXBW: middle.block: 2454; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[PARTIAL_REDUCE]]) 2455; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, [[N_VEC]] 2456; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 2457; CHECK-MAXBW: scalar.ph: 2458; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2459; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2460; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 2461; CHECK-MAXBW: for.body: 2462; CHECK-MAXBW-NEXT: [[I_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_IV_NEXT:%.*]], [[FOR_BODY]] ] 2463; CHECK-MAXBW-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2464; CHECK-MAXBW-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[I_IV]] 2465; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 2466; CHECK-MAXBW-NEXT: [[CONV:%.*]] = zext i8 [[TMP17]] to i64 2467; CHECK-MAXBW-NEXT: [[I_IV_NEXT]] = add nuw nsw i64 [[I_IV]], 1 2468; CHECK-MAXBW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[I_IV_NEXT]] 2469; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 2470; CHECK-MAXBW-NEXT: [[CONV3:%.*]] = zext i8 [[TMP18]] to i64 2471; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[CONV3]], [[CONV]] 2472; CHECK-MAXBW-NEXT: [[ADD]] = add i64 [[SUM]], [[MUL]] 2473; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_IV_NEXT]], 16 2474; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 2475; CHECK-MAXBW: exit: 2476; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 2477; CHECK-MAXBW-NEXT: ret i64 [[ADD_LCSSA]] 2478; 2479entry: 2480 br label %for.body 2481 2482for.body: ; preds = %entry, %for.body 2483 %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 2484 %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] 2485 %arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv 2486 %0 = load i8, ptr %arrayidx, align 1 2487 %conv = zext i8 %0 to i64 2488 %i.iv.next = add nuw nsw i64 %i.iv, 1 2489 %arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next 2490 %1 = load i8, ptr %arrayidx2, align 1 2491 %conv3 = zext i8 %1 to i64 2492 %mul = mul nuw nsw i64 %conv3, %conv 2493 %add = add i64 %sum, %mul 2494 %exitcond.not = icmp eq i64 %i.iv.next, 16 2495 br i1 %exitcond.not, label %exit, label %for.body 2496 2497exit: ; preds = %for.body 2498 ret i64 %add 2499} 2500 2501define void @not_dotp_not_phi2(ptr %matrix, i32 %n) #0 { 2502; CHECK-INTERLEAVE1-LABEL: define void @not_dotp_not_phi2( 2503; CHECK-INTERLEAVE1-SAME: ptr [[MATRIX:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 2504; CHECK-INTERLEAVE1-NEXT: entry: 2505; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 2506; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[FOR_PREHEADER:%.*]], label [[EXIT:%.*]] 2507; CHECK-INTERLEAVE1: for.preheader: 2508; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i8, ptr null, align 1 2509; CHECK-INTERLEAVE1-NEXT: [[LOAD_A1:%.*]] = load i8, ptr inttoptr (i64 1 to ptr), align 1 2510; CHECK-INTERLEAVE1-NEXT: [[A_EXT:%.*]] = sext i8 [[LOAD_A]] to i32 2511; CHECK-INTERLEAVE1-NEXT: [[A_EXT1:%.*]] = sext i8 [[LOAD_A1]] to i32 2512; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 2513; CHECK-INTERLEAVE1: for.body: 2514; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_PREHEADER]] ] 2515; CHECK-INTERLEAVE1-NEXT: [[PTR:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[MATRIX]], [[FOR_PREHEADER]] ] 2516; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i32 [ [[ADD_1:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_PREHEADER]] ] 2517; CHECK-INTERLEAVE1-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 2518; CHECK-INTERLEAVE1-NEXT: [[GEP_B1:%.*]] = getelementptr i8, ptr [[PTR]], i64 2 2519; CHECK-INTERLEAVE1-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2520; CHECK-INTERLEAVE1-NEXT: [[B_EXT:%.*]] = sext i8 [[LOAD_B]] to i32 2521; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]] 2522; CHECK-INTERLEAVE1-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 2523; CHECK-INTERLEAVE1-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 2524; CHECK-INTERLEAVE1-NEXT: [[B_EXT1:%.*]] = sext i8 [[LOAD_B1]] to i32 2525; CHECK-INTERLEAVE1-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[A_EXT1]], [[B_EXT1]] 2526; CHECK-INTERLEAVE1-NEXT: [[ADD_1]] = add i32 [[MUL_1]], [[ADD]] 2527; CHECK-INTERLEAVE1-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[PTR]], i64 16 2528; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 2529; CHECK-INTERLEAVE1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] 2530; CHECK-INTERLEAVE1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 2531; CHECK-INTERLEAVE1: for.exit: 2532; CHECK-INTERLEAVE1-NEXT: [[ADD_1_LCSSA:%.*]] = phi i32 [ [[ADD_1]], [[FOR_BODY]] ] 2533; CHECK-INTERLEAVE1-NEXT: [[ADD_FLOAT:%.*]] = sitofp i32 [[ADD_1_LCSSA]] to float 2534; CHECK-INTERLEAVE1-NEXT: br label [[EXIT]] 2535; CHECK-INTERLEAVE1: exit: 2536; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_FLOAT]], [[FOR_EXIT]] ] 2537; CHECK-INTERLEAVE1-NEXT: store float [[RESULT]], ptr [[MATRIX]], align 4 2538; CHECK-INTERLEAVE1-NEXT: ret void 2539; 2540; CHECK-INTERLEAVED-LABEL: define void @not_dotp_not_phi2( 2541; CHECK-INTERLEAVED-SAME: ptr [[MATRIX:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 2542; CHECK-INTERLEAVED-NEXT: entry: 2543; CHECK-INTERLEAVED-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 2544; CHECK-INTERLEAVED-NEXT: br i1 [[CMP]], label [[FOR_PREHEADER:%.*]], label [[EXIT:%.*]] 2545; CHECK-INTERLEAVED: for.preheader: 2546; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i8, ptr null, align 1 2547; CHECK-INTERLEAVED-NEXT: [[LOAD_A1:%.*]] = load i8, ptr inttoptr (i64 1 to ptr), align 1 2548; CHECK-INTERLEAVED-NEXT: [[A_EXT:%.*]] = sext i8 [[LOAD_A]] to i32 2549; CHECK-INTERLEAVED-NEXT: [[A_EXT1:%.*]] = sext i8 [[LOAD_A1]] to i32 2550; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 2551; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 2552; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2553; CHECK-INTERLEAVED: vector.ph: 2554; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 2555; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] 2556; CHECK-INTERLEAVED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 2557; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], 16 2558; CHECK-INTERLEAVED-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[MATRIX]], i64 [[TMP1]] 2559; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 2560; CHECK-INTERLEAVED: vector.body: 2561; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2562; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] 2563; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] 2564; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 2565; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 2566; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 16 2567; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[MATRIX]], i64 [[TMP2]] 2568; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[MATRIX]], i64 [[TMP3]] 2569; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1 2570; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 1 2571; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 2572; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 2 2573; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP4]], align 1 2574; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1 2575; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = sext i8 [[TMP8]] to i32 2576; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = sext i8 [[TMP9]] to i32 2577; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nsw i32 [[A_EXT]], [[TMP10]] 2578; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul nsw i32 [[A_EXT]], [[TMP11]] 2579; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[VEC_PHI]] 2580; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[VEC_PHI2]] 2581; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP6]], align 1 2582; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP7]], align 1 2583; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext i8 [[TMP16]] to i32 2584; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = sext i8 [[TMP17]] to i32 2585; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nsw i32 [[A_EXT1]], [[TMP18]] 2586; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw i32 [[A_EXT1]], [[TMP19]] 2587; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add i32 [[TMP20]], [[TMP14]] 2588; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add i32 [[TMP21]], [[TMP15]] 2589; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 2590; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2591; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 2592; CHECK-INTERLEAVED: middle.block: 2593; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP23]], [[TMP22]] 2594; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] 2595; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 2596; CHECK-INTERLEAVED: scalar.ph: 2597; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] 2598; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[MATRIX]], [[FOR_PREHEADER]] ] 2599; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] 2600; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 2601; CHECK-INTERLEAVED: for.body: 2602; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2603; CHECK-INTERLEAVED-NEXT: [[PTR:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 2604; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i32 [ [[ADD_1:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 2605; CHECK-INTERLEAVED-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 2606; CHECK-INTERLEAVED-NEXT: [[GEP_B1:%.*]] = getelementptr i8, ptr [[PTR]], i64 2 2607; CHECK-INTERLEAVED-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2608; CHECK-INTERLEAVED-NEXT: [[B_EXT:%.*]] = sext i8 [[LOAD_B]] to i32 2609; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]] 2610; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 2611; CHECK-INTERLEAVED-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 2612; CHECK-INTERLEAVED-NEXT: [[B_EXT1:%.*]] = sext i8 [[LOAD_B1]] to i32 2613; CHECK-INTERLEAVED-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[A_EXT1]], [[B_EXT1]] 2614; CHECK-INTERLEAVED-NEXT: [[ADD_1]] = add i32 [[MUL_1]], [[ADD]] 2615; CHECK-INTERLEAVED-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[PTR]], i64 16 2616; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 2617; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] 2618; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 2619; CHECK-INTERLEAVED: for.exit: 2620; CHECK-INTERLEAVED-NEXT: [[ADD_1_LCSSA:%.*]] = phi i32 [ [[ADD_1]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 2621; CHECK-INTERLEAVED-NEXT: [[ADD_FLOAT:%.*]] = sitofp i32 [[ADD_1_LCSSA]] to float 2622; CHECK-INTERLEAVED-NEXT: br label [[EXIT]] 2623; CHECK-INTERLEAVED: exit: 2624; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_FLOAT]], [[FOR_EXIT]] ] 2625; CHECK-INTERLEAVED-NEXT: store float [[RESULT]], ptr [[MATRIX]], align 4 2626; CHECK-INTERLEAVED-NEXT: ret void 2627; 2628; CHECK-MAXBW-LABEL: define void @not_dotp_not_phi2( 2629; CHECK-MAXBW-SAME: ptr [[MATRIX:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { 2630; CHECK-MAXBW-NEXT: entry: 2631; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 2632; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[FOR_PREHEADER:%.*]], label [[EXIT:%.*]] 2633; CHECK-MAXBW: for.preheader: 2634; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i8, ptr null, align 1 2635; CHECK-MAXBW-NEXT: [[LOAD_A1:%.*]] = load i8, ptr inttoptr (i64 1 to ptr), align 1 2636; CHECK-MAXBW-NEXT: [[A_EXT:%.*]] = sext i8 [[LOAD_A]] to i32 2637; CHECK-MAXBW-NEXT: [[A_EXT1:%.*]] = sext i8 [[LOAD_A1]] to i32 2638; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 2639; CHECK-MAXBW: for.body: 2640; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_PREHEADER]] ] 2641; CHECK-MAXBW-NEXT: [[PTR:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[MATRIX]], [[FOR_PREHEADER]] ] 2642; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i32 [ [[ADD_1:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_PREHEADER]] ] 2643; CHECK-MAXBW-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 2644; CHECK-MAXBW-NEXT: [[GEP_B1:%.*]] = getelementptr i8, ptr [[PTR]], i64 2 2645; CHECK-MAXBW-NEXT: [[LOAD_B:%.*]] = load i8, ptr [[GEP_B]], align 1 2646; CHECK-MAXBW-NEXT: [[B_EXT:%.*]] = sext i8 [[LOAD_B]] to i32 2647; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]] 2648; CHECK-MAXBW-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[ACCUM]] 2649; CHECK-MAXBW-NEXT: [[LOAD_B1:%.*]] = load i8, ptr [[GEP_B1]], align 1 2650; CHECK-MAXBW-NEXT: [[B_EXT1:%.*]] = sext i8 [[LOAD_B1]] to i32 2651; CHECK-MAXBW-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[A_EXT1]], [[B_EXT1]] 2652; CHECK-MAXBW-NEXT: [[ADD_1]] = add i32 [[MUL_1]], [[ADD]] 2653; CHECK-MAXBW-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[PTR]], i64 16 2654; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 2655; CHECK-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] 2656; CHECK-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 2657; CHECK-MAXBW: for.exit: 2658; CHECK-MAXBW-NEXT: [[ADD_1_LCSSA:%.*]] = phi i32 [ [[ADD_1]], [[FOR_BODY]] ] 2659; CHECK-MAXBW-NEXT: [[ADD_FLOAT:%.*]] = sitofp i32 [[ADD_1_LCSSA]] to float 2660; CHECK-MAXBW-NEXT: br label [[EXIT]] 2661; CHECK-MAXBW: exit: 2662; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_FLOAT]], [[FOR_EXIT]] ] 2663; CHECK-MAXBW-NEXT: store float [[RESULT]], ptr [[MATRIX]], align 4 2664; CHECK-MAXBW-NEXT: ret void 2665; 2666entry: 2667 %cmp = icmp sgt i32 %n, 0 2668 br i1 %cmp, label %for.preheader, label %exit 2669 2670for.preheader: ; preds = %entry 2671 %load.a = load i8, ptr inttoptr (i64 0 to ptr), align 1 2672 %load.a1 = load i8, ptr inttoptr (i64 1 to ptr), align 1 2673 %a.ext = sext i8 %load.a to i32 2674 %a.ext1 = sext i8 %load.a1 to i32 2675 br label %for.body 2676 2677for.body: ; preds = %for.preheader, %for.body 2678 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %for.preheader ] 2679 %ptr = phi ptr [ %scevgep, %for.body ], [ %matrix, %for.preheader ] 2680 %accum = phi i32 [ %add.1, %for.body ], [ 0, %for.preheader ] 2681 %gep.b = getelementptr i8, ptr %ptr, i64 1 2682 %gep.b1 = getelementptr i8, ptr %ptr, i64 2 2683 %load.b = load i8, ptr %gep.b, align 1 2684 %b.ext = sext i8 %load.b to i32 2685 %mul = mul nsw i32 %a.ext, %b.ext 2686 %add = add i32 %mul, %accum 2687 %load.b1 = load i8, ptr %gep.b1, align 1 2688 %b.ext1 = sext i8 %load.b1 to i32 2689 %mul.1 = mul nsw i32 %a.ext1, %b.ext1 2690 %add.1 = add i32 %mul.1, %add 2691 %scevgep = getelementptr i8, ptr %ptr, i64 16 2692 %iv.next = add nuw nsw i32 %iv, 1 2693 %exitcond.not = icmp eq i32 %iv.next, %n 2694 br i1 %exitcond.not, label %for.exit, label %for.body 2695 2696for.exit: ; preds = %for.body 2697 %add.1.lcssa = phi i32 [ %add.1, %for.body ] 2698 %add.float = sitofp i32 %add.1.lcssa to float 2699 br label %exit 2700 2701exit: ; preds = %for.exit, %entry 2702 %result = phi float [ 0.000000e+00, %entry ], [ %add.float, %for.exit ] 2703 store float %result, ptr %matrix, align 4 2704 ret void 2705} 2706 2707define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { 2708; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_ext_outside_plan( 2709; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 2710; CHECK-INTERLEAVE1-NEXT: entry: 2711; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 2712; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 2713; CHECK-INTERLEAVE1: for.ph: 2714; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 2715; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 2716; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2717; CHECK-INTERLEAVE1: vector.ph: 2718; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 2719; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2720; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0 2721; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 2722; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 2723; CHECK-INTERLEAVE1: vector.body: 2724; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2725; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 2726; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2727; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]] 2728; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0 2729; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 2730; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> 2731; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw nsw <8 x i64> [[TMP3]], [[BROADCAST_SPLAT]] 2732; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]] 2733; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 2734; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2735; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 2736; CHECK-INTERLEAVE1: middle.block: 2737; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) 2738; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 2739; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2740; CHECK-INTERLEAVE1: scalar.ph: 2741; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2742; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2743; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 2744; CHECK-INTERLEAVE1: for.body: 2745; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2746; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2747; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 2748; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 2749; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 2750; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_A]], [[EXT_B]] 2751; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 2752; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2753; CHECK-INTERLEAVE1-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2754; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 2755; CHECK-INTERLEAVE1: exit.loopexit: 2756; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 2757; CHECK-INTERLEAVE1-NEXT: br label [[EXIT]] 2758; CHECK-INTERLEAVE1: exit: 2759; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 2760; CHECK-INTERLEAVE1-NEXT: ret i64 [[RESULT]] 2761; 2762; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_ext_outside_plan( 2763; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 2764; CHECK-INTERLEAVED-NEXT: entry: 2765; CHECK-INTERLEAVED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 2766; CHECK-INTERLEAVED-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 2767; CHECK-INTERLEAVED: for.ph: 2768; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 2769; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 2770; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2771; CHECK-INTERLEAVED: vector.ph: 2772; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 2773; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2774; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0 2775; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 2776; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 2777; CHECK-INTERLEAVED: vector.body: 2778; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2779; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 2780; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 2781; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2782; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]] 2783; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0 2784; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 2785; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 2786; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 2787; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> 2788; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[WIDE_LOAD2]] to <8 x i64> 2789; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw nsw <8 x i64> [[TMP4]], [[BROADCAST_SPLAT]] 2790; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw <8 x i64> [[TMP5]], [[BROADCAST_SPLAT]] 2791; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <8 x i64> [[TMP6]], [[VEC_PHI]] 2792; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]] 2793; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2794; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2795; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 2796; CHECK-INTERLEAVED: middle.block: 2797; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]] 2798; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]]) 2799; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 2800; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2801; CHECK-INTERLEAVED: scalar.ph: 2802; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2803; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2804; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 2805; CHECK-INTERLEAVED: for.body: 2806; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2807; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2808; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 2809; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 2810; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 2811; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_A]], [[EXT_B]] 2812; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 2813; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2814; CHECK-INTERLEAVED-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2815; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2816; CHECK-INTERLEAVED: exit.loopexit: 2817; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 2818; CHECK-INTERLEAVED-NEXT: br label [[EXIT]] 2819; CHECK-INTERLEAVED: exit: 2820; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 2821; CHECK-INTERLEAVED-NEXT: ret i64 [[RESULT]] 2822; 2823; CHECK-MAXBW-LABEL: define i64 @not_dotp_ext_outside_plan( 2824; CHECK-MAXBW-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 2825; CHECK-MAXBW-NEXT: entry: 2826; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 2827; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 2828; CHECK-MAXBW: for.ph: 2829; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 2830; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 2831; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 2832; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 2833; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2834; CHECK-MAXBW: vector.ph: 2835; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 2836; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 2837; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 2838; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2839; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 2840; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 2841; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0 2842; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 2843; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 2844; CHECK-MAXBW: vector.body: 2845; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2846; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 2847; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 2848; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP6]] 2849; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0 2850; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2 2851; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64> 2852; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP9]], [[BROADCAST_SPLAT]] 2853; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] 2854; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 2855; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2856; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 2857; CHECK-MAXBW: middle.block: 2858; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]]) 2859; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 2860; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2861; CHECK-MAXBW: scalar.ph: 2862; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2863; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2864; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 2865; CHECK-MAXBW: for.body: 2866; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2867; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2868; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 2869; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 2870; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 2871; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_A]], [[EXT_B]] 2872; CHECK-MAXBW-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 2873; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2874; CHECK-MAXBW-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2875; CHECK-MAXBW-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 2876; CHECK-MAXBW: exit.loopexit: 2877; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 2878; CHECK-MAXBW-NEXT: br label [[EXIT]] 2879; CHECK-MAXBW: exit: 2880; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 2881; CHECK-MAXBW-NEXT: ret i64 [[RESULT]] 2882; 2883entry: 2884 %cmp = icmp eq i64 %n, 0 2885 br i1 %cmp, label %exit, label %for.ph 2886 2887for.ph: ; preds = %entry 2888 %ext.b = zext i16 %b to i64 2889 br label %for.body 2890 2891for.body: ; preds = %for.body.lr.ph, %for.body 2892 %iv = phi i64 [ 0, %for.ph ], [ %iv.next, %for.body ] 2893 %accum = phi i64 [ 0, %for.ph ], [ %add, %for.body ] 2894 %gep.a = getelementptr inbounds nuw i16, ptr %a, i64 %iv 2895 %load.a = load i16, ptr %gep.a, align 2 2896 %ext.a = zext i16 %load.a to i64 2897 %mul = mul nuw nsw i64 %ext.a, %ext.b 2898 %add = add i64 %mul, %accum 2899 %iv.next = add nuw nsw i64 %iv, 1 2900 %cmp.1 = icmp eq i64 %iv.next, %n 2901 br i1 %cmp.1, label %exit, label %for.body 2902 2903exit: ; preds = %for.cond.cleanup.loopexit, %entry 2904 %result = phi i64 [ 0, %entry ], [ %add, %for.body ] 2905 ret i64 %result 2906} 2907 2908define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { 2909; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_ext_outside_plan2( 2910; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 2911; CHECK-INTERLEAVE1-NEXT: entry: 2912; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 2913; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 2914; CHECK-INTERLEAVE1: for.ph: 2915; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 2916; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 2917; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2918; CHECK-INTERLEAVE1: vector.ph: 2919; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 2920; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2921; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0 2922; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 2923; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] 2924; CHECK-INTERLEAVE1: vector.body: 2925; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2926; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 2927; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2928; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]] 2929; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0 2930; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 2931; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> 2932; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw nsw <8 x i64> [[BROADCAST_SPLAT]], [[TMP3]] 2933; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]] 2934; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 2935; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2936; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 2937; CHECK-INTERLEAVE1: middle.block: 2938; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) 2939; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 2940; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2941; CHECK-INTERLEAVE1: scalar.ph: 2942; CHECK-INTERLEAVE1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2943; CHECK-INTERLEAVE1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 2944; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]] 2945; CHECK-INTERLEAVE1: for.body: 2946; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 2947; CHECK-INTERLEAVE1-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 2948; CHECK-INTERLEAVE1-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 2949; CHECK-INTERLEAVE1-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 2950; CHECK-INTERLEAVE1-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 2951; CHECK-INTERLEAVE1-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_B]], [[EXT_A]] 2952; CHECK-INTERLEAVE1-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 2953; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 2954; CHECK-INTERLEAVE1-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 2955; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2956; CHECK-INTERLEAVE1: exit.loopexit: 2957; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 2958; CHECK-INTERLEAVE1-NEXT: br label [[EXIT]] 2959; CHECK-INTERLEAVE1: exit: 2960; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 2961; CHECK-INTERLEAVE1-NEXT: ret i64 [[RESULT]] 2962; 2963; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_ext_outside_plan2( 2964; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 2965; CHECK-INTERLEAVED-NEXT: entry: 2966; CHECK-INTERLEAVED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 2967; CHECK-INTERLEAVED-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 2968; CHECK-INTERLEAVED: for.ph: 2969; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 2970; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 2971; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2972; CHECK-INTERLEAVED: vector.ph: 2973; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 2974; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 2975; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0 2976; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 2977; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] 2978; CHECK-INTERLEAVED: vector.body: 2979; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2980; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 2981; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 2982; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2983; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]] 2984; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0 2985; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 2986; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 2987; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 2988; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> 2989; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[WIDE_LOAD2]] to <8 x i64> 2990; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw nsw <8 x i64> [[BROADCAST_SPLAT]], [[TMP4]] 2991; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw <8 x i64> [[BROADCAST_SPLAT]], [[TMP5]] 2992; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <8 x i64> [[TMP6]], [[VEC_PHI]] 2993; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]] 2994; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2995; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2996; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 2997; CHECK-INTERLEAVED: middle.block: 2998; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]] 2999; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]]) 3000; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 3001; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3002; CHECK-INTERLEAVED: scalar.ph: 3003; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 3004; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 3005; CHECK-INTERLEAVED-NEXT: br label [[FOR_BODY:%.*]] 3006; CHECK-INTERLEAVED: for.body: 3007; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 3008; CHECK-INTERLEAVED-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 3009; CHECK-INTERLEAVED-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 3010; CHECK-INTERLEAVED-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 3011; CHECK-INTERLEAVED-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 3012; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_B]], [[EXT_A]] 3013; CHECK-INTERLEAVED-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 3014; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 3015; CHECK-INTERLEAVED-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 3016; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 3017; CHECK-INTERLEAVED: exit.loopexit: 3018; CHECK-INTERLEAVED-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 3019; CHECK-INTERLEAVED-NEXT: br label [[EXIT]] 3020; CHECK-INTERLEAVED: exit: 3021; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 3022; CHECK-INTERLEAVED-NEXT: ret i64 [[RESULT]] 3023; 3024; CHECK-MAXBW-LABEL: define i64 @not_dotp_ext_outside_plan2( 3025; CHECK-MAXBW-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { 3026; CHECK-MAXBW-NEXT: entry: 3027; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0 3028; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]] 3029; CHECK-MAXBW: for.ph: 3030; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64 3031; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 3032; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 3033; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] 3034; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3035; CHECK-MAXBW: vector.ph: 3036; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 3037; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 3038; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 3039; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 3040; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 3041; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 3042; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0 3043; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 3044; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] 3045; CHECK-MAXBW: vector.body: 3046; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3047; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 3048; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 3049; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP6]] 3050; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0 3051; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2 3052; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64> 3053; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]] 3054; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] 3055; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 3056; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 3057; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 3058; CHECK-MAXBW: middle.block: 3059; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]]) 3060; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 3061; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3062; CHECK-MAXBW: scalar.ph: 3063; CHECK-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 3064; CHECK-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PH]] ] 3065; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]] 3066; CHECK-MAXBW: for.body: 3067; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 3068; CHECK-MAXBW-NEXT: [[ACCUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 3069; CHECK-MAXBW-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[IV]] 3070; CHECK-MAXBW-NEXT: [[LOAD_A:%.*]] = load i16, ptr [[GEP_A]], align 2 3071; CHECK-MAXBW-NEXT: [[EXT_A:%.*]] = zext i16 [[LOAD_A]] to i64 3072; CHECK-MAXBW-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[EXT_B]], [[EXT_A]] 3073; CHECK-MAXBW-NEXT: [[ADD]] = add i64 [[MUL]], [[ACCUM]] 3074; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 3075; CHECK-MAXBW-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 3076; CHECK-MAXBW-NEXT: br i1 [[CMP_1]], label [[EXIT_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 3077; CHECK-MAXBW: exit.loopexit: 3078; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 3079; CHECK-MAXBW-NEXT: br label [[EXIT]] 3080; CHECK-MAXBW: exit: 3081; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[EXIT_LOOPEXIT]] ] 3082; CHECK-MAXBW-NEXT: ret i64 [[RESULT]] 3083; 3084entry: 3085 %cmp = icmp eq i64 %n, 0 3086 br i1 %cmp, label %exit, label %for.ph 3087 3088for.ph: ; preds = %entry 3089 %ext.b = zext i16 %b to i64 3090 br label %for.body 3091 3092for.body: ; preds = %for.body.lr.ph, %for.body 3093 %iv = phi i64 [ 0, %for.ph ], [ %iv.next, %for.body ] 3094 %accum = phi i64 [ 0, %for.ph ], [ %add, %for.body ] 3095 %gep.a = getelementptr inbounds nuw i16, ptr %a, i64 %iv 3096 %load.a = load i16, ptr %gep.a, align 2 3097 %ext.a = zext i16 %load.a to i64 3098 %mul = mul nuw nsw i64 %ext.b, %ext.a 3099 %add = add i64 %mul, %accum 3100 %iv.next = add nuw nsw i64 %iv, 1 3101 %cmp.1 = icmp eq i64 %iv.next, %n 3102 br i1 %cmp.1, label %exit, label %for.body 3103 3104exit: ; preds = %for.cond.cleanup.loopexit, %entry 3105 %result = phi i64 [ 0, %entry ], [ %add, %for.body ] 3106 ret i64 %result 3107} 3108 3109!7 = distinct !{!7, !8, !9, !10} 3110!8 = !{!"llvm.loop.mustprogress"} 3111!9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} 3112!10 = !{!"llvm.loop.vectorize.enable", i1 true} 3113attributes #0 = { vscale_range(1,16) "target-features"="+sve" } 3114;. 3115; CHECK-INTERLEAVE1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 3116; CHECK-INTERLEAVE1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 3117; CHECK-INTERLEAVE1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 3118; CHECK-INTERLEAVE1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 3119; CHECK-INTERLEAVE1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 3120; CHECK-INTERLEAVE1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 3121; CHECK-INTERLEAVE1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 3122; CHECK-INTERLEAVE1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 3123; CHECK-INTERLEAVE1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 3124; CHECK-INTERLEAVE1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 3125; CHECK-INTERLEAVE1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} 3126; CHECK-INTERLEAVE1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} 3127; CHECK-INTERLEAVE1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} 3128; CHECK-INTERLEAVE1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} 3129; CHECK-INTERLEAVE1: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]} 3130; CHECK-INTERLEAVE1: [[META15]] = !{!"llvm.loop.mustprogress"} 3131; CHECK-INTERLEAVE1: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]} 3132; CHECK-INTERLEAVE1: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} 3133; CHECK-INTERLEAVE1: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]} 3134; CHECK-INTERLEAVE1: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} 3135; CHECK-INTERLEAVE1: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} 3136; CHECK-INTERLEAVE1: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} 3137; CHECK-INTERLEAVE1: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]} 3138; CHECK-INTERLEAVE1: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} 3139; CHECK-INTERLEAVE1: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} 3140;. 3141; CHECK-INTERLEAVED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 3142; CHECK-INTERLEAVED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 3143; CHECK-INTERLEAVED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 3144; CHECK-INTERLEAVED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 3145; CHECK-INTERLEAVED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 3146; CHECK-INTERLEAVED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 3147; CHECK-INTERLEAVED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 3148; CHECK-INTERLEAVED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 3149; CHECK-INTERLEAVED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 3150; CHECK-INTERLEAVED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 3151; CHECK-INTERLEAVED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} 3152; CHECK-INTERLEAVED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} 3153; CHECK-INTERLEAVED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} 3154; CHECK-INTERLEAVED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} 3155; CHECK-INTERLEAVED: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]} 3156; CHECK-INTERLEAVED: [[META15]] = !{!"llvm.loop.mustprogress"} 3157; CHECK-INTERLEAVED: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]} 3158; CHECK-INTERLEAVED: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} 3159; CHECK-INTERLEAVED: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]} 3160; CHECK-INTERLEAVED: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} 3161; CHECK-INTERLEAVED: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} 3162; CHECK-INTERLEAVED: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} 3163; CHECK-INTERLEAVED: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]} 3164; CHECK-INTERLEAVED: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} 3165; CHECK-INTERLEAVED: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} 3166; CHECK-INTERLEAVED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]} 3167; CHECK-INTERLEAVED: [[LOOP26]] = distinct !{[[LOOP26]], [[META2]], [[META1]]} 3168;. 3169; CHECK-MAXBW: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 3170; CHECK-MAXBW: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 3171; CHECK-MAXBW: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 3172; CHECK-MAXBW: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 3173; CHECK-MAXBW: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 3174; CHECK-MAXBW: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 3175; CHECK-MAXBW: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 3176; CHECK-MAXBW: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 3177; CHECK-MAXBW: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 3178; CHECK-MAXBW: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 3179; CHECK-MAXBW: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} 3180; CHECK-MAXBW: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} 3181; CHECK-MAXBW: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} 3182; CHECK-MAXBW: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} 3183; CHECK-MAXBW: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]} 3184; CHECK-MAXBW: [[META15]] = !{!"llvm.loop.mustprogress"} 3185; CHECK-MAXBW: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]} 3186; CHECK-MAXBW: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} 3187; CHECK-MAXBW: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]} 3188; CHECK-MAXBW: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} 3189; CHECK-MAXBW: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} 3190; CHECK-MAXBW: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} 3191; CHECK-MAXBW: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]} 3192; CHECK-MAXBW: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} 3193; CHECK-MAXBW: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} 3194;. 3195