1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes="default<O3>" -enable-matrix -S %s | FileCheck %s 3 4target triple = "arm64-apple-ios" 5 6define void @matrix_extract_insert_scalar(i32 %i, i32 %k, i32 %j, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) #0 { 7; CHECK-LABEL: @matrix_extract_insert_scalar( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[K:%.*]] to i64 10; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[J:%.*]] to i64 11; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[CONV1]], 15 12; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV]] 13; CHECK-NEXT: [[TMP2:%.*]] = icmp samesign ult i64 [[TMP1]], 225 14; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) 15; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]] 16; CHECK-NEXT: [[MATRIXEXT:%.*]] = load double, ptr [[TMP3]], align 8 17; CHECK-NEXT: [[CONV2:%.*]] = zext i32 [[I:%.*]] to i64 18; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV2]] 19; CHECK-NEXT: [[TMP5:%.*]] = icmp samesign ult i64 [[TMP4]], 225 20; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]]) 21; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP4]] 22; CHECK-NEXT: [[MATRIXEXT4:%.*]] = load double, ptr [[TMP6]], align 8 23; CHECK-NEXT: [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]] 24; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]] 25; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, ptr [[TMP7]], align 8 26; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]] 27; CHECK-NEXT: store double [[SUB]], ptr [[TMP7]], align 8 28; CHECK-NEXT: ret void 29; 30entry: 31 %i.addr = alloca i32, align 4 32 %k.addr = alloca i32, align 4 33 %j.addr = alloca i32, align 4 34 %A.addr = alloca ptr, align 8 35 %B.addr = alloca ptr, align 8 36 store i32 %i, ptr %i.addr, align 4 37 store i32 %k, ptr %k.addr, align 4 38 store i32 %j, ptr %j.addr, align 4 39 store ptr %A, ptr %A.addr, align 8 40 store ptr %B, ptr %B.addr, align 8 41 %0 = load i32, ptr %k.addr, align 4 42 %conv = zext i32 %0 to i64 43 %1 = load i32, ptr %j.addr, align 4 44 %conv1 = zext i32 %1 to i64 45 %2 = mul i64 %conv1, 15 46 %3 = add i64 %2, %conv 47 %4 = icmp ult i64 %3, 225 48 call void @llvm.assume(i1 %4) 49 %5 = load ptr, ptr %A.addr, align 8 50 %6 = load <225 x double>, ptr %5, align 8 51 %matrixext = extractelement <225 x double> %6, i64 %3 52 %7 = load i32, ptr %i.addr, align 4 53 %conv2 = zext i32 %7 to i64 54 %8 = load i32, ptr %j.addr, align 4 55 %conv3 = zext i32 %8 to i64 56 %9 = mul i64 %conv3, 15 57 %10 = add i64 %9, %conv2 58 %11 = icmp ult i64 %10, 225 59 call void @llvm.assume(i1 %11) 60 %12 = load ptr, ptr %B.addr, align 8 61 %13 = load <225 x double>, ptr %12, align 8 62 %matrixext4 = extractelement <225 x double> %13, i64 %10 63 %mul = fmul double %matrixext, %matrixext4 64 %14 = load ptr, ptr %B.addr, align 8 65 %15 = load i32, ptr %k.addr, align 4 66 %conv5 = zext i32 %15 to i64 67 %16 = load i32, ptr %j.addr, align 4 68 %conv6 = zext i32 %16 to i64 69 %17 = mul i64 %conv6, 15 70 %18 = add i64 %17, %conv5 71 %19 = icmp ult i64 %18, 225 72 call void @llvm.assume(i1 %19) 73 %20 = load <225 x double>, ptr %14, align 8 74 %matrixext7 = extractelement <225 x double> %20, i64 %18 75 %sub = fsub double %matrixext7, %mul 76 %21 = icmp ult i64 %18, 225 77 call void @llvm.assume(i1 %21) 78 %22 = load <225 x double>, ptr %14, align 8 79 %matins = insertelement <225 x double> %22, double %sub, i64 %18 80 store <225 x double> %matins, ptr %14, align 8 81 ret void 82} 83define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) { 84; CHECK-LABEL: @matrix_extract_insert_loop( 85; CHECK-NEXT: entry: 86; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 87; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 88; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_MEMCHECK:%.*]] 89; CHECK: for.cond1.preheader.us.preheader: 90; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV6]], 3 91; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 360 92; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP1]] 93; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] 94; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225 95; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) 96; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[CONV6]] 97; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i32 [[I]], 4 98; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] 99; CHECK: vector.memcheck: 100; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] 101; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 102; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 103; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY4_US_PREHEADER]], label [[VECTOR_PH:%.*]] 104; CHECK: vector.ph: 105; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[CONV6]], 252 106; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 110; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 2 111; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 3 112; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 113; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1 114; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 115; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1 116; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225) 117; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225) 118; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 119; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) 120; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1 121; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) 122; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i64 0 123; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) 124; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP12]], i64 1 125; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) 126; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[INDEX]] 127; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 16 128; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP17]], align 8, !alias.scope [[META0:![0-9]+]] 129; CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x double>, ptr [[TMP18]], align 8, !alias.scope [[META0]] 130; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP3]], align 8, !alias.scope [[META3:![0-9]+]] 131; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP19]], i64 0 132; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22]], <2 x double> poison, <2 x i32> zeroinitializer 133; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT23]] 134; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[WIDE_LOAD21]], [[BROADCAST_SPLAT23]] 135; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[INDEX]] 136; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i64 16 137; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x double>, ptr [[TMP22]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META0]] 138; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x double>, ptr [[TMP23]], align 8, !alias.scope [[META5]], !noalias [[META0]] 139; CHECK-NEXT: [[TMP24:%.*]] = fsub <2 x double> [[WIDE_LOAD24]], [[TMP20]] 140; CHECK-NEXT: [[TMP25:%.*]] = fsub <2 x double> [[WIDE_LOAD25]], [[TMP21]] 141; CHECK-NEXT: store <2 x double> [[TMP24]], ptr [[TMP22]], align 8, !alias.scope [[META5]], !noalias [[META0]] 142; CHECK-NEXT: store <2 x double> [[TMP25]], ptr [[TMP23]], align 8, !alias.scope [[META5]], !noalias [[META0]] 143; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 144; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 145; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 146; CHECK: middle.block: 147; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[CONV6]] 148; CHECK-NEXT: br i1 [[CMP_N]], label [[VECTOR_MEMCHECK_1:%.*]], label [[FOR_BODY4_US_PREHEADER]] 149; CHECK: for.body4.us.preheader: 150; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] 151; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] 152; CHECK: for.body4.us: 153; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY4_US_PREHEADER]] ] 154; CHECK-NEXT: [[TMP27:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 225 155; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP27]]) 156; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[INDVARS_IV]] 157; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP28]], align 8 158; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP3]], align 8 159; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] 160; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV]] 161; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP29]], align 8 162; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] 163; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP29]], align 8 164; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 165; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]] 166; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[VECTOR_MEMCHECK_1]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10:![0-9]+]] 167; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: 168; CHECK-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[CONV6]], 15 169; CHECK-NEXT: [[TMP31:%.*]] = icmp samesign ult i32 [[I]], 210 170; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP31]]) 171; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP30]] 172; CHECK-NEXT: [[MIN_ITERS_CHECK_1:%.*]] = icmp samesign ult i32 [[I]], 4 173; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] 174; CHECK: vector.memcheck.1: 175; CHECK-NEXT: [[BOUND0_1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] 176; CHECK-NEXT: [[BOUND1_1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 177; CHECK-NEXT: [[FOUND_CONFLICT_1:%.*]] = and i1 [[BOUND0_1]], [[BOUND1_1]] 178; CHECK-NEXT: br i1 [[FOUND_CONFLICT_1]], label [[FOR_BODY4_US_PREHEADER_1]], label [[VECTOR_PH_1:%.*]] 179; CHECK: vector.ph.1: 180; CHECK-NEXT: [[N_VEC_1:%.*]] = and i64 [[CONV6]], 252 181; CHECK-NEXT: br label [[VECTOR_BODY_1:%.*]] 182; CHECK: vector.body.1: 183; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] 184; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 185; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16 186; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 187; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 188; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17 189; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 190; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 191; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 192; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225) 193; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225) 194; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0 195; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]]) 196; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1 197; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP44]]) 198; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP42]], i64 0 199; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]]) 200; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1 201; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP46]]) 202; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP33]] 203; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP47]], i64 16 204; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP47]], align 8, !alias.scope [[META0]] 205; CHECK-NEXT: [[WIDE_LOAD21_1:%.*]] = load <2 x double>, ptr [[TMP48]], align 8, !alias.scope [[META0]] 206; CHECK-NEXT: [[TMP49:%.*]] = load double, ptr [[TMP32]], align 8, !alias.scope [[META3]] 207; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_1:%.*]] = insertelement <2 x double> poison, double [[TMP49]], i64 0 208; CHECK-NEXT: [[BROADCAST_SPLAT23_1:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_1]], <2 x double> poison, <2 x i32> zeroinitializer 209; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT23_1]] 210; CHECK-NEXT: [[TMP51:%.*]] = fmul <2 x double> [[WIDE_LOAD21_1]], [[BROADCAST_SPLAT23_1]] 211; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP33]] 212; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 16 213; CHECK-NEXT: [[WIDE_LOAD24_1:%.*]] = load <2 x double>, ptr [[TMP52]], align 8, !alias.scope [[META5]], !noalias [[META0]] 214; CHECK-NEXT: [[WIDE_LOAD25_1:%.*]] = load <2 x double>, ptr [[TMP53]], align 8, !alias.scope [[META5]], !noalias [[META0]] 215; CHECK-NEXT: [[TMP54:%.*]] = fsub <2 x double> [[WIDE_LOAD24_1]], [[TMP50]] 216; CHECK-NEXT: [[TMP55:%.*]] = fsub <2 x double> [[WIDE_LOAD25_1]], [[TMP51]] 217; CHECK-NEXT: store <2 x double> [[TMP54]], ptr [[TMP52]], align 8, !alias.scope [[META5]], !noalias [[META0]] 218; CHECK-NEXT: store <2 x double> [[TMP55]], ptr [[TMP53]], align 8, !alias.scope [[META5]], !noalias [[META0]] 219; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX_1]], 4 220; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[INDEX_NEXT_1]], [[N_VEC_1]] 221; CHECK-NEXT: br i1 [[TMP56]], label [[MIDDLE_BLOCK_1:%.*]], label [[VECTOR_BODY_1]], !llvm.loop [[LOOP7]] 222; CHECK: middle.block.1: 223; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i64 [[N_VEC_1]], [[CONV6]] 224; CHECK-NEXT: br i1 [[CMP_N_1]], label [[VECTOR_MEMCHECK_2:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] 225; CHECK: for.body4.us.preheader.1: 226; CHECK-NEXT: [[INDVARS_IV_PH_1:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_1]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ] 227; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] 228; CHECK: for.body4.us.1: 229; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY4_US_1]] ], [ [[INDVARS_IV_PH_1]], [[FOR_BODY4_US_PREHEADER_1]] ] 230; CHECK-NEXT: [[TMP57:%.*]] = add nuw nsw i64 [[INDVARS_IV_1]], 15 231; CHECK-NEXT: [[TMP58:%.*]] = icmp samesign ult i64 [[INDVARS_IV_1]], 210 232; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP58]]) 233; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP57]] 234; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, ptr [[TMP59]], align 8 235; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, ptr [[TMP32]], align 8 236; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]] 237; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP57]] 238; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, ptr [[TMP60]], align 8 239; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]] 240; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP60]], align 8 241; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_1]], 1 242; CHECK-NEXT: [[EXITCOND_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[CONV6]] 243; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[VECTOR_MEMCHECK_2]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10]] 244; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: 245; CHECK-NEXT: [[TMP61:%.*]] = add nuw nsw i64 [[CONV6]], 30 246; CHECK-NEXT: [[TMP62:%.*]] = icmp samesign ult i32 [[I]], 195 247; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP62]]) 248; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP61]] 249; CHECK-NEXT: [[MIN_ITERS_CHECK_2:%.*]] = icmp samesign ult i32 [[I]], 4 250; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]] 251; CHECK: vector.memcheck.2: 252; CHECK-NEXT: [[BOUND0_2:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] 253; CHECK-NEXT: [[BOUND1_2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 254; CHECK-NEXT: [[FOUND_CONFLICT_2:%.*]] = and i1 [[BOUND0_2]], [[BOUND1_2]] 255; CHECK-NEXT: br i1 [[FOUND_CONFLICT_2]], label [[FOR_BODY4_US_PREHEADER_2]], label [[VECTOR_PH_2:%.*]] 256; CHECK: vector.ph.2: 257; CHECK-NEXT: [[N_VEC_2:%.*]] = and i64 [[CONV6]], 252 258; CHECK-NEXT: br label [[VECTOR_BODY_2:%.*]] 259; CHECK: vector.body.2: 260; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] 261; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 262; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31 263; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 264; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 265; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32 266; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 267; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 268; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 269; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225) 270; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225) 271; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0 272; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]]) 273; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1 274; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP75]]) 275; CHECK-NEXT: [[TMP76:%.*]] = extractelement <2 x i1> [[TMP73]], i64 0 276; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP76]]) 277; CHECK-NEXT: [[TMP77:%.*]] = extractelement <2 x i1> [[TMP73]], i64 1 278; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP77]]) 279; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP64]] 280; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP78]], i64 16 281; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP78]], align 8, !alias.scope [[META0]] 282; CHECK-NEXT: [[WIDE_LOAD21_2:%.*]] = load <2 x double>, ptr [[TMP79]], align 8, !alias.scope [[META0]] 283; CHECK-NEXT: [[TMP80:%.*]] = load double, ptr [[TMP63]], align 8, !alias.scope [[META3]] 284; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_2:%.*]] = insertelement <2 x double> poison, double [[TMP80]], i64 0 285; CHECK-NEXT: [[BROADCAST_SPLAT23_2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_2]], <2 x double> poison, <2 x i32> zeroinitializer 286; CHECK-NEXT: [[TMP81:%.*]] = fmul <2 x double> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT23_2]] 287; CHECK-NEXT: [[TMP82:%.*]] = fmul <2 x double> [[WIDE_LOAD21_2]], [[BROADCAST_SPLAT23_2]] 288; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP64]] 289; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP83]], i64 16 290; CHECK-NEXT: [[WIDE_LOAD24_2:%.*]] = load <2 x double>, ptr [[TMP83]], align 8, !alias.scope [[META5]], !noalias [[META0]] 291; CHECK-NEXT: [[WIDE_LOAD25_2:%.*]] = load <2 x double>, ptr [[TMP84]], align 8, !alias.scope [[META5]], !noalias [[META0]] 292; CHECK-NEXT: [[TMP85:%.*]] = fsub <2 x double> [[WIDE_LOAD24_2]], [[TMP81]] 293; CHECK-NEXT: [[TMP86:%.*]] = fsub <2 x double> [[WIDE_LOAD25_2]], [[TMP82]] 294; CHECK-NEXT: store <2 x double> [[TMP85]], ptr [[TMP83]], align 8, !alias.scope [[META5]], !noalias [[META0]] 295; CHECK-NEXT: store <2 x double> [[TMP86]], ptr [[TMP84]], align 8, !alias.scope [[META5]], !noalias [[META0]] 296; CHECK-NEXT: [[INDEX_NEXT_2]] = add nuw i64 [[INDEX_2]], 4 297; CHECK-NEXT: [[TMP87:%.*]] = icmp eq i64 [[INDEX_NEXT_2]], [[N_VEC_2]] 298; CHECK-NEXT: br i1 [[TMP87]], label [[MIDDLE_BLOCK_2:%.*]], label [[VECTOR_BODY_2]], !llvm.loop [[LOOP7]] 299; CHECK: middle.block.2: 300; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i64 [[N_VEC_2]], [[CONV6]] 301; CHECK-NEXT: br i1 [[CMP_N_2]], label [[VECTOR_MEMCHECK_3:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] 302; CHECK: for.body4.us.preheader.2: 303; CHECK-NEXT: [[INDVARS_IV_PH_2:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_2]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ] 304; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] 305; CHECK: for.body4.us.2: 306; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2:%.*]], [[FOR_BODY4_US_2]] ], [ [[INDVARS_IV_PH_2]], [[FOR_BODY4_US_PREHEADER_2]] ] 307; CHECK-NEXT: [[TMP88:%.*]] = add nuw nsw i64 [[INDVARS_IV_2]], 30 308; CHECK-NEXT: [[TMP89:%.*]] = icmp samesign ult i64 [[INDVARS_IV_2]], 195 309; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP89]]) 310; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP88]] 311; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, ptr [[TMP90]], align 8 312; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, ptr [[TMP63]], align 8 313; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]] 314; CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP88]] 315; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, ptr [[TMP91]], align 8 316; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]] 317; CHECK-NEXT: store double [[SUB_US_2]], ptr [[TMP91]], align 8 318; CHECK-NEXT: [[INDVARS_IV_NEXT_2]] = add nuw nsw i64 [[INDVARS_IV_2]], 1 319; CHECK-NEXT: [[EXITCOND_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2]], [[CONV6]] 320; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[VECTOR_MEMCHECK_3]], label [[FOR_BODY4_US_2]], !llvm.loop [[LOOP10]] 321; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: 322; CHECK-NEXT: [[TMP92:%.*]] = add nuw nsw i64 [[CONV6]], 45 323; CHECK-NEXT: [[TMP93:%.*]] = icmp samesign ult i32 [[I]], 180 324; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP93]]) 325; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP92]] 326; CHECK-NEXT: [[MIN_ITERS_CHECK_3:%.*]] = icmp samesign ult i32 [[I]], 4 327; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]] 328; CHECK: vector.memcheck.3: 329; CHECK-NEXT: [[BOUND0_3:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] 330; CHECK-NEXT: [[BOUND1_3:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 331; CHECK-NEXT: [[FOUND_CONFLICT_3:%.*]] = and i1 [[BOUND0_3]], [[BOUND1_3]] 332; CHECK-NEXT: br i1 [[FOUND_CONFLICT_3]], label [[FOR_BODY4_US_PREHEADER_3]], label [[VECTOR_PH_3:%.*]] 333; CHECK: vector.ph.3: 334; CHECK-NEXT: [[N_VEC_3:%.*]] = and i64 [[CONV6]], 252 335; CHECK-NEXT: br label [[VECTOR_BODY_3:%.*]] 336; CHECK: vector.body.3: 337; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] 338; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 339; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46 340; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 341; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 342; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47 343; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 344; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 345; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 346; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225) 347; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225) 348; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0 349; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]]) 350; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1 351; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP106]]) 352; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i1> [[TMP104]], i64 0 353; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP107]]) 354; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i1> [[TMP104]], i64 1 355; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP108]]) 356; CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP95]] 357; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP109]], i64 16 358; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP109]], align 8, !alias.scope [[META0]] 359; CHECK-NEXT: [[WIDE_LOAD21_3:%.*]] = load <2 x double>, ptr [[TMP110]], align 8, !alias.scope [[META0]] 360; CHECK-NEXT: [[TMP111:%.*]] = load double, ptr [[TMP94]], align 8, !alias.scope [[META3]] 361; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_3:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i64 0 362; CHECK-NEXT: [[BROADCAST_SPLAT23_3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_3]], <2 x double> poison, <2 x i32> zeroinitializer 363; CHECK-NEXT: [[TMP112:%.*]] = fmul <2 x double> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT23_3]] 364; CHECK-NEXT: [[TMP113:%.*]] = fmul <2 x double> [[WIDE_LOAD21_3]], [[BROADCAST_SPLAT23_3]] 365; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP95]] 366; CHECK-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP114]], i64 16 367; CHECK-NEXT: [[WIDE_LOAD24_3:%.*]] = load <2 x double>, ptr [[TMP114]], align 8, !alias.scope [[META5]], !noalias [[META0]] 368; CHECK-NEXT: [[WIDE_LOAD25_3:%.*]] = load <2 x double>, ptr [[TMP115]], align 8, !alias.scope [[META5]], !noalias [[META0]] 369; CHECK-NEXT: [[TMP116:%.*]] = fsub <2 x double> [[WIDE_LOAD24_3]], [[TMP112]] 370; CHECK-NEXT: [[TMP117:%.*]] = fsub <2 x double> [[WIDE_LOAD25_3]], [[TMP113]] 371; CHECK-NEXT: store <2 x double> [[TMP116]], ptr [[TMP114]], align 8, !alias.scope [[META5]], !noalias [[META0]] 372; CHECK-NEXT: store <2 x double> [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META5]], !noalias [[META0]] 373; CHECK-NEXT: [[INDEX_NEXT_3]] = add nuw i64 [[INDEX_3]], 4 374; CHECK-NEXT: [[TMP118:%.*]] = icmp eq i64 [[INDEX_NEXT_3]], [[N_VEC_3]] 375; CHECK-NEXT: br i1 [[TMP118]], label [[MIDDLE_BLOCK_3:%.*]], label [[VECTOR_BODY_3]], !llvm.loop [[LOOP7]] 376; CHECK: middle.block.3: 377; CHECK-NEXT: [[CMP_N_3:%.*]] = icmp eq i64 [[N_VEC_3]], [[CONV6]] 378; CHECK-NEXT: br i1 [[CMP_N_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_PREHEADER_3]] 379; CHECK: for.body4.us.preheader.3: 380; CHECK-NEXT: [[INDVARS_IV_PH_3:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_3]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[N_VEC_3]], [[MIDDLE_BLOCK_3]] ] 381; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] 382; CHECK: for.body4.us.3: 383; CHECK-NEXT: [[INDVARS_IV_3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY4_US_3]] ], [ [[INDVARS_IV_PH_3]], [[FOR_BODY4_US_PREHEADER_3]] ] 384; CHECK-NEXT: [[TMP119:%.*]] = add nuw nsw i64 [[INDVARS_IV_3]], 45 385; CHECK-NEXT: [[TMP120:%.*]] = icmp samesign ult i64 [[INDVARS_IV_3]], 180 386; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP120]]) 387; CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP119]] 388; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, ptr [[TMP121]], align 8 389; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, ptr [[TMP94]], align 8 390; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]] 391; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP119]] 392; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, ptr [[TMP122]], align 8 393; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]] 394; CHECK-NEXT: store double [[SUB_US_3]], ptr [[TMP122]], align 8 395; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_3]], 1 396; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[CONV6]] 397; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_3]], !llvm.loop [[LOOP10]] 398; CHECK: for.cond.cleanup: 399; CHECK-NEXT: ret void 400; 401entry: 402 %i.addr = alloca i32, align 4 403 %A.addr = alloca ptr, align 8 404 %B.addr = alloca ptr, align 8 405 %j = alloca i32, align 4 406 %cleanup.dest.slot = alloca i32, align 4 407 %k = alloca i32, align 4 408 store i32 %i, ptr %i.addr, align 4 409 store ptr %A, ptr %A.addr, align 8 410 store ptr %B, ptr %B.addr, align 8 411 call void @llvm.lifetime.start.p0(i64 4, ptr %j) #3 412 store i32 0, ptr %j, align 4 413 br label %for.cond 414 415for.cond: ; preds = %for.inc12, %entry 416 %0 = load i32, ptr %j, align 4 417 %cmp = icmp ult i32 %0, 4 418 br i1 %cmp, label %for.body, label %for.cond.cleanup 419 420for.cond.cleanup: ; preds = %for.cond 421 store i32 2, ptr %cleanup.dest.slot, align 4 422 call void @llvm.lifetime.end.p0(i64 4, ptr %j) #3 423 br label %for.end14 424 425for.body: ; preds = %for.cond 426 call void @llvm.lifetime.start.p0(i64 4, ptr %k) #3 427 store i32 0, ptr %k, align 4 428 br label %for.cond1 429 430for.cond1: ; preds = %for.inc, %for.body 431 %1 = load i32, ptr %k, align 4 432 %2 = load i32, ptr %i.addr, align 4 433 %cmp2 = icmp ult i32 %1, %2 434 br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 435 436for.cond.cleanup3: ; preds = %for.cond1 437 store i32 5, ptr %cleanup.dest.slot, align 4 438 call void @llvm.lifetime.end.p0(i64 4, ptr %k) #3 439 br label %for.end 440 441for.body4: ; preds = %for.cond1 442 %3 = load i32, ptr %k, align 4 443 %conv = zext i32 %3 to i64 444 %4 = load i32, ptr %j, align 4 445 %conv5 = zext i32 %4 to i64 446 %5 = mul i64 %conv5, 15 447 %6 = add i64 %5, %conv 448 %7 = icmp ult i64 %6, 225 449 call void @llvm.assume(i1 %7) 450 %8 = load ptr, ptr %A.addr, align 8 451 %9 = load <225 x double>, ptr %8, align 8 452 %matrixext = extractelement <225 x double> %9, i64 %6 453 %10 = load i32, ptr %i.addr, align 4 454 %conv6 = zext i32 %10 to i64 455 %11 = load i32, ptr %j, align 4 456 %conv7 = zext i32 %11 to i64 457 %12 = mul i64 %conv7, 15 458 %13 = add i64 %12, %conv6 459 %14 = icmp ult i64 %13, 225 460 call void @llvm.assume(i1 %14) 461 %15 = load ptr, ptr %B.addr, align 8 462 %16 = load <225 x double>, ptr %15, align 8 463 %matrixext8 = extractelement <225 x double> %16, i64 %13 464 %mul = fmul double %matrixext, %matrixext8 465 %17 = load ptr, ptr %B.addr, align 8 466 %18 = load i32, ptr %k, align 4 467 %conv9 = zext i32 %18 to i64 468 %19 = load i32, ptr %j, align 4 469 %conv10 = zext i32 %19 to i64 470 %20 = mul i64 %conv10, 15 471 %21 = add i64 %20, %conv9 472 %22 = icmp ult i64 %21, 225 473 call void @llvm.assume(i1 %22) 474 %23 = load <225 x double>, ptr %17, align 8 475 %matrixext11 = extractelement <225 x double> %23, i64 %21 476 %sub = fsub double %matrixext11, %mul 477 %24 = icmp ult i64 %21, 225 478 call void @llvm.assume(i1 %24) 479 %25 = load <225 x double>, ptr %17, align 8 480 %matins = insertelement <225 x double> %25, double %sub, i64 %21 481 store <225 x double> %matins, ptr %17, align 8 482 br label %for.inc 483 484for.inc: ; preds = %for.body4 485 %26 = load i32, ptr %k, align 4 486 %inc = add i32 %26, 1 487 store i32 %inc, ptr %k, align 4 488 br label %for.cond1 489 490for.end: ; preds = %for.cond.cleanup3 491 br label %for.inc12 492 493for.inc12: ; preds = %for.end 494 %27 = load i32, ptr %j, align 4 495 %inc13 = add i32 %27, 1 496 store i32 %inc13, ptr %j, align 4 497 br label %for.cond 498 499for.end14: ; preds = %for.cond.cleanup 500 ret void 501} 502 503; Function Attrs: argmemonly nofree nosync nounwind willreturn 504declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 505 506; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn 507declare void @llvm.assume(i1 noundef) #2 508 509; Function Attrs: argmemonly nofree nosync nounwind willreturn 510declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 511 512; Function Attrs: nounwind ssp uwtable mustprogress 513 514define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) { 515; CHECK-LABEL: @reverse_hadd_v4f32( 516; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4> 517; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5> 518; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 519; CHECK-NEXT: ret <4 x float> [[TMP3]] 520; 521 %vecext = extractelement <4 x float> %a, i32 0 522 %vecext1 = extractelement <4 x float> %a, i32 1 523 %add = fadd float %vecext, %vecext1 524 %vecinit = insertelement <4 x float> undef, float %add, i32 0 525 %vecext2 = extractelement <4 x float> %a, i32 2 526 %vecext3 = extractelement <4 x float> %a, i32 3 527 %add4 = fadd float %vecext2, %vecext3 528 %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 529 %vecext6 = extractelement <4 x float> %b, i32 0 530 %vecext7 = extractelement <4 x float> %b, i32 1 531 %add8 = fadd float %vecext6, %vecext7 532 %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 533 %vecext10 = extractelement <4 x float> %b, i32 2 534 %vecext11 = extractelement <4 x float> %b, i32 3 535 %add12 = fadd float %vecext10, %vecext11 536 %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 537 %shuffle = shufflevector <4 x float> %vecinit13, <4 x float> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 538 ret <4 x float> %shuffle 539} 540