1; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-n32:64" 4target triple = "powerpc64le-unknown-linux-gnu" 5 6; void matvecmul(const double *__restrict y, const double * __restrict x, const double * __restrict b, 7; const int * __restrict nb, const int * __restrict nx, const int * __restrict ny, const int * __restrict nz) { 8; 9; for (int k=1;k<nz,++k) 10; for (int j=1;j<ny,++j) 11; for (int i=1;i<nx,++i) 12; for (int l=1;l<nb,++l) 13; for (int m=1;m<nb,++m) 14; y[k+1][j][i][l] = y[k+1][j][i][l] + b[k][j][i][m][l]*x[k][j][i][m] 15; } 16 17; CHECK: Loop 'k_loop' has cost = 10200000000000000 18; CHECK-NEXT: Loop 'j_loop' has cost = 102000000000000 19; CHECK-NEXT: Loop 'i_loop' has cost = 1020000000000 20; CHECK-NEXT: Loop 'm_loop' has cost = 10800000000 21; CHECK-NEXT: Loop 'l_loop' has cost = 1500000000 22 23%_elem_type_of_double = type <{ double }> 24 25; Function Attrs: norecurse nounwind 26define void @mat_vec_mpy(ptr noalias %y, ptr noalias readonly %x, 27 ptr noalias readonly %b, ptr noalias readonly %nb, ptr noalias readonly %nx, 28 ptr noalias readonly %ny, ptr noalias readonly %nz) { 29mat_times_vec_entry: 30 %_ind_val = load i32, ptr %nb, align 4 31 %_conv = sext i32 %_ind_val to i64 32 %_grt_tmp.i = icmp sgt i64 %_conv, 0 33 %a_b.i = select i1 %_grt_tmp.i, i64 %_conv, i64 0 34 %_ind_val1 = load i32, ptr %nx, align 4 35 %_conv2 = sext i32 %_ind_val1 to i64 36 %_grt_tmp.i266 = icmp sgt i64 %_conv2, 0 37 %a_b.i267 = select i1 %_grt_tmp.i266, i64 %_conv2, i64 0 38 %_ind_val3 = load i32, ptr %ny, align 4 39 %_conv4 = sext i32 %_ind_val3 to i64 40 %_grt_tmp.i264 = icmp sgt i64 %_conv4, 0 41 %a_b.i265 = select i1 %_grt_tmp.i264, i64 %_conv4, i64 0 42 %_ind_val5 = load i32, ptr %nz, align 4 43 %_mult_tmp = shl nsw i64 %a_b.i, 3 44 %_mult_tmp7 = mul i64 %_mult_tmp, %a_b.i267 45 %_mult_tmp8 = mul i64 %_mult_tmp7, %a_b.i265 46 %_sub_tmp = sub nuw nsw i64 -8, %_mult_tmp 47 %_sub_tmp21 = sub i64 %_sub_tmp, %_mult_tmp7 48 %_sub_tmp23 = sub i64 %_sub_tmp21, %_mult_tmp8 49 %_mult_tmp73 = mul i64 %_mult_tmp, %a_b.i 50 %_mult_tmp74 = mul i64 %_mult_tmp73, %a_b.i267 51 %_mult_tmp75 = mul i64 %_mult_tmp74, %a_b.i265 52 %_sub_tmp93 = sub i64 %_sub_tmp, %_mult_tmp73 53 %_sub_tmp95 = sub i64 %_sub_tmp93, %_mult_tmp74 54 %_sub_tmp97 = sub i64 %_sub_tmp95, %_mult_tmp75 55 %_grt_tmp853288 = icmp slt i32 %_ind_val5, 1 56 br i1 %_grt_tmp853288, label %_return_bb, label %k_loop.lr.ph 57 58k_loop.lr.ph: ; preds = %mat_times_vec_entry 59 %_grt_tmp851279 = icmp slt i32 %_ind_val3, 1 60 %_grt_tmp847270 = icmp slt i32 %_ind_val, 1 61 %_aa_conv = bitcast ptr %y to ptr 62 %_adda_ = getelementptr inbounds i8, ptr %_aa_conv, i64 %_sub_tmp23 63 %_aa_conv434 = bitcast ptr %x to ptr 64 %_adda_435 = getelementptr inbounds i8, ptr %_aa_conv434, i64 %_sub_tmp23 65 %_aa_conv785 = bitcast ptr %b to ptr 66 %_adda_786 = getelementptr inbounds i8, ptr %_aa_conv785, i64 %_sub_tmp97 67 br i1 %_grt_tmp851279, label %k_loop.us.preheader, label %k_loop.lr.ph.split 68 69k_loop.us.preheader: ; preds = %k_loop.lr.ph 70 br label %_return_bb.loopexit 71 72k_loop.lr.ph.split: ; preds = %k_loop.lr.ph 73 %_grt_tmp849273 = icmp slt i32 %_ind_val1, 1 74 br i1 %_grt_tmp849273, label %k_loop.us291.preheader, label %k_loop.lr.ph.split.split 75 76k_loop.us291.preheader: ; preds = %k_loop.lr.ph.split 77 br label %_return_bb.loopexit300 78 79k_loop.lr.ph.split.split: ; preds = %k_loop.lr.ph.split 80 br i1 %_grt_tmp847270, label %k_loop.us294.preheader, label %k_loop.preheader 81 82k_loop.preheader: ; preds = %k_loop.lr.ph.split.split 83 %0 = add i32 %_ind_val, 1 84 %1 = add i32 %_ind_val1, 1 85 %2 = add i32 %_ind_val3, 1 86 %3 = add i32 %_ind_val5, 1 87 br label %k_loop 88 89k_loop.us294.preheader: ; preds = %k_loop.lr.ph.split.split 90 br label %_return_bb.loopexit301 91 92k_loop: ; preds = %k_loop._label_18_crit_edge.split.split.split, %k_loop.preheader 93 %indvars.iv316 = phi i64 [ 1, %k_loop.preheader ], [ %indvars.iv.next317, %k_loop._label_18_crit_edge.split.split.split ] 94 %indvars.iv.next317 = add nuw nsw i64 %indvars.iv316, 1 95 %_ix_x_len = mul i64 %_mult_tmp8, %indvars.iv.next317 96 %_ix_x_len410 = mul i64 %_mult_tmp75, %indvars.iv316 97 %_ix_x_len822 = mul i64 %_mult_tmp8, %indvars.iv316 98 br label %j_loop 99 100j_loop: ; preds = %j_loop._label_15_crit_edge.split.split, %k_loop 101 %indvars.iv312 = phi i64 [ %indvars.iv.next313, %j_loop._label_15_crit_edge.split.split ], [ 1, %k_loop ] 102 %_ix_x_len371 = mul i64 %_mult_tmp7, %indvars.iv312 103 %_ix_x_len415 = mul i64 %_mult_tmp74, %indvars.iv312 104 br label %i_loop 105 106i_loop: ; preds = %i_loop._label_12_crit_edge.split, %j_loop 107 %indvars.iv307 = phi i64 [ %indvars.iv.next308, %i_loop._label_12_crit_edge.split ], [ 1, %j_loop ] 108 %_ix_x_len375 = mul i64 %_mult_tmp, %indvars.iv307 109 %_ix_x_len420 = mul i64 %_mult_tmp73, %indvars.iv307 110 br label %l_loop 111 112l_loop: ; preds = %l_loop._label_9_crit_edge, %i_loop 113 %indvars.iv303 = phi i64 [ %indvars.iv.next304, %l_loop._label_9_crit_edge ], [ 1, %i_loop ] 114 %_ix_x_len378 = shl nuw nsw i64 %indvars.iv303, 3 115 br label %m_loop 116 117m_loop: ; preds = %m_loop, %l_loop 118 %indvars.iv = phi i64 [ %indvars.iv.next, %m_loop ], [ 1, %l_loop ] 119 %_ix_x_len424 = mul i64 %_mult_tmp, %indvars.iv 120 %_ix_x_len454 = shl nuw nsw i64 %indvars.iv, 3 121 %_ixa_gep = getelementptr inbounds i8, ptr %_adda_, i64 %_ix_x_len 122 %_ixa_gep791 = getelementptr inbounds i8, ptr %_adda_786, i64 %_ix_x_len410 123 %_ixa_gep823 = getelementptr inbounds i8, ptr %_adda_435, i64 %_ix_x_len822 124 %_ixa_gep372 = getelementptr inbounds i8, ptr %_ixa_gep, i64 %_ix_x_len371 125 %_ixa_gep376 = getelementptr inbounds i8, ptr %_ixa_gep372, i64 %_ix_x_len375 126 %_ixa_gep796 = getelementptr inbounds i8, ptr %_ixa_gep791, i64 %_ix_x_len415 127 %_ixa_gep828 = getelementptr inbounds i8, ptr %_ixa_gep823, i64 %_ix_x_len371 128 %_ixa_gep379 = getelementptr inbounds i8, ptr %_ixa_gep376, i64 %_ix_x_len378 129 %_ixa_gep801 = getelementptr inbounds i8, ptr %_ixa_gep796, i64 %_ix_x_len420 130 %_ixa_gep833 = getelementptr inbounds i8, ptr %_ixa_gep828, i64 %_ix_x_len375 131 %_ixa_gep806 = getelementptr inbounds i8, ptr %_ixa_gep801, i64 %_ix_x_len378 132 %_ixa_gep810 = getelementptr inbounds i8, ptr %_ixa_gep806, i64 %_ix_x_len424 133 %_gepp = bitcast ptr %_ixa_gep379 to ptr 134 %_gepp813 = bitcast ptr %_ixa_gep810 to ptr 135 %_ind_val814 = load double, ptr %_gepp813, align 8 136 %_ixa_gep837 = getelementptr inbounds i8, ptr %_ixa_gep833, i64 %_ix_x_len454 137 %_gepp840 = bitcast ptr %_ixa_gep837 to ptr 138 %_ind_val841 = load double, ptr %_gepp840, align 8 139 %_mult_tmp842 = fmul double %_ind_val814, %_ind_val841 140 store double %_mult_tmp842, ptr %_gepp, align 8 141 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 142 %wide.trip.count = zext i32 %0 to i64 143 %wide.trip.count305 = zext i32 %0 to i64 144 %wide.trip.count309 = zext i32 %1 to i64 145 %wide.trip.count314 = zext i32 %2 to i64 146 %wide.trip.count319 = zext i32 %3 to i64 147 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 148 br i1 %exitcond, label %m_loop, label %l_loop._label_9_crit_edge 149 150l_loop._label_9_crit_edge: ; preds = %m_loop 151 %indvars.iv.next304 = add nuw nsw i64 %indvars.iv303, 1 152 %exitcond306 = icmp ne i64 %indvars.iv.next304, %wide.trip.count305 153 br i1 %exitcond306, label %l_loop, label %i_loop._label_12_crit_edge.split 154 155i_loop._label_12_crit_edge.split: ; preds = %l_loop._label_9_crit_edge 156 %indvars.iv.next308 = add nuw nsw i64 %indvars.iv307, 1 157 %exitcond310 = icmp ne i64 %indvars.iv.next308, %wide.trip.count309 158 br i1 %exitcond310, label %i_loop, label %j_loop._label_15_crit_edge.split.split 159 160j_loop._label_15_crit_edge.split.split: ; preds = %i_loop._label_12_crit_edge.split 161 %indvars.iv.next313 = add nuw nsw i64 %indvars.iv312, 1 162 %exitcond315 = icmp ne i64 %indvars.iv.next313, %wide.trip.count314 163 br i1 %exitcond315, label %j_loop, label %k_loop._label_18_crit_edge.split.split.split 164 165k_loop._label_18_crit_edge.split.split.split: ; preds = %j_loop._label_15_crit_edge.split.split 166 %exitcond320 = icmp ne i64 %indvars.iv.next317, %wide.trip.count319 167 br i1 %exitcond320, label %k_loop, label %_return_bb.loopexit302 168 169_return_bb.loopexit: ; preds = %k_loop.us.preheader 170 br label %_return_bb 171 172_return_bb.loopexit300: ; preds = %k_loop.us291.preheader 173 br label %_return_bb 174 175_return_bb.loopexit301: ; preds = %k_loop.us294.preheader 176 br label %_return_bb 177 178_return_bb.loopexit302: ; preds = %k_loop._label_18_crit_edge.split.split.split 179 br label %_return_bb 180 181_return_bb: ; preds = %_return_bb.loopexit302, %_return_bb.loopexit301, %_return_bb.loopexit300, %_return_bb.loopexit, %mat_times_vec_entry 182 ret void 183} 184 185 186