1 /* The tracer pass for the GNU compiler. 2 Contributed by Jan Hubicka, SuSE Labs. 3 Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. 4 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 5 Free Software Foundation, Inc. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 17 License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 /* This pass performs the tail duplication needed for superblock formation. 24 For more information see: 25 26 Design and Analysis of Profile-Based Optimization in Compaq's 27 Compilation Tools for Alpha; Journal of Instruction-Level 28 Parallelism 3 (2000) 1-25 29 30 Unlike Compaq's implementation we don't do the loop peeling as most 31 probably a better job can be done by a special pass and we don't 32 need to worry too much about the code size implications as the tail 33 duplicates are crossjumped again if optimizations are not 34 performed. */ 35 36 37 #include "config.h" 38 #include "system.h" 39 #include "coretypes.h" 40 #include "tm.h" 41 #include "tree.h" 42 #include "rtl.h" 43 #include "hard-reg-set.h" 44 #include "basic-block.h" 45 #include "output.h" 46 #include "cfglayout.h" 47 #include "fibheap.h" 48 #include "flags.h" 49 #include "timevar.h" 50 #include "params.h" 51 #include "coverage.h" 52 #include "tree-pass.h" 53 #include "tree-flow.h" 54 #include "tree-inline.h" 55 56 static int count_insns (basic_block); 57 static bool ignore_bb_p (const_basic_block); 58 static bool better_p (const_edge, const_edge); 59 static edge find_best_successor (basic_block); 60 static edge find_best_predecessor (basic_block); 61 static int find_trace (basic_block, basic_block *); 62 static void tail_duplicate (void); 63 64 /* Minimal outgoing edge probability considered for superblock formation. */ 65 static int probability_cutoff; 66 static int branch_ratio_cutoff; 67 68 /* A bit BB->index is set if BB has already been seen, i.e. it is 69 connected to some trace already. */ 70 sbitmap bb_seen; 71 72 static inline void 73 mark_bb_seen (basic_block bb) 74 { 75 unsigned int size = SBITMAP_SIZE_BYTES (bb_seen) * 8; 76 77 if ((unsigned int)bb->index >= size) 78 bb_seen = sbitmap_resize (bb_seen, size * 2, 0); 79 80 SET_BIT (bb_seen, bb->index); 81 } 82 83 static inline bool 84 bb_seen_p (basic_block bb) 85 { 86 return TEST_BIT (bb_seen, bb->index); 87 } 88 89 /* Return true if we should ignore the basic block for purposes of tracing. */ 90 static bool 91 ignore_bb_p (const_basic_block bb) 92 { 93 if (bb->index < NUM_FIXED_BLOCKS) 94 return true; 95 if (optimize_bb_for_size_p (bb)) 96 return true; 97 return false; 98 } 99 100 /* Return number of instructions in the block. */ 101 102 static int 103 count_insns (basic_block bb) 104 { 105 gimple_stmt_iterator gsi; 106 gimple stmt; 107 int n = 0; 108 109 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 110 { 111 stmt = gsi_stmt (gsi); 112 n += estimate_num_insns (stmt, &eni_size_weights); 113 } 114 return n; 115 } 116 117 /* Return true if E1 is more frequent than E2. */ 118 static bool 119 better_p (const_edge e1, const_edge e2) 120 { 121 if (e1->count != e2->count) 122 return e1->count > e2->count; 123 if (e1->src->frequency * e1->probability != 124 e2->src->frequency * e2->probability) 125 return (e1->src->frequency * e1->probability 126 > e2->src->frequency * e2->probability); 127 /* This is needed to avoid changes in the decision after 128 CFG is modified. */ 129 if (e1->src != e2->src) 130 return e1->src->index > e2->src->index; 131 return e1->dest->index > e2->dest->index; 132 } 133 134 /* Return most frequent successor of basic block BB. */ 135 136 static edge 137 find_best_successor (basic_block bb) 138 { 139 edge e; 140 edge best = NULL; 141 edge_iterator ei; 142 143 FOR_EACH_EDGE (e, ei, bb->succs) 144 if (!best || better_p (e, best)) 145 best = e; 146 if (!best || ignore_bb_p (best->dest)) 147 return NULL; 148 if (best->probability <= probability_cutoff) 149 return NULL; 150 return best; 151 } 152 153 /* Return most frequent predecessor of basic block BB. */ 154 155 static edge 156 find_best_predecessor (basic_block bb) 157 { 158 edge e; 159 edge best = NULL; 160 edge_iterator ei; 161 162 FOR_EACH_EDGE (e, ei, bb->preds) 163 if (!best || better_p (e, best)) 164 best = e; 165 if (!best || ignore_bb_p (best->src)) 166 return NULL; 167 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE 168 < bb->frequency * branch_ratio_cutoff) 169 return NULL; 170 return best; 171 } 172 173 /* Find the trace using bb and record it in the TRACE array. 174 Return number of basic blocks recorded. */ 175 176 static int 177 find_trace (basic_block bb, basic_block *trace) 178 { 179 int i = 0; 180 edge e; 181 182 if (dump_file) 183 fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); 184 185 while ((e = find_best_predecessor (bb)) != NULL) 186 { 187 basic_block bb2 = e->src; 188 if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 189 || find_best_successor (bb2) != e) 190 break; 191 if (dump_file) 192 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 193 bb = bb2; 194 } 195 if (dump_file) 196 fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); 197 trace[i++] = bb; 198 199 /* Follow the trace in forward direction. */ 200 while ((e = find_best_successor (bb)) != NULL) 201 { 202 bb = e->dest; 203 if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 204 || find_best_predecessor (bb) != e) 205 break; 206 if (dump_file) 207 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 208 trace[i++] = bb; 209 } 210 if (dump_file) 211 fprintf (dump_file, "\n"); 212 return i; 213 } 214 215 /* Look for basic blocks in frequency order, construct traces and tail duplicate 216 if profitable. */ 217 218 static void 219 tail_duplicate (void) 220 { 221 fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); 222 basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); 223 int *counts = XNEWVEC (int, last_basic_block); 224 int ninsns = 0, nduplicated = 0; 225 gcov_type weighted_insns = 0, traced_insns = 0; 226 fibheap_t heap = fibheap_new (); 227 gcov_type cover_insns; 228 int max_dup_insns; 229 basic_block bb; 230 231 /* Create an oversized sbitmap to reduce the chance that we need to 232 resize it. */ 233 bb_seen = sbitmap_alloc (last_basic_block * 2); 234 sbitmap_zero (bb_seen); 235 initialize_original_copy_tables (); 236 237 if (profile_info && flag_branch_probabilities) 238 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); 239 else 240 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); 241 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 242 243 branch_ratio_cutoff = 244 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); 245 246 FOR_EACH_BB (bb) 247 { 248 int n = count_insns (bb); 249 if (!ignore_bb_p (bb)) 250 blocks[bb->index] = fibheap_insert (heap, -bb->frequency, 251 bb); 252 253 counts [bb->index] = n; 254 ninsns += n; 255 weighted_insns += n * bb->frequency; 256 } 257 258 if (profile_info && flag_branch_probabilities) 259 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); 260 else 261 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); 262 cover_insns = (weighted_insns * cover_insns + 50) / 100; 263 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; 264 265 while (traced_insns < cover_insns && nduplicated < max_dup_insns 266 && !fibheap_empty (heap)) 267 { 268 basic_block bb = (basic_block) fibheap_extract_min (heap); 269 int n, pos; 270 271 if (!bb) 272 break; 273 274 blocks[bb->index] = NULL; 275 276 if (ignore_bb_p (bb)) 277 continue; 278 gcc_assert (!bb_seen_p (bb)); 279 280 n = find_trace (bb, trace); 281 282 bb = trace[0]; 283 traced_insns += bb->frequency * counts [bb->index]; 284 if (blocks[bb->index]) 285 { 286 fibheap_delete_node (heap, blocks[bb->index]); 287 blocks[bb->index] = NULL; 288 } 289 290 for (pos = 1; pos < n; pos++) 291 { 292 basic_block bb2 = trace[pos]; 293 294 if (blocks[bb2->index]) 295 { 296 fibheap_delete_node (heap, blocks[bb2->index]); 297 blocks[bb2->index] = NULL; 298 } 299 traced_insns += bb2->frequency * counts [bb2->index]; 300 if (EDGE_COUNT (bb2->preds) > 1 301 && can_duplicate_block_p (bb2)) 302 { 303 edge e; 304 basic_block copy; 305 306 nduplicated += counts [bb2->index]; 307 308 e = find_edge (bb, bb2); 309 310 copy = duplicate_block (bb2, e, bb); 311 flush_pending_stmts (e); 312 313 add_phi_args_after_copy (©, 1, NULL); 314 315 /* Reconsider the original copy of block we've duplicated. 316 Removing the most common predecessor may make it to be 317 head. */ 318 blocks[bb2->index] = 319 fibheap_insert (heap, -bb2->frequency, bb2); 320 321 if (dump_file) 322 fprintf (dump_file, "Duplicated %i as %i [%i]\n", 323 bb2->index, copy->index, copy->frequency); 324 325 bb2 = copy; 326 } 327 mark_bb_seen (bb2); 328 bb = bb2; 329 /* In case the trace became infrequent, stop duplicating. */ 330 if (ignore_bb_p (bb)) 331 break; 332 } 333 if (dump_file) 334 fprintf (dump_file, " covered now %.1f\n\n", 335 traced_insns * 100.0 / weighted_insns); 336 } 337 if (dump_file) 338 fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 339 nduplicated * 100 / ninsns); 340 341 free_original_copy_tables (); 342 sbitmap_free (bb_seen); 343 free (blocks); 344 free (trace); 345 free (counts); 346 fibheap_delete (heap); 347 } 348 349 /* Main entry point to this file. */ 350 351 static unsigned int 352 tracer (void) 353 { 354 gcc_assert (current_ir_type () == IR_GIMPLE); 355 356 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 357 return 0; 358 359 mark_dfs_back_edges (); 360 if (dump_file) 361 dump_flow_info (dump_file, dump_flags); 362 363 /* Trace formation is done on the fly inside tail_duplicate */ 364 tail_duplicate (); 365 366 /* FIXME: We really only need to do this when we know tail duplication 367 has altered the CFG. */ 368 free_dominance_info (CDI_DOMINATORS); 369 if (dump_file) 370 dump_flow_info (dump_file, dump_flags); 371 372 return 0; 373 } 374 375 static bool 376 gate_tracer (void) 377 { 378 return (optimize > 0 && flag_tracer && flag_reorder_blocks); 379 } 380 381 struct gimple_opt_pass pass_tracer = 382 { 383 { 384 GIMPLE_PASS, 385 "tracer", /* name */ 386 gate_tracer, /* gate */ 387 tracer, /* execute */ 388 NULL, /* sub */ 389 NULL, /* next */ 390 0, /* static_pass_number */ 391 TV_TRACER, /* tv_id */ 392 0, /* properties_required */ 393 0, /* properties_provided */ 394 0, /* properties_destroyed */ 395 0, /* todo_flags_start */ 396 TODO_dump_func 397 | TODO_update_ssa 398 | TODO_verify_ssa /* todo_flags_finish */ 399 } 400 }; 401