1 /* Tail merging for gimple. 2 Copyright (C) 2011-2015 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 PASS PLACEMENT 180 This 'pass' is not a stand-alone gimple pass, but runs as part of 181 pass_pre, in order to share the value numbering. 182 183 184 SWITCHES 185 186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 187 188 #include "config.h" 189 #include "system.h" 190 #include "coretypes.h" 191 #include "tm.h" 192 #include "hash-set.h" 193 #include "machmode.h" 194 #include "vec.h" 195 #include "double-int.h" 196 #include "input.h" 197 #include "alias.h" 198 #include "symtab.h" 199 #include "wide-int.h" 200 #include "inchash.h" 201 #include "real.h" 202 #include "tree.h" 203 #include "fold-const.h" 204 #include "stor-layout.h" 205 #include "trans-mem.h" 206 #include "inchash.h" 207 #include "tm_p.h" 208 #include "predict.h" 209 #include "hard-reg-set.h" 210 #include "input.h" 211 #include "function.h" 212 #include "dominance.h" 213 #include "cfg.h" 214 #include "cfganal.h" 215 #include "cfgcleanup.h" 216 #include "basic-block.h" 217 #include "flags.h" 218 #include "hash-table.h" 219 #include "tree-ssa-alias.h" 220 #include "internal-fn.h" 221 #include "tree-eh.h" 222 #include "gimple-expr.h" 223 #include "is-a.h" 224 #include "gimple.h" 225 #include "gimple-iterator.h" 226 #include "gimple-ssa.h" 227 #include "tree-cfg.h" 228 #include "tree-phinodes.h" 229 #include "ssa-iterators.h" 230 #include "tree-into-ssa.h" 231 #include "params.h" 232 #include "gimple-pretty-print.h" 233 #include "tree-ssa-sccvn.h" 234 #include "tree-dump.h" 235 #include "cfgloop.h" 236 #include "tree-pass.h" 237 #include "trans-mem.h" 238 #include "stringpool.h" 239 #include "tree-ssanames.h" 240 #include "tree-eh.h" 241 242 /* Describes a group of bbs with the same successors. The successor bbs are 243 cached in succs, and the successor edge flags are cached in succ_flags. 244 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags, 245 it's marked in inverse. 246 Additionally, the hash value for the struct is cached in hashval, and 247 in_worklist indicates whether it's currently part of worklist. */ 248 249 struct same_succ_def 250 { 251 /* The bbs that have the same successor bbs. */ 252 bitmap bbs; 253 /* The successor bbs. */ 254 bitmap succs; 255 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 256 bb. */ 257 bitmap inverse; 258 /* The edge flags for each of the successor bbs. */ 259 vec<int> succ_flags; 260 /* Indicates whether the struct is currently in the worklist. */ 261 bool in_worklist; 262 /* The hash value of the struct. */ 263 hashval_t hashval; 264 265 /* hash_table support. */ 266 typedef same_succ_def value_type; 267 typedef same_succ_def compare_type; 268 static inline hashval_t hash (const value_type *); 269 static int equal (const value_type *, const compare_type *); 270 static void remove (value_type *); 271 }; 272 typedef struct same_succ_def *same_succ; 273 typedef const struct same_succ_def *const_same_succ; 274 275 /* hash routine for hash_table support, returns hashval of E. */ 276 277 inline hashval_t 278 same_succ_def::hash (const value_type *e) 279 { 280 return e->hashval; 281 } 282 283 /* A group of bbs where 1 bb from bbs can replace the other bbs. */ 284 285 struct bb_cluster_def 286 { 287 /* The bbs in the cluster. */ 288 bitmap bbs; 289 /* The preds of the bbs in the cluster. */ 290 bitmap preds; 291 /* Index in all_clusters vector. */ 292 int index; 293 /* The bb to replace the cluster with. */ 294 basic_block rep_bb; 295 }; 296 typedef struct bb_cluster_def *bb_cluster; 297 typedef const struct bb_cluster_def *const_bb_cluster; 298 299 /* Per bb-info. */ 300 301 struct aux_bb_info 302 { 303 /* The number of non-debug statements in the bb. */ 304 int size; 305 /* The same_succ that this bb is a member of. */ 306 same_succ bb_same_succ; 307 /* The cluster that this bb is a member of. */ 308 bb_cluster cluster; 309 /* The vop state at the exit of a bb. This is shortlived data, used to 310 communicate data between update_block_by and update_vuses. */ 311 tree vop_at_exit; 312 /* The bb that either contains or is dominated by the dependencies of the 313 bb. */ 314 basic_block dep_bb; 315 }; 316 317 /* Macros to access the fields of struct aux_bb_info. */ 318 319 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 320 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 321 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 322 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 323 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 324 325 /* Returns true if the only effect a statement STMT has, is to define locally 326 used SSA_NAMEs. */ 327 328 static bool 329 stmt_local_def (gimple stmt) 330 { 331 basic_block bb, def_bb; 332 imm_use_iterator iter; 333 use_operand_p use_p; 334 tree val; 335 def_operand_p def_p; 336 337 if (gimple_vdef (stmt) != NULL_TREE 338 || gimple_has_side_effects (stmt) 339 || gimple_could_trap_p_1 (stmt, false, false) 340 || gimple_vuse (stmt) != NULL_TREE) 341 return false; 342 343 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); 344 if (def_p == NULL) 345 return false; 346 347 val = DEF_FROM_PTR (def_p); 348 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) 349 return false; 350 351 def_bb = gimple_bb (stmt); 352 353 FOR_EACH_IMM_USE_FAST (use_p, iter, val) 354 { 355 if (is_gimple_debug (USE_STMT (use_p))) 356 continue; 357 bb = gimple_bb (USE_STMT (use_p)); 358 if (bb == def_bb) 359 continue; 360 361 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI 362 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) 363 continue; 364 365 return false; 366 } 367 368 return true; 369 } 370 371 /* Let GSI skip forwards over local defs. */ 372 373 static void 374 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) 375 { 376 gimple stmt; 377 378 while (true) 379 { 380 if (gsi_end_p (*gsi)) 381 return; 382 stmt = gsi_stmt (*gsi); 383 if (!stmt_local_def (stmt)) 384 return; 385 gsi_next_nondebug (gsi); 386 } 387 } 388 389 /* VAL1 and VAL2 are either: 390 - uses in BB1 and BB2, or 391 - phi alternatives for BB1 and BB2. 392 Return true if the uses have the same gvn value. */ 393 394 static bool 395 gvn_uses_equal (tree val1, tree val2) 396 { 397 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 398 399 if (val1 == val2) 400 return true; 401 402 if (vn_valueize (val1) != vn_valueize (val2)) 403 return false; 404 405 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 406 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 407 } 408 409 /* Prints E to FILE. */ 410 411 static void 412 same_succ_print (FILE *file, const same_succ e) 413 { 414 unsigned int i; 415 bitmap_print (file, e->bbs, "bbs:", "\n"); 416 bitmap_print (file, e->succs, "succs:", "\n"); 417 bitmap_print (file, e->inverse, "inverse:", "\n"); 418 fprintf (file, "flags:"); 419 for (i = 0; i < e->succ_flags.length (); ++i) 420 fprintf (file, " %x", e->succ_flags[i]); 421 fprintf (file, "\n"); 422 } 423 424 /* Prints same_succ VE to VFILE. */ 425 426 inline int 427 ssa_same_succ_print_traverse (same_succ *pe, FILE *file) 428 { 429 const same_succ e = *pe; 430 same_succ_print (file, e); 431 return 1; 432 } 433 434 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 435 436 static void 437 update_dep_bb (basic_block use_bb, tree val) 438 { 439 basic_block dep_bb; 440 441 /* Not a dep. */ 442 if (TREE_CODE (val) != SSA_NAME) 443 return; 444 445 /* Skip use of global def. */ 446 if (SSA_NAME_IS_DEFAULT_DEF (val)) 447 return; 448 449 /* Skip use of local def. */ 450 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 451 if (dep_bb == use_bb) 452 return; 453 454 if (BB_DEP_BB (use_bb) == NULL 455 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 456 BB_DEP_BB (use_bb) = dep_bb; 457 } 458 459 /* Update BB_DEP_BB, given the dependencies in STMT. */ 460 461 static void 462 stmt_update_dep_bb (gimple stmt) 463 { 464 ssa_op_iter iter; 465 use_operand_p use; 466 467 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 468 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 469 } 470 471 /* Calculates hash value for same_succ VE. */ 472 473 static hashval_t 474 same_succ_hash (const_same_succ e) 475 { 476 inchash::hash hstate (bitmap_hash (e->succs)); 477 int flags; 478 unsigned int i; 479 unsigned int first = bitmap_first_set_bit (e->bbs); 480 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first); 481 int size = 0; 482 gimple stmt; 483 tree arg; 484 unsigned int s; 485 bitmap_iterator bs; 486 487 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb); 488 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 489 { 490 stmt = gsi_stmt (gsi); 491 stmt_update_dep_bb (stmt); 492 if (stmt_local_def (stmt)) 493 continue; 494 size++; 495 496 hstate.add_int (gimple_code (stmt)); 497 if (is_gimple_assign (stmt)) 498 hstate.add_int (gimple_assign_rhs_code (stmt)); 499 if (!is_gimple_call (stmt)) 500 continue; 501 if (gimple_call_internal_p (stmt)) 502 hstate.add_int (gimple_call_internal_fn (stmt)); 503 else 504 { 505 inchash::add_expr (gimple_call_fn (stmt), hstate); 506 if (gimple_call_chain (stmt)) 507 inchash::add_expr (gimple_call_chain (stmt), hstate); 508 } 509 for (i = 0; i < gimple_call_num_args (stmt); i++) 510 { 511 arg = gimple_call_arg (stmt, i); 512 arg = vn_valueize (arg); 513 inchash::add_expr (arg, hstate); 514 } 515 } 516 517 hstate.add_int (size); 518 BB_SIZE (bb) = size; 519 520 for (i = 0; i < e->succ_flags.length (); ++i) 521 { 522 flags = e->succ_flags[i]; 523 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 524 hstate.add_int (flags); 525 } 526 527 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 528 { 529 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx; 530 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s)); 531 !gsi_end_p (gsi); 532 gsi_next (&gsi)) 533 { 534 gphi *phi = gsi.phi (); 535 tree lhs = gimple_phi_result (phi); 536 tree val = gimple_phi_arg_def (phi, n); 537 538 if (virtual_operand_p (lhs)) 539 continue; 540 update_dep_bb (bb, val); 541 } 542 } 543 544 return hstate.end (); 545 } 546 547 /* Returns true if E1 and E2 have 2 successors, and if the successor flags 548 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 549 the other edge flags. */ 550 551 static bool 552 inverse_flags (const_same_succ e1, const_same_succ e2) 553 { 554 int f1a, f1b, f2a, f2b; 555 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 556 557 if (e1->succ_flags.length () != 2) 558 return false; 559 560 f1a = e1->succ_flags[0]; 561 f1b = e1->succ_flags[1]; 562 f2a = e2->succ_flags[0]; 563 f2b = e2->succ_flags[1]; 564 565 if (f1a == f2a && f1b == f2b) 566 return false; 567 568 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 569 } 570 571 /* Compares SAME_SUCCs E1 and E2. */ 572 573 int 574 same_succ_def::equal (const value_type *e1, const compare_type *e2) 575 { 576 unsigned int i, first1, first2; 577 gimple_stmt_iterator gsi1, gsi2; 578 gimple s1, s2; 579 basic_block bb1, bb2; 580 581 if (e1->hashval != e2->hashval) 582 return 0; 583 584 if (e1->succ_flags.length () != e2->succ_flags.length ()) 585 return 0; 586 587 if (!bitmap_equal_p (e1->succs, e2->succs)) 588 return 0; 589 590 if (!inverse_flags (e1, e2)) 591 { 592 for (i = 0; i < e1->succ_flags.length (); ++i) 593 if (e1->succ_flags[i] != e2->succ_flags[i]) 594 return 0; 595 } 596 597 first1 = bitmap_first_set_bit (e1->bbs); 598 first2 = bitmap_first_set_bit (e2->bbs); 599 600 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1); 601 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2); 602 603 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 604 return 0; 605 606 gsi1 = gsi_start_nondebug_bb (bb1); 607 gsi2 = gsi_start_nondebug_bb (bb2); 608 gsi_advance_fw_nondebug_nonlocal (&gsi1); 609 gsi_advance_fw_nondebug_nonlocal (&gsi2); 610 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 611 { 612 s1 = gsi_stmt (gsi1); 613 s2 = gsi_stmt (gsi2); 614 if (gimple_code (s1) != gimple_code (s2)) 615 return 0; 616 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 617 return 0; 618 gsi_next_nondebug (&gsi1); 619 gsi_next_nondebug (&gsi2); 620 gsi_advance_fw_nondebug_nonlocal (&gsi1); 621 gsi_advance_fw_nondebug_nonlocal (&gsi2); 622 } 623 624 return 1; 625 } 626 627 /* Alloc and init a new SAME_SUCC. */ 628 629 static same_succ 630 same_succ_alloc (void) 631 { 632 same_succ same = XNEW (struct same_succ_def); 633 634 same->bbs = BITMAP_ALLOC (NULL); 635 same->succs = BITMAP_ALLOC (NULL); 636 same->inverse = BITMAP_ALLOC (NULL); 637 same->succ_flags.create (10); 638 same->in_worklist = false; 639 640 return same; 641 } 642 643 /* Delete same_succ E. */ 644 645 void 646 same_succ_def::remove (same_succ e) 647 { 648 BITMAP_FREE (e->bbs); 649 BITMAP_FREE (e->succs); 650 BITMAP_FREE (e->inverse); 651 e->succ_flags.release (); 652 653 XDELETE (e); 654 } 655 656 /* Reset same_succ SAME. */ 657 658 static void 659 same_succ_reset (same_succ same) 660 { 661 bitmap_clear (same->bbs); 662 bitmap_clear (same->succs); 663 bitmap_clear (same->inverse); 664 same->succ_flags.truncate (0); 665 } 666 667 static hash_table<same_succ_def> *same_succ_htab; 668 669 /* Array that is used to store the edge flags for a successor. */ 670 671 static int *same_succ_edge_flags; 672 673 /* Bitmap that is used to mark bbs that are recently deleted. */ 674 675 static bitmap deleted_bbs; 676 677 /* Bitmap that is used to mark predecessors of bbs that are 678 deleted. */ 679 680 static bitmap deleted_bb_preds; 681 682 /* Prints same_succ_htab to stderr. */ 683 684 extern void debug_same_succ (void); 685 DEBUG_FUNCTION void 686 debug_same_succ ( void) 687 { 688 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr); 689 } 690 691 692 /* Vector of bbs to process. */ 693 694 static vec<same_succ> worklist; 695 696 /* Prints worklist to FILE. */ 697 698 static void 699 print_worklist (FILE *file) 700 { 701 unsigned int i; 702 for (i = 0; i < worklist.length (); ++i) 703 same_succ_print (file, worklist[i]); 704 } 705 706 /* Adds SAME to worklist. */ 707 708 static void 709 add_to_worklist (same_succ same) 710 { 711 if (same->in_worklist) 712 return; 713 714 if (bitmap_count_bits (same->bbs) < 2) 715 return; 716 717 same->in_worklist = true; 718 worklist.safe_push (same); 719 } 720 721 /* Add BB to same_succ_htab. */ 722 723 static void 724 find_same_succ_bb (basic_block bb, same_succ *same_p) 725 { 726 unsigned int j; 727 bitmap_iterator bj; 728 same_succ same = *same_p; 729 same_succ *slot; 730 edge_iterator ei; 731 edge e; 732 733 if (bb == NULL 734 /* Be conservative with loop structure. It's not evident that this test 735 is sufficient. Before tail-merge, we've just called 736 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now 737 set, so there's no guarantee that the loop->latch value is still valid. 738 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the 739 start of pre, we've kept that property intact throughout pre, and are 740 keeping it throughout tail-merge using this test. */ 741 || bb->loop_father->latch == bb) 742 return; 743 bitmap_set_bit (same->bbs, bb->index); 744 FOR_EACH_EDGE (e, ei, bb->succs) 745 { 746 int index = e->dest->index; 747 bitmap_set_bit (same->succs, index); 748 same_succ_edge_flags[index] = e->flags; 749 } 750 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 751 same->succ_flags.safe_push (same_succ_edge_flags[j]); 752 753 same->hashval = same_succ_hash (same); 754 755 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT); 756 if (*slot == NULL) 757 { 758 *slot = same; 759 BB_SAME_SUCC (bb) = same; 760 add_to_worklist (same); 761 *same_p = NULL; 762 } 763 else 764 { 765 bitmap_set_bit ((*slot)->bbs, bb->index); 766 BB_SAME_SUCC (bb) = *slot; 767 add_to_worklist (*slot); 768 if (inverse_flags (same, *slot)) 769 bitmap_set_bit ((*slot)->inverse, bb->index); 770 same_succ_reset (same); 771 } 772 } 773 774 /* Find bbs with same successors. */ 775 776 static void 777 find_same_succ (void) 778 { 779 same_succ same = same_succ_alloc (); 780 basic_block bb; 781 782 FOR_EACH_BB_FN (bb, cfun) 783 { 784 find_same_succ_bb (bb, &same); 785 if (same == NULL) 786 same = same_succ_alloc (); 787 } 788 789 same_succ_def::remove (same); 790 } 791 792 /* Initializes worklist administration. */ 793 794 static void 795 init_worklist (void) 796 { 797 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 798 same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun)); 799 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun)); 800 deleted_bbs = BITMAP_ALLOC (NULL); 801 deleted_bb_preds = BITMAP_ALLOC (NULL); 802 worklist.create (n_basic_blocks_for_fn (cfun)); 803 find_same_succ (); 804 805 if (dump_file && (dump_flags & TDF_DETAILS)) 806 { 807 fprintf (dump_file, "initial worklist:\n"); 808 print_worklist (dump_file); 809 } 810 } 811 812 /* Deletes worklist administration. */ 813 814 static void 815 delete_worklist (void) 816 { 817 free_aux_for_blocks (); 818 delete same_succ_htab; 819 same_succ_htab = NULL; 820 XDELETEVEC (same_succ_edge_flags); 821 same_succ_edge_flags = NULL; 822 BITMAP_FREE (deleted_bbs); 823 BITMAP_FREE (deleted_bb_preds); 824 worklist.release (); 825 } 826 827 /* Mark BB as deleted, and mark its predecessors. */ 828 829 static void 830 mark_basic_block_deleted (basic_block bb) 831 { 832 edge e; 833 edge_iterator ei; 834 835 bitmap_set_bit (deleted_bbs, bb->index); 836 837 FOR_EACH_EDGE (e, ei, bb->preds) 838 bitmap_set_bit (deleted_bb_preds, e->src->index); 839 } 840 841 /* Removes BB from its corresponding same_succ. */ 842 843 static void 844 same_succ_flush_bb (basic_block bb) 845 { 846 same_succ same = BB_SAME_SUCC (bb); 847 if (! same) 848 return; 849 850 BB_SAME_SUCC (bb) = NULL; 851 if (bitmap_single_bit_set_p (same->bbs)) 852 same_succ_htab->remove_elt_with_hash (same, same->hashval); 853 else 854 bitmap_clear_bit (same->bbs, bb->index); 855 } 856 857 /* Removes all bbs in BBS from their corresponding same_succ. */ 858 859 static void 860 same_succ_flush_bbs (bitmap bbs) 861 { 862 unsigned int i; 863 bitmap_iterator bi; 864 865 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 866 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i)); 867 } 868 869 /* Release the last vdef in BB, either normal or phi result. */ 870 871 static void 872 release_last_vdef (basic_block bb) 873 { 874 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i); 875 gsi_prev_nondebug (&i)) 876 { 877 gimple stmt = gsi_stmt (i); 878 if (gimple_vdef (stmt) == NULL_TREE) 879 continue; 880 881 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 882 return; 883 } 884 885 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i); 886 gsi_next (&i)) 887 { 888 gphi *phi = i.phi (); 889 tree res = gimple_phi_result (phi); 890 891 if (!virtual_operand_p (res)) 892 continue; 893 894 mark_virtual_phi_result_for_renaming (phi); 895 return; 896 } 897 898 } 899 900 /* For deleted_bb_preds, find bbs with same successors. */ 901 902 static void 903 update_worklist (void) 904 { 905 unsigned int i; 906 bitmap_iterator bi; 907 basic_block bb; 908 same_succ same; 909 910 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 911 bitmap_clear (deleted_bbs); 912 913 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 914 same_succ_flush_bbs (deleted_bb_preds); 915 916 same = same_succ_alloc (); 917 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 918 { 919 bb = BASIC_BLOCK_FOR_FN (cfun, i); 920 gcc_assert (bb != NULL); 921 find_same_succ_bb (bb, &same); 922 if (same == NULL) 923 same = same_succ_alloc (); 924 } 925 same_succ_def::remove (same); 926 bitmap_clear (deleted_bb_preds); 927 } 928 929 /* Prints cluster C to FILE. */ 930 931 static void 932 print_cluster (FILE *file, bb_cluster c) 933 { 934 if (c == NULL) 935 return; 936 bitmap_print (file, c->bbs, "bbs:", "\n"); 937 bitmap_print (file, c->preds, "preds:", "\n"); 938 } 939 940 /* Prints cluster C to stderr. */ 941 942 extern void debug_cluster (bb_cluster); 943 DEBUG_FUNCTION void 944 debug_cluster (bb_cluster c) 945 { 946 print_cluster (stderr, c); 947 } 948 949 /* Update C->rep_bb, given that BB is added to the cluster. */ 950 951 static void 952 update_rep_bb (bb_cluster c, basic_block bb) 953 { 954 /* Initial. */ 955 if (c->rep_bb == NULL) 956 { 957 c->rep_bb = bb; 958 return; 959 } 960 961 /* Current needs no deps, keep it. */ 962 if (BB_DEP_BB (c->rep_bb) == NULL) 963 return; 964 965 /* Bb needs no deps, change rep_bb. */ 966 if (BB_DEP_BB (bb) == NULL) 967 { 968 c->rep_bb = bb; 969 return; 970 } 971 972 /* Bb needs last deps earlier than current, change rep_bb. A potential 973 problem with this, is that the first deps might also be earlier, which 974 would mean we prefer longer lifetimes for the deps. To be able to check 975 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 976 BB_DEP_BB, which is really BB_LAST_DEP_BB. 977 The benefit of choosing the bb with last deps earlier, is that it can 978 potentially be used as replacement for more bbs. */ 979 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 980 c->rep_bb = bb; 981 } 982 983 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 984 985 static void 986 add_bb_to_cluster (bb_cluster c, basic_block bb) 987 { 988 edge e; 989 edge_iterator ei; 990 991 bitmap_set_bit (c->bbs, bb->index); 992 993 FOR_EACH_EDGE (e, ei, bb->preds) 994 bitmap_set_bit (c->preds, e->src->index); 995 996 update_rep_bb (c, bb); 997 } 998 999 /* Allocate and init new cluster. */ 1000 1001 static bb_cluster 1002 new_cluster (void) 1003 { 1004 bb_cluster c; 1005 c = XCNEW (struct bb_cluster_def); 1006 c->bbs = BITMAP_ALLOC (NULL); 1007 c->preds = BITMAP_ALLOC (NULL); 1008 c->rep_bb = NULL; 1009 return c; 1010 } 1011 1012 /* Delete clusters. */ 1013 1014 static void 1015 delete_cluster (bb_cluster c) 1016 { 1017 if (c == NULL) 1018 return; 1019 BITMAP_FREE (c->bbs); 1020 BITMAP_FREE (c->preds); 1021 XDELETE (c); 1022 } 1023 1024 1025 /* Array that contains all clusters. */ 1026 1027 static vec<bb_cluster> all_clusters; 1028 1029 /* Allocate all cluster vectors. */ 1030 1031 static void 1032 alloc_cluster_vectors (void) 1033 { 1034 all_clusters.create (n_basic_blocks_for_fn (cfun)); 1035 } 1036 1037 /* Reset all cluster vectors. */ 1038 1039 static void 1040 reset_cluster_vectors (void) 1041 { 1042 unsigned int i; 1043 basic_block bb; 1044 for (i = 0; i < all_clusters.length (); ++i) 1045 delete_cluster (all_clusters[i]); 1046 all_clusters.truncate (0); 1047 FOR_EACH_BB_FN (bb, cfun) 1048 BB_CLUSTER (bb) = NULL; 1049 } 1050 1051 /* Delete all cluster vectors. */ 1052 1053 static void 1054 delete_cluster_vectors (void) 1055 { 1056 unsigned int i; 1057 for (i = 0; i < all_clusters.length (); ++i) 1058 delete_cluster (all_clusters[i]); 1059 all_clusters.release (); 1060 } 1061 1062 /* Merge cluster C2 into C1. */ 1063 1064 static void 1065 merge_clusters (bb_cluster c1, bb_cluster c2) 1066 { 1067 bitmap_ior_into (c1->bbs, c2->bbs); 1068 bitmap_ior_into (c1->preds, c2->preds); 1069 } 1070 1071 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 1072 all_clusters, or merge c with existing cluster. */ 1073 1074 static void 1075 set_cluster (basic_block bb1, basic_block bb2) 1076 { 1077 basic_block merge_bb, other_bb; 1078 bb_cluster merge, old, c; 1079 1080 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 1081 { 1082 c = new_cluster (); 1083 add_bb_to_cluster (c, bb1); 1084 add_bb_to_cluster (c, bb2); 1085 BB_CLUSTER (bb1) = c; 1086 BB_CLUSTER (bb2) = c; 1087 c->index = all_clusters.length (); 1088 all_clusters.safe_push (c); 1089 } 1090 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1091 { 1092 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1093 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1094 merge = BB_CLUSTER (merge_bb); 1095 add_bb_to_cluster (merge, other_bb); 1096 BB_CLUSTER (other_bb) = merge; 1097 } 1098 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1099 { 1100 unsigned int i; 1101 bitmap_iterator bi; 1102 1103 old = BB_CLUSTER (bb2); 1104 merge = BB_CLUSTER (bb1); 1105 merge_clusters (merge, old); 1106 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1107 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge; 1108 all_clusters[old->index] = NULL; 1109 update_rep_bb (merge, old->rep_bb); 1110 delete_cluster (old); 1111 } 1112 else 1113 gcc_unreachable (); 1114 } 1115 1116 /* Return true if gimple operands T1 and T2 have the same value. */ 1117 1118 static bool 1119 gimple_operand_equal_value_p (tree t1, tree t2) 1120 { 1121 if (t1 == t2) 1122 return true; 1123 1124 if (t1 == NULL_TREE 1125 || t2 == NULL_TREE) 1126 return false; 1127 1128 if (operand_equal_p (t1, t2, 0)) 1129 return true; 1130 1131 return gvn_uses_equal (t1, t2); 1132 } 1133 1134 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1135 gimple_bb (s2) are members of SAME_SUCC. */ 1136 1137 static bool 1138 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2) 1139 { 1140 unsigned int i; 1141 tree lhs1, lhs2; 1142 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1143 tree t1, t2; 1144 bool inv_cond; 1145 enum tree_code code1, code2; 1146 1147 if (gimple_code (s1) != gimple_code (s2)) 1148 return false; 1149 1150 switch (gimple_code (s1)) 1151 { 1152 case GIMPLE_CALL: 1153 if (!gimple_call_same_target_p (s1, s2)) 1154 return false; 1155 1156 t1 = gimple_call_chain (s1); 1157 t2 = gimple_call_chain (s2); 1158 if (!gimple_operand_equal_value_p (t1, t2)) 1159 return false; 1160 1161 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1162 return false; 1163 1164 for (i = 0; i < gimple_call_num_args (s1); ++i) 1165 { 1166 t1 = gimple_call_arg (s1, i); 1167 t2 = gimple_call_arg (s2, i); 1168 if (!gimple_operand_equal_value_p (t1, t2)) 1169 return false; 1170 } 1171 1172 lhs1 = gimple_get_lhs (s1); 1173 lhs2 = gimple_get_lhs (s2); 1174 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1175 return true; 1176 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1177 return false; 1178 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1179 return vn_valueize (lhs1) == vn_valueize (lhs2); 1180 return operand_equal_p (lhs1, lhs2, 0); 1181 1182 case GIMPLE_ASSIGN: 1183 lhs1 = gimple_get_lhs (s1); 1184 lhs2 = gimple_get_lhs (s2); 1185 if (TREE_CODE (lhs1) != SSA_NAME 1186 && TREE_CODE (lhs2) != SSA_NAME) 1187 return (operand_equal_p (lhs1, lhs2, 0) 1188 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), 1189 gimple_assign_rhs1 (s2))); 1190 else if (TREE_CODE (lhs1) == SSA_NAME 1191 && TREE_CODE (lhs2) == SSA_NAME) 1192 return operand_equal_p (gimple_assign_rhs1 (s1), 1193 gimple_assign_rhs1 (s2), 0); 1194 return false; 1195 1196 case GIMPLE_COND: 1197 t1 = gimple_cond_lhs (s1); 1198 t2 = gimple_cond_lhs (s2); 1199 if (!gimple_operand_equal_value_p (t1, t2)) 1200 return false; 1201 1202 t1 = gimple_cond_rhs (s1); 1203 t2 = gimple_cond_rhs (s2); 1204 if (!gimple_operand_equal_value_p (t1, t2)) 1205 return false; 1206 1207 code1 = gimple_expr_code (s1); 1208 code2 = gimple_expr_code (s2); 1209 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1210 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1211 if (inv_cond) 1212 { 1213 bool honor_nans = HONOR_NANS (t1); 1214 code2 = invert_tree_comparison (code2, honor_nans); 1215 } 1216 return code1 == code2; 1217 1218 default: 1219 return false; 1220 } 1221 } 1222 1223 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1224 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1225 processed statements. */ 1226 1227 static void 1228 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1229 bool *vuse_escaped) 1230 { 1231 gimple stmt; 1232 tree lvuse; 1233 1234 while (true) 1235 { 1236 if (gsi_end_p (*gsi)) 1237 return; 1238 stmt = gsi_stmt (*gsi); 1239 1240 lvuse = gimple_vuse (stmt); 1241 if (lvuse != NULL_TREE) 1242 { 1243 *vuse = lvuse; 1244 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1245 *vuse_escaped = true; 1246 } 1247 1248 if (!stmt_local_def (stmt)) 1249 return; 1250 gsi_prev_nondebug (gsi); 1251 } 1252 } 1253 1254 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1255 clusters them. */ 1256 1257 static void 1258 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2) 1259 { 1260 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1261 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1262 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1263 bool vuse_escaped = false; 1264 1265 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1266 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1267 1268 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1269 { 1270 gimple stmt1 = gsi_stmt (gsi1); 1271 gimple stmt2 = gsi_stmt (gsi2); 1272 1273 /* What could be better than to this this here is to blacklist the bb 1274 containing the stmt, when encountering the stmt f.i. in 1275 same_succ_hash. */ 1276 if (is_tm_ending (stmt1) 1277 || is_tm_ending (stmt2)) 1278 return; 1279 1280 /* Verify EH landing pads. */ 1281 if (lookup_stmt_eh_lp_fn (cfun, stmt1) 1282 != lookup_stmt_eh_lp_fn (cfun, stmt2)) 1283 return; 1284 1285 if (!gimple_equal_p (same_succ, stmt1, stmt2)) 1286 return; 1287 1288 gsi_prev_nondebug (&gsi1); 1289 gsi_prev_nondebug (&gsi2); 1290 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1291 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1292 } 1293 1294 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1295 return; 1296 1297 /* If the incoming vuses are not the same, and the vuse escaped into an 1298 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1299 which potentially means the semantics of one of the blocks will be changed. 1300 TODO: make this check more precise. */ 1301 if (vuse_escaped && vuse1 != vuse2) 1302 return; 1303 1304 if (dump_file) 1305 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1306 bb1->index, bb2->index); 1307 1308 set_cluster (bb1, bb2); 1309 } 1310 1311 /* Returns whether for all phis in DEST the phi alternatives for E1 and 1312 E2 are equal. */ 1313 1314 static bool 1315 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1316 { 1317 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1318 gphi_iterator gsi; 1319 1320 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1321 { 1322 gphi *phi = gsi.phi (); 1323 tree lhs = gimple_phi_result (phi); 1324 tree val1 = gimple_phi_arg_def (phi, n1); 1325 tree val2 = gimple_phi_arg_def (phi, n2); 1326 1327 if (virtual_operand_p (lhs)) 1328 continue; 1329 1330 if (operand_equal_for_phi_arg_p (val1, val2)) 1331 continue; 1332 if (gvn_uses_equal (val1, val2)) 1333 continue; 1334 1335 return false; 1336 } 1337 1338 return true; 1339 } 1340 1341 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1342 phi alternatives for BB1 and BB2 are equal. */ 1343 1344 static bool 1345 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2) 1346 { 1347 unsigned int s; 1348 bitmap_iterator bs; 1349 edge e1, e2; 1350 basic_block succ; 1351 1352 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1353 { 1354 succ = BASIC_BLOCK_FOR_FN (cfun, s); 1355 e1 = find_edge (bb1, succ); 1356 e2 = find_edge (bb2, succ); 1357 if (e1->flags & EDGE_COMPLEX 1358 || e2->flags & EDGE_COMPLEX) 1359 return false; 1360 1361 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1362 the same value. */ 1363 if (!same_phi_alternatives_1 (succ, e1, e2)) 1364 return false; 1365 } 1366 1367 return true; 1368 } 1369 1370 /* Return true if BB has non-vop phis. */ 1371 1372 static bool 1373 bb_has_non_vop_phi (basic_block bb) 1374 { 1375 gimple_seq phis = phi_nodes (bb); 1376 gimple phi; 1377 1378 if (phis == NULL) 1379 return false; 1380 1381 if (!gimple_seq_singleton_p (phis)) 1382 return true; 1383 1384 phi = gimple_seq_first_stmt (phis); 1385 return !virtual_operand_p (gimple_phi_result (phi)); 1386 } 1387 1388 /* Returns true if redirecting the incoming edges of FROM to TO maintains the 1389 invariant that uses in FROM are dominates by their defs. */ 1390 1391 static bool 1392 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1393 { 1394 basic_block cd, dep_bb = BB_DEP_BB (to); 1395 edge_iterator ei; 1396 edge e; 1397 bitmap from_preds = BITMAP_ALLOC (NULL); 1398 1399 if (dep_bb == NULL) 1400 return true; 1401 1402 FOR_EACH_EDGE (e, ei, from->preds) 1403 bitmap_set_bit (from_preds, e->src->index); 1404 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1405 BITMAP_FREE (from_preds); 1406 1407 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1408 } 1409 1410 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1411 replacement bb) and vice versa maintains the invariant that uses in the 1412 replacement are dominates by their defs. */ 1413 1414 static bool 1415 deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1416 { 1417 if (BB_CLUSTER (bb1) != NULL) 1418 bb1 = BB_CLUSTER (bb1)->rep_bb; 1419 1420 if (BB_CLUSTER (bb2) != NULL) 1421 bb2 = BB_CLUSTER (bb2)->rep_bb; 1422 1423 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1424 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1425 } 1426 1427 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1428 1429 static void 1430 find_clusters_1 (same_succ same_succ) 1431 { 1432 basic_block bb1, bb2; 1433 unsigned int i, j; 1434 bitmap_iterator bi, bj; 1435 int nr_comparisons; 1436 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1437 1438 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1439 { 1440 bb1 = BASIC_BLOCK_FOR_FN (cfun, i); 1441 1442 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1443 phi-nodes in bb1 and bb2, with the same alternatives for the same 1444 preds. */ 1445 if (bb_has_non_vop_phi (bb1)) 1446 continue; 1447 1448 nr_comparisons = 0; 1449 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1450 { 1451 bb2 = BASIC_BLOCK_FOR_FN (cfun, j); 1452 1453 if (bb_has_non_vop_phi (bb2)) 1454 continue; 1455 1456 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1457 continue; 1458 1459 /* Limit quadratic behaviour. */ 1460 nr_comparisons++; 1461 if (nr_comparisons > max_comparisons) 1462 break; 1463 1464 /* This is a conservative dependency check. We could test more 1465 precise for allowed replacement direction. */ 1466 if (!deps_ok_for_redirect (bb1, bb2)) 1467 continue; 1468 1469 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1470 continue; 1471 1472 find_duplicate (same_succ, bb1, bb2); 1473 } 1474 } 1475 } 1476 1477 /* Find clusters of bbs which can be merged. */ 1478 1479 static void 1480 find_clusters (void) 1481 { 1482 same_succ same; 1483 1484 while (!worklist.is_empty ()) 1485 { 1486 same = worklist.pop (); 1487 same->in_worklist = false; 1488 if (dump_file && (dump_flags & TDF_DETAILS)) 1489 { 1490 fprintf (dump_file, "processing worklist entry\n"); 1491 same_succ_print (dump_file, same); 1492 } 1493 find_clusters_1 (same); 1494 } 1495 } 1496 1497 /* Returns the vop phi of BB, if any. */ 1498 1499 static gphi * 1500 vop_phi (basic_block bb) 1501 { 1502 gphi *stmt; 1503 gphi_iterator gsi; 1504 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1505 { 1506 stmt = gsi.phi (); 1507 if (! virtual_operand_p (gimple_phi_result (stmt))) 1508 continue; 1509 return stmt; 1510 } 1511 return NULL; 1512 } 1513 1514 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1515 1516 static void 1517 replace_block_by (basic_block bb1, basic_block bb2) 1518 { 1519 edge pred_edge; 1520 edge e1, e2; 1521 edge_iterator ei; 1522 unsigned int i; 1523 gphi *bb2_phi; 1524 1525 bb2_phi = vop_phi (bb2); 1526 1527 /* Mark the basic block as deleted. */ 1528 mark_basic_block_deleted (bb1); 1529 1530 /* Redirect the incoming edges of bb1 to bb2. */ 1531 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1532 { 1533 pred_edge = EDGE_PRED (bb1, i - 1); 1534 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1535 gcc_assert (pred_edge != NULL); 1536 1537 if (bb2_phi == NULL) 1538 continue; 1539 1540 /* The phi might have run out of capacity when the redirect added an 1541 argument, which means it could have been replaced. Refresh it. */ 1542 bb2_phi = vop_phi (bb2); 1543 1544 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1545 pred_edge, UNKNOWN_LOCATION); 1546 } 1547 1548 bb2->frequency += bb1->frequency; 1549 if (bb2->frequency > BB_FREQ_MAX) 1550 bb2->frequency = BB_FREQ_MAX; 1551 1552 bb2->count += bb1->count; 1553 1554 /* Merge the outgoing edge counts from bb1 onto bb2. */ 1555 gcov_type out_sum = 0; 1556 FOR_EACH_EDGE (e1, ei, bb1->succs) 1557 { 1558 e2 = find_edge (bb2, e1->dest); 1559 gcc_assert (e2); 1560 e2->count += e1->count; 1561 out_sum += e2->count; 1562 } 1563 /* Recompute the edge probabilities from the new merged edge count. 1564 Use the sum of the new merged edge counts computed above instead 1565 of bb2's merged count, in case there are profile count insanities 1566 making the bb count inconsistent with the edge weights. */ 1567 FOR_EACH_EDGE (e2, ei, bb2->succs) 1568 { 1569 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum); 1570 } 1571 1572 /* Clear range info from all stmts in BB2 -- this transformation 1573 could make them out of date. */ 1574 reset_flow_sensitive_info_in_bb (bb2); 1575 1576 /* Do updates that use bb1, before deleting bb1. */ 1577 release_last_vdef (bb1); 1578 same_succ_flush_bb (bb1); 1579 1580 delete_basic_block (bb1); 1581 } 1582 1583 /* Bbs for which update_debug_stmt need to be called. */ 1584 1585 static bitmap update_bbs; 1586 1587 /* For each cluster in all_clusters, merge all cluster->bbs. Returns 1588 number of bbs removed. */ 1589 1590 static int 1591 apply_clusters (void) 1592 { 1593 basic_block bb1, bb2; 1594 bb_cluster c; 1595 unsigned int i, j; 1596 bitmap_iterator bj; 1597 int nr_bbs_removed = 0; 1598 1599 for (i = 0; i < all_clusters.length (); ++i) 1600 { 1601 c = all_clusters[i]; 1602 if (c == NULL) 1603 continue; 1604 1605 bb2 = c->rep_bb; 1606 bitmap_set_bit (update_bbs, bb2->index); 1607 1608 bitmap_clear_bit (c->bbs, bb2->index); 1609 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1610 { 1611 bb1 = BASIC_BLOCK_FOR_FN (cfun, j); 1612 bitmap_clear_bit (update_bbs, bb1->index); 1613 1614 replace_block_by (bb1, bb2); 1615 nr_bbs_removed++; 1616 } 1617 } 1618 1619 return nr_bbs_removed; 1620 } 1621 1622 /* Resets debug statement STMT if it has uses that are not dominated by their 1623 defs. */ 1624 1625 static void 1626 update_debug_stmt (gimple stmt) 1627 { 1628 use_operand_p use_p; 1629 ssa_op_iter oi; 1630 basic_block bbuse; 1631 1632 if (!gimple_debug_bind_p (stmt)) 1633 return; 1634 1635 bbuse = gimple_bb (stmt); 1636 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1637 { 1638 tree name = USE_FROM_PTR (use_p); 1639 gimple def_stmt = SSA_NAME_DEF_STMT (name); 1640 basic_block bbdef = gimple_bb (def_stmt); 1641 if (bbdef == NULL || bbuse == bbdef 1642 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1643 continue; 1644 1645 gimple_debug_bind_reset_value (stmt); 1646 update_stmt (stmt); 1647 break; 1648 } 1649 } 1650 1651 /* Resets all debug statements that have uses that are not 1652 dominated by their defs. */ 1653 1654 static void 1655 update_debug_stmts (void) 1656 { 1657 basic_block bb; 1658 bitmap_iterator bi; 1659 unsigned int i; 1660 1661 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1662 { 1663 gimple stmt; 1664 gimple_stmt_iterator gsi; 1665 1666 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1667 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1668 { 1669 stmt = gsi_stmt (gsi); 1670 if (!is_gimple_debug (stmt)) 1671 continue; 1672 update_debug_stmt (stmt); 1673 } 1674 } 1675 } 1676 1677 /* Runs tail merge optimization. */ 1678 1679 unsigned int 1680 tail_merge_optimize (unsigned int todo) 1681 { 1682 int nr_bbs_removed_total = 0; 1683 int nr_bbs_removed; 1684 bool loop_entered = false; 1685 int iteration_nr = 0; 1686 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1687 1688 if (!flag_tree_tail_merge 1689 || max_iterations == 0) 1690 return 0; 1691 1692 timevar_push (TV_TREE_TAIL_MERGE); 1693 1694 if (!dom_info_available_p (CDI_DOMINATORS)) 1695 { 1696 /* PRE can leave us with unreachable blocks, remove them now. */ 1697 delete_unreachable_blocks (); 1698 calculate_dominance_info (CDI_DOMINATORS); 1699 } 1700 init_worklist (); 1701 1702 while (!worklist.is_empty ()) 1703 { 1704 if (!loop_entered) 1705 { 1706 loop_entered = true; 1707 alloc_cluster_vectors (); 1708 update_bbs = BITMAP_ALLOC (NULL); 1709 } 1710 else 1711 reset_cluster_vectors (); 1712 1713 iteration_nr++; 1714 if (dump_file && (dump_flags & TDF_DETAILS)) 1715 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1716 1717 find_clusters (); 1718 gcc_assert (worklist.is_empty ()); 1719 if (all_clusters.is_empty ()) 1720 break; 1721 1722 nr_bbs_removed = apply_clusters (); 1723 nr_bbs_removed_total += nr_bbs_removed; 1724 if (nr_bbs_removed == 0) 1725 break; 1726 1727 free_dominance_info (CDI_DOMINATORS); 1728 1729 if (iteration_nr == max_iterations) 1730 break; 1731 1732 calculate_dominance_info (CDI_DOMINATORS); 1733 update_worklist (); 1734 } 1735 1736 if (dump_file && (dump_flags & TDF_DETAILS)) 1737 fprintf (dump_file, "htab collision / search: %f\n", 1738 same_succ_htab->collisions ()); 1739 1740 if (nr_bbs_removed_total > 0) 1741 { 1742 if (MAY_HAVE_DEBUG_STMTS) 1743 { 1744 calculate_dominance_info (CDI_DOMINATORS); 1745 update_debug_stmts (); 1746 } 1747 1748 if (dump_file && (dump_flags & TDF_DETAILS)) 1749 { 1750 fprintf (dump_file, "Before TODOs.\n"); 1751 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1752 } 1753 1754 mark_virtual_operands_for_renaming (cfun); 1755 } 1756 1757 delete_worklist (); 1758 if (loop_entered) 1759 { 1760 delete_cluster_vectors (); 1761 BITMAP_FREE (update_bbs); 1762 } 1763 1764 timevar_pop (TV_TREE_TAIL_MERGE); 1765 1766 return todo; 1767 } 1768