1 /* Tail merging for gimple. 2 Copyright (C) 2011-2013 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 SWITCHES 180 181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 182 183 #include "config.h" 184 #include "system.h" 185 #include "coretypes.h" 186 #include "tm.h" 187 #include "tree.h" 188 #include "tm_p.h" 189 #include "basic-block.h" 190 #include "flags.h" 191 #include "function.h" 192 #include "tree-flow.h" 193 #include "bitmap.h" 194 #include "tree-ssa-alias.h" 195 #include "params.h" 196 #include "hash-table.h" 197 #include "gimple-pretty-print.h" 198 #include "tree-ssa-sccvn.h" 199 #include "tree-dump.h" 200 201 /* ??? This currently runs as part of tree-ssa-pre. Why is this not 202 a stand-alone GIMPLE pass? */ 203 #include "tree-pass.h" 204 205 /* Describes a group of bbs with the same successors. The successor bbs are 206 cached in succs, and the successor edge flags are cached in succ_flags. 207 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags, 208 it's marked in inverse. 209 Additionally, the hash value for the struct is cached in hashval, and 210 in_worklist indicates whether it's currently part of worklist. */ 211 212 struct same_succ_def 213 { 214 /* The bbs that have the same successor bbs. */ 215 bitmap bbs; 216 /* The successor bbs. */ 217 bitmap succs; 218 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 219 bb. */ 220 bitmap inverse; 221 /* The edge flags for each of the successor bbs. */ 222 vec<int> succ_flags; 223 /* Indicates whether the struct is currently in the worklist. */ 224 bool in_worklist; 225 /* The hash value of the struct. */ 226 hashval_t hashval; 227 228 /* hash_table support. */ 229 typedef same_succ_def value_type; 230 typedef same_succ_def compare_type; 231 static inline hashval_t hash (const value_type *); 232 static int equal (const value_type *, const compare_type *); 233 static void remove (value_type *); 234 }; 235 typedef struct same_succ_def *same_succ; 236 typedef const struct same_succ_def *const_same_succ; 237 238 /* hash routine for hash_table support, returns hashval of E. */ 239 240 inline hashval_t 241 same_succ_def::hash (const value_type *e) 242 { 243 return e->hashval; 244 } 245 246 /* A group of bbs where 1 bb from bbs can replace the other bbs. */ 247 248 struct bb_cluster_def 249 { 250 /* The bbs in the cluster. */ 251 bitmap bbs; 252 /* The preds of the bbs in the cluster. */ 253 bitmap preds; 254 /* Index in all_clusters vector. */ 255 int index; 256 /* The bb to replace the cluster with. */ 257 basic_block rep_bb; 258 }; 259 typedef struct bb_cluster_def *bb_cluster; 260 typedef const struct bb_cluster_def *const_bb_cluster; 261 262 /* Per bb-info. */ 263 264 struct aux_bb_info 265 { 266 /* The number of non-debug statements in the bb. */ 267 int size; 268 /* The same_succ that this bb is a member of. */ 269 same_succ bb_same_succ; 270 /* The cluster that this bb is a member of. */ 271 bb_cluster cluster; 272 /* The vop state at the exit of a bb. This is shortlived data, used to 273 communicate data between update_block_by and update_vuses. */ 274 tree vop_at_exit; 275 /* The bb that either contains or is dominated by the dependencies of the 276 bb. */ 277 basic_block dep_bb; 278 }; 279 280 /* Macros to access the fields of struct aux_bb_info. */ 281 282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 287 288 /* Returns true if the only effect a statement STMT has, is to define locally 289 used SSA_NAMEs. */ 290 291 static bool 292 stmt_local_def (gimple stmt) 293 { 294 basic_block bb, def_bb; 295 imm_use_iterator iter; 296 use_operand_p use_p; 297 tree val; 298 def_operand_p def_p; 299 300 if (gimple_has_side_effects (stmt) 301 || gimple_vdef (stmt) != NULL_TREE 302 || gimple_vuse (stmt) != NULL_TREE) 303 return false; 304 305 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); 306 if (def_p == NULL) 307 return false; 308 309 val = DEF_FROM_PTR (def_p); 310 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) 311 return false; 312 313 def_bb = gimple_bb (stmt); 314 315 FOR_EACH_IMM_USE_FAST (use_p, iter, val) 316 { 317 if (is_gimple_debug (USE_STMT (use_p))) 318 continue; 319 bb = gimple_bb (USE_STMT (use_p)); 320 if (bb == def_bb) 321 continue; 322 323 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI 324 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) 325 continue; 326 327 return false; 328 } 329 330 return true; 331 } 332 333 /* Let GSI skip forwards over local defs. */ 334 335 static void 336 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) 337 { 338 gimple stmt; 339 340 while (true) 341 { 342 if (gsi_end_p (*gsi)) 343 return; 344 stmt = gsi_stmt (*gsi); 345 if (!stmt_local_def (stmt)) 346 return; 347 gsi_next_nondebug (gsi); 348 } 349 } 350 351 /* VAL1 and VAL2 are either: 352 - uses in BB1 and BB2, or 353 - phi alternatives for BB1 and BB2. 354 Return true if the uses have the same gvn value. */ 355 356 static bool 357 gvn_uses_equal (tree val1, tree val2) 358 { 359 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 360 361 if (val1 == val2) 362 return true; 363 364 if (vn_valueize (val1) != vn_valueize (val2)) 365 return false; 366 367 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 368 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 369 } 370 371 /* Prints E to FILE. */ 372 373 static void 374 same_succ_print (FILE *file, const same_succ e) 375 { 376 unsigned int i; 377 bitmap_print (file, e->bbs, "bbs:", "\n"); 378 bitmap_print (file, e->succs, "succs:", "\n"); 379 bitmap_print (file, e->inverse, "inverse:", "\n"); 380 fprintf (file, "flags:"); 381 for (i = 0; i < e->succ_flags.length (); ++i) 382 fprintf (file, " %x", e->succ_flags[i]); 383 fprintf (file, "\n"); 384 } 385 386 /* Prints same_succ VE to VFILE. */ 387 388 inline int 389 ssa_same_succ_print_traverse (same_succ *pe, FILE *file) 390 { 391 const same_succ e = *pe; 392 same_succ_print (file, e); 393 return 1; 394 } 395 396 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 397 398 static void 399 update_dep_bb (basic_block use_bb, tree val) 400 { 401 basic_block dep_bb; 402 403 /* Not a dep. */ 404 if (TREE_CODE (val) != SSA_NAME) 405 return; 406 407 /* Skip use of global def. */ 408 if (SSA_NAME_IS_DEFAULT_DEF (val)) 409 return; 410 411 /* Skip use of local def. */ 412 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 413 if (dep_bb == use_bb) 414 return; 415 416 if (BB_DEP_BB (use_bb) == NULL 417 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 418 BB_DEP_BB (use_bb) = dep_bb; 419 } 420 421 /* Update BB_DEP_BB, given the dependencies in STMT. */ 422 423 static void 424 stmt_update_dep_bb (gimple stmt) 425 { 426 ssa_op_iter iter; 427 use_operand_p use; 428 429 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 430 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 431 } 432 433 /* Calculates hash value for same_succ VE. */ 434 435 static hashval_t 436 same_succ_hash (const_same_succ e) 437 { 438 hashval_t hashval = bitmap_hash (e->succs); 439 int flags; 440 unsigned int i; 441 unsigned int first = bitmap_first_set_bit (e->bbs); 442 basic_block bb = BASIC_BLOCK (first); 443 int size = 0; 444 gimple_stmt_iterator gsi; 445 gimple stmt; 446 tree arg; 447 unsigned int s; 448 bitmap_iterator bs; 449 450 for (gsi = gsi_start_nondebug_bb (bb); 451 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 452 { 453 stmt = gsi_stmt (gsi); 454 stmt_update_dep_bb (stmt); 455 if (stmt_local_def (stmt)) 456 continue; 457 size++; 458 459 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval); 460 if (is_gimple_assign (stmt)) 461 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt), 462 hashval); 463 if (!is_gimple_call (stmt)) 464 continue; 465 if (gimple_call_internal_p (stmt)) 466 hashval = iterative_hash_hashval_t 467 ((hashval_t) gimple_call_internal_fn (stmt), hashval); 468 else 469 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval); 470 for (i = 0; i < gimple_call_num_args (stmt); i++) 471 { 472 arg = gimple_call_arg (stmt, i); 473 arg = vn_valueize (arg); 474 hashval = iterative_hash_expr (arg, hashval); 475 } 476 } 477 478 hashval = iterative_hash_hashval_t (size, hashval); 479 BB_SIZE (bb) = size; 480 481 for (i = 0; i < e->succ_flags.length (); ++i) 482 { 483 flags = e->succ_flags[i]; 484 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 485 hashval = iterative_hash_hashval_t (flags, hashval); 486 } 487 488 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 489 { 490 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx; 491 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi); 492 gsi_next (&gsi)) 493 { 494 gimple phi = gsi_stmt (gsi); 495 tree lhs = gimple_phi_result (phi); 496 tree val = gimple_phi_arg_def (phi, n); 497 498 if (virtual_operand_p (lhs)) 499 continue; 500 update_dep_bb (bb, val); 501 } 502 } 503 504 return hashval; 505 } 506 507 /* Returns true if E1 and E2 have 2 successors, and if the successor flags 508 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 509 the other edge flags. */ 510 511 static bool 512 inverse_flags (const_same_succ e1, const_same_succ e2) 513 { 514 int f1a, f1b, f2a, f2b; 515 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 516 517 if (e1->succ_flags.length () != 2) 518 return false; 519 520 f1a = e1->succ_flags[0]; 521 f1b = e1->succ_flags[1]; 522 f2a = e2->succ_flags[0]; 523 f2b = e2->succ_flags[1]; 524 525 if (f1a == f2a && f1b == f2b) 526 return false; 527 528 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 529 } 530 531 /* Compares SAME_SUCCs E1 and E2. */ 532 533 int 534 same_succ_def::equal (const value_type *e1, const compare_type *e2) 535 { 536 unsigned int i, first1, first2; 537 gimple_stmt_iterator gsi1, gsi2; 538 gimple s1, s2; 539 basic_block bb1, bb2; 540 541 if (e1->hashval != e2->hashval) 542 return 0; 543 544 if (e1->succ_flags.length () != e2->succ_flags.length ()) 545 return 0; 546 547 if (!bitmap_equal_p (e1->succs, e2->succs)) 548 return 0; 549 550 if (!inverse_flags (e1, e2)) 551 { 552 for (i = 0; i < e1->succ_flags.length (); ++i) 553 if (e1->succ_flags[i] != e2->succ_flags[i]) 554 return 0; 555 } 556 557 first1 = bitmap_first_set_bit (e1->bbs); 558 first2 = bitmap_first_set_bit (e2->bbs); 559 560 bb1 = BASIC_BLOCK (first1); 561 bb2 = BASIC_BLOCK (first2); 562 563 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 564 return 0; 565 566 gsi1 = gsi_start_nondebug_bb (bb1); 567 gsi2 = gsi_start_nondebug_bb (bb2); 568 gsi_advance_fw_nondebug_nonlocal (&gsi1); 569 gsi_advance_fw_nondebug_nonlocal (&gsi2); 570 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 571 { 572 s1 = gsi_stmt (gsi1); 573 s2 = gsi_stmt (gsi2); 574 if (gimple_code (s1) != gimple_code (s2)) 575 return 0; 576 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 577 return 0; 578 gsi_next_nondebug (&gsi1); 579 gsi_next_nondebug (&gsi2); 580 gsi_advance_fw_nondebug_nonlocal (&gsi1); 581 gsi_advance_fw_nondebug_nonlocal (&gsi2); 582 } 583 584 return 1; 585 } 586 587 /* Alloc and init a new SAME_SUCC. */ 588 589 static same_succ 590 same_succ_alloc (void) 591 { 592 same_succ same = XNEW (struct same_succ_def); 593 594 same->bbs = BITMAP_ALLOC (NULL); 595 same->succs = BITMAP_ALLOC (NULL); 596 same->inverse = BITMAP_ALLOC (NULL); 597 same->succ_flags.create (10); 598 same->in_worklist = false; 599 600 return same; 601 } 602 603 /* Delete same_succ E. */ 604 605 void 606 same_succ_def::remove (same_succ e) 607 { 608 BITMAP_FREE (e->bbs); 609 BITMAP_FREE (e->succs); 610 BITMAP_FREE (e->inverse); 611 e->succ_flags.release (); 612 613 XDELETE (e); 614 } 615 616 /* Reset same_succ SAME. */ 617 618 static void 619 same_succ_reset (same_succ same) 620 { 621 bitmap_clear (same->bbs); 622 bitmap_clear (same->succs); 623 bitmap_clear (same->inverse); 624 same->succ_flags.truncate (0); 625 } 626 627 static hash_table <same_succ_def> same_succ_htab; 628 629 /* Array that is used to store the edge flags for a successor. */ 630 631 static int *same_succ_edge_flags; 632 633 /* Bitmap that is used to mark bbs that are recently deleted. */ 634 635 static bitmap deleted_bbs; 636 637 /* Bitmap that is used to mark predecessors of bbs that are 638 deleted. */ 639 640 static bitmap deleted_bb_preds; 641 642 /* Prints same_succ_htab to stderr. */ 643 644 extern void debug_same_succ (void); 645 DEBUG_FUNCTION void 646 debug_same_succ ( void) 647 { 648 same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr); 649 } 650 651 652 /* Vector of bbs to process. */ 653 654 static vec<same_succ> worklist; 655 656 /* Prints worklist to FILE. */ 657 658 static void 659 print_worklist (FILE *file) 660 { 661 unsigned int i; 662 for (i = 0; i < worklist.length (); ++i) 663 same_succ_print (file, worklist[i]); 664 } 665 666 /* Adds SAME to worklist. */ 667 668 static void 669 add_to_worklist (same_succ same) 670 { 671 if (same->in_worklist) 672 return; 673 674 if (bitmap_count_bits (same->bbs) < 2) 675 return; 676 677 same->in_worklist = true; 678 worklist.safe_push (same); 679 } 680 681 /* Add BB to same_succ_htab. */ 682 683 static void 684 find_same_succ_bb (basic_block bb, same_succ *same_p) 685 { 686 unsigned int j; 687 bitmap_iterator bj; 688 same_succ same = *same_p; 689 same_succ *slot; 690 edge_iterator ei; 691 edge e; 692 693 if (bb == NULL) 694 return; 695 bitmap_set_bit (same->bbs, bb->index); 696 FOR_EACH_EDGE (e, ei, bb->succs) 697 { 698 int index = e->dest->index; 699 bitmap_set_bit (same->succs, index); 700 same_succ_edge_flags[index] = e->flags; 701 } 702 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 703 same->succ_flags.safe_push (same_succ_edge_flags[j]); 704 705 same->hashval = same_succ_hash (same); 706 707 slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT); 708 if (*slot == NULL) 709 { 710 *slot = same; 711 BB_SAME_SUCC (bb) = same; 712 add_to_worklist (same); 713 *same_p = NULL; 714 } 715 else 716 { 717 bitmap_set_bit ((*slot)->bbs, bb->index); 718 BB_SAME_SUCC (bb) = *slot; 719 add_to_worklist (*slot); 720 if (inverse_flags (same, *slot)) 721 bitmap_set_bit ((*slot)->inverse, bb->index); 722 same_succ_reset (same); 723 } 724 } 725 726 /* Find bbs with same successors. */ 727 728 static void 729 find_same_succ (void) 730 { 731 same_succ same = same_succ_alloc (); 732 basic_block bb; 733 734 FOR_EACH_BB (bb) 735 { 736 find_same_succ_bb (bb, &same); 737 if (same == NULL) 738 same = same_succ_alloc (); 739 } 740 741 same_succ_def::remove (same); 742 } 743 744 /* Initializes worklist administration. */ 745 746 static void 747 init_worklist (void) 748 { 749 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 750 same_succ_htab.create (n_basic_blocks); 751 same_succ_edge_flags = XCNEWVEC (int, last_basic_block); 752 deleted_bbs = BITMAP_ALLOC (NULL); 753 deleted_bb_preds = BITMAP_ALLOC (NULL); 754 worklist.create (n_basic_blocks); 755 find_same_succ (); 756 757 if (dump_file && (dump_flags & TDF_DETAILS)) 758 { 759 fprintf (dump_file, "initial worklist:\n"); 760 print_worklist (dump_file); 761 } 762 } 763 764 /* Deletes worklist administration. */ 765 766 static void 767 delete_worklist (void) 768 { 769 free_aux_for_blocks (); 770 same_succ_htab.dispose (); 771 XDELETEVEC (same_succ_edge_flags); 772 same_succ_edge_flags = NULL; 773 BITMAP_FREE (deleted_bbs); 774 BITMAP_FREE (deleted_bb_preds); 775 worklist.release (); 776 } 777 778 /* Mark BB as deleted, and mark its predecessors. */ 779 780 static void 781 mark_basic_block_deleted (basic_block bb) 782 { 783 edge e; 784 edge_iterator ei; 785 786 bitmap_set_bit (deleted_bbs, bb->index); 787 788 FOR_EACH_EDGE (e, ei, bb->preds) 789 bitmap_set_bit (deleted_bb_preds, e->src->index); 790 } 791 792 /* Removes BB from its corresponding same_succ. */ 793 794 static void 795 same_succ_flush_bb (basic_block bb) 796 { 797 same_succ same = BB_SAME_SUCC (bb); 798 BB_SAME_SUCC (bb) = NULL; 799 if (bitmap_single_bit_set_p (same->bbs)) 800 same_succ_htab.remove_elt_with_hash (same, same->hashval); 801 else 802 bitmap_clear_bit (same->bbs, bb->index); 803 } 804 805 /* Removes all bbs in BBS from their corresponding same_succ. */ 806 807 static void 808 same_succ_flush_bbs (bitmap bbs) 809 { 810 unsigned int i; 811 bitmap_iterator bi; 812 813 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 814 same_succ_flush_bb (BASIC_BLOCK (i)); 815 } 816 817 /* Release the last vdef in BB, either normal or phi result. */ 818 819 static void 820 release_last_vdef (basic_block bb) 821 { 822 gimple_stmt_iterator i; 823 824 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i)) 825 { 826 gimple stmt = gsi_stmt (i); 827 if (gimple_vdef (stmt) == NULL_TREE) 828 continue; 829 830 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 831 return; 832 } 833 834 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i)) 835 { 836 gimple phi = gsi_stmt (i); 837 tree res = gimple_phi_result (phi); 838 839 if (!virtual_operand_p (res)) 840 continue; 841 842 mark_virtual_phi_result_for_renaming (phi); 843 return; 844 } 845 846 } 847 848 /* For deleted_bb_preds, find bbs with same successors. */ 849 850 static void 851 update_worklist (void) 852 { 853 unsigned int i; 854 bitmap_iterator bi; 855 basic_block bb; 856 same_succ same; 857 858 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 859 bitmap_clear (deleted_bbs); 860 861 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 862 same_succ_flush_bbs (deleted_bb_preds); 863 864 same = same_succ_alloc (); 865 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 866 { 867 bb = BASIC_BLOCK (i); 868 gcc_assert (bb != NULL); 869 find_same_succ_bb (bb, &same); 870 if (same == NULL) 871 same = same_succ_alloc (); 872 } 873 same_succ_def::remove (same); 874 bitmap_clear (deleted_bb_preds); 875 } 876 877 /* Prints cluster C to FILE. */ 878 879 static void 880 print_cluster (FILE *file, bb_cluster c) 881 { 882 if (c == NULL) 883 return; 884 bitmap_print (file, c->bbs, "bbs:", "\n"); 885 bitmap_print (file, c->preds, "preds:", "\n"); 886 } 887 888 /* Prints cluster C to stderr. */ 889 890 extern void debug_cluster (bb_cluster); 891 DEBUG_FUNCTION void 892 debug_cluster (bb_cluster c) 893 { 894 print_cluster (stderr, c); 895 } 896 897 /* Update C->rep_bb, given that BB is added to the cluster. */ 898 899 static void 900 update_rep_bb (bb_cluster c, basic_block bb) 901 { 902 /* Initial. */ 903 if (c->rep_bb == NULL) 904 { 905 c->rep_bb = bb; 906 return; 907 } 908 909 /* Current needs no deps, keep it. */ 910 if (BB_DEP_BB (c->rep_bb) == NULL) 911 return; 912 913 /* Bb needs no deps, change rep_bb. */ 914 if (BB_DEP_BB (bb) == NULL) 915 { 916 c->rep_bb = bb; 917 return; 918 } 919 920 /* Bb needs last deps earlier than current, change rep_bb. A potential 921 problem with this, is that the first deps might also be earlier, which 922 would mean we prefer longer lifetimes for the deps. To be able to check 923 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 924 BB_DEP_BB, which is really BB_LAST_DEP_BB. 925 The benefit of choosing the bb with last deps earlier, is that it can 926 potentially be used as replacement for more bbs. */ 927 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 928 c->rep_bb = bb; 929 } 930 931 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 932 933 static void 934 add_bb_to_cluster (bb_cluster c, basic_block bb) 935 { 936 edge e; 937 edge_iterator ei; 938 939 bitmap_set_bit (c->bbs, bb->index); 940 941 FOR_EACH_EDGE (e, ei, bb->preds) 942 bitmap_set_bit (c->preds, e->src->index); 943 944 update_rep_bb (c, bb); 945 } 946 947 /* Allocate and init new cluster. */ 948 949 static bb_cluster 950 new_cluster (void) 951 { 952 bb_cluster c; 953 c = XCNEW (struct bb_cluster_def); 954 c->bbs = BITMAP_ALLOC (NULL); 955 c->preds = BITMAP_ALLOC (NULL); 956 c->rep_bb = NULL; 957 return c; 958 } 959 960 /* Delete clusters. */ 961 962 static void 963 delete_cluster (bb_cluster c) 964 { 965 if (c == NULL) 966 return; 967 BITMAP_FREE (c->bbs); 968 BITMAP_FREE (c->preds); 969 XDELETE (c); 970 } 971 972 973 /* Array that contains all clusters. */ 974 975 static vec<bb_cluster> all_clusters; 976 977 /* Allocate all cluster vectors. */ 978 979 static void 980 alloc_cluster_vectors (void) 981 { 982 all_clusters.create (n_basic_blocks); 983 } 984 985 /* Reset all cluster vectors. */ 986 987 static void 988 reset_cluster_vectors (void) 989 { 990 unsigned int i; 991 basic_block bb; 992 for (i = 0; i < all_clusters.length (); ++i) 993 delete_cluster (all_clusters[i]); 994 all_clusters.truncate (0); 995 FOR_EACH_BB (bb) 996 BB_CLUSTER (bb) = NULL; 997 } 998 999 /* Delete all cluster vectors. */ 1000 1001 static void 1002 delete_cluster_vectors (void) 1003 { 1004 unsigned int i; 1005 for (i = 0; i < all_clusters.length (); ++i) 1006 delete_cluster (all_clusters[i]); 1007 all_clusters.release (); 1008 } 1009 1010 /* Merge cluster C2 into C1. */ 1011 1012 static void 1013 merge_clusters (bb_cluster c1, bb_cluster c2) 1014 { 1015 bitmap_ior_into (c1->bbs, c2->bbs); 1016 bitmap_ior_into (c1->preds, c2->preds); 1017 } 1018 1019 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 1020 all_clusters, or merge c with existing cluster. */ 1021 1022 static void 1023 set_cluster (basic_block bb1, basic_block bb2) 1024 { 1025 basic_block merge_bb, other_bb; 1026 bb_cluster merge, old, c; 1027 1028 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 1029 { 1030 c = new_cluster (); 1031 add_bb_to_cluster (c, bb1); 1032 add_bb_to_cluster (c, bb2); 1033 BB_CLUSTER (bb1) = c; 1034 BB_CLUSTER (bb2) = c; 1035 c->index = all_clusters.length (); 1036 all_clusters.safe_push (c); 1037 } 1038 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1039 { 1040 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1041 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1042 merge = BB_CLUSTER (merge_bb); 1043 add_bb_to_cluster (merge, other_bb); 1044 BB_CLUSTER (other_bb) = merge; 1045 } 1046 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1047 { 1048 unsigned int i; 1049 bitmap_iterator bi; 1050 1051 old = BB_CLUSTER (bb2); 1052 merge = BB_CLUSTER (bb1); 1053 merge_clusters (merge, old); 1054 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1055 BB_CLUSTER (BASIC_BLOCK (i)) = merge; 1056 all_clusters[old->index] = NULL; 1057 update_rep_bb (merge, old->rep_bb); 1058 delete_cluster (old); 1059 } 1060 else 1061 gcc_unreachable (); 1062 } 1063 1064 /* Return true if gimple operands T1 and T2 have the same value. */ 1065 1066 static bool 1067 gimple_operand_equal_value_p (tree t1, tree t2) 1068 { 1069 if (t1 == t2) 1070 return true; 1071 1072 if (t1 == NULL_TREE 1073 || t2 == NULL_TREE) 1074 return false; 1075 1076 if (operand_equal_p (t1, t2, 0)) 1077 return true; 1078 1079 return gvn_uses_equal (t1, t2); 1080 } 1081 1082 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1083 gimple_bb (s2) are members of SAME_SUCC. */ 1084 1085 static bool 1086 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2) 1087 { 1088 unsigned int i; 1089 tree lhs1, lhs2; 1090 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1091 tree t1, t2; 1092 bool equal, inv_cond; 1093 enum tree_code code1, code2; 1094 1095 if (gimple_code (s1) != gimple_code (s2)) 1096 return false; 1097 1098 switch (gimple_code (s1)) 1099 { 1100 case GIMPLE_CALL: 1101 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1102 return false; 1103 if (!gimple_call_same_target_p (s1, s2)) 1104 return false; 1105 1106 /* Eventually, we'll significantly complicate the CFG by adding 1107 back edges to properly model the effects of transaction restart. 1108 For the bulk of optimization this does not matter, but what we 1109 cannot recover from is tail merging blocks between two separate 1110 transactions. Avoid that by making commit not match. */ 1111 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT)) 1112 return false; 1113 1114 equal = true; 1115 for (i = 0; i < gimple_call_num_args (s1); ++i) 1116 { 1117 t1 = gimple_call_arg (s1, i); 1118 t2 = gimple_call_arg (s2, i); 1119 if (operand_equal_p (t1, t2, 0)) 1120 continue; 1121 if (gvn_uses_equal (t1, t2)) 1122 continue; 1123 equal = false; 1124 break; 1125 } 1126 if (!equal) 1127 return false; 1128 1129 lhs1 = gimple_get_lhs (s1); 1130 lhs2 = gimple_get_lhs (s2); 1131 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1132 return true; 1133 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1134 return false; 1135 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1136 return vn_valueize (lhs1) == vn_valueize (lhs2); 1137 return operand_equal_p (lhs1, lhs2, 0); 1138 1139 case GIMPLE_ASSIGN: 1140 lhs1 = gimple_get_lhs (s1); 1141 lhs2 = gimple_get_lhs (s2); 1142 if (TREE_CODE (lhs1) != SSA_NAME 1143 && TREE_CODE (lhs2) != SSA_NAME) 1144 return (operand_equal_p (lhs1, lhs2, 0) 1145 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), 1146 gimple_assign_rhs1 (s2))); 1147 else if (TREE_CODE (lhs1) == SSA_NAME 1148 && TREE_CODE (lhs2) == SSA_NAME) 1149 return operand_equal_p (gimple_assign_rhs1 (s1), 1150 gimple_assign_rhs1 (s2), 0); 1151 return false; 1152 1153 case GIMPLE_COND: 1154 t1 = gimple_cond_lhs (s1); 1155 t2 = gimple_cond_lhs (s2); 1156 if (!operand_equal_p (t1, t2, 0) 1157 && !gvn_uses_equal (t1, t2)) 1158 return false; 1159 1160 t1 = gimple_cond_rhs (s1); 1161 t2 = gimple_cond_rhs (s2); 1162 if (!operand_equal_p (t1, t2, 0) 1163 && !gvn_uses_equal (t1, t2)) 1164 return false; 1165 1166 code1 = gimple_expr_code (s1); 1167 code2 = gimple_expr_code (s2); 1168 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1169 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1170 if (inv_cond) 1171 { 1172 bool honor_nans 1173 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1)))); 1174 code2 = invert_tree_comparison (code2, honor_nans); 1175 } 1176 return code1 == code2; 1177 1178 default: 1179 return false; 1180 } 1181 } 1182 1183 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1184 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1185 processed statements. */ 1186 1187 static void 1188 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1189 bool *vuse_escaped) 1190 { 1191 gimple stmt; 1192 tree lvuse; 1193 1194 while (true) 1195 { 1196 if (gsi_end_p (*gsi)) 1197 return; 1198 stmt = gsi_stmt (*gsi); 1199 1200 lvuse = gimple_vuse (stmt); 1201 if (lvuse != NULL_TREE) 1202 { 1203 *vuse = lvuse; 1204 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1205 *vuse_escaped = true; 1206 } 1207 1208 if (!stmt_local_def (stmt)) 1209 return; 1210 gsi_prev_nondebug (gsi); 1211 } 1212 } 1213 1214 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1215 clusters them. */ 1216 1217 static void 1218 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2) 1219 { 1220 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1221 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1222 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1223 bool vuse_escaped = false; 1224 1225 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1226 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1227 1228 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1229 { 1230 gimple stmt1 = gsi_stmt (gsi1); 1231 gimple stmt2 = gsi_stmt (gsi2); 1232 1233 if (!gimple_equal_p (same_succ, stmt1, stmt2)) 1234 return; 1235 1236 // We cannot tail-merge the builtins that end transactions. 1237 // ??? The alternative being unsharing of BBs in the tm_init pass. 1238 if (flag_tm 1239 && is_gimple_call (stmt1) 1240 && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN) 1241 && is_tm_ending_fndecl (gimple_call_fndecl (stmt1))) 1242 return; 1243 1244 gsi_prev_nondebug (&gsi1); 1245 gsi_prev_nondebug (&gsi2); 1246 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1247 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1248 } 1249 1250 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1251 return; 1252 1253 /* If the incoming vuses are not the same, and the vuse escaped into an 1254 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1255 which potentially means the semantics of one of the blocks will be changed. 1256 TODO: make this check more precise. */ 1257 if (vuse_escaped && vuse1 != vuse2) 1258 return; 1259 1260 if (dump_file) 1261 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1262 bb1->index, bb2->index); 1263 1264 set_cluster (bb1, bb2); 1265 } 1266 1267 /* Returns whether for all phis in DEST the phi alternatives for E1 and 1268 E2 are equal. */ 1269 1270 static bool 1271 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1272 { 1273 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1274 gimple_stmt_iterator gsi; 1275 1276 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1277 { 1278 gimple phi = gsi_stmt (gsi); 1279 tree lhs = gimple_phi_result (phi); 1280 tree val1 = gimple_phi_arg_def (phi, n1); 1281 tree val2 = gimple_phi_arg_def (phi, n2); 1282 1283 if (virtual_operand_p (lhs)) 1284 continue; 1285 1286 if (operand_equal_for_phi_arg_p (val1, val2)) 1287 continue; 1288 if (gvn_uses_equal (val1, val2)) 1289 continue; 1290 1291 return false; 1292 } 1293 1294 return true; 1295 } 1296 1297 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1298 phi alternatives for BB1 and BB2 are equal. */ 1299 1300 static bool 1301 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2) 1302 { 1303 unsigned int s; 1304 bitmap_iterator bs; 1305 edge e1, e2; 1306 basic_block succ; 1307 1308 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1309 { 1310 succ = BASIC_BLOCK (s); 1311 e1 = find_edge (bb1, succ); 1312 e2 = find_edge (bb2, succ); 1313 if (e1->flags & EDGE_COMPLEX 1314 || e2->flags & EDGE_COMPLEX) 1315 return false; 1316 1317 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1318 the same value. */ 1319 if (!same_phi_alternatives_1 (succ, e1, e2)) 1320 return false; 1321 } 1322 1323 return true; 1324 } 1325 1326 /* Return true if BB has non-vop phis. */ 1327 1328 static bool 1329 bb_has_non_vop_phi (basic_block bb) 1330 { 1331 gimple_seq phis = phi_nodes (bb); 1332 gimple phi; 1333 1334 if (phis == NULL) 1335 return false; 1336 1337 if (!gimple_seq_singleton_p (phis)) 1338 return true; 1339 1340 phi = gimple_seq_first_stmt (phis); 1341 return !virtual_operand_p (gimple_phi_result (phi)); 1342 } 1343 1344 /* Returns true if redirecting the incoming edges of FROM to TO maintains the 1345 invariant that uses in FROM are dominates by their defs. */ 1346 1347 static bool 1348 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1349 { 1350 basic_block cd, dep_bb = BB_DEP_BB (to); 1351 edge_iterator ei; 1352 edge e; 1353 bitmap from_preds = BITMAP_ALLOC (NULL); 1354 1355 if (dep_bb == NULL) 1356 return true; 1357 1358 FOR_EACH_EDGE (e, ei, from->preds) 1359 bitmap_set_bit (from_preds, e->src->index); 1360 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1361 BITMAP_FREE (from_preds); 1362 1363 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1364 } 1365 1366 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1367 replacement bb) and vice versa maintains the invariant that uses in the 1368 replacement are dominates by their defs. */ 1369 1370 static bool 1371 deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1372 { 1373 if (BB_CLUSTER (bb1) != NULL) 1374 bb1 = BB_CLUSTER (bb1)->rep_bb; 1375 1376 if (BB_CLUSTER (bb2) != NULL) 1377 bb2 = BB_CLUSTER (bb2)->rep_bb; 1378 1379 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1380 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1381 } 1382 1383 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1384 1385 static void 1386 find_clusters_1 (same_succ same_succ) 1387 { 1388 basic_block bb1, bb2; 1389 unsigned int i, j; 1390 bitmap_iterator bi, bj; 1391 int nr_comparisons; 1392 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1393 1394 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1395 { 1396 bb1 = BASIC_BLOCK (i); 1397 1398 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1399 phi-nodes in bb1 and bb2, with the same alternatives for the same 1400 preds. */ 1401 if (bb_has_non_vop_phi (bb1)) 1402 continue; 1403 1404 nr_comparisons = 0; 1405 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1406 { 1407 bb2 = BASIC_BLOCK (j); 1408 1409 if (bb_has_non_vop_phi (bb2)) 1410 continue; 1411 1412 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1413 continue; 1414 1415 /* Limit quadratic behaviour. */ 1416 nr_comparisons++; 1417 if (nr_comparisons > max_comparisons) 1418 break; 1419 1420 /* This is a conservative dependency check. We could test more 1421 precise for allowed replacement direction. */ 1422 if (!deps_ok_for_redirect (bb1, bb2)) 1423 continue; 1424 1425 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1426 continue; 1427 1428 find_duplicate (same_succ, bb1, bb2); 1429 } 1430 } 1431 } 1432 1433 /* Find clusters of bbs which can be merged. */ 1434 1435 static void 1436 find_clusters (void) 1437 { 1438 same_succ same; 1439 1440 while (!worklist.is_empty ()) 1441 { 1442 same = worklist.pop (); 1443 same->in_worklist = false; 1444 if (dump_file && (dump_flags & TDF_DETAILS)) 1445 { 1446 fprintf (dump_file, "processing worklist entry\n"); 1447 same_succ_print (dump_file, same); 1448 } 1449 find_clusters_1 (same); 1450 } 1451 } 1452 1453 /* Returns the vop phi of BB, if any. */ 1454 1455 static gimple 1456 vop_phi (basic_block bb) 1457 { 1458 gimple stmt; 1459 gimple_stmt_iterator gsi; 1460 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1461 { 1462 stmt = gsi_stmt (gsi); 1463 if (! virtual_operand_p (gimple_phi_result (stmt))) 1464 continue; 1465 return stmt; 1466 } 1467 return NULL; 1468 } 1469 1470 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1471 1472 static void 1473 replace_block_by (basic_block bb1, basic_block bb2) 1474 { 1475 edge pred_edge; 1476 unsigned int i; 1477 gimple bb2_phi; 1478 1479 bb2_phi = vop_phi (bb2); 1480 1481 /* Mark the basic block as deleted. */ 1482 mark_basic_block_deleted (bb1); 1483 1484 /* Redirect the incoming edges of bb1 to bb2. */ 1485 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1486 { 1487 pred_edge = EDGE_PRED (bb1, i - 1); 1488 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1489 gcc_assert (pred_edge != NULL); 1490 1491 if (bb2_phi == NULL) 1492 continue; 1493 1494 /* The phi might have run out of capacity when the redirect added an 1495 argument, which means it could have been replaced. Refresh it. */ 1496 bb2_phi = vop_phi (bb2); 1497 1498 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1499 pred_edge, UNKNOWN_LOCATION); 1500 } 1501 1502 bb2->frequency += bb1->frequency; 1503 if (bb2->frequency > BB_FREQ_MAX) 1504 bb2->frequency = BB_FREQ_MAX; 1505 1506 bb2->count += bb1->count; 1507 1508 /* Do updates that use bb1, before deleting bb1. */ 1509 release_last_vdef (bb1); 1510 same_succ_flush_bb (bb1); 1511 1512 delete_basic_block (bb1); 1513 } 1514 1515 /* Bbs for which update_debug_stmt need to be called. */ 1516 1517 static bitmap update_bbs; 1518 1519 /* For each cluster in all_clusters, merge all cluster->bbs. Returns 1520 number of bbs removed. */ 1521 1522 static int 1523 apply_clusters (void) 1524 { 1525 basic_block bb1, bb2; 1526 bb_cluster c; 1527 unsigned int i, j; 1528 bitmap_iterator bj; 1529 int nr_bbs_removed = 0; 1530 1531 for (i = 0; i < all_clusters.length (); ++i) 1532 { 1533 c = all_clusters[i]; 1534 if (c == NULL) 1535 continue; 1536 1537 bb2 = c->rep_bb; 1538 bitmap_set_bit (update_bbs, bb2->index); 1539 1540 bitmap_clear_bit (c->bbs, bb2->index); 1541 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1542 { 1543 bb1 = BASIC_BLOCK (j); 1544 bitmap_clear_bit (update_bbs, bb1->index); 1545 1546 replace_block_by (bb1, bb2); 1547 nr_bbs_removed++; 1548 } 1549 } 1550 1551 return nr_bbs_removed; 1552 } 1553 1554 /* Resets debug statement STMT if it has uses that are not dominated by their 1555 defs. */ 1556 1557 static void 1558 update_debug_stmt (gimple stmt) 1559 { 1560 use_operand_p use_p; 1561 ssa_op_iter oi; 1562 basic_block bbdef, bbuse; 1563 gimple def_stmt; 1564 tree name; 1565 1566 if (!gimple_debug_bind_p (stmt)) 1567 return; 1568 1569 bbuse = gimple_bb (stmt); 1570 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1571 { 1572 name = USE_FROM_PTR (use_p); 1573 gcc_assert (TREE_CODE (name) == SSA_NAME); 1574 1575 def_stmt = SSA_NAME_DEF_STMT (name); 1576 gcc_assert (def_stmt != NULL); 1577 1578 bbdef = gimple_bb (def_stmt); 1579 if (bbdef == NULL || bbuse == bbdef 1580 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1581 continue; 1582 1583 gimple_debug_bind_reset_value (stmt); 1584 update_stmt (stmt); 1585 } 1586 } 1587 1588 /* Resets all debug statements that have uses that are not 1589 dominated by their defs. */ 1590 1591 static void 1592 update_debug_stmts (void) 1593 { 1594 basic_block bb; 1595 bitmap_iterator bi; 1596 unsigned int i; 1597 1598 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1599 { 1600 gimple stmt; 1601 gimple_stmt_iterator gsi; 1602 1603 bb = BASIC_BLOCK (i); 1604 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1605 { 1606 stmt = gsi_stmt (gsi); 1607 if (!is_gimple_debug (stmt)) 1608 continue; 1609 update_debug_stmt (stmt); 1610 } 1611 } 1612 } 1613 1614 /* Runs tail merge optimization. */ 1615 1616 unsigned int 1617 tail_merge_optimize (unsigned int todo) 1618 { 1619 int nr_bbs_removed_total = 0; 1620 int nr_bbs_removed; 1621 bool loop_entered = false; 1622 int iteration_nr = 0; 1623 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1624 1625 if (!flag_tree_tail_merge || max_iterations == 0) 1626 return 0; 1627 1628 timevar_push (TV_TREE_TAIL_MERGE); 1629 1630 if (!dom_info_available_p (CDI_DOMINATORS)) 1631 { 1632 /* PRE can leave us with unreachable blocks, remove them now. */ 1633 delete_unreachable_blocks (); 1634 calculate_dominance_info (CDI_DOMINATORS); 1635 } 1636 init_worklist (); 1637 1638 while (!worklist.is_empty ()) 1639 { 1640 if (!loop_entered) 1641 { 1642 loop_entered = true; 1643 alloc_cluster_vectors (); 1644 update_bbs = BITMAP_ALLOC (NULL); 1645 } 1646 else 1647 reset_cluster_vectors (); 1648 1649 iteration_nr++; 1650 if (dump_file && (dump_flags & TDF_DETAILS)) 1651 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1652 1653 find_clusters (); 1654 gcc_assert (worklist.is_empty ()); 1655 if (all_clusters.is_empty ()) 1656 break; 1657 1658 nr_bbs_removed = apply_clusters (); 1659 nr_bbs_removed_total += nr_bbs_removed; 1660 if (nr_bbs_removed == 0) 1661 break; 1662 1663 free_dominance_info (CDI_DOMINATORS); 1664 1665 if (iteration_nr == max_iterations) 1666 break; 1667 1668 calculate_dominance_info (CDI_DOMINATORS); 1669 update_worklist (); 1670 } 1671 1672 if (dump_file && (dump_flags & TDF_DETAILS)) 1673 fprintf (dump_file, "htab collision / search: %f\n", 1674 same_succ_htab.collisions ()); 1675 1676 if (nr_bbs_removed_total > 0) 1677 { 1678 if (MAY_HAVE_DEBUG_STMTS) 1679 { 1680 calculate_dominance_info (CDI_DOMINATORS); 1681 update_debug_stmts (); 1682 } 1683 1684 if (dump_file && (dump_flags & TDF_DETAILS)) 1685 { 1686 fprintf (dump_file, "Before TODOs.\n"); 1687 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1688 } 1689 1690 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow); 1691 mark_virtual_operands_for_renaming (cfun); 1692 } 1693 1694 delete_worklist (); 1695 if (loop_entered) 1696 { 1697 delete_cluster_vectors (); 1698 BITMAP_FREE (update_bbs); 1699 } 1700 1701 timevar_pop (TV_TREE_TAIL_MERGE); 1702 1703 return todo; 1704 } 1705