1 /* Tail merging for gimple. 2 Copyright (C) 2011-2017 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 PASS PLACEMENT 180 This 'pass' is not a stand-alone gimple pass, but runs as part of 181 pass_pre, in order to share the value numbering. 182 183 184 SWITCHES 185 186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 187 188 #include "config.h" 189 #include "system.h" 190 #include "coretypes.h" 191 #include "backend.h" 192 #include "tree.h" 193 #include "gimple.h" 194 #include "cfghooks.h" 195 #include "tree-pass.h" 196 #include "ssa.h" 197 #include "fold-const.h" 198 #include "trans-mem.h" 199 #include "cfganal.h" 200 #include "cfgcleanup.h" 201 #include "gimple-iterator.h" 202 #include "tree-cfg.h" 203 #include "tree-into-ssa.h" 204 #include "params.h" 205 #include "tree-ssa-sccvn.h" 206 #include "cfgloop.h" 207 #include "tree-eh.h" 208 209 /* Describes a group of bbs with the same successors. The successor bbs are 210 cached in succs, and the successor edge flags are cached in succ_flags. 211 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags, 212 it's marked in inverse. 213 Additionally, the hash value for the struct is cached in hashval, and 214 in_worklist indicates whether it's currently part of worklist. */ 215 216 struct same_succ : pointer_hash <same_succ> 217 { 218 /* The bbs that have the same successor bbs. */ 219 bitmap bbs; 220 /* The successor bbs. */ 221 bitmap succs; 222 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 223 bb. */ 224 bitmap inverse; 225 /* The edge flags for each of the successor bbs. */ 226 vec<int> succ_flags; 227 /* Indicates whether the struct is currently in the worklist. */ 228 bool in_worklist; 229 /* The hash value of the struct. */ 230 hashval_t hashval; 231 232 /* hash_table support. */ 233 static inline hashval_t hash (const same_succ *); 234 static int equal (const same_succ *, const same_succ *); 235 static void remove (same_succ *); 236 }; 237 238 /* hash routine for hash_table support, returns hashval of E. */ 239 240 inline hashval_t 241 same_succ::hash (const same_succ *e) 242 { 243 return e->hashval; 244 } 245 246 /* A group of bbs where 1 bb from bbs can replace the other bbs. */ 247 248 struct bb_cluster 249 { 250 /* The bbs in the cluster. */ 251 bitmap bbs; 252 /* The preds of the bbs in the cluster. */ 253 bitmap preds; 254 /* Index in all_clusters vector. */ 255 int index; 256 /* The bb to replace the cluster with. */ 257 basic_block rep_bb; 258 }; 259 260 /* Per bb-info. */ 261 262 struct aux_bb_info 263 { 264 /* The number of non-debug statements in the bb. */ 265 int size; 266 /* The same_succ that this bb is a member of. */ 267 same_succ *bb_same_succ; 268 /* The cluster that this bb is a member of. */ 269 bb_cluster *cluster; 270 /* The vop state at the exit of a bb. This is shortlived data, used to 271 communicate data between update_block_by and update_vuses. */ 272 tree vop_at_exit; 273 /* The bb that either contains or is dominated by the dependencies of the 274 bb. */ 275 basic_block dep_bb; 276 }; 277 278 /* Macros to access the fields of struct aux_bb_info. */ 279 280 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 281 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 282 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 283 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 284 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 285 286 /* Returns true if the only effect a statement STMT has, is to define locally 287 used SSA_NAMEs. */ 288 289 static bool 290 stmt_local_def (gimple *stmt) 291 { 292 basic_block bb, def_bb; 293 imm_use_iterator iter; 294 use_operand_p use_p; 295 tree val; 296 def_operand_p def_p; 297 298 if (gimple_vdef (stmt) != NULL_TREE 299 || gimple_has_side_effects (stmt) 300 || gimple_could_trap_p_1 (stmt, false, false) 301 || gimple_vuse (stmt) != NULL_TREE 302 /* Copied from tree-ssa-ifcombine.c:bb_no_side_effects_p(): 303 const calls don't match any of the above, yet they could 304 still have some side-effects - they could contain 305 gimple_could_trap_p statements, like floating point 306 exceptions or integer division by zero. See PR70586. 307 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p 308 should handle this. */ 309 || is_gimple_call (stmt)) 310 return false; 311 312 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); 313 if (def_p == NULL) 314 return false; 315 316 val = DEF_FROM_PTR (def_p); 317 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) 318 return false; 319 320 def_bb = gimple_bb (stmt); 321 322 FOR_EACH_IMM_USE_FAST (use_p, iter, val) 323 { 324 if (is_gimple_debug (USE_STMT (use_p))) 325 continue; 326 bb = gimple_bb (USE_STMT (use_p)); 327 if (bb == def_bb) 328 continue; 329 330 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI 331 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) 332 continue; 333 334 return false; 335 } 336 337 return true; 338 } 339 340 /* Let GSI skip forwards over local defs. */ 341 342 static void 343 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) 344 { 345 gimple *stmt; 346 347 while (true) 348 { 349 if (gsi_end_p (*gsi)) 350 return; 351 stmt = gsi_stmt (*gsi); 352 if (!stmt_local_def (stmt)) 353 return; 354 gsi_next_nondebug (gsi); 355 } 356 } 357 358 /* VAL1 and VAL2 are either: 359 - uses in BB1 and BB2, or 360 - phi alternatives for BB1 and BB2. 361 Return true if the uses have the same gvn value. */ 362 363 static bool 364 gvn_uses_equal (tree val1, tree val2) 365 { 366 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 367 368 if (val1 == val2) 369 return true; 370 371 if (vn_valueize (val1) != vn_valueize (val2)) 372 return false; 373 374 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 375 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 376 } 377 378 /* Prints E to FILE. */ 379 380 static void 381 same_succ_print (FILE *file, const same_succ *e) 382 { 383 unsigned int i; 384 bitmap_print (file, e->bbs, "bbs:", "\n"); 385 bitmap_print (file, e->succs, "succs:", "\n"); 386 bitmap_print (file, e->inverse, "inverse:", "\n"); 387 fprintf (file, "flags:"); 388 for (i = 0; i < e->succ_flags.length (); ++i) 389 fprintf (file, " %x", e->succ_flags[i]); 390 fprintf (file, "\n"); 391 } 392 393 /* Prints same_succ VE to VFILE. */ 394 395 inline int 396 ssa_same_succ_print_traverse (same_succ **pe, FILE *file) 397 { 398 const same_succ *e = *pe; 399 same_succ_print (file, e); 400 return 1; 401 } 402 403 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 404 405 static void 406 update_dep_bb (basic_block use_bb, tree val) 407 { 408 basic_block dep_bb; 409 410 /* Not a dep. */ 411 if (TREE_CODE (val) != SSA_NAME) 412 return; 413 414 /* Skip use of global def. */ 415 if (SSA_NAME_IS_DEFAULT_DEF (val)) 416 return; 417 418 /* Skip use of local def. */ 419 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 420 if (dep_bb == use_bb) 421 return; 422 423 if (BB_DEP_BB (use_bb) == NULL 424 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 425 BB_DEP_BB (use_bb) = dep_bb; 426 } 427 428 /* Update BB_DEP_BB, given the dependencies in STMT. */ 429 430 static void 431 stmt_update_dep_bb (gimple *stmt) 432 { 433 ssa_op_iter iter; 434 use_operand_p use; 435 436 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 437 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 438 } 439 440 /* Calculates hash value for same_succ VE. */ 441 442 static hashval_t 443 same_succ_hash (const same_succ *e) 444 { 445 inchash::hash hstate (bitmap_hash (e->succs)); 446 int flags; 447 unsigned int i; 448 unsigned int first = bitmap_first_set_bit (e->bbs); 449 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first); 450 int size = 0; 451 gimple *stmt; 452 tree arg; 453 unsigned int s; 454 bitmap_iterator bs; 455 456 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb); 457 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 458 { 459 stmt = gsi_stmt (gsi); 460 stmt_update_dep_bb (stmt); 461 if (stmt_local_def (stmt)) 462 continue; 463 size++; 464 465 hstate.add_int (gimple_code (stmt)); 466 if (is_gimple_assign (stmt)) 467 hstate.add_int (gimple_assign_rhs_code (stmt)); 468 if (!is_gimple_call (stmt)) 469 continue; 470 if (gimple_call_internal_p (stmt)) 471 hstate.add_int (gimple_call_internal_fn (stmt)); 472 else 473 { 474 inchash::add_expr (gimple_call_fn (stmt), hstate); 475 if (gimple_call_chain (stmt)) 476 inchash::add_expr (gimple_call_chain (stmt), hstate); 477 } 478 for (i = 0; i < gimple_call_num_args (stmt); i++) 479 { 480 arg = gimple_call_arg (stmt, i); 481 arg = vn_valueize (arg); 482 inchash::add_expr (arg, hstate); 483 } 484 } 485 486 hstate.add_int (size); 487 BB_SIZE (bb) = size; 488 489 for (i = 0; i < e->succ_flags.length (); ++i) 490 { 491 flags = e->succ_flags[i]; 492 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 493 hstate.add_int (flags); 494 } 495 496 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 497 { 498 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx; 499 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s)); 500 !gsi_end_p (gsi); 501 gsi_next (&gsi)) 502 { 503 gphi *phi = gsi.phi (); 504 tree lhs = gimple_phi_result (phi); 505 tree val = gimple_phi_arg_def (phi, n); 506 507 if (virtual_operand_p (lhs)) 508 continue; 509 update_dep_bb (bb, val); 510 } 511 } 512 513 return hstate.end (); 514 } 515 516 /* Returns true if E1 and E2 have 2 successors, and if the successor flags 517 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 518 the other edge flags. */ 519 520 static bool 521 inverse_flags (const same_succ *e1, const same_succ *e2) 522 { 523 int f1a, f1b, f2a, f2b; 524 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 525 526 if (e1->succ_flags.length () != 2) 527 return false; 528 529 f1a = e1->succ_flags[0]; 530 f1b = e1->succ_flags[1]; 531 f2a = e2->succ_flags[0]; 532 f2b = e2->succ_flags[1]; 533 534 if (f1a == f2a && f1b == f2b) 535 return false; 536 537 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 538 } 539 540 /* Compares SAME_SUCCs E1 and E2. */ 541 542 int 543 same_succ::equal (const same_succ *e1, const same_succ *e2) 544 { 545 unsigned int i, first1, first2; 546 gimple_stmt_iterator gsi1, gsi2; 547 gimple *s1, *s2; 548 basic_block bb1, bb2; 549 550 if (e1 == e2) 551 return 1; 552 553 if (e1->hashval != e2->hashval) 554 return 0; 555 556 if (e1->succ_flags.length () != e2->succ_flags.length ()) 557 return 0; 558 559 if (!bitmap_equal_p (e1->succs, e2->succs)) 560 return 0; 561 562 if (!inverse_flags (e1, e2)) 563 { 564 for (i = 0; i < e1->succ_flags.length (); ++i) 565 if (e1->succ_flags[i] != e2->succ_flags[i]) 566 return 0; 567 } 568 569 first1 = bitmap_first_set_bit (e1->bbs); 570 first2 = bitmap_first_set_bit (e2->bbs); 571 572 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1); 573 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2); 574 575 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 576 return 0; 577 578 gsi1 = gsi_start_nondebug_bb (bb1); 579 gsi2 = gsi_start_nondebug_bb (bb2); 580 gsi_advance_fw_nondebug_nonlocal (&gsi1); 581 gsi_advance_fw_nondebug_nonlocal (&gsi2); 582 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 583 { 584 s1 = gsi_stmt (gsi1); 585 s2 = gsi_stmt (gsi2); 586 if (gimple_code (s1) != gimple_code (s2)) 587 return 0; 588 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 589 return 0; 590 gsi_next_nondebug (&gsi1); 591 gsi_next_nondebug (&gsi2); 592 gsi_advance_fw_nondebug_nonlocal (&gsi1); 593 gsi_advance_fw_nondebug_nonlocal (&gsi2); 594 } 595 596 return 1; 597 } 598 599 /* Alloc and init a new SAME_SUCC. */ 600 601 static same_succ * 602 same_succ_alloc (void) 603 { 604 same_succ *same = XNEW (struct same_succ); 605 606 same->bbs = BITMAP_ALLOC (NULL); 607 same->succs = BITMAP_ALLOC (NULL); 608 same->inverse = BITMAP_ALLOC (NULL); 609 same->succ_flags.create (10); 610 same->in_worklist = false; 611 612 return same; 613 } 614 615 /* Delete same_succ E. */ 616 617 void 618 same_succ::remove (same_succ *e) 619 { 620 BITMAP_FREE (e->bbs); 621 BITMAP_FREE (e->succs); 622 BITMAP_FREE (e->inverse); 623 e->succ_flags.release (); 624 625 XDELETE (e); 626 } 627 628 /* Reset same_succ SAME. */ 629 630 static void 631 same_succ_reset (same_succ *same) 632 { 633 bitmap_clear (same->bbs); 634 bitmap_clear (same->succs); 635 bitmap_clear (same->inverse); 636 same->succ_flags.truncate (0); 637 } 638 639 static hash_table<same_succ> *same_succ_htab; 640 641 /* Array that is used to store the edge flags for a successor. */ 642 643 static int *same_succ_edge_flags; 644 645 /* Bitmap that is used to mark bbs that are recently deleted. */ 646 647 static bitmap deleted_bbs; 648 649 /* Bitmap that is used to mark predecessors of bbs that are 650 deleted. */ 651 652 static bitmap deleted_bb_preds; 653 654 /* Prints same_succ_htab to stderr. */ 655 656 extern void debug_same_succ (void); 657 DEBUG_FUNCTION void 658 debug_same_succ ( void) 659 { 660 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr); 661 } 662 663 664 /* Vector of bbs to process. */ 665 666 static vec<same_succ *> worklist; 667 668 /* Prints worklist to FILE. */ 669 670 static void 671 print_worklist (FILE *file) 672 { 673 unsigned int i; 674 for (i = 0; i < worklist.length (); ++i) 675 same_succ_print (file, worklist[i]); 676 } 677 678 /* Adds SAME to worklist. */ 679 680 static void 681 add_to_worklist (same_succ *same) 682 { 683 if (same->in_worklist) 684 return; 685 686 if (bitmap_count_bits (same->bbs) < 2) 687 return; 688 689 same->in_worklist = true; 690 worklist.safe_push (same); 691 } 692 693 /* Add BB to same_succ_htab. */ 694 695 static void 696 find_same_succ_bb (basic_block bb, same_succ **same_p) 697 { 698 unsigned int j; 699 bitmap_iterator bj; 700 same_succ *same = *same_p; 701 same_succ **slot; 702 edge_iterator ei; 703 edge e; 704 705 if (bb == NULL 706 /* Be conservative with loop structure. It's not evident that this test 707 is sufficient. Before tail-merge, we've just called 708 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now 709 set, so there's no guarantee that the loop->latch value is still valid. 710 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the 711 start of pre, we've kept that property intact throughout pre, and are 712 keeping it throughout tail-merge using this test. */ 713 || bb->loop_father->latch == bb) 714 return; 715 bitmap_set_bit (same->bbs, bb->index); 716 FOR_EACH_EDGE (e, ei, bb->succs) 717 { 718 int index = e->dest->index; 719 bitmap_set_bit (same->succs, index); 720 same_succ_edge_flags[index] = e->flags; 721 } 722 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 723 same->succ_flags.safe_push (same_succ_edge_flags[j]); 724 725 same->hashval = same_succ_hash (same); 726 727 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT); 728 if (*slot == NULL) 729 { 730 *slot = same; 731 BB_SAME_SUCC (bb) = same; 732 add_to_worklist (same); 733 *same_p = NULL; 734 } 735 else 736 { 737 bitmap_set_bit ((*slot)->bbs, bb->index); 738 BB_SAME_SUCC (bb) = *slot; 739 add_to_worklist (*slot); 740 if (inverse_flags (same, *slot)) 741 bitmap_set_bit ((*slot)->inverse, bb->index); 742 same_succ_reset (same); 743 } 744 } 745 746 /* Find bbs with same successors. */ 747 748 static void 749 find_same_succ (void) 750 { 751 same_succ *same = same_succ_alloc (); 752 basic_block bb; 753 754 FOR_EACH_BB_FN (bb, cfun) 755 { 756 find_same_succ_bb (bb, &same); 757 if (same == NULL) 758 same = same_succ_alloc (); 759 } 760 761 same_succ::remove (same); 762 } 763 764 /* Initializes worklist administration. */ 765 766 static void 767 init_worklist (void) 768 { 769 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 770 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun)); 771 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun)); 772 deleted_bbs = BITMAP_ALLOC (NULL); 773 deleted_bb_preds = BITMAP_ALLOC (NULL); 774 worklist.create (n_basic_blocks_for_fn (cfun)); 775 find_same_succ (); 776 777 if (dump_file && (dump_flags & TDF_DETAILS)) 778 { 779 fprintf (dump_file, "initial worklist:\n"); 780 print_worklist (dump_file); 781 } 782 } 783 784 /* Deletes worklist administration. */ 785 786 static void 787 delete_worklist (void) 788 { 789 free_aux_for_blocks (); 790 delete same_succ_htab; 791 same_succ_htab = NULL; 792 XDELETEVEC (same_succ_edge_flags); 793 same_succ_edge_flags = NULL; 794 BITMAP_FREE (deleted_bbs); 795 BITMAP_FREE (deleted_bb_preds); 796 worklist.release (); 797 } 798 799 /* Mark BB as deleted, and mark its predecessors. */ 800 801 static void 802 mark_basic_block_deleted (basic_block bb) 803 { 804 edge e; 805 edge_iterator ei; 806 807 bitmap_set_bit (deleted_bbs, bb->index); 808 809 FOR_EACH_EDGE (e, ei, bb->preds) 810 bitmap_set_bit (deleted_bb_preds, e->src->index); 811 } 812 813 /* Removes BB from its corresponding same_succ. */ 814 815 static void 816 same_succ_flush_bb (basic_block bb) 817 { 818 same_succ *same = BB_SAME_SUCC (bb); 819 if (! same) 820 return; 821 822 BB_SAME_SUCC (bb) = NULL; 823 if (bitmap_single_bit_set_p (same->bbs)) 824 same_succ_htab->remove_elt_with_hash (same, same->hashval); 825 else 826 bitmap_clear_bit (same->bbs, bb->index); 827 } 828 829 /* Removes all bbs in BBS from their corresponding same_succ. */ 830 831 static void 832 same_succ_flush_bbs (bitmap bbs) 833 { 834 unsigned int i; 835 bitmap_iterator bi; 836 837 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 838 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i)); 839 } 840 841 /* Release the last vdef in BB, either normal or phi result. */ 842 843 static void 844 release_last_vdef (basic_block bb) 845 { 846 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i); 847 gsi_prev_nondebug (&i)) 848 { 849 gimple *stmt = gsi_stmt (i); 850 if (gimple_vdef (stmt) == NULL_TREE) 851 continue; 852 853 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 854 return; 855 } 856 857 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i); 858 gsi_next (&i)) 859 { 860 gphi *phi = i.phi (); 861 tree res = gimple_phi_result (phi); 862 863 if (!virtual_operand_p (res)) 864 continue; 865 866 mark_virtual_phi_result_for_renaming (phi); 867 return; 868 } 869 } 870 871 /* For deleted_bb_preds, find bbs with same successors. */ 872 873 static void 874 update_worklist (void) 875 { 876 unsigned int i; 877 bitmap_iterator bi; 878 basic_block bb; 879 same_succ *same; 880 881 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 882 bitmap_clear (deleted_bbs); 883 884 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 885 same_succ_flush_bbs (deleted_bb_preds); 886 887 same = same_succ_alloc (); 888 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 889 { 890 bb = BASIC_BLOCK_FOR_FN (cfun, i); 891 gcc_assert (bb != NULL); 892 find_same_succ_bb (bb, &same); 893 if (same == NULL) 894 same = same_succ_alloc (); 895 } 896 same_succ::remove (same); 897 bitmap_clear (deleted_bb_preds); 898 } 899 900 /* Prints cluster C to FILE. */ 901 902 static void 903 print_cluster (FILE *file, bb_cluster *c) 904 { 905 if (c == NULL) 906 return; 907 bitmap_print (file, c->bbs, "bbs:", "\n"); 908 bitmap_print (file, c->preds, "preds:", "\n"); 909 } 910 911 /* Prints cluster C to stderr. */ 912 913 extern void debug_cluster (bb_cluster *); 914 DEBUG_FUNCTION void 915 debug_cluster (bb_cluster *c) 916 { 917 print_cluster (stderr, c); 918 } 919 920 /* Update C->rep_bb, given that BB is added to the cluster. */ 921 922 static void 923 update_rep_bb (bb_cluster *c, basic_block bb) 924 { 925 /* Initial. */ 926 if (c->rep_bb == NULL) 927 { 928 c->rep_bb = bb; 929 return; 930 } 931 932 /* Current needs no deps, keep it. */ 933 if (BB_DEP_BB (c->rep_bb) == NULL) 934 return; 935 936 /* Bb needs no deps, change rep_bb. */ 937 if (BB_DEP_BB (bb) == NULL) 938 { 939 c->rep_bb = bb; 940 return; 941 } 942 943 /* Bb needs last deps earlier than current, change rep_bb. A potential 944 problem with this, is that the first deps might also be earlier, which 945 would mean we prefer longer lifetimes for the deps. To be able to check 946 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 947 BB_DEP_BB, which is really BB_LAST_DEP_BB. 948 The benefit of choosing the bb with last deps earlier, is that it can 949 potentially be used as replacement for more bbs. */ 950 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 951 c->rep_bb = bb; 952 } 953 954 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 955 956 static void 957 add_bb_to_cluster (bb_cluster *c, basic_block bb) 958 { 959 edge e; 960 edge_iterator ei; 961 962 bitmap_set_bit (c->bbs, bb->index); 963 964 FOR_EACH_EDGE (e, ei, bb->preds) 965 bitmap_set_bit (c->preds, e->src->index); 966 967 update_rep_bb (c, bb); 968 } 969 970 /* Allocate and init new cluster. */ 971 972 static bb_cluster * 973 new_cluster (void) 974 { 975 bb_cluster *c; 976 c = XCNEW (bb_cluster); 977 c->bbs = BITMAP_ALLOC (NULL); 978 c->preds = BITMAP_ALLOC (NULL); 979 c->rep_bb = NULL; 980 return c; 981 } 982 983 /* Delete clusters. */ 984 985 static void 986 delete_cluster (bb_cluster *c) 987 { 988 if (c == NULL) 989 return; 990 BITMAP_FREE (c->bbs); 991 BITMAP_FREE (c->preds); 992 XDELETE (c); 993 } 994 995 996 /* Array that contains all clusters. */ 997 998 static vec<bb_cluster *> all_clusters; 999 1000 /* Allocate all cluster vectors. */ 1001 1002 static void 1003 alloc_cluster_vectors (void) 1004 { 1005 all_clusters.create (n_basic_blocks_for_fn (cfun)); 1006 } 1007 1008 /* Reset all cluster vectors. */ 1009 1010 static void 1011 reset_cluster_vectors (void) 1012 { 1013 unsigned int i; 1014 basic_block bb; 1015 for (i = 0; i < all_clusters.length (); ++i) 1016 delete_cluster (all_clusters[i]); 1017 all_clusters.truncate (0); 1018 FOR_EACH_BB_FN (bb, cfun) 1019 BB_CLUSTER (bb) = NULL; 1020 } 1021 1022 /* Delete all cluster vectors. */ 1023 1024 static void 1025 delete_cluster_vectors (void) 1026 { 1027 unsigned int i; 1028 for (i = 0; i < all_clusters.length (); ++i) 1029 delete_cluster (all_clusters[i]); 1030 all_clusters.release (); 1031 } 1032 1033 /* Merge cluster C2 into C1. */ 1034 1035 static void 1036 merge_clusters (bb_cluster *c1, bb_cluster *c2) 1037 { 1038 bitmap_ior_into (c1->bbs, c2->bbs); 1039 bitmap_ior_into (c1->preds, c2->preds); 1040 } 1041 1042 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 1043 all_clusters, or merge c with existing cluster. */ 1044 1045 static void 1046 set_cluster (basic_block bb1, basic_block bb2) 1047 { 1048 basic_block merge_bb, other_bb; 1049 bb_cluster *merge, *old, *c; 1050 1051 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 1052 { 1053 c = new_cluster (); 1054 add_bb_to_cluster (c, bb1); 1055 add_bb_to_cluster (c, bb2); 1056 BB_CLUSTER (bb1) = c; 1057 BB_CLUSTER (bb2) = c; 1058 c->index = all_clusters.length (); 1059 all_clusters.safe_push (c); 1060 } 1061 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1062 { 1063 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1064 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1065 merge = BB_CLUSTER (merge_bb); 1066 add_bb_to_cluster (merge, other_bb); 1067 BB_CLUSTER (other_bb) = merge; 1068 } 1069 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1070 { 1071 unsigned int i; 1072 bitmap_iterator bi; 1073 1074 old = BB_CLUSTER (bb2); 1075 merge = BB_CLUSTER (bb1); 1076 merge_clusters (merge, old); 1077 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1078 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge; 1079 all_clusters[old->index] = NULL; 1080 update_rep_bb (merge, old->rep_bb); 1081 delete_cluster (old); 1082 } 1083 else 1084 gcc_unreachable (); 1085 } 1086 1087 /* Return true if gimple operands T1 and T2 have the same value. */ 1088 1089 static bool 1090 gimple_operand_equal_value_p (tree t1, tree t2) 1091 { 1092 if (t1 == t2) 1093 return true; 1094 1095 if (t1 == NULL_TREE 1096 || t2 == NULL_TREE) 1097 return false; 1098 1099 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS)) 1100 return true; 1101 1102 return gvn_uses_equal (t1, t2); 1103 } 1104 1105 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1106 gimple_bb (s2) are members of SAME_SUCC. */ 1107 1108 static bool 1109 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2) 1110 { 1111 unsigned int i; 1112 tree lhs1, lhs2; 1113 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1114 tree t1, t2; 1115 bool inv_cond; 1116 enum tree_code code1, code2; 1117 1118 if (gimple_code (s1) != gimple_code (s2)) 1119 return false; 1120 1121 switch (gimple_code (s1)) 1122 { 1123 case GIMPLE_CALL: 1124 if (!gimple_call_same_target_p (s1, s2)) 1125 return false; 1126 1127 t1 = gimple_call_chain (s1); 1128 t2 = gimple_call_chain (s2); 1129 if (!gimple_operand_equal_value_p (t1, t2)) 1130 return false; 1131 1132 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1133 return false; 1134 1135 for (i = 0; i < gimple_call_num_args (s1); ++i) 1136 { 1137 t1 = gimple_call_arg (s1, i); 1138 t2 = gimple_call_arg (s2, i); 1139 if (!gimple_operand_equal_value_p (t1, t2)) 1140 return false; 1141 } 1142 1143 lhs1 = gimple_get_lhs (s1); 1144 lhs2 = gimple_get_lhs (s2); 1145 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1146 return true; 1147 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1148 return false; 1149 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1150 return vn_valueize (lhs1) == vn_valueize (lhs2); 1151 return operand_equal_p (lhs1, lhs2, 0); 1152 1153 case GIMPLE_ASSIGN: 1154 lhs1 = gimple_get_lhs (s1); 1155 lhs2 = gimple_get_lhs (s2); 1156 if (TREE_CODE (lhs1) != SSA_NAME 1157 && TREE_CODE (lhs2) != SSA_NAME) 1158 return (operand_equal_p (lhs1, lhs2, 0) 1159 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), 1160 gimple_assign_rhs1 (s2))); 1161 else if (TREE_CODE (lhs1) == SSA_NAME 1162 && TREE_CODE (lhs2) == SSA_NAME) 1163 return operand_equal_p (gimple_assign_rhs1 (s1), 1164 gimple_assign_rhs1 (s2), 0); 1165 return false; 1166 1167 case GIMPLE_COND: 1168 t1 = gimple_cond_lhs (s1); 1169 t2 = gimple_cond_lhs (s2); 1170 if (!gimple_operand_equal_value_p (t1, t2)) 1171 return false; 1172 1173 t1 = gimple_cond_rhs (s1); 1174 t2 = gimple_cond_rhs (s2); 1175 if (!gimple_operand_equal_value_p (t1, t2)) 1176 return false; 1177 1178 code1 = gimple_expr_code (s1); 1179 code2 = gimple_expr_code (s2); 1180 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1181 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1182 if (inv_cond) 1183 { 1184 bool honor_nans = HONOR_NANS (t1); 1185 code2 = invert_tree_comparison (code2, honor_nans); 1186 } 1187 return code1 == code2; 1188 1189 default: 1190 return false; 1191 } 1192 } 1193 1194 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1195 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1196 processed statements. */ 1197 1198 static void 1199 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1200 bool *vuse_escaped) 1201 { 1202 gimple *stmt; 1203 tree lvuse; 1204 1205 while (true) 1206 { 1207 if (gsi_end_p (*gsi)) 1208 return; 1209 stmt = gsi_stmt (*gsi); 1210 1211 lvuse = gimple_vuse (stmt); 1212 if (lvuse != NULL_TREE) 1213 { 1214 *vuse = lvuse; 1215 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1216 *vuse_escaped = true; 1217 } 1218 1219 if (!stmt_local_def (stmt)) 1220 return; 1221 gsi_prev_nondebug (gsi); 1222 } 1223 } 1224 1225 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and 1226 STMT2 are allowed to be merged. */ 1227 1228 static bool 1229 merge_stmts_p (gimple *stmt1, gimple *stmt2) 1230 { 1231 /* What could be better than this here is to blacklist the bb 1232 containing the stmt, when encountering the stmt f.i. in 1233 same_succ_hash. */ 1234 if (is_tm_ending (stmt1)) 1235 return false; 1236 1237 /* Verify EH landing pads. */ 1238 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2)) 1239 return false; 1240 1241 if (is_gimple_call (stmt1) 1242 && gimple_call_internal_p (stmt1)) 1243 switch (gimple_call_internal_fn (stmt1)) 1244 { 1245 case IFN_UBSAN_NULL: 1246 case IFN_UBSAN_BOUNDS: 1247 case IFN_UBSAN_VPTR: 1248 case IFN_UBSAN_CHECK_ADD: 1249 case IFN_UBSAN_CHECK_SUB: 1250 case IFN_UBSAN_CHECK_MUL: 1251 case IFN_UBSAN_OBJECT_SIZE: 1252 case IFN_ASAN_CHECK: 1253 /* For these internal functions, gimple_location is an implicit 1254 parameter, which will be used explicitly after expansion. 1255 Merging these statements may cause confusing line numbers in 1256 sanitizer messages. */ 1257 return gimple_location (stmt1) == gimple_location (stmt2); 1258 default: 1259 break; 1260 } 1261 1262 return true; 1263 } 1264 1265 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1266 clusters them. */ 1267 1268 static void 1269 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2) 1270 { 1271 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1272 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1273 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1274 bool vuse_escaped = false; 1275 1276 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1277 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1278 1279 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1280 { 1281 gimple *stmt1 = gsi_stmt (gsi1); 1282 gimple *stmt2 = gsi_stmt (gsi2); 1283 1284 if (gimple_code (stmt1) == GIMPLE_LABEL 1285 && gimple_code (stmt2) == GIMPLE_LABEL) 1286 break; 1287 1288 if (!gimple_equal_p (same_succ, stmt1, stmt2)) 1289 return; 1290 1291 if (!merge_stmts_p (stmt1, stmt2)) 1292 return; 1293 1294 gsi_prev_nondebug (&gsi1); 1295 gsi_prev_nondebug (&gsi2); 1296 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1297 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1298 } 1299 1300 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1301 { 1302 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1))); 1303 if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) 1304 return; 1305 gsi_prev (&gsi1); 1306 } 1307 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL) 1308 { 1309 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2))); 1310 if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) 1311 return; 1312 gsi_prev (&gsi2); 1313 } 1314 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1315 return; 1316 1317 /* If the incoming vuses are not the same, and the vuse escaped into an 1318 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1319 which potentially means the semantics of one of the blocks will be changed. 1320 TODO: make this check more precise. */ 1321 if (vuse_escaped && vuse1 != vuse2) 1322 return; 1323 1324 if (dump_file) 1325 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1326 bb1->index, bb2->index); 1327 1328 set_cluster (bb1, bb2); 1329 } 1330 1331 /* Returns whether for all phis in DEST the phi alternatives for E1 and 1332 E2 are equal. */ 1333 1334 static bool 1335 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1336 { 1337 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1338 gphi_iterator gsi; 1339 1340 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1341 { 1342 gphi *phi = gsi.phi (); 1343 tree lhs = gimple_phi_result (phi); 1344 tree val1 = gimple_phi_arg_def (phi, n1); 1345 tree val2 = gimple_phi_arg_def (phi, n2); 1346 1347 if (virtual_operand_p (lhs)) 1348 continue; 1349 1350 if (operand_equal_for_phi_arg_p (val1, val2)) 1351 continue; 1352 if (gvn_uses_equal (val1, val2)) 1353 continue; 1354 1355 return false; 1356 } 1357 1358 return true; 1359 } 1360 1361 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1362 phi alternatives for BB1 and BB2 are equal. */ 1363 1364 static bool 1365 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2) 1366 { 1367 unsigned int s; 1368 bitmap_iterator bs; 1369 edge e1, e2; 1370 basic_block succ; 1371 1372 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1373 { 1374 succ = BASIC_BLOCK_FOR_FN (cfun, s); 1375 e1 = find_edge (bb1, succ); 1376 e2 = find_edge (bb2, succ); 1377 if (e1->flags & EDGE_COMPLEX 1378 || e2->flags & EDGE_COMPLEX) 1379 return false; 1380 1381 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1382 the same value. */ 1383 if (!same_phi_alternatives_1 (succ, e1, e2)) 1384 return false; 1385 } 1386 1387 return true; 1388 } 1389 1390 /* Return true if BB has non-vop phis. */ 1391 1392 static bool 1393 bb_has_non_vop_phi (basic_block bb) 1394 { 1395 gimple_seq phis = phi_nodes (bb); 1396 gimple *phi; 1397 1398 if (phis == NULL) 1399 return false; 1400 1401 if (!gimple_seq_singleton_p (phis)) 1402 return true; 1403 1404 phi = gimple_seq_first_stmt (phis); 1405 return !virtual_operand_p (gimple_phi_result (phi)); 1406 } 1407 1408 /* Returns true if redirecting the incoming edges of FROM to TO maintains the 1409 invariant that uses in FROM are dominates by their defs. */ 1410 1411 static bool 1412 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1413 { 1414 basic_block cd, dep_bb = BB_DEP_BB (to); 1415 edge_iterator ei; 1416 edge e; 1417 1418 if (dep_bb == NULL) 1419 return true; 1420 1421 bitmap from_preds = BITMAP_ALLOC (NULL); 1422 FOR_EACH_EDGE (e, ei, from->preds) 1423 bitmap_set_bit (from_preds, e->src->index); 1424 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1425 BITMAP_FREE (from_preds); 1426 1427 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1428 } 1429 1430 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1431 replacement bb) and vice versa maintains the invariant that uses in the 1432 replacement are dominates by their defs. */ 1433 1434 static bool 1435 deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1436 { 1437 if (BB_CLUSTER (bb1) != NULL) 1438 bb1 = BB_CLUSTER (bb1)->rep_bb; 1439 1440 if (BB_CLUSTER (bb2) != NULL) 1441 bb2 = BB_CLUSTER (bb2)->rep_bb; 1442 1443 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1444 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1445 } 1446 1447 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1448 1449 static void 1450 find_clusters_1 (same_succ *same_succ) 1451 { 1452 basic_block bb1, bb2; 1453 unsigned int i, j; 1454 bitmap_iterator bi, bj; 1455 int nr_comparisons; 1456 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1457 1458 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1459 { 1460 bb1 = BASIC_BLOCK_FOR_FN (cfun, i); 1461 1462 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1463 phi-nodes in bb1 and bb2, with the same alternatives for the same 1464 preds. */ 1465 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1) 1466 || bb_has_abnormal_pred (bb1)) 1467 continue; 1468 1469 nr_comparisons = 0; 1470 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1471 { 1472 bb2 = BASIC_BLOCK_FOR_FN (cfun, j); 1473 1474 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2) 1475 || bb_has_abnormal_pred (bb2)) 1476 continue; 1477 1478 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1479 continue; 1480 1481 /* Limit quadratic behavior. */ 1482 nr_comparisons++; 1483 if (nr_comparisons > max_comparisons) 1484 break; 1485 1486 /* This is a conservative dependency check. We could test more 1487 precise for allowed replacement direction. */ 1488 if (!deps_ok_for_redirect (bb1, bb2)) 1489 continue; 1490 1491 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1492 continue; 1493 1494 find_duplicate (same_succ, bb1, bb2); 1495 } 1496 } 1497 } 1498 1499 /* Find clusters of bbs which can be merged. */ 1500 1501 static void 1502 find_clusters (void) 1503 { 1504 same_succ *same; 1505 1506 while (!worklist.is_empty ()) 1507 { 1508 same = worklist.pop (); 1509 same->in_worklist = false; 1510 if (dump_file && (dump_flags & TDF_DETAILS)) 1511 { 1512 fprintf (dump_file, "processing worklist entry\n"); 1513 same_succ_print (dump_file, same); 1514 } 1515 find_clusters_1 (same); 1516 } 1517 } 1518 1519 /* Returns the vop phi of BB, if any. */ 1520 1521 static gphi * 1522 vop_phi (basic_block bb) 1523 { 1524 gphi *stmt; 1525 gphi_iterator gsi; 1526 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1527 { 1528 stmt = gsi.phi (); 1529 if (! virtual_operand_p (gimple_phi_result (stmt))) 1530 continue; 1531 return stmt; 1532 } 1533 return NULL; 1534 } 1535 1536 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1537 1538 static void 1539 replace_block_by (basic_block bb1, basic_block bb2) 1540 { 1541 edge pred_edge; 1542 edge e1, e2; 1543 edge_iterator ei; 1544 unsigned int i; 1545 gphi *bb2_phi; 1546 1547 bb2_phi = vop_phi (bb2); 1548 1549 /* Mark the basic block as deleted. */ 1550 mark_basic_block_deleted (bb1); 1551 1552 /* Redirect the incoming edges of bb1 to bb2. */ 1553 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1554 { 1555 pred_edge = EDGE_PRED (bb1, i - 1); 1556 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1557 gcc_assert (pred_edge != NULL); 1558 1559 if (bb2_phi == NULL) 1560 continue; 1561 1562 /* The phi might have run out of capacity when the redirect added an 1563 argument, which means it could have been replaced. Refresh it. */ 1564 bb2_phi = vop_phi (bb2); 1565 1566 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1567 pred_edge, UNKNOWN_LOCATION); 1568 } 1569 1570 bb2->frequency += bb1->frequency; 1571 if (bb2->frequency > BB_FREQ_MAX) 1572 bb2->frequency = BB_FREQ_MAX; 1573 1574 bb2->count += bb1->count; 1575 1576 /* Merge the outgoing edge counts from bb1 onto bb2. */ 1577 gcov_type out_sum = 0; 1578 FOR_EACH_EDGE (e1, ei, bb1->succs) 1579 { 1580 e2 = find_edge (bb2, e1->dest); 1581 gcc_assert (e2); 1582 e2->count += e1->count; 1583 out_sum += e2->count; 1584 } 1585 /* Recompute the edge probabilities from the new merged edge count. 1586 Use the sum of the new merged edge counts computed above instead 1587 of bb2's merged count, in case there are profile count insanities 1588 making the bb count inconsistent with the edge weights. */ 1589 FOR_EACH_EDGE (e2, ei, bb2->succs) 1590 { 1591 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum); 1592 } 1593 1594 /* Move over any user labels from bb1 after the bb2 labels. */ 1595 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1); 1596 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1597 { 1598 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2); 1599 while (!gsi_end_p (gsi1) 1600 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1601 { 1602 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1))); 1603 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label)); 1604 if (DECL_ARTIFICIAL (label)) 1605 gsi_next (&gsi1); 1606 else 1607 gsi_move_before (&gsi1, &gsi2); 1608 } 1609 } 1610 1611 /* Clear range info from all stmts in BB2 -- this transformation 1612 could make them out of date. */ 1613 reset_flow_sensitive_info_in_bb (bb2); 1614 1615 /* Do updates that use bb1, before deleting bb1. */ 1616 release_last_vdef (bb1); 1617 same_succ_flush_bb (bb1); 1618 1619 delete_basic_block (bb1); 1620 } 1621 1622 /* Bbs for which update_debug_stmt need to be called. */ 1623 1624 static bitmap update_bbs; 1625 1626 /* For each cluster in all_clusters, merge all cluster->bbs. Returns 1627 number of bbs removed. */ 1628 1629 static int 1630 apply_clusters (void) 1631 { 1632 basic_block bb1, bb2; 1633 bb_cluster *c; 1634 unsigned int i, j; 1635 bitmap_iterator bj; 1636 int nr_bbs_removed = 0; 1637 1638 for (i = 0; i < all_clusters.length (); ++i) 1639 { 1640 c = all_clusters[i]; 1641 if (c == NULL) 1642 continue; 1643 1644 bb2 = c->rep_bb; 1645 bitmap_set_bit (update_bbs, bb2->index); 1646 1647 bitmap_clear_bit (c->bbs, bb2->index); 1648 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1649 { 1650 bb1 = BASIC_BLOCK_FOR_FN (cfun, j); 1651 bitmap_clear_bit (update_bbs, bb1->index); 1652 1653 replace_block_by (bb1, bb2); 1654 nr_bbs_removed++; 1655 } 1656 } 1657 1658 return nr_bbs_removed; 1659 } 1660 1661 /* Resets debug statement STMT if it has uses that are not dominated by their 1662 defs. */ 1663 1664 static void 1665 update_debug_stmt (gimple *stmt) 1666 { 1667 use_operand_p use_p; 1668 ssa_op_iter oi; 1669 basic_block bbuse; 1670 1671 if (!gimple_debug_bind_p (stmt)) 1672 return; 1673 1674 bbuse = gimple_bb (stmt); 1675 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1676 { 1677 tree name = USE_FROM_PTR (use_p); 1678 gimple *def_stmt = SSA_NAME_DEF_STMT (name); 1679 basic_block bbdef = gimple_bb (def_stmt); 1680 if (bbdef == NULL || bbuse == bbdef 1681 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1682 continue; 1683 1684 gimple_debug_bind_reset_value (stmt); 1685 update_stmt (stmt); 1686 break; 1687 } 1688 } 1689 1690 /* Resets all debug statements that have uses that are not 1691 dominated by their defs. */ 1692 1693 static void 1694 update_debug_stmts (void) 1695 { 1696 basic_block bb; 1697 bitmap_iterator bi; 1698 unsigned int i; 1699 1700 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1701 { 1702 gimple *stmt; 1703 gimple_stmt_iterator gsi; 1704 1705 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1706 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1707 { 1708 stmt = gsi_stmt (gsi); 1709 if (!is_gimple_debug (stmt)) 1710 continue; 1711 update_debug_stmt (stmt); 1712 } 1713 } 1714 } 1715 1716 /* Runs tail merge optimization. */ 1717 1718 unsigned int 1719 tail_merge_optimize (unsigned int todo) 1720 { 1721 int nr_bbs_removed_total = 0; 1722 int nr_bbs_removed; 1723 bool loop_entered = false; 1724 int iteration_nr = 0; 1725 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1726 1727 if (!flag_tree_tail_merge 1728 || max_iterations == 0) 1729 return 0; 1730 1731 timevar_push (TV_TREE_TAIL_MERGE); 1732 1733 if (!dom_info_available_p (CDI_DOMINATORS)) 1734 { 1735 /* PRE can leave us with unreachable blocks, remove them now. */ 1736 delete_unreachable_blocks (); 1737 calculate_dominance_info (CDI_DOMINATORS); 1738 } 1739 init_worklist (); 1740 1741 while (!worklist.is_empty ()) 1742 { 1743 if (!loop_entered) 1744 { 1745 loop_entered = true; 1746 alloc_cluster_vectors (); 1747 update_bbs = BITMAP_ALLOC (NULL); 1748 } 1749 else 1750 reset_cluster_vectors (); 1751 1752 iteration_nr++; 1753 if (dump_file && (dump_flags & TDF_DETAILS)) 1754 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1755 1756 find_clusters (); 1757 gcc_assert (worklist.is_empty ()); 1758 if (all_clusters.is_empty ()) 1759 break; 1760 1761 nr_bbs_removed = apply_clusters (); 1762 nr_bbs_removed_total += nr_bbs_removed; 1763 if (nr_bbs_removed == 0) 1764 break; 1765 1766 free_dominance_info (CDI_DOMINATORS); 1767 1768 if (iteration_nr == max_iterations) 1769 break; 1770 1771 calculate_dominance_info (CDI_DOMINATORS); 1772 update_worklist (); 1773 } 1774 1775 if (dump_file && (dump_flags & TDF_DETAILS)) 1776 fprintf (dump_file, "htab collision / search: %f\n", 1777 same_succ_htab->collisions ()); 1778 1779 if (nr_bbs_removed_total > 0) 1780 { 1781 if (MAY_HAVE_DEBUG_STMTS) 1782 { 1783 calculate_dominance_info (CDI_DOMINATORS); 1784 update_debug_stmts (); 1785 } 1786 1787 if (dump_file && (dump_flags & TDF_DETAILS)) 1788 { 1789 fprintf (dump_file, "Before TODOs.\n"); 1790 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1791 } 1792 1793 mark_virtual_operands_for_renaming (cfun); 1794 } 1795 1796 delete_worklist (); 1797 if (loop_entered) 1798 { 1799 delete_cluster_vectors (); 1800 BITMAP_FREE (update_bbs); 1801 } 1802 1803 timevar_pop (TV_TREE_TAIL_MERGE); 1804 1805 return todo; 1806 } 1807