1 /* Shrink-wrapping related optimizations. 2 Copyright (C) 1987-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This file handles shrink-wrapping related optimizations. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "target.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "cfghooks.h" 30 #include "df.h" 31 #include "memmodel.h" 32 #include "tm_p.h" 33 #include "regs.h" 34 #include "insn-config.h" 35 #include "emit-rtl.h" 36 #include "output.h" 37 #include "tree-pass.h" 38 #include "cfgrtl.h" 39 #include "cfgbuild.h" 40 #include "bb-reorder.h" 41 #include "shrink-wrap.h" 42 #include "regcprop.h" 43 #include "rtl-iter.h" 44 #include "valtrack.h" 45 #include "function-abi.h" 46 47 /* Return true if INSN requires the stack frame to be set up. 48 PROLOGUE_USED contains the hard registers used in the function 49 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the 50 prologue to set up for the function. */ 51 bool 52 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, 53 HARD_REG_SET set_up_by_prologue) 54 { 55 df_ref def, use; 56 HARD_REG_SET hardregs; 57 unsigned regno; 58 59 if (CALL_P (insn)) 60 return !SIBLING_CALL_P (insn); 61 62 /* We need a frame to get the unique CFA expected by the unwinder. */ 63 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) 64 return true; 65 66 CLEAR_HARD_REG_SET (hardregs); 67 FOR_EACH_INSN_DEF (def, insn) 68 { 69 rtx dreg = DF_REF_REG (def); 70 71 if (!REG_P (dreg)) 72 continue; 73 74 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg)); 75 } 76 if (hard_reg_set_intersect_p (hardregs, prologue_used)) 77 return true; 78 hardregs &= ~crtl->abi->full_reg_clobbers (); 79 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 80 if (TEST_HARD_REG_BIT (hardregs, regno) 81 && df_regs_ever_live_p (regno)) 82 return true; 83 84 FOR_EACH_INSN_USE (use, insn) 85 { 86 rtx reg = DF_REF_REG (use); 87 88 if (!REG_P (reg)) 89 continue; 90 91 add_to_hard_reg_set (&hardregs, GET_MODE (reg), 92 REGNO (reg)); 93 } 94 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) 95 return true; 96 97 return false; 98 } 99 100 /* See whether there has a single live edge from BB, which dest uses 101 [REGNO, END_REGNO). Return the live edge if its dest bb has 102 one or two predecessors. Otherwise return NULL. */ 103 104 static edge 105 live_edge_for_reg (basic_block bb, int regno, int end_regno) 106 { 107 edge e, live_edge; 108 edge_iterator ei; 109 bitmap live; 110 int i; 111 112 live_edge = NULL; 113 FOR_EACH_EDGE (e, ei, bb->succs) 114 { 115 live = df_get_live_in (e->dest); 116 for (i = regno; i < end_regno; i++) 117 if (REGNO_REG_SET_P (live, i)) 118 { 119 if (live_edge && live_edge != e) 120 return NULL; 121 live_edge = e; 122 } 123 } 124 125 /* We can sometimes encounter dead code. Don't try to move it 126 into the exit block. */ 127 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 128 return NULL; 129 130 /* Reject targets of abnormal edges. This is needed for correctness 131 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on 132 exception edges even though it is generally treated as call-saved 133 for the majority of the compilation. Moving across abnormal edges 134 isn't going to be interesting for shrink-wrap usage anyway. */ 135 if (live_edge->flags & EDGE_ABNORMAL) 136 return NULL; 137 138 /* When live_edge->dest->preds == 2, we can create a new block on 139 the edge to make it meet the requirement. */ 140 if (EDGE_COUNT (live_edge->dest->preds) > 2) 141 return NULL; 142 143 return live_edge; 144 } 145 146 /* Try to move INSN from BB to a successor. Return true on success. 147 USES and DEFS are the set of registers that are used and defined 148 after INSN in BB. SPLIT_P indicates whether a live edge from BB 149 is splitted or not. */ 150 151 static bool 152 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, 153 const_hard_reg_set uses, 154 const_hard_reg_set defs, 155 bool *split_p, 156 struct dead_debug_local *debug) 157 { 158 rtx set, src, dest; 159 bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL; 160 unsigned int i, dregno, end_dregno; 161 unsigned int sregno = FIRST_PSEUDO_REGISTER; 162 unsigned int end_sregno = FIRST_PSEUDO_REGISTER; 163 basic_block next_block; 164 edge live_edge; 165 rtx_insn *dinsn; 166 df_ref def; 167 168 /* Look for a simple register assignment. We don't use single_set here 169 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary 170 destinations. */ 171 if (!INSN_P (insn)) 172 return false; 173 set = PATTERN (insn); 174 if (GET_CODE (set) != SET) 175 return false; 176 src = SET_SRC (set); 177 dest = SET_DEST (set); 178 179 /* For the destination, we want only a register. Also disallow STACK 180 or FRAME related adjustments. They are likely part of the prologue, 181 so keep them in the entry block. */ 182 if (!REG_P (dest) 183 || dest == stack_pointer_rtx 184 || dest == frame_pointer_rtx 185 || dest == hard_frame_pointer_rtx) 186 return false; 187 188 /* For the source, we want one of: 189 (1) A (non-overlapping) register 190 (2) A constant, 191 (3) An expression involving no more than one register. 192 193 That last point comes from the code following, which was originally 194 written to handle only register move operations, and still only handles 195 a single source register when checking for overlaps. Happily, the 196 same checks can be applied to expressions like (plus reg const). */ 197 198 if (CONSTANT_P (src)) 199 ; 200 else if (!REG_P (src)) 201 { 202 rtx src_inner = NULL_RTX; 203 204 if (can_throw_internal (insn)) 205 return false; 206 207 subrtx_var_iterator::array_type array; 208 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) 209 { 210 rtx x = *iter; 211 switch (GET_RTX_CLASS (GET_CODE (x))) 212 { 213 case RTX_CONST_OBJ: 214 case RTX_COMPARE: 215 case RTX_COMM_COMPARE: 216 case RTX_BIN_ARITH: 217 case RTX_COMM_ARITH: 218 case RTX_UNARY: 219 case RTX_TERNARY: 220 /* Constant or expression. Continue. */ 221 break; 222 223 case RTX_OBJ: 224 case RTX_EXTRA: 225 switch (GET_CODE (x)) 226 { 227 case UNSPEC: 228 case SUBREG: 229 case STRICT_LOW_PART: 230 case PC: 231 case LO_SUM: 232 /* Ok. Continue. */ 233 break; 234 235 case REG: 236 /* Fail if we see a second inner register. */ 237 if (src_inner != NULL) 238 return false; 239 src_inner = x; 240 break; 241 242 default: 243 return false; 244 } 245 break; 246 247 default: 248 return false; 249 } 250 } 251 252 if (src_inner != NULL) 253 src = src_inner; 254 } 255 256 /* Make sure that the source register isn't defined later in BB. */ 257 if (REG_P (src)) 258 { 259 sregno = REGNO (src); 260 end_sregno = END_REGNO (src); 261 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) 262 return false; 263 } 264 265 /* Make sure that the destination register isn't referenced later in BB. */ 266 dregno = REGNO (dest); 267 end_dregno = END_REGNO (dest); 268 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) 269 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) 270 return false; 271 272 /* See whether there is a successor block to which we could move INSN. */ 273 live_edge = live_edge_for_reg (bb, dregno, end_dregno); 274 if (!live_edge) 275 return false; 276 277 next_block = live_edge->dest; 278 /* Create a new basic block on the edge. */ 279 if (EDGE_COUNT (next_block->preds) == 2) 280 { 281 /* split_edge for a block with only one successor is meaningless. */ 282 if (EDGE_COUNT (bb->succs) == 1) 283 return false; 284 285 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ 286 if (!df_live) 287 return false; 288 289 basic_block old_dest = live_edge->dest; 290 next_block = split_edge (live_edge); 291 292 /* We create a new basic block. Call df_grow_bb_info to make sure 293 all data structures are allocated. */ 294 df_grow_bb_info (df_live); 295 296 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), 297 df_get_live_in (old_dest)); 298 df_set_bb_dirty (next_block); 299 300 /* We should not split more than once for a function. */ 301 if (*split_p) 302 return false; 303 304 *split_p = true; 305 } 306 307 /* At this point we are committed to moving INSN, but let's try to 308 move it as far as we can. */ 309 do 310 { 311 if (MAY_HAVE_DEBUG_BIND_INSNS) 312 { 313 FOR_BB_INSNS_REVERSE (bb, dinsn) 314 if (DEBUG_BIND_INSN_P (dinsn)) 315 { 316 df_ref use; 317 FOR_EACH_INSN_USE (use, dinsn) 318 if (refers_to_regno_p (dregno, end_dregno, 319 DF_REF_REG (use), (rtx *) NULL)) 320 dead_debug_add (debug, use, DF_REF_REGNO (use)); 321 } 322 else if (dinsn == insn) 323 break; 324 } 325 live_out = df_get_live_out (bb); 326 live_in = df_get_live_in (next_block); 327 bb = next_block; 328 329 /* Check whether BB uses DEST or clobbers DEST. We need to add 330 INSN to BB if so. Either way, DEST is no longer live on entry, 331 except for any part that overlaps SRC (next loop). */ 332 if (!*split_p) 333 { 334 bb_uses = &DF_LR_BB_INFO (bb)->use; 335 bb_defs = &DF_LR_BB_INFO (bb)->def; 336 } 337 if (df_live) 338 { 339 for (i = dregno; i < end_dregno; i++) 340 { 341 if (*split_p 342 || REGNO_REG_SET_P (bb_uses, i) 343 || REGNO_REG_SET_P (bb_defs, i) 344 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) 345 next_block = NULL; 346 CLEAR_REGNO_REG_SET (live_out, i); 347 CLEAR_REGNO_REG_SET (live_in, i); 348 } 349 350 /* Check whether BB clobbers SRC. We need to add INSN to BB if so. 351 Either way, SRC is now live on entry. */ 352 for (i = sregno; i < end_sregno; i++) 353 { 354 if (*split_p 355 || REGNO_REG_SET_P (bb_defs, i) 356 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) 357 next_block = NULL; 358 SET_REGNO_REG_SET (live_out, i); 359 SET_REGNO_REG_SET (live_in, i); 360 } 361 } 362 else 363 { 364 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and 365 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. 366 at -O1, just give up searching NEXT_BLOCK. */ 367 next_block = NULL; 368 for (i = dregno; i < end_dregno; i++) 369 { 370 CLEAR_REGNO_REG_SET (live_out, i); 371 CLEAR_REGNO_REG_SET (live_in, i); 372 } 373 374 for (i = sregno; i < end_sregno; i++) 375 { 376 SET_REGNO_REG_SET (live_out, i); 377 SET_REGNO_REG_SET (live_in, i); 378 } 379 } 380 381 /* If we don't need to add the move to BB, look for a single 382 successor block. */ 383 if (next_block) 384 { 385 live_edge = live_edge_for_reg (next_block, dregno, end_dregno); 386 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) 387 break; 388 next_block = live_edge->dest; 389 } 390 } 391 while (next_block); 392 393 /* For the new created basic block, there is no dataflow info at all. 394 So skip the following dataflow update and check. */ 395 if (!(*split_p)) 396 { 397 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC 398 (next loop). */ 399 for (i = dregno; i < end_dregno; i++) 400 { 401 CLEAR_REGNO_REG_SET (bb_uses, i); 402 SET_REGNO_REG_SET (bb_defs, i); 403 } 404 405 /* BB now uses SRC. */ 406 for (i = sregno; i < end_sregno; i++) 407 SET_REGNO_REG_SET (bb_uses, i); 408 } 409 410 /* Insert debug temps for dead REGs used in subsequent debug insns. */ 411 if (debug->used && !bitmap_empty_p (debug->used)) 412 FOR_EACH_INSN_DEF (def, insn) 413 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, 414 DEBUG_TEMP_BEFORE_WITH_VALUE); 415 416 rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb)); 417 /* Update the LABEL_NUSES count on any referenced labels. The ideal 418 solution here would be to actually move the instruction instead 419 of copying/deleting it as this loses some notations on the 420 insn. */ 421 mark_jump_label (PATTERN (insn), insn_copy, 0); 422 delete_insn (insn); 423 return true; 424 } 425 426 /* Look for register copies in the first block of the function, and move 427 them down into successor blocks if the register is used only on one 428 path. This exposes more opportunities for shrink-wrapping. These 429 kinds of sets often occur when incoming argument registers are moved 430 to call-saved registers because their values are live across one or 431 more calls during the function. */ 432 433 static void 434 prepare_shrink_wrap (basic_block entry_block) 435 { 436 rtx_insn *insn, *curr; 437 rtx x; 438 HARD_REG_SET uses, defs; 439 df_ref def, use; 440 bool split_p = false; 441 unsigned int i; 442 struct dead_debug_local debug; 443 444 if (JUMP_P (BB_END (entry_block))) 445 { 446 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries 447 to sink the copies from parameter to callee saved register out of 448 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called 449 to release some dependences. */ 450 copyprop_hardreg_forward_bb_without_debug_insn (entry_block); 451 } 452 453 dead_debug_local_init (&debug, NULL, NULL); 454 CLEAR_HARD_REG_SET (uses); 455 CLEAR_HARD_REG_SET (defs); 456 457 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) 458 if (NONDEBUG_INSN_P (insn) 459 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, 460 &split_p, &debug)) 461 { 462 /* Add all defined registers to DEFs. */ 463 FOR_EACH_INSN_DEF (def, insn) 464 { 465 x = DF_REF_REG (def); 466 if (REG_P (x) && HARD_REGISTER_P (x)) 467 for (i = REGNO (x); i < END_REGNO (x); i++) 468 SET_HARD_REG_BIT (defs, i); 469 } 470 471 /* Add all used registers to USESs. */ 472 FOR_EACH_INSN_USE (use, insn) 473 { 474 x = DF_REF_REG (use); 475 if (REG_P (x) && HARD_REGISTER_P (x)) 476 for (i = REGNO (x); i < END_REGNO (x); i++) 477 SET_HARD_REG_BIT (uses, i); 478 } 479 } 480 481 dead_debug_local_finish (&debug, NULL); 482 } 483 484 /* Return whether basic block PRO can get the prologue. It cannot if it 485 has incoming complex edges that need a prologue inserted (we make a new 486 block for the prologue, so those edges would need to be redirected, which 487 does not work). It also cannot if there exist registers live on entry 488 to PRO that are clobbered by the prologue. */ 489 490 static bool 491 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered) 492 { 493 edge e; 494 edge_iterator ei; 495 FOR_EACH_EDGE (e, ei, pro->preds) 496 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) 497 && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) 498 return false; 499 500 HARD_REG_SET live; 501 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro)); 502 if (hard_reg_set_intersect_p (live, prologue_clobbered)) 503 return false; 504 505 return true; 506 } 507 508 /* Return whether we can duplicate basic block BB for shrink wrapping. We 509 cannot if the block cannot be duplicated at all, or if any of its incoming 510 edges are complex and come from a block that does not require a prologue 511 (we cannot redirect such edges), or if the block is too big to copy. 512 PRO is the basic block before which we would put the prologue, MAX_SIZE is 513 the maximum size block we allow to be copied. */ 514 515 static bool 516 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size) 517 { 518 if (!can_duplicate_block_p (bb)) 519 return false; 520 521 edge e; 522 edge_iterator ei; 523 FOR_EACH_EDGE (e, ei, bb->preds) 524 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING) 525 && !dominated_by_p (CDI_DOMINATORS, e->src, pro)) 526 return false; 527 528 unsigned size = 0; 529 530 rtx_insn *insn; 531 FOR_BB_INSNS (bb, insn) 532 if (NONDEBUG_INSN_P (insn)) 533 { 534 size += get_attr_min_length (insn); 535 if (size > max_size) 536 return false; 537 } 538 539 return true; 540 } 541 542 /* Do whatever needs to be done for exits that run without prologue. 543 Sibcalls need nothing done. Normal exits get a simple_return inserted. */ 544 545 static void 546 handle_simple_exit (edge e) 547 { 548 549 if (e->flags & EDGE_SIBCALL) 550 { 551 /* Tell function.c to take no further action on this edge. */ 552 e->flags |= EDGE_IGNORE; 553 554 e->flags &= ~EDGE_FALLTHRU; 555 emit_barrier_after_bb (e->src); 556 return; 557 } 558 559 /* If the basic block the edge comes from has multiple successors, 560 split the edge. */ 561 if (EDGE_COUNT (e->src->succs) > 1) 562 { 563 basic_block old_bb = e->src; 564 rtx_insn *end = BB_END (old_bb); 565 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); 566 basic_block new_bb = create_basic_block (note, note, old_bb); 567 BB_COPY_PARTITION (new_bb, old_bb); 568 BB_END (old_bb) = end; 569 570 redirect_edge_succ (e, new_bb); 571 new_bb->count = e->count (); 572 e->flags |= EDGE_FALLTHRU; 573 574 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); 575 } 576 577 e->flags &= ~EDGE_FALLTHRU; 578 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (), 579 BB_END (e->src)); 580 JUMP_LABEL (ret) = simple_return_rtx; 581 emit_barrier_after_bb (e->src); 582 583 if (dump_file) 584 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n", 585 INSN_UID (ret), e->src->index); 586 } 587 588 /* Try to perform a kind of shrink-wrapping, making sure the 589 prologue/epilogue is emitted only around those parts of the 590 function that require it. 591 592 There will be exactly one prologue, and it will be executed either 593 zero or one time, on any path. Depending on where the prologue is 594 placed, some of the basic blocks can be reached via both paths with 595 and without a prologue. Such blocks will be duplicated here, and the 596 edges changed to match. 597 598 Paths that go to the exit without going through the prologue will use 599 a simple_return instead of the epilogue. We maximize the number of 600 those, making sure to only duplicate blocks that can be duplicated. 601 If the prologue can then still be placed in multiple locations, we 602 place it as early as possible. 603 604 An example, where we duplicate blocks with control flow (legend: 605 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should 606 be taken to point down or to the right, to simplify the diagram; here, 607 block 3 needs a prologue, the rest does not): 608 609 610 B B 611 | | 612 2 2 613 |\ |\ 614 | 3 becomes | 3 615 |/ | \ 616 4 7 4 617 |\ |\ |\ 618 | 5 | 8 | 5 619 |/ |/ |/ 620 6 9 6 621 | | | 622 R S R 623 624 625 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the 626 edge 2->3). 627 628 Another example, where part of a loop is duplicated (again, bb 3 is 629 the only block that needs a prologue): 630 631 632 B 3<-- B ->3<-- 633 | | | | | | | 634 | v | becomes | | v | 635 2---4--- 2---5-- 4--- 636 | | | 637 R S R 638 639 640 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3). 641 642 ENTRY_EDGE is the edge where the prologue will be placed, possibly 643 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */ 644 645 void 646 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) 647 { 648 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes 649 no sense to shrink-wrap: then do not shrink-wrap! */ 650 651 if (!SHRINK_WRAPPING_ENABLED) 652 return; 653 654 if (crtl->profile && !targetm.profile_before_prologue ()) 655 return; 656 657 if (crtl->calls_eh_return) 658 return; 659 660 bool empty_prologue = true; 661 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) 662 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)) 663 { 664 empty_prologue = false; 665 break; 666 } 667 if (empty_prologue) 668 return; 669 670 /* Move some code down to expose more shrink-wrapping opportunities. */ 671 672 basic_block entry = (*entry_edge)->dest; 673 prepare_shrink_wrap (entry); 674 675 if (dump_file) 676 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); 677 678 /* Compute the registers set and used in the prologue. */ 679 680 HARD_REG_SET prologue_clobbered, prologue_used; 681 CLEAR_HARD_REG_SET (prologue_clobbered); 682 CLEAR_HARD_REG_SET (prologue_used); 683 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn)) 684 if (NONDEBUG_INSN_P (insn)) 685 { 686 HARD_REG_SET this_used; 687 CLEAR_HARD_REG_SET (this_used); 688 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); 689 this_used &= ~prologue_clobbered; 690 prologue_used |= this_used; 691 note_stores (insn, record_hard_reg_sets, &prologue_clobbered); 692 } 693 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); 694 if (frame_pointer_needed) 695 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); 696 697 /* Find out what registers are set up by the prologue; any use of these 698 cannot happen before the prologue. */ 699 700 struct hard_reg_set_container set_up_by_prologue; 701 CLEAR_HARD_REG_SET (set_up_by_prologue.set); 702 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM); 703 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); 704 if (frame_pointer_needed) 705 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, 706 HARD_FRAME_POINTER_REGNUM); 707 if (pic_offset_table_rtx 708 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 709 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, 710 PIC_OFFSET_TABLE_REGNUM); 711 if (crtl->drap_reg) 712 add_to_hard_reg_set (&set_up_by_prologue.set, 713 GET_MODE (crtl->drap_reg), 714 REGNO (crtl->drap_reg)); 715 if (targetm.set_up_by_prologue) 716 targetm.set_up_by_prologue (&set_up_by_prologue); 717 718 /* We will insert the prologue before the basic block PRO. PRO should 719 dominate all basic blocks that need the prologue to be executed 720 before them. First, make PRO the "tightest wrap" possible. */ 721 722 calculate_dominance_info (CDI_DOMINATORS); 723 724 basic_block pro = 0; 725 726 basic_block bb; 727 edge e; 728 edge_iterator ei; 729 FOR_EACH_BB_FN (bb, cfun) 730 { 731 rtx_insn *insn; 732 FOR_BB_INSNS (bb, insn) 733 if (NONDEBUG_INSN_P (insn) 734 && requires_stack_frame_p (insn, prologue_used, 735 set_up_by_prologue.set)) 736 { 737 if (dump_file) 738 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index); 739 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb); 740 break; 741 } 742 } 743 744 /* If nothing needs a prologue, just put it at the start. This really 745 shouldn't happen, but we cannot fix it here. */ 746 747 if (pro == 0) 748 { 749 if (dump_file) 750 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; " 751 "putting it at the start.\n"); 752 pro = entry; 753 } 754 755 if (dump_file) 756 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n", 757 pro->index); 758 759 /* Now see if we can put the prologue at the start of PRO. Putting it 760 there might require duplicating a block that cannot be duplicated, 761 or in some cases we cannot insert the prologue there at all. If PRO 762 wont't do, try again with the immediate dominator of PRO, and so on. 763 764 The blocks that need duplicating are those reachable from PRO but 765 not dominated by it. We keep in BB_WITH a bitmap of the blocks 766 reachable from PRO that we already found, and in VEC a stack of 767 those we still need to consider (to find successors). */ 768 769 auto_bitmap bb_with; 770 bitmap_set_bit (bb_with, pro->index); 771 772 vec<basic_block> vec; 773 vec.create (n_basic_blocks_for_fn (cfun)); 774 vec.quick_push (pro); 775 776 unsigned max_grow_size = get_uncond_jump_length (); 777 max_grow_size *= param_max_grow_copy_bb_insns; 778 779 while (pro != entry) 780 { 781 while (pro != entry && !can_get_prologue (pro, prologue_clobbered)) 782 { 783 pro = get_immediate_dominator (CDI_DOMINATORS, pro); 784 785 if (bitmap_set_bit (bb_with, pro->index)) 786 vec.quick_push (pro); 787 } 788 789 if (vec.is_empty ()) 790 break; 791 792 basic_block bb = vec.pop (); 793 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size)) 794 while (!dominated_by_p (CDI_DOMINATORS, bb, pro)) 795 { 796 gcc_assert (pro != entry); 797 798 pro = get_immediate_dominator (CDI_DOMINATORS, pro); 799 800 if (bitmap_set_bit (bb_with, pro->index)) 801 vec.quick_push (pro); 802 } 803 804 FOR_EACH_EDGE (e, ei, bb->succs) 805 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 806 && bitmap_set_bit (bb_with, e->dest->index)) 807 vec.quick_push (e->dest); 808 } 809 810 if (dump_file) 811 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n", 812 pro->index); 813 814 /* If we can move PRO back without having to duplicate more blocks, do so. 815 We do this because putting the prologue earlier is better for scheduling. 816 817 We can move back to a block PRE if every path from PRE will eventually 818 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs 819 to dominate every block reachable from itself. We keep in BB_TMP a 820 bitmap of the blocks reachable from PRE that we already found, and in 821 VEC a stack of those we still need to consider. 822 823 Any block reachable from PRE is also reachable from all predecessors 824 of PRE, so if we find we need to move PRE back further we can leave 825 everything not considered so far on the stack. Any block dominated 826 by PRE is also dominated by all other dominators of PRE, so anything 827 found good for some PRE does not need to be reconsidered later. 828 829 We don't need to update BB_WITH because none of the new blocks found 830 can jump to a block that does not need the prologue. */ 831 832 if (pro != entry) 833 { 834 calculate_dominance_info (CDI_POST_DOMINATORS); 835 836 auto_bitmap bb_tmp; 837 bitmap_copy (bb_tmp, bb_with); 838 basic_block last_ok = pro; 839 vec.truncate (0); 840 841 while (pro != entry) 842 { 843 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro); 844 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro)) 845 break; 846 847 if (bitmap_set_bit (bb_tmp, pre->index)) 848 vec.quick_push (pre); 849 850 bool ok = true; 851 while (!vec.is_empty ()) 852 { 853 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre)) 854 { 855 ok = false; 856 break; 857 } 858 859 basic_block bb = vec.pop (); 860 FOR_EACH_EDGE (e, ei, bb->succs) 861 if (bitmap_set_bit (bb_tmp, e->dest->index)) 862 vec.quick_push (e->dest); 863 } 864 865 if (ok && can_get_prologue (pre, prologue_clobbered)) 866 last_ok = pre; 867 868 pro = pre; 869 } 870 871 pro = last_ok; 872 873 free_dominance_info (CDI_POST_DOMINATORS); 874 } 875 876 vec.release (); 877 878 if (dump_file) 879 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n", 880 pro->index); 881 882 if (pro == entry) 883 { 884 free_dominance_info (CDI_DOMINATORS); 885 return; 886 } 887 888 /* Compute what fraction of the frequency and count of the blocks that run 889 both with and without prologue are for running with prologue. This gives 890 the correct answer for reducible flow graphs; for irreducible flow graphs 891 our profile is messed up beyond repair anyway. */ 892 893 profile_count num = profile_count::zero (); 894 profile_count den = profile_count::zero (); 895 896 FOR_EACH_EDGE (e, ei, pro->preds) 897 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro)) 898 { 899 if (e->count ().initialized_p ()) 900 num += e->count (); 901 if (e->src->count.initialized_p ()) 902 den += e->src->count; 903 } 904 905 /* All is okay, so do it. */ 906 907 crtl->shrink_wrapped = true; 908 if (dump_file) 909 fprintf (dump_file, "Performing shrink-wrapping.\n"); 910 911 /* Copy the blocks that can run both with and without prologue. The 912 originals run with prologue, the copies without. Store a pointer to 913 the copy in the ->aux field of the original. */ 914 915 FOR_EACH_BB_FN (bb, cfun) 916 if (bitmap_bit_p (bb_with, bb->index) 917 && !dominated_by_p (CDI_DOMINATORS, bb, pro)) 918 { 919 basic_block dup = duplicate_block (bb, 0, 0); 920 921 bb->aux = dup; 922 923 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup))) 924 emit_barrier_after_bb (dup); 925 926 if (EDGE_COUNT (dup->succs) == 0) 927 emit_barrier_after_bb (dup); 928 929 if (dump_file) 930 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index); 931 932 if (num == profile_count::zero () || den.nonzero_p ()) 933 bb->count = bb->count.apply_scale (num, den); 934 dup->count -= bb->count; 935 } 936 937 /* Now change the edges to point to the copies, where appropriate. */ 938 939 FOR_EACH_BB_FN (bb, cfun) 940 if (!dominated_by_p (CDI_DOMINATORS, bb, pro)) 941 { 942 basic_block src = bb; 943 if (bitmap_bit_p (bb_with, bb->index)) 944 src = (basic_block) bb->aux; 945 946 FOR_EACH_EDGE (e, ei, src->succs) 947 { 948 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 949 continue; 950 951 if (bitmap_bit_p (bb_with, e->dest->index) 952 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) 953 { 954 if (dump_file) 955 fprintf (dump_file, "Redirecting edge %d->%d to %d\n", 956 e->src->index, e->dest->index, 957 ((basic_block) e->dest->aux)->index); 958 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); 959 } 960 else if (e->flags & EDGE_FALLTHRU 961 && bitmap_bit_p (bb_with, bb->index)) 962 force_nonfallthru (e); 963 } 964 } 965 966 /* Also redirect the function entry edge if necessary. */ 967 968 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) 969 if (bitmap_bit_p (bb_with, e->dest->index) 970 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro)) 971 { 972 basic_block split_bb = split_edge (e); 973 e = single_succ_edge (split_bb); 974 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); 975 } 976 977 /* Make a simple_return for those exits that run without prologue. */ 978 979 FOR_EACH_BB_REVERSE_FN (bb, cfun) 980 if (!bitmap_bit_p (bb_with, bb->index)) 981 FOR_EACH_EDGE (e, ei, bb->succs) 982 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 983 handle_simple_exit (e); 984 985 /* Finally, we want a single edge to put the prologue on. Make a new 986 block before the PRO block; the edge beteen them is the edge we want. 987 Then redirect those edges into PRO that come from blocks without the 988 prologue, to point to the new block instead. The new prologue block 989 is put at the end of the insn chain. */ 990 991 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 992 BB_COPY_PARTITION (new_bb, pro); 993 new_bb->count = profile_count::zero (); 994 if (dump_file) 995 fprintf (dump_file, "Made prologue block %d\n", new_bb->index); 996 997 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); ) 998 { 999 if (bitmap_bit_p (bb_with, e->src->index) 1000 || dominated_by_p (CDI_DOMINATORS, e->src, pro)) 1001 { 1002 ei_next (&ei); 1003 continue; 1004 } 1005 1006 new_bb->count += e->count (); 1007 1008 redirect_edge_and_branch_force (e, new_bb); 1009 if (dump_file) 1010 fprintf (dump_file, "Redirected edge from %d\n", e->src->index); 1011 } 1012 1013 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU); 1014 force_nonfallthru (*entry_edge); 1015 1016 free_dominance_info (CDI_DOMINATORS); 1017 } 1018 1019 /* Separate shrink-wrapping 1020 1021 Instead of putting all of the prologue and epilogue in one spot, we 1022 can put parts of it in places where those components are executed less 1023 frequently. The following code does this, for prologue and epilogue 1024 components that can be put in more than one location, and where those 1025 components can be executed more than once (the epilogue component will 1026 always be executed before the prologue component is executed a second 1027 time). 1028 1029 What exactly is a component is target-dependent. The more usual 1030 components are simple saves/restores to/from the frame of callee-saved 1031 registers. This code treats components abstractly (as an sbitmap), 1032 letting the target handle all details. 1033 1034 Prologue components are placed in such a way that for every component 1035 the prologue is executed as infrequently as possible. We do this by 1036 walking the dominator tree, comparing the cost of placing a prologue 1037 component before a block to the sum of costs determined for all subtrees 1038 of that block. 1039 1040 From this placement, we then determine for each component all blocks 1041 where at least one of this block's dominators (including itself) will 1042 get a prologue inserted. That then is how the components are placed. 1043 We could place the epilogue components a bit smarter (we can save a 1044 bit of code size sometimes); this is a possible future improvement. 1045 1046 Prologues and epilogues are preferably placed into a block, either at 1047 the beginning or end of it, if it is needed for all predecessor resp. 1048 successor edges; or placed on the edge otherwise. 1049 1050 If the placement of any prologue/epilogue leads to a situation we cannot 1051 handle (for example, an abnormal edge would need to be split, or some 1052 targets want to use some specific registers that may not be available 1053 where we want to put them), separate shrink-wrapping for the components 1054 in that prologue/epilogue is aborted. */ 1055 1056 1057 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the 1058 label LABEL. */ 1059 static void 1060 dump_components (const char *label, sbitmap components) 1061 { 1062 if (bitmap_empty_p (components)) 1063 return; 1064 1065 fprintf (dump_file, " [%s", label); 1066 1067 for (unsigned int j = 0; j < components->n_bits; j++) 1068 if (bitmap_bit_p (components, j)) 1069 fprintf (dump_file, " %u", j); 1070 1071 fprintf (dump_file, "]"); 1072 } 1073 1074 /* The data we collect for each bb. */ 1075 struct sw { 1076 /* What components does this BB need? */ 1077 sbitmap needs_components; 1078 1079 /* What components does this BB have? This is the main decision this 1080 pass makes. */ 1081 sbitmap has_components; 1082 1083 /* The components for which we placed code at the start of the BB (instead 1084 of on all incoming edges). */ 1085 sbitmap head_components; 1086 1087 /* The components for which we placed code at the end of the BB (instead 1088 of on all outgoing edges). */ 1089 sbitmap tail_components; 1090 1091 /* The frequency of executing the prologue for this BB, if a prologue is 1092 placed on this BB. This is a pessimistic estimate (no prologue is 1093 needed for edges from blocks that have the component under consideration 1094 active already). */ 1095 gcov_type own_cost; 1096 1097 /* The frequency of executing the prologue for this BB and all BBs 1098 dominated by it. */ 1099 gcov_type total_cost; 1100 }; 1101 1102 /* A helper function for accessing the pass-specific info. */ 1103 static inline struct sw * 1104 SW (basic_block bb) 1105 { 1106 gcc_assert (bb->aux); 1107 return (struct sw *) bb->aux; 1108 } 1109 1110 /* Create the pass-specific data structures for separately shrink-wrapping 1111 with components COMPONENTS. */ 1112 static void 1113 init_separate_shrink_wrap (sbitmap components) 1114 { 1115 basic_block bb; 1116 FOR_ALL_BB_FN (bb, cfun) 1117 { 1118 bb->aux = xcalloc (1, sizeof (struct sw)); 1119 1120 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb); 1121 1122 /* Mark all basic blocks without successor as needing all components. 1123 This avoids problems in at least cfgcleanup, sel-sched, and 1124 regrename (largely to do with all paths to such a block still 1125 needing the same dwarf CFI info). */ 1126 if (EDGE_COUNT (bb->succs) == 0) 1127 bitmap_copy (SW (bb)->needs_components, components); 1128 1129 if (dump_file) 1130 { 1131 fprintf (dump_file, "bb %d components:", bb->index); 1132 dump_components ("has", SW (bb)->needs_components); 1133 fprintf (dump_file, "\n"); 1134 } 1135 1136 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components)); 1137 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components)); 1138 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components)); 1139 bitmap_clear (SW (bb)->has_components); 1140 } 1141 } 1142 1143 /* Destroy the pass-specific data. */ 1144 static void 1145 fini_separate_shrink_wrap (void) 1146 { 1147 basic_block bb; 1148 FOR_ALL_BB_FN (bb, cfun) 1149 if (bb->aux) 1150 { 1151 sbitmap_free (SW (bb)->needs_components); 1152 sbitmap_free (SW (bb)->has_components); 1153 sbitmap_free (SW (bb)->head_components); 1154 sbitmap_free (SW (bb)->tail_components); 1155 free (bb->aux); 1156 bb->aux = 0; 1157 } 1158 } 1159 1160 /* Place the prologue for component WHICH, in the basic blocks dominated 1161 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the 1162 HAS_COMPONENTS of a block if either the block has that bit set in 1163 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all 1164 dominator subtrees separately. */ 1165 static void 1166 place_prologue_for_one_component (unsigned int which, basic_block head) 1167 { 1168 /* The block we are currently dealing with. */ 1169 basic_block bb = head; 1170 /* Is this the first time we visit this block, i.e. have we just gone 1171 down the tree. */ 1172 bool first_visit = true; 1173 1174 /* Walk the dominator tree, visit one block per iteration of this loop. 1175 Each basic block is visited twice: once before visiting any children 1176 of the block, and once after visiting all of them (leaf nodes are 1177 visited only once). As an optimization, we do not visit subtrees 1178 that can no longer influence the prologue placement. */ 1179 for (;;) 1180 { 1181 /* First visit of a block: set the (children) cost accumulator to zero; 1182 if the block does not have the component itself, walk down. */ 1183 if (first_visit) 1184 { 1185 /* Initialize the cost. The cost is the block execution frequency 1186 that does not come from backedges. Calculating this by simply 1187 adding the cost of all edges that aren't backedges does not 1188 work: this does not always add up to the block frequency at 1189 all, and even if it does, rounding error makes for bad 1190 decisions. */ 1191 SW (bb)->own_cost = bb->count.to_frequency (cfun); 1192 1193 edge e; 1194 edge_iterator ei; 1195 FOR_EACH_EDGE (e, ei, bb->preds) 1196 if (dominated_by_p (CDI_DOMINATORS, e->src, bb)) 1197 { 1198 if (SW (bb)->own_cost > EDGE_FREQUENCY (e)) 1199 SW (bb)->own_cost -= EDGE_FREQUENCY (e); 1200 else 1201 SW (bb)->own_cost = 0; 1202 } 1203 1204 SW (bb)->total_cost = 0; 1205 1206 if (!bitmap_bit_p (SW (bb)->needs_components, which) 1207 && first_dom_son (CDI_DOMINATORS, bb)) 1208 { 1209 bb = first_dom_son (CDI_DOMINATORS, bb); 1210 continue; 1211 } 1212 } 1213 1214 /* If this block does need the component itself, or it is cheaper to 1215 put the prologue here than in all the descendants that need it, 1216 mark it so. If this block's immediate post-dominator is dominated 1217 by this block, and that needs the prologue, we can put it on this 1218 block as well (earlier is better). */ 1219 if (bitmap_bit_p (SW (bb)->needs_components, which) 1220 || SW (bb)->total_cost > SW (bb)->own_cost) 1221 { 1222 SW (bb)->total_cost = SW (bb)->own_cost; 1223 bitmap_set_bit (SW (bb)->has_components, which); 1224 } 1225 else 1226 { 1227 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb); 1228 if (dominated_by_p (CDI_DOMINATORS, kid, bb) 1229 && bitmap_bit_p (SW (kid)->has_components, which)) 1230 { 1231 SW (bb)->total_cost = SW (bb)->own_cost; 1232 bitmap_set_bit (SW (bb)->has_components, which); 1233 } 1234 } 1235 1236 /* We are back where we started, so we are done now. */ 1237 if (bb == head) 1238 return; 1239 1240 /* We now know the cost of the subtree rooted at the current block. 1241 Accumulate this cost in the parent. */ 1242 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb); 1243 SW (parent)->total_cost += SW (bb)->total_cost; 1244 1245 /* Don't walk the tree down unless necessary. */ 1246 if (next_dom_son (CDI_DOMINATORS, bb) 1247 && SW (parent)->total_cost <= SW (parent)->own_cost) 1248 { 1249 bb = next_dom_son (CDI_DOMINATORS, bb); 1250 first_visit = true; 1251 } 1252 else 1253 { 1254 bb = parent; 1255 first_visit = false; 1256 } 1257 } 1258 } 1259 1260 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without 1261 setting it on any path from entry to exit where it was not already set 1262 somewhere (or, for blocks that have no path to the exit, consider only 1263 paths from the entry to the block itself). Return whether any changes 1264 were made to some HAS_COMPONENTS. */ 1265 static bool 1266 spread_components (sbitmap components) 1267 { 1268 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun); 1269 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun); 1270 1271 /* A stack of all blocks left to consider, and a bitmap of all blocks 1272 on that stack. */ 1273 vec<basic_block> todo; 1274 todo.create (n_basic_blocks_for_fn (cfun)); 1275 auto_bitmap seen; 1276 1277 auto_sbitmap old (SBITMAP_SIZE (components)); 1278 1279 /* Find for every block the components that are *not* needed on some path 1280 from the entry to that block. Do this with a flood fill from the entry 1281 block. Every block can be visited at most as often as the number of 1282 components (plus one), and usually much less often. */ 1283 1284 if (dump_file) 1285 fprintf (dump_file, "Spreading down...\n"); 1286 1287 basic_block bb; 1288 FOR_ALL_BB_FN (bb, cfun) 1289 bitmap_clear (SW (bb)->head_components); 1290 1291 bitmap_copy (SW (entry_block)->head_components, components); 1292 1293 edge e; 1294 edge_iterator ei; 1295 1296 todo.quick_push (single_succ (entry_block)); 1297 bitmap_set_bit (seen, single_succ (entry_block)->index); 1298 while (!todo.is_empty ()) 1299 { 1300 bb = todo.pop (); 1301 1302 bitmap_copy (old, SW (bb)->head_components); 1303 1304 FOR_EACH_EDGE (e, ei, bb->preds) 1305 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, 1306 SW (e->src)->head_components); 1307 1308 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components, 1309 SW (bb)->has_components); 1310 1311 if (!bitmap_equal_p (old, SW (bb)->head_components)) 1312 FOR_EACH_EDGE (e, ei, bb->succs) 1313 if (bitmap_set_bit (seen, e->dest->index)) 1314 todo.quick_push (e->dest); 1315 1316 bitmap_clear_bit (seen, bb->index); 1317 } 1318 1319 /* Find for every block the components that are *not* needed on some reverse 1320 path from the exit to that block. */ 1321 1322 if (dump_file) 1323 fprintf (dump_file, "Spreading up...\n"); 1324 1325 /* First, mark all blocks not reachable from the exit block as not needing 1326 any component on any path to the exit. Mark everything, and then clear 1327 again by a flood fill. */ 1328 1329 FOR_ALL_BB_FN (bb, cfun) 1330 bitmap_copy (SW (bb)->tail_components, components); 1331 1332 FOR_EACH_EDGE (e, ei, exit_block->preds) 1333 { 1334 todo.quick_push (e->src); 1335 bitmap_set_bit (seen, e->src->index); 1336 } 1337 1338 while (!todo.is_empty ()) 1339 { 1340 bb = todo.pop (); 1341 1342 if (!bitmap_empty_p (SW (bb)->tail_components)) 1343 FOR_EACH_EDGE (e, ei, bb->preds) 1344 if (bitmap_set_bit (seen, e->src->index)) 1345 todo.quick_push (e->src); 1346 1347 bitmap_clear (SW (bb)->tail_components); 1348 1349 bitmap_clear_bit (seen, bb->index); 1350 } 1351 1352 /* And then, flood fill backwards to find for every block the components 1353 not needed on some path to the exit. */ 1354 1355 bitmap_copy (SW (exit_block)->tail_components, components); 1356 1357 FOR_EACH_EDGE (e, ei, exit_block->preds) 1358 { 1359 todo.quick_push (e->src); 1360 bitmap_set_bit (seen, e->src->index); 1361 } 1362 1363 while (!todo.is_empty ()) 1364 { 1365 bb = todo.pop (); 1366 1367 bitmap_copy (old, SW (bb)->tail_components); 1368 1369 FOR_EACH_EDGE (e, ei, bb->succs) 1370 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, 1371 SW (e->dest)->tail_components); 1372 1373 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components, 1374 SW (bb)->has_components); 1375 1376 if (!bitmap_equal_p (old, SW (bb)->tail_components)) 1377 FOR_EACH_EDGE (e, ei, bb->preds) 1378 if (bitmap_set_bit (seen, e->src->index)) 1379 todo.quick_push (e->src); 1380 1381 bitmap_clear_bit (seen, bb->index); 1382 } 1383 1384 todo.release (); 1385 1386 /* Finally, mark everything not needed both forwards and backwards. */ 1387 1388 bool did_changes = false; 1389 1390 FOR_EACH_BB_FN (bb, cfun) 1391 { 1392 bitmap_copy (old, SW (bb)->has_components); 1393 1394 bitmap_and (SW (bb)->head_components, SW (bb)->head_components, 1395 SW (bb)->tail_components); 1396 bitmap_and_compl (SW (bb)->has_components, components, 1397 SW (bb)->head_components); 1398 1399 if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components)) 1400 did_changes = true; 1401 } 1402 1403 FOR_ALL_BB_FN (bb, cfun) 1404 { 1405 if (dump_file) 1406 { 1407 fprintf (dump_file, "bb %d components:", bb->index); 1408 dump_components ("has", SW (bb)->has_components); 1409 fprintf (dump_file, "\n"); 1410 } 1411 } 1412 1413 return did_changes; 1414 } 1415 1416 /* If we cannot handle placing some component's prologues or epilogues where 1417 we decided we should place them, unmark that component in COMPONENTS so 1418 that it is not wrapped separately. */ 1419 static void 1420 disqualify_problematic_components (sbitmap components) 1421 { 1422 auto_sbitmap pro (SBITMAP_SIZE (components)); 1423 auto_sbitmap epi (SBITMAP_SIZE (components)); 1424 1425 basic_block bb; 1426 FOR_EACH_BB_FN (bb, cfun) 1427 { 1428 edge e; 1429 edge_iterator ei; 1430 FOR_EACH_EDGE (e, ei, bb->succs) 1431 { 1432 /* Find which components we want pro/epilogues for here. */ 1433 bitmap_and_compl (epi, SW (e->src)->has_components, 1434 SW (e->dest)->has_components); 1435 bitmap_and_compl (pro, SW (e->dest)->has_components, 1436 SW (e->src)->has_components); 1437 1438 /* Ask the target what it thinks about things. */ 1439 if (!bitmap_empty_p (epi)) 1440 targetm.shrink_wrap.disqualify_components (components, e, epi, 1441 false); 1442 if (!bitmap_empty_p (pro)) 1443 targetm.shrink_wrap.disqualify_components (components, e, pro, 1444 true); 1445 1446 /* If this edge doesn't need splitting, we're fine. */ 1447 if (single_pred_p (e->dest) 1448 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) 1449 continue; 1450 1451 /* If the edge can be split, that is fine too. */ 1452 if ((e->flags & EDGE_ABNORMAL) == 0) 1453 continue; 1454 1455 /* We also can handle sibcalls. */ 1456 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 1457 { 1458 gcc_assert (e->flags & EDGE_SIBCALL); 1459 continue; 1460 } 1461 1462 /* Remove from consideration those components we would need 1463 pro/epilogues for on edges where we cannot insert them. */ 1464 bitmap_and_compl (components, components, epi); 1465 bitmap_and_compl (components, components, pro); 1466 1467 if (dump_file && !bitmap_subset_p (epi, components)) 1468 { 1469 fprintf (dump_file, " BAD epi %d->%d", e->src->index, 1470 e->dest->index); 1471 if (e->flags & EDGE_EH) 1472 fprintf (dump_file, " for EH"); 1473 dump_components ("epi", epi); 1474 fprintf (dump_file, "\n"); 1475 } 1476 1477 if (dump_file && !bitmap_subset_p (pro, components)) 1478 { 1479 fprintf (dump_file, " BAD pro %d->%d", e->src->index, 1480 e->dest->index); 1481 if (e->flags & EDGE_EH) 1482 fprintf (dump_file, " for EH"); 1483 dump_components ("pro", pro); 1484 fprintf (dump_file, "\n"); 1485 } 1486 } 1487 } 1488 } 1489 1490 /* Place code for prologues and epilogues for COMPONENTS where we can put 1491 that code at the start of basic blocks. */ 1492 static void 1493 emit_common_heads_for_components (sbitmap components) 1494 { 1495 auto_sbitmap pro (SBITMAP_SIZE (components)); 1496 auto_sbitmap epi (SBITMAP_SIZE (components)); 1497 auto_sbitmap tmp (SBITMAP_SIZE (components)); 1498 1499 basic_block bb; 1500 FOR_ALL_BB_FN (bb, cfun) 1501 bitmap_clear (SW (bb)->head_components); 1502 1503 FOR_EACH_BB_FN (bb, cfun) 1504 { 1505 /* Find which prologue resp. epilogue components are needed for all 1506 predecessor edges to this block. */ 1507 1508 /* First, select all possible components. */ 1509 bitmap_copy (epi, components); 1510 bitmap_copy (pro, components); 1511 1512 edge e; 1513 edge_iterator ei; 1514 FOR_EACH_EDGE (e, ei, bb->preds) 1515 { 1516 if (e->flags & EDGE_ABNORMAL) 1517 { 1518 bitmap_clear (epi); 1519 bitmap_clear (pro); 1520 break; 1521 } 1522 1523 /* Deselect those epilogue components that should not be inserted 1524 for this edge. */ 1525 bitmap_and_compl (tmp, SW (e->src)->has_components, 1526 SW (e->dest)->has_components); 1527 bitmap_and (epi, epi, tmp); 1528 1529 /* Similar, for the prologue. */ 1530 bitmap_and_compl (tmp, SW (e->dest)->has_components, 1531 SW (e->src)->has_components); 1532 bitmap_and (pro, pro, tmp); 1533 } 1534 1535 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) 1536 fprintf (dump_file, " bb %d", bb->index); 1537 1538 if (dump_file && !bitmap_empty_p (epi)) 1539 dump_components ("epi", epi); 1540 if (dump_file && !bitmap_empty_p (pro)) 1541 dump_components ("pro", pro); 1542 1543 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) 1544 fprintf (dump_file, "\n"); 1545 1546 /* Place code after the BB note. */ 1547 if (!bitmap_empty_p (pro)) 1548 { 1549 start_sequence (); 1550 targetm.shrink_wrap.emit_prologue_components (pro); 1551 rtx_insn *seq = get_insns (); 1552 end_sequence (); 1553 record_prologue_seq (seq); 1554 1555 emit_insn_after (seq, bb_note (bb)); 1556 1557 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro); 1558 } 1559 1560 if (!bitmap_empty_p (epi)) 1561 { 1562 start_sequence (); 1563 targetm.shrink_wrap.emit_epilogue_components (epi); 1564 rtx_insn *seq = get_insns (); 1565 end_sequence (); 1566 record_epilogue_seq (seq); 1567 1568 emit_insn_after (seq, bb_note (bb)); 1569 1570 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi); 1571 } 1572 } 1573 } 1574 1575 /* Place code for prologues and epilogues for COMPONENTS where we can put 1576 that code at the end of basic blocks. */ 1577 static void 1578 emit_common_tails_for_components (sbitmap components) 1579 { 1580 auto_sbitmap pro (SBITMAP_SIZE (components)); 1581 auto_sbitmap epi (SBITMAP_SIZE (components)); 1582 auto_sbitmap tmp (SBITMAP_SIZE (components)); 1583 1584 basic_block bb; 1585 FOR_ALL_BB_FN (bb, cfun) 1586 bitmap_clear (SW (bb)->tail_components); 1587 1588 FOR_EACH_BB_FN (bb, cfun) 1589 { 1590 /* Find which prologue resp. epilogue components are needed for all 1591 successor edges from this block. */ 1592 if (EDGE_COUNT (bb->succs) == 0) 1593 continue; 1594 1595 /* First, select all possible components. */ 1596 bitmap_copy (epi, components); 1597 bitmap_copy (pro, components); 1598 1599 edge e; 1600 edge_iterator ei; 1601 FOR_EACH_EDGE (e, ei, bb->succs) 1602 { 1603 if (e->flags & EDGE_ABNORMAL) 1604 { 1605 bitmap_clear (epi); 1606 bitmap_clear (pro); 1607 break; 1608 } 1609 1610 /* Deselect those epilogue components that should not be inserted 1611 for this edge, and also those that are already put at the head 1612 of the successor block. */ 1613 bitmap_and_compl (tmp, SW (e->src)->has_components, 1614 SW (e->dest)->has_components); 1615 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); 1616 bitmap_and (epi, epi, tmp); 1617 1618 /* Similarly, for the prologue. */ 1619 bitmap_and_compl (tmp, SW (e->dest)->has_components, 1620 SW (e->src)->has_components); 1621 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components); 1622 bitmap_and (pro, pro, tmp); 1623 } 1624 1625 /* If the last insn of this block is a control flow insn we cannot 1626 put anything after it. We can put our code before it instead, 1627 but only if that jump insn is a simple jump. */ 1628 rtx_insn *last_insn = BB_END (bb); 1629 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn)) 1630 { 1631 bitmap_clear (epi); 1632 bitmap_clear (pro); 1633 } 1634 1635 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) 1636 fprintf (dump_file, " bb %d", bb->index); 1637 1638 if (dump_file && !bitmap_empty_p (epi)) 1639 dump_components ("epi", epi); 1640 if (dump_file && !bitmap_empty_p (pro)) 1641 dump_components ("pro", pro); 1642 1643 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro))) 1644 fprintf (dump_file, "\n"); 1645 1646 /* Put the code at the end of the BB, but before any final jump. */ 1647 if (!bitmap_empty_p (epi)) 1648 { 1649 start_sequence (); 1650 targetm.shrink_wrap.emit_epilogue_components (epi); 1651 rtx_insn *seq = get_insns (); 1652 end_sequence (); 1653 record_epilogue_seq (seq); 1654 1655 if (control_flow_insn_p (last_insn)) 1656 emit_insn_before (seq, last_insn); 1657 else 1658 emit_insn_after (seq, last_insn); 1659 1660 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi); 1661 } 1662 1663 if (!bitmap_empty_p (pro)) 1664 { 1665 start_sequence (); 1666 targetm.shrink_wrap.emit_prologue_components (pro); 1667 rtx_insn *seq = get_insns (); 1668 end_sequence (); 1669 record_prologue_seq (seq); 1670 1671 if (control_flow_insn_p (last_insn)) 1672 emit_insn_before (seq, last_insn); 1673 else 1674 emit_insn_after (seq, last_insn); 1675 1676 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro); 1677 } 1678 } 1679 } 1680 1681 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already 1682 placed them inside blocks directly. */ 1683 static void 1684 insert_prologue_epilogue_for_components (sbitmap components) 1685 { 1686 auto_sbitmap pro (SBITMAP_SIZE (components)); 1687 auto_sbitmap epi (SBITMAP_SIZE (components)); 1688 1689 basic_block bb; 1690 FOR_EACH_BB_FN (bb, cfun) 1691 { 1692 if (!bb->aux) 1693 continue; 1694 1695 edge e; 1696 edge_iterator ei; 1697 FOR_EACH_EDGE (e, ei, bb->succs) 1698 { 1699 /* Find which pro/epilogue components are needed on this edge. */ 1700 bitmap_and_compl (epi, SW (e->src)->has_components, 1701 SW (e->dest)->has_components); 1702 bitmap_and_compl (pro, SW (e->dest)->has_components, 1703 SW (e->src)->has_components); 1704 bitmap_and (epi, epi, components); 1705 bitmap_and (pro, pro, components); 1706 1707 /* Deselect those we already have put at the head or tail of the 1708 edge's dest resp. src. */ 1709 bitmap_and_compl (epi, epi, SW (e->dest)->head_components); 1710 bitmap_and_compl (pro, pro, SW (e->dest)->head_components); 1711 bitmap_and_compl (epi, epi, SW (e->src)->tail_components); 1712 bitmap_and_compl (pro, pro, SW (e->src)->tail_components); 1713 1714 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro)) 1715 { 1716 if (dump_file) 1717 { 1718 fprintf (dump_file, " %d->%d", e->src->index, 1719 e->dest->index); 1720 dump_components ("epi", epi); 1721 dump_components ("pro", pro); 1722 if (e->flags & EDGE_SIBCALL) 1723 fprintf (dump_file, " (SIBCALL)"); 1724 else if (e->flags & EDGE_ABNORMAL) 1725 fprintf (dump_file, " (ABNORMAL)"); 1726 fprintf (dump_file, "\n"); 1727 } 1728 1729 /* Put the epilogue components in place. */ 1730 start_sequence (); 1731 targetm.shrink_wrap.emit_epilogue_components (epi); 1732 rtx_insn *seq = get_insns (); 1733 end_sequence (); 1734 record_epilogue_seq (seq); 1735 1736 if (e->flags & EDGE_SIBCALL) 1737 { 1738 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)); 1739 1740 rtx_insn *insn = BB_END (e->src); 1741 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn)); 1742 emit_insn_before (seq, insn); 1743 } 1744 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 1745 { 1746 gcc_assert (e->flags & EDGE_FALLTHRU); 1747 basic_block new_bb = split_edge (e); 1748 emit_insn_after (seq, BB_END (new_bb)); 1749 } 1750 else 1751 insert_insn_on_edge (seq, e); 1752 1753 /* Put the prologue components in place. */ 1754 start_sequence (); 1755 targetm.shrink_wrap.emit_prologue_components (pro); 1756 seq = get_insns (); 1757 end_sequence (); 1758 record_prologue_seq (seq); 1759 1760 insert_insn_on_edge (seq, e); 1761 } 1762 } 1763 } 1764 1765 commit_edge_insertions (); 1766 } 1767 1768 /* The main entry point to this subpass. FIRST_BB is where the prologue 1769 would be normally put. */ 1770 void 1771 try_shrink_wrapping_separate (basic_block first_bb) 1772 { 1773 if (HAVE_cc0) 1774 return; 1775 1776 if (!(SHRINK_WRAPPING_ENABLED 1777 && flag_shrink_wrap_separate 1778 && optimize_function_for_speed_p (cfun) 1779 && targetm.shrink_wrap.get_separate_components)) 1780 return; 1781 1782 /* We don't handle "strange" functions. */ 1783 if (cfun->calls_alloca 1784 || cfun->calls_setjmp 1785 || cfun->can_throw_non_call_exceptions 1786 || crtl->calls_eh_return 1787 || crtl->has_nonlocal_goto 1788 || crtl->saves_all_registers) 1789 return; 1790 1791 /* Ask the target what components there are. If it returns NULL, don't 1792 do anything. */ 1793 sbitmap components = targetm.shrink_wrap.get_separate_components (); 1794 if (!components) 1795 return; 1796 1797 /* We need LIVE info, not defining anything in the entry block and not 1798 using anything in the exit block. A block then needs a component if 1799 the register for that component is in the IN or GEN or KILL set for 1800 that block. */ 1801 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT; 1802 df_update_entry_exit_and_calls (); 1803 df_live_add_problem (); 1804 df_live_set_all_dirty (); 1805 df_analyze (); 1806 1807 calculate_dominance_info (CDI_DOMINATORS); 1808 calculate_dominance_info (CDI_POST_DOMINATORS); 1809 1810 init_separate_shrink_wrap (components); 1811 1812 sbitmap_iterator sbi; 1813 unsigned int j; 1814 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi) 1815 place_prologue_for_one_component (j, first_bb); 1816 1817 /* Try to minimize the number of saves and restores. Do this as long as 1818 it changes anything. This does not iterate more than a few times. */ 1819 int spread_times = 0; 1820 while (spread_components (components)) 1821 { 1822 spread_times++; 1823 1824 if (dump_file) 1825 fprintf (dump_file, "Now spread %d times.\n", spread_times); 1826 } 1827 1828 disqualify_problematic_components (components); 1829 1830 /* Don't separately shrink-wrap anything where the "main" prologue will 1831 go; the target code can often optimize things if it is presented with 1832 all components together (say, if it generates store-multiple insns). */ 1833 bitmap_and_compl (components, components, SW (first_bb)->has_components); 1834 1835 if (bitmap_empty_p (components)) 1836 { 1837 if (dump_file) 1838 fprintf (dump_file, "Not wrapping anything separately.\n"); 1839 } 1840 else 1841 { 1842 if (dump_file) 1843 { 1844 fprintf (dump_file, "The components we wrap separately are"); 1845 dump_components ("sep", components); 1846 fprintf (dump_file, "\n"); 1847 1848 fprintf (dump_file, "... Inserting common heads...\n"); 1849 } 1850 1851 emit_common_heads_for_components (components); 1852 1853 if (dump_file) 1854 fprintf (dump_file, "... Inserting common tails...\n"); 1855 1856 emit_common_tails_for_components (components); 1857 1858 if (dump_file) 1859 fprintf (dump_file, "... Inserting the more difficult ones...\n"); 1860 1861 insert_prologue_epilogue_for_components (components); 1862 1863 if (dump_file) 1864 fprintf (dump_file, "... Done.\n"); 1865 1866 targetm.shrink_wrap.set_handled_components (components); 1867 1868 crtl->shrink_wrapped_separate = true; 1869 } 1870 1871 fini_separate_shrink_wrap (); 1872 1873 sbitmap_free (components); 1874 free_dominance_info (CDI_DOMINATORS); 1875 free_dominance_info (CDI_POST_DOMINATORS); 1876 1877 /* All done. */ 1878 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT; 1879 df_update_entry_exit_and_calls (); 1880 df_live_set_all_dirty (); 1881 df_analyze (); 1882 } 1883