1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file handles shrink-wrapping related optimizations. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "tm_p.h"
33 #include "regs.h"
34 #include "insn-config.h"
35 #include "emit-rtl.h"
36 #include "output.h"
37 #include "tree-pass.h"
38 #include "cfgrtl.h"
39 #include "cfgbuild.h"
40 #include "bb-reorder.h"
41 #include "shrink-wrap.h"
42 #include "regcprop.h"
43 #include "rtl-iter.h"
44 #include "valtrack.h"
45 #include "function-abi.h"
46
47 /* Return true if INSN requires the stack frame to be set up.
48 PROLOGUE_USED contains the hard registers used in the function
49 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
50 prologue to set up for the function. */
51 bool
requires_stack_frame_p(rtx_insn * insn,HARD_REG_SET prologue_used,HARD_REG_SET set_up_by_prologue)52 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
53 HARD_REG_SET set_up_by_prologue)
54 {
55 df_ref def, use;
56 HARD_REG_SET hardregs;
57 unsigned regno;
58
59 if (CALL_P (insn))
60 return !SIBLING_CALL_P (insn);
61
62 /* We need a frame to get the unique CFA expected by the unwinder. */
63 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
64 return true;
65
66 CLEAR_HARD_REG_SET (hardregs);
67 FOR_EACH_INSN_DEF (def, insn)
68 {
69 rtx dreg = DF_REF_REG (def);
70
71 if (!REG_P (dreg))
72 continue;
73
74 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
75 }
76 if (hard_reg_set_intersect_p (hardregs, prologue_used))
77 return true;
78 hardregs &= ~crtl->abi->full_reg_clobbers ();
79 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
80 if (TEST_HARD_REG_BIT (hardregs, regno)
81 && df_regs_ever_live_p (regno))
82 return true;
83
84 FOR_EACH_INSN_USE (use, insn)
85 {
86 rtx reg = DF_REF_REG (use);
87
88 if (!REG_P (reg))
89 continue;
90
91 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
92 REGNO (reg));
93 }
94 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
95 return true;
96
97 return false;
98 }
99
100 /* See whether there has a single live edge from BB, which dest uses
101 [REGNO, END_REGNO). Return the live edge if its dest bb has
102 one or two predecessors. Otherwise return NULL. */
103
104 static edge
live_edge_for_reg(basic_block bb,int regno,int end_regno)105 live_edge_for_reg (basic_block bb, int regno, int end_regno)
106 {
107 edge e, live_edge;
108 edge_iterator ei;
109 bitmap live;
110 int i;
111
112 live_edge = NULL;
113 FOR_EACH_EDGE (e, ei, bb->succs)
114 {
115 live = df_get_live_in (e->dest);
116 for (i = regno; i < end_regno; i++)
117 if (REGNO_REG_SET_P (live, i))
118 {
119 if (live_edge && live_edge != e)
120 return NULL;
121 live_edge = e;
122 }
123 }
124
125 /* We can sometimes encounter dead code. Don't try to move it
126 into the exit block. */
127 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
128 return NULL;
129
130 /* Reject targets of abnormal edges. This is needed for correctness
131 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
132 exception edges even though it is generally treated as call-saved
133 for the majority of the compilation. Moving across abnormal edges
134 isn't going to be interesting for shrink-wrap usage anyway. */
135 if (live_edge->flags & EDGE_ABNORMAL)
136 return NULL;
137
138 /* When live_edge->dest->preds == 2, we can create a new block on
139 the edge to make it meet the requirement. */
140 if (EDGE_COUNT (live_edge->dest->preds) > 2)
141 return NULL;
142
143 return live_edge;
144 }
145
146 /* Try to move INSN from BB to a successor. Return true on success.
147 USES and DEFS are the set of registers that are used and defined
148 after INSN in BB. SPLIT_P indicates whether a live edge from BB
149 is splitted or not. */
150
151 static bool
move_insn_for_shrink_wrap(basic_block bb,rtx_insn * insn,const_hard_reg_set uses,const_hard_reg_set defs,bool * split_p,struct dead_debug_local * debug)152 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
153 const_hard_reg_set uses,
154 const_hard_reg_set defs,
155 bool *split_p,
156 struct dead_debug_local *debug)
157 {
158 rtx set, src, dest;
159 bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
160 unsigned int i, dregno, end_dregno;
161 unsigned int sregno = FIRST_PSEUDO_REGISTER;
162 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
163 basic_block next_block;
164 edge live_edge;
165 rtx_insn *dinsn;
166 df_ref def;
167
168 /* Look for a simple register assignment. We don't use single_set here
169 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
170 destinations. */
171 if (!INSN_P (insn))
172 return false;
173 set = PATTERN (insn);
174 if (GET_CODE (set) != SET)
175 return false;
176 src = SET_SRC (set);
177 dest = SET_DEST (set);
178
179 /* For the destination, we want only a register. Also disallow STACK
180 or FRAME related adjustments. They are likely part of the prologue,
181 so keep them in the entry block. */
182 if (!REG_P (dest)
183 || dest == stack_pointer_rtx
184 || dest == frame_pointer_rtx
185 || dest == hard_frame_pointer_rtx)
186 return false;
187
188 /* For the source, we want one of:
189 (1) A (non-overlapping) register
190 (2) A constant,
191 (3) An expression involving no more than one register.
192
193 That last point comes from the code following, which was originally
194 written to handle only register move operations, and still only handles
195 a single source register when checking for overlaps. Happily, the
196 same checks can be applied to expressions like (plus reg const). */
197
198 if (CONSTANT_P (src))
199 ;
200 else if (!REG_P (src))
201 {
202 rtx src_inner = NULL_RTX;
203
204 if (can_throw_internal (insn))
205 return false;
206
207 subrtx_var_iterator::array_type array;
208 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
209 {
210 rtx x = *iter;
211 switch (GET_RTX_CLASS (GET_CODE (x)))
212 {
213 case RTX_CONST_OBJ:
214 case RTX_COMPARE:
215 case RTX_COMM_COMPARE:
216 case RTX_BIN_ARITH:
217 case RTX_COMM_ARITH:
218 case RTX_UNARY:
219 case RTX_TERNARY:
220 /* Constant or expression. Continue. */
221 break;
222
223 case RTX_OBJ:
224 case RTX_EXTRA:
225 switch (GET_CODE (x))
226 {
227 case UNSPEC:
228 case SUBREG:
229 case STRICT_LOW_PART:
230 case PC:
231 case LO_SUM:
232 /* Ok. Continue. */
233 break;
234
235 case REG:
236 /* Fail if we see a second inner register. */
237 if (src_inner != NULL)
238 return false;
239 src_inner = x;
240 break;
241
242 default:
243 return false;
244 }
245 break;
246
247 default:
248 return false;
249 }
250 }
251
252 if (src_inner != NULL)
253 src = src_inner;
254 }
255
256 /* Make sure that the source register isn't defined later in BB. */
257 if (REG_P (src))
258 {
259 sregno = REGNO (src);
260 end_sregno = END_REGNO (src);
261 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
262 return false;
263 }
264
265 /* Make sure that the destination register isn't referenced later in BB. */
266 dregno = REGNO (dest);
267 end_dregno = END_REGNO (dest);
268 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
269 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
270 return false;
271
272 /* See whether there is a successor block to which we could move INSN. */
273 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
274 if (!live_edge)
275 return false;
276
277 next_block = live_edge->dest;
278 /* Create a new basic block on the edge. */
279 if (EDGE_COUNT (next_block->preds) == 2)
280 {
281 /* split_edge for a block with only one successor is meaningless. */
282 if (EDGE_COUNT (bb->succs) == 1)
283 return false;
284
285 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
286 if (!df_live)
287 return false;
288
289 basic_block old_dest = live_edge->dest;
290 next_block = split_edge (live_edge);
291
292 /* We create a new basic block. Call df_grow_bb_info to make sure
293 all data structures are allocated. */
294 df_grow_bb_info (df_live);
295
296 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
297 df_get_live_in (old_dest));
298 df_set_bb_dirty (next_block);
299
300 /* We should not split more than once for a function. */
301 if (*split_p)
302 return false;
303
304 *split_p = true;
305 }
306
307 /* At this point we are committed to moving INSN, but let's try to
308 move it as far as we can. */
309 do
310 {
311 if (MAY_HAVE_DEBUG_BIND_INSNS)
312 {
313 FOR_BB_INSNS_REVERSE (bb, dinsn)
314 if (DEBUG_BIND_INSN_P (dinsn))
315 {
316 df_ref use;
317 FOR_EACH_INSN_USE (use, dinsn)
318 if (refers_to_regno_p (dregno, end_dregno,
319 DF_REF_REG (use), (rtx *) NULL))
320 dead_debug_add (debug, use, DF_REF_REGNO (use));
321 }
322 else if (dinsn == insn)
323 break;
324 }
325 live_out = df_get_live_out (bb);
326 live_in = df_get_live_in (next_block);
327 bb = next_block;
328
329 /* Check whether BB uses DEST or clobbers DEST. We need to add
330 INSN to BB if so. Either way, DEST is no longer live on entry,
331 except for any part that overlaps SRC (next loop). */
332 if (!*split_p)
333 {
334 bb_uses = &DF_LR_BB_INFO (bb)->use;
335 bb_defs = &DF_LR_BB_INFO (bb)->def;
336 }
337 if (df_live)
338 {
339 for (i = dregno; i < end_dregno; i++)
340 {
341 if (*split_p
342 || REGNO_REG_SET_P (bb_uses, i)
343 || REGNO_REG_SET_P (bb_defs, i)
344 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
345 next_block = NULL;
346 CLEAR_REGNO_REG_SET (live_out, i);
347 CLEAR_REGNO_REG_SET (live_in, i);
348 }
349
350 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
351 Either way, SRC is now live on entry. */
352 for (i = sregno; i < end_sregno; i++)
353 {
354 if (*split_p
355 || REGNO_REG_SET_P (bb_defs, i)
356 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
357 next_block = NULL;
358 SET_REGNO_REG_SET (live_out, i);
359 SET_REGNO_REG_SET (live_in, i);
360 }
361 }
362 else
363 {
364 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
365 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
366 at -O1, just give up searching NEXT_BLOCK. */
367 next_block = NULL;
368 for (i = dregno; i < end_dregno; i++)
369 {
370 CLEAR_REGNO_REG_SET (live_out, i);
371 CLEAR_REGNO_REG_SET (live_in, i);
372 }
373
374 for (i = sregno; i < end_sregno; i++)
375 {
376 SET_REGNO_REG_SET (live_out, i);
377 SET_REGNO_REG_SET (live_in, i);
378 }
379 }
380
381 /* If we don't need to add the move to BB, look for a single
382 successor block. */
383 if (next_block)
384 {
385 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
386 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
387 break;
388 next_block = live_edge->dest;
389 }
390 }
391 while (next_block);
392
393 /* For the new created basic block, there is no dataflow info at all.
394 So skip the following dataflow update and check. */
395 if (!(*split_p))
396 {
397 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
398 (next loop). */
399 for (i = dregno; i < end_dregno; i++)
400 {
401 CLEAR_REGNO_REG_SET (bb_uses, i);
402 SET_REGNO_REG_SET (bb_defs, i);
403 }
404
405 /* BB now uses SRC. */
406 for (i = sregno; i < end_sregno; i++)
407 SET_REGNO_REG_SET (bb_uses, i);
408 }
409
410 /* Insert debug temps for dead REGs used in subsequent debug insns. */
411 if (debug->used && !bitmap_empty_p (debug->used))
412 FOR_EACH_INSN_DEF (def, insn)
413 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
414 DEBUG_TEMP_BEFORE_WITH_VALUE);
415
416 rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
417 /* Update the LABEL_NUSES count on any referenced labels. The ideal
418 solution here would be to actually move the instruction instead
419 of copying/deleting it as this loses some notations on the
420 insn. */
421 mark_jump_label (PATTERN (insn), insn_copy, 0);
422 delete_insn (insn);
423 return true;
424 }
425
426 /* Look for register copies in the first block of the function, and move
427 them down into successor blocks if the register is used only on one
428 path. This exposes more opportunities for shrink-wrapping. These
429 kinds of sets often occur when incoming argument registers are moved
430 to call-saved registers because their values are live across one or
431 more calls during the function. */
432
433 static void
prepare_shrink_wrap(basic_block entry_block)434 prepare_shrink_wrap (basic_block entry_block)
435 {
436 rtx_insn *insn, *curr;
437 rtx x;
438 HARD_REG_SET uses, defs;
439 df_ref def, use;
440 bool split_p = false;
441 unsigned int i;
442 struct dead_debug_local debug;
443
444 if (JUMP_P (BB_END (entry_block)))
445 {
446 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
447 to sink the copies from parameter to callee saved register out of
448 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
449 to release some dependences. */
450 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
451 }
452
453 dead_debug_local_init (&debug, NULL, NULL);
454 CLEAR_HARD_REG_SET (uses);
455 CLEAR_HARD_REG_SET (defs);
456
457 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
458 if (NONDEBUG_INSN_P (insn)
459 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
460 &split_p, &debug))
461 {
462 /* Add all defined registers to DEFs. */
463 FOR_EACH_INSN_DEF (def, insn)
464 {
465 x = DF_REF_REG (def);
466 if (REG_P (x) && HARD_REGISTER_P (x))
467 for (i = REGNO (x); i < END_REGNO (x); i++)
468 SET_HARD_REG_BIT (defs, i);
469 }
470
471 /* Add all used registers to USESs. */
472 FOR_EACH_INSN_USE (use, insn)
473 {
474 x = DF_REF_REG (use);
475 if (REG_P (x) && HARD_REGISTER_P (x))
476 for (i = REGNO (x); i < END_REGNO (x); i++)
477 SET_HARD_REG_BIT (uses, i);
478 }
479 }
480
481 dead_debug_local_finish (&debug, NULL);
482 }
483
484 /* Return whether basic block PRO can get the prologue. It cannot if it
485 has incoming complex edges that need a prologue inserted (we make a new
486 block for the prologue, so those edges would need to be redirected, which
487 does not work). It also cannot if there exist registers live on entry
488 to PRO that are clobbered by the prologue. */
489
490 static bool
can_get_prologue(basic_block pro,HARD_REG_SET prologue_clobbered)491 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
492 {
493 edge e;
494 edge_iterator ei;
495 FOR_EACH_EDGE (e, ei, pro->preds)
496 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
497 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
498 return false;
499
500 HARD_REG_SET live;
501 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
502 if (hard_reg_set_intersect_p (live, prologue_clobbered))
503 return false;
504
505 return true;
506 }
507
508 /* Return whether we can duplicate basic block BB for shrink wrapping. We
509 cannot if the block cannot be duplicated at all, or if any of its incoming
510 edges are complex and come from a block that does not require a prologue
511 (we cannot redirect such edges), or if the block is too big to copy.
512 PRO is the basic block before which we would put the prologue, MAX_SIZE is
513 the maximum size block we allow to be copied. */
514
515 static bool
can_dup_for_shrink_wrapping(basic_block bb,basic_block pro,unsigned max_size)516 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
517 {
518 if (!can_duplicate_block_p (bb))
519 return false;
520
521 edge e;
522 edge_iterator ei;
523 FOR_EACH_EDGE (e, ei, bb->preds)
524 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
525 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
526 return false;
527
528 unsigned size = 0;
529
530 rtx_insn *insn;
531 FOR_BB_INSNS (bb, insn)
532 if (NONDEBUG_INSN_P (insn))
533 {
534 size += get_attr_min_length (insn);
535 if (size > max_size)
536 return false;
537 }
538
539 return true;
540 }
541
542 /* Do whatever needs to be done for exits that run without prologue.
543 Sibcalls need nothing done. Normal exits get a simple_return inserted. */
544
545 static void
handle_simple_exit(edge e)546 handle_simple_exit (edge e)
547 {
548
549 if (e->flags & EDGE_SIBCALL)
550 {
551 /* Tell function.c to take no further action on this edge. */
552 e->flags |= EDGE_IGNORE;
553
554 e->flags &= ~EDGE_FALLTHRU;
555 emit_barrier_after_bb (e->src);
556 return;
557 }
558
559 /* If the basic block the edge comes from has multiple successors,
560 split the edge. */
561 if (EDGE_COUNT (e->src->succs) > 1)
562 {
563 basic_block old_bb = e->src;
564 rtx_insn *end = BB_END (old_bb);
565 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
566 basic_block new_bb = create_basic_block (note, note, old_bb);
567 BB_COPY_PARTITION (new_bb, old_bb);
568 BB_END (old_bb) = end;
569
570 redirect_edge_succ (e, new_bb);
571 new_bb->count = e->count ();
572 e->flags |= EDGE_FALLTHRU;
573
574 e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
575 }
576
577 e->flags &= ~EDGE_FALLTHRU;
578 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
579 BB_END (e->src));
580 JUMP_LABEL (ret) = simple_return_rtx;
581 emit_barrier_after_bb (e->src);
582
583 if (dump_file)
584 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
585 INSN_UID (ret), e->src->index);
586 }
587
588 /* Try to perform a kind of shrink-wrapping, making sure the
589 prologue/epilogue is emitted only around those parts of the
590 function that require it.
591
592 There will be exactly one prologue, and it will be executed either
593 zero or one time, on any path. Depending on where the prologue is
594 placed, some of the basic blocks can be reached via both paths with
595 and without a prologue. Such blocks will be duplicated here, and the
596 edges changed to match.
597
598 Paths that go to the exit without going through the prologue will use
599 a simple_return instead of the epilogue. We maximize the number of
600 those, making sure to only duplicate blocks that can be duplicated.
601 If the prologue can then still be placed in multiple locations, we
602 place it as early as possible.
603
604 An example, where we duplicate blocks with control flow (legend:
605 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
606 be taken to point down or to the right, to simplify the diagram; here,
607 block 3 needs a prologue, the rest does not):
608
609
610 B B
611 | |
612 2 2
613 |\ |\
614 | 3 becomes | 3
615 |/ | \
616 4 7 4
617 |\ |\ |\
618 | 5 | 8 | 5
619 |/ |/ |/
620 6 9 6
621 | | |
622 R S R
623
624
625 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
626 edge 2->3).
627
628 Another example, where part of a loop is duplicated (again, bb 3 is
629 the only block that needs a prologue):
630
631
632 B 3<-- B ->3<--
633 | | | | | | |
634 | v | becomes | | v |
635 2---4--- 2---5-- 4---
636 | | |
637 R S R
638
639
640 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
641
642 ENTRY_EDGE is the edge where the prologue will be placed, possibly
643 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
644
645 void
try_shrink_wrapping(edge * entry_edge,rtx_insn * prologue_seq)646 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
647 {
648 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
649 no sense to shrink-wrap: then do not shrink-wrap! */
650
651 if (!SHRINK_WRAPPING_ENABLED)
652 return;
653
654 if (crtl->profile && !targetm.profile_before_prologue ())
655 return;
656
657 if (crtl->calls_eh_return)
658 return;
659
660 bool empty_prologue = true;
661 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
662 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
663 {
664 empty_prologue = false;
665 break;
666 }
667 if (empty_prologue)
668 return;
669
670 /* Move some code down to expose more shrink-wrapping opportunities. */
671
672 basic_block entry = (*entry_edge)->dest;
673 prepare_shrink_wrap (entry);
674
675 if (dump_file)
676 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
677
678 /* Compute the registers set and used in the prologue. */
679
680 HARD_REG_SET prologue_clobbered, prologue_used;
681 CLEAR_HARD_REG_SET (prologue_clobbered);
682 CLEAR_HARD_REG_SET (prologue_used);
683 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
684 if (NONDEBUG_INSN_P (insn))
685 {
686 HARD_REG_SET this_used;
687 CLEAR_HARD_REG_SET (this_used);
688 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
689 this_used &= ~prologue_clobbered;
690 prologue_used |= this_used;
691 note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
692 }
693 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
694 if (frame_pointer_needed)
695 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
696
697 /* Find out what registers are set up by the prologue; any use of these
698 cannot happen before the prologue. */
699
700 struct hard_reg_set_container set_up_by_prologue;
701 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
702 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
703 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
704 if (frame_pointer_needed)
705 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
706 HARD_FRAME_POINTER_REGNUM);
707 if (pic_offset_table_rtx
708 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
709 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
710 PIC_OFFSET_TABLE_REGNUM);
711 if (crtl->drap_reg)
712 add_to_hard_reg_set (&set_up_by_prologue.set,
713 GET_MODE (crtl->drap_reg),
714 REGNO (crtl->drap_reg));
715 if (targetm.set_up_by_prologue)
716 targetm.set_up_by_prologue (&set_up_by_prologue);
717
718 /* We will insert the prologue before the basic block PRO. PRO should
719 dominate all basic blocks that need the prologue to be executed
720 before them. First, make PRO the "tightest wrap" possible. */
721
722 calculate_dominance_info (CDI_DOMINATORS);
723
724 basic_block pro = 0;
725
726 basic_block bb;
727 edge e;
728 edge_iterator ei;
729 FOR_EACH_BB_FN (bb, cfun)
730 {
731 rtx_insn *insn;
732 FOR_BB_INSNS (bb, insn)
733 if (NONDEBUG_INSN_P (insn)
734 && requires_stack_frame_p (insn, prologue_used,
735 set_up_by_prologue.set))
736 {
737 if (dump_file)
738 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
739 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
740 break;
741 }
742 }
743
744 /* If nothing needs a prologue, just put it at the start. This really
745 shouldn't happen, but we cannot fix it here. */
746
747 if (pro == 0)
748 {
749 if (dump_file)
750 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
751 "putting it at the start.\n");
752 pro = entry;
753 }
754
755 if (dump_file)
756 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
757 pro->index);
758
759 /* Now see if we can put the prologue at the start of PRO. Putting it
760 there might require duplicating a block that cannot be duplicated,
761 or in some cases we cannot insert the prologue there at all. If PRO
762 wont't do, try again with the immediate dominator of PRO, and so on.
763
764 The blocks that need duplicating are those reachable from PRO but
765 not dominated by it. We keep in BB_WITH a bitmap of the blocks
766 reachable from PRO that we already found, and in VEC a stack of
767 those we still need to consider (to find successors). */
768
769 auto_bitmap bb_with;
770 bitmap_set_bit (bb_with, pro->index);
771
772 vec<basic_block> vec;
773 vec.create (n_basic_blocks_for_fn (cfun));
774 vec.quick_push (pro);
775
776 unsigned max_grow_size = get_uncond_jump_length ();
777 max_grow_size *= param_max_grow_copy_bb_insns;
778
779 while (pro != entry)
780 {
781 while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
782 {
783 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
784
785 if (bitmap_set_bit (bb_with, pro->index))
786 vec.quick_push (pro);
787 }
788
789 if (vec.is_empty ())
790 break;
791
792 basic_block bb = vec.pop ();
793 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
794 while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
795 {
796 gcc_assert (pro != entry);
797
798 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
799
800 if (bitmap_set_bit (bb_with, pro->index))
801 vec.quick_push (pro);
802 }
803
804 FOR_EACH_EDGE (e, ei, bb->succs)
805 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
806 && bitmap_set_bit (bb_with, e->dest->index))
807 vec.quick_push (e->dest);
808 }
809
810 if (dump_file)
811 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
812 pro->index);
813
814 /* If we can move PRO back without having to duplicate more blocks, do so.
815 We do this because putting the prologue earlier is better for scheduling.
816
817 We can move back to a block PRE if every path from PRE will eventually
818 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
819 to dominate every block reachable from itself. We keep in BB_TMP a
820 bitmap of the blocks reachable from PRE that we already found, and in
821 VEC a stack of those we still need to consider.
822
823 Any block reachable from PRE is also reachable from all predecessors
824 of PRE, so if we find we need to move PRE back further we can leave
825 everything not considered so far on the stack. Any block dominated
826 by PRE is also dominated by all other dominators of PRE, so anything
827 found good for some PRE does not need to be reconsidered later.
828
829 We don't need to update BB_WITH because none of the new blocks found
830 can jump to a block that does not need the prologue. */
831
832 if (pro != entry)
833 {
834 calculate_dominance_info (CDI_POST_DOMINATORS);
835
836 auto_bitmap bb_tmp;
837 bitmap_copy (bb_tmp, bb_with);
838 basic_block last_ok = pro;
839 vec.truncate (0);
840
841 while (pro != entry)
842 {
843 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
844 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
845 break;
846
847 if (bitmap_set_bit (bb_tmp, pre->index))
848 vec.quick_push (pre);
849
850 bool ok = true;
851 while (!vec.is_empty ())
852 {
853 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
854 {
855 ok = false;
856 break;
857 }
858
859 basic_block bb = vec.pop ();
860 FOR_EACH_EDGE (e, ei, bb->succs)
861 if (bitmap_set_bit (bb_tmp, e->dest->index))
862 vec.quick_push (e->dest);
863 }
864
865 if (ok && can_get_prologue (pre, prologue_clobbered))
866 last_ok = pre;
867
868 pro = pre;
869 }
870
871 pro = last_ok;
872
873 free_dominance_info (CDI_POST_DOMINATORS);
874 }
875
876 vec.release ();
877
878 if (dump_file)
879 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
880 pro->index);
881
882 if (pro == entry)
883 {
884 free_dominance_info (CDI_DOMINATORS);
885 return;
886 }
887
888 /* Compute what fraction of the frequency and count of the blocks that run
889 both with and without prologue are for running with prologue. This gives
890 the correct answer for reducible flow graphs; for irreducible flow graphs
891 our profile is messed up beyond repair anyway. */
892
893 profile_count num = profile_count::zero ();
894 profile_count den = profile_count::zero ();
895
896 FOR_EACH_EDGE (e, ei, pro->preds)
897 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
898 {
899 if (e->count ().initialized_p ())
900 num += e->count ();
901 if (e->src->count.initialized_p ())
902 den += e->src->count;
903 }
904
905 /* All is okay, so do it. */
906
907 crtl->shrink_wrapped = true;
908 if (dump_file)
909 fprintf (dump_file, "Performing shrink-wrapping.\n");
910
911 /* Copy the blocks that can run both with and without prologue. The
912 originals run with prologue, the copies without. Store a pointer to
913 the copy in the ->aux field of the original. */
914
915 FOR_EACH_BB_FN (bb, cfun)
916 if (bitmap_bit_p (bb_with, bb->index)
917 && !dominated_by_p (CDI_DOMINATORS, bb, pro))
918 {
919 basic_block dup = duplicate_block (bb, 0, 0);
920
921 bb->aux = dup;
922
923 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
924 emit_barrier_after_bb (dup);
925
926 if (EDGE_COUNT (dup->succs) == 0)
927 emit_barrier_after_bb (dup);
928
929 if (dump_file)
930 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
931
932 if (num == profile_count::zero () || den.nonzero_p ())
933 bb->count = bb->count.apply_scale (num, den);
934 dup->count -= bb->count;
935 }
936
937 /* Now change the edges to point to the copies, where appropriate. */
938
939 FOR_EACH_BB_FN (bb, cfun)
940 if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
941 {
942 basic_block src = bb;
943 if (bitmap_bit_p (bb_with, bb->index))
944 src = (basic_block) bb->aux;
945
946 FOR_EACH_EDGE (e, ei, src->succs)
947 {
948 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
949 continue;
950
951 if (bitmap_bit_p (bb_with, e->dest->index)
952 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
953 {
954 if (dump_file)
955 fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
956 e->src->index, e->dest->index,
957 ((basic_block) e->dest->aux)->index);
958 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
959 }
960 else if (e->flags & EDGE_FALLTHRU
961 && bitmap_bit_p (bb_with, bb->index))
962 force_nonfallthru (e);
963 }
964 }
965
966 /* Also redirect the function entry edge if necessary. */
967
968 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
969 if (bitmap_bit_p (bb_with, e->dest->index)
970 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
971 {
972 basic_block split_bb = split_edge (e);
973 e = single_succ_edge (split_bb);
974 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
975 }
976
977 /* Make a simple_return for those exits that run without prologue. */
978
979 FOR_EACH_BB_REVERSE_FN (bb, cfun)
980 if (!bitmap_bit_p (bb_with, bb->index))
981 FOR_EACH_EDGE (e, ei, bb->succs)
982 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
983 handle_simple_exit (e);
984
985 /* Finally, we want a single edge to put the prologue on. Make a new
986 block before the PRO block; the edge beteen them is the edge we want.
987 Then redirect those edges into PRO that come from blocks without the
988 prologue, to point to the new block instead. The new prologue block
989 is put at the end of the insn chain. */
990
991 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
992 BB_COPY_PARTITION (new_bb, pro);
993 new_bb->count = profile_count::zero ();
994 if (dump_file)
995 fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
996
997 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
998 {
999 if (bitmap_bit_p (bb_with, e->src->index)
1000 || dominated_by_p (CDI_DOMINATORS, e->src, pro))
1001 {
1002 ei_next (&ei);
1003 continue;
1004 }
1005
1006 new_bb->count += e->count ();
1007
1008 redirect_edge_and_branch_force (e, new_bb);
1009 if (dump_file)
1010 fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1011 }
1012
1013 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1014 force_nonfallthru (*entry_edge);
1015
1016 free_dominance_info (CDI_DOMINATORS);
1017 }
1018
1019 /* Separate shrink-wrapping
1020
1021 Instead of putting all of the prologue and epilogue in one spot, we
1022 can put parts of it in places where those components are executed less
1023 frequently. The following code does this, for prologue and epilogue
1024 components that can be put in more than one location, and where those
1025 components can be executed more than once (the epilogue component will
1026 always be executed before the prologue component is executed a second
1027 time).
1028
1029 What exactly is a component is target-dependent. The more usual
1030 components are simple saves/restores to/from the frame of callee-saved
1031 registers. This code treats components abstractly (as an sbitmap),
1032 letting the target handle all details.
1033
1034 Prologue components are placed in such a way that for every component
1035 the prologue is executed as infrequently as possible. We do this by
1036 walking the dominator tree, comparing the cost of placing a prologue
1037 component before a block to the sum of costs determined for all subtrees
1038 of that block.
1039
1040 From this placement, we then determine for each component all blocks
1041 where at least one of this block's dominators (including itself) will
1042 get a prologue inserted. That then is how the components are placed.
1043 We could place the epilogue components a bit smarter (we can save a
1044 bit of code size sometimes); this is a possible future improvement.
1045
1046 Prologues and epilogues are preferably placed into a block, either at
1047 the beginning or end of it, if it is needed for all predecessor resp.
1048 successor edges; or placed on the edge otherwise.
1049
1050 If the placement of any prologue/epilogue leads to a situation we cannot
1051 handle (for example, an abnormal edge would need to be split, or some
1052 targets want to use some specific registers that may not be available
1053 where we want to put them), separate shrink-wrapping for the components
1054 in that prologue/epilogue is aborted. */
1055
1056
1057 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1058 label LABEL. */
1059 static void
dump_components(const char * label,sbitmap components)1060 dump_components (const char *label, sbitmap components)
1061 {
1062 if (bitmap_empty_p (components))
1063 return;
1064
1065 fprintf (dump_file, " [%s", label);
1066
1067 for (unsigned int j = 0; j < components->n_bits; j++)
1068 if (bitmap_bit_p (components, j))
1069 fprintf (dump_file, " %u", j);
1070
1071 fprintf (dump_file, "]");
1072 }
1073
1074 /* The data we collect for each bb. */
1075 struct sw {
1076 /* What components does this BB need? */
1077 sbitmap needs_components;
1078
1079 /* What components does this BB have? This is the main decision this
1080 pass makes. */
1081 sbitmap has_components;
1082
1083 /* The components for which we placed code at the start of the BB (instead
1084 of on all incoming edges). */
1085 sbitmap head_components;
1086
1087 /* The components for which we placed code at the end of the BB (instead
1088 of on all outgoing edges). */
1089 sbitmap tail_components;
1090
1091 /* The frequency of executing the prologue for this BB, if a prologue is
1092 placed on this BB. This is a pessimistic estimate (no prologue is
1093 needed for edges from blocks that have the component under consideration
1094 active already). */
1095 gcov_type own_cost;
1096
1097 /* The frequency of executing the prologue for this BB and all BBs
1098 dominated by it. */
1099 gcov_type total_cost;
1100 };
1101
1102 /* A helper function for accessing the pass-specific info. */
1103 static inline struct sw *
SW(basic_block bb)1104 SW (basic_block bb)
1105 {
1106 gcc_assert (bb->aux);
1107 return (struct sw *) bb->aux;
1108 }
1109
1110 /* Create the pass-specific data structures for separately shrink-wrapping
1111 with components COMPONENTS. */
1112 static void
init_separate_shrink_wrap(sbitmap components)1113 init_separate_shrink_wrap (sbitmap components)
1114 {
1115 basic_block bb;
1116 FOR_ALL_BB_FN (bb, cfun)
1117 {
1118 bb->aux = xcalloc (1, sizeof (struct sw));
1119
1120 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1121
1122 /* Mark all basic blocks without successor as needing all components.
1123 This avoids problems in at least cfgcleanup, sel-sched, and
1124 regrename (largely to do with all paths to such a block still
1125 needing the same dwarf CFI info). */
1126 if (EDGE_COUNT (bb->succs) == 0)
1127 bitmap_copy (SW (bb)->needs_components, components);
1128
1129 if (dump_file)
1130 {
1131 fprintf (dump_file, "bb %d components:", bb->index);
1132 dump_components ("has", SW (bb)->needs_components);
1133 fprintf (dump_file, "\n");
1134 }
1135
1136 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1137 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1138 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1139 bitmap_clear (SW (bb)->has_components);
1140 }
1141 }
1142
1143 /* Destroy the pass-specific data. */
1144 static void
fini_separate_shrink_wrap(void)1145 fini_separate_shrink_wrap (void)
1146 {
1147 basic_block bb;
1148 FOR_ALL_BB_FN (bb, cfun)
1149 if (bb->aux)
1150 {
1151 sbitmap_free (SW (bb)->needs_components);
1152 sbitmap_free (SW (bb)->has_components);
1153 sbitmap_free (SW (bb)->head_components);
1154 sbitmap_free (SW (bb)->tail_components);
1155 free (bb->aux);
1156 bb->aux = 0;
1157 }
1158 }
1159
1160 /* Place the prologue for component WHICH, in the basic blocks dominated
1161 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
1162 HAS_COMPONENTS of a block if either the block has that bit set in
1163 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1164 dominator subtrees separately. */
1165 static void
place_prologue_for_one_component(unsigned int which,basic_block head)1166 place_prologue_for_one_component (unsigned int which, basic_block head)
1167 {
1168 /* The block we are currently dealing with. */
1169 basic_block bb = head;
1170 /* Is this the first time we visit this block, i.e. have we just gone
1171 down the tree. */
1172 bool first_visit = true;
1173
1174 /* Walk the dominator tree, visit one block per iteration of this loop.
1175 Each basic block is visited twice: once before visiting any children
1176 of the block, and once after visiting all of them (leaf nodes are
1177 visited only once). As an optimization, we do not visit subtrees
1178 that can no longer influence the prologue placement. */
1179 for (;;)
1180 {
1181 /* First visit of a block: set the (children) cost accumulator to zero;
1182 if the block does not have the component itself, walk down. */
1183 if (first_visit)
1184 {
1185 /* Initialize the cost. The cost is the block execution frequency
1186 that does not come from backedges. Calculating this by simply
1187 adding the cost of all edges that aren't backedges does not
1188 work: this does not always add up to the block frequency at
1189 all, and even if it does, rounding error makes for bad
1190 decisions. */
1191 SW (bb)->own_cost = bb->count.to_frequency (cfun);
1192
1193 edge e;
1194 edge_iterator ei;
1195 FOR_EACH_EDGE (e, ei, bb->preds)
1196 if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1197 {
1198 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1199 SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1200 else
1201 SW (bb)->own_cost = 0;
1202 }
1203
1204 SW (bb)->total_cost = 0;
1205
1206 if (!bitmap_bit_p (SW (bb)->needs_components, which)
1207 && first_dom_son (CDI_DOMINATORS, bb))
1208 {
1209 bb = first_dom_son (CDI_DOMINATORS, bb);
1210 continue;
1211 }
1212 }
1213
1214 /* If this block does need the component itself, or it is cheaper to
1215 put the prologue here than in all the descendants that need it,
1216 mark it so. If this block's immediate post-dominator is dominated
1217 by this block, and that needs the prologue, we can put it on this
1218 block as well (earlier is better). */
1219 if (bitmap_bit_p (SW (bb)->needs_components, which)
1220 || SW (bb)->total_cost > SW (bb)->own_cost)
1221 {
1222 SW (bb)->total_cost = SW (bb)->own_cost;
1223 bitmap_set_bit (SW (bb)->has_components, which);
1224 }
1225 else
1226 {
1227 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1228 if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1229 && bitmap_bit_p (SW (kid)->has_components, which))
1230 {
1231 SW (bb)->total_cost = SW (bb)->own_cost;
1232 bitmap_set_bit (SW (bb)->has_components, which);
1233 }
1234 }
1235
1236 /* We are back where we started, so we are done now. */
1237 if (bb == head)
1238 return;
1239
1240 /* We now know the cost of the subtree rooted at the current block.
1241 Accumulate this cost in the parent. */
1242 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1243 SW (parent)->total_cost += SW (bb)->total_cost;
1244
1245 /* Don't walk the tree down unless necessary. */
1246 if (next_dom_son (CDI_DOMINATORS, bb)
1247 && SW (parent)->total_cost <= SW (parent)->own_cost)
1248 {
1249 bb = next_dom_son (CDI_DOMINATORS, bb);
1250 first_visit = true;
1251 }
1252 else
1253 {
1254 bb = parent;
1255 first_visit = false;
1256 }
1257 }
1258 }
1259
1260 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1261 setting it on any path from entry to exit where it was not already set
1262 somewhere (or, for blocks that have no path to the exit, consider only
1263 paths from the entry to the block itself). Return whether any changes
1264 were made to some HAS_COMPONENTS. */
1265 static bool
spread_components(sbitmap components)1266 spread_components (sbitmap components)
1267 {
1268 basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1269 basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1270
1271 /* A stack of all blocks left to consider, and a bitmap of all blocks
1272 on that stack. */
1273 vec<basic_block> todo;
1274 todo.create (n_basic_blocks_for_fn (cfun));
1275 auto_bitmap seen;
1276
1277 auto_sbitmap old (SBITMAP_SIZE (components));
1278
1279 /* Find for every block the components that are *not* needed on some path
1280 from the entry to that block. Do this with a flood fill from the entry
1281 block. Every block can be visited at most as often as the number of
1282 components (plus one), and usually much less often. */
1283
1284 if (dump_file)
1285 fprintf (dump_file, "Spreading down...\n");
1286
1287 basic_block bb;
1288 FOR_ALL_BB_FN (bb, cfun)
1289 bitmap_clear (SW (bb)->head_components);
1290
1291 bitmap_copy (SW (entry_block)->head_components, components);
1292
1293 edge e;
1294 edge_iterator ei;
1295
1296 todo.quick_push (single_succ (entry_block));
1297 bitmap_set_bit (seen, single_succ (entry_block)->index);
1298 while (!todo.is_empty ())
1299 {
1300 bb = todo.pop ();
1301
1302 bitmap_copy (old, SW (bb)->head_components);
1303
1304 FOR_EACH_EDGE (e, ei, bb->preds)
1305 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1306 SW (e->src)->head_components);
1307
1308 bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1309 SW (bb)->has_components);
1310
1311 if (!bitmap_equal_p (old, SW (bb)->head_components))
1312 FOR_EACH_EDGE (e, ei, bb->succs)
1313 if (bitmap_set_bit (seen, e->dest->index))
1314 todo.quick_push (e->dest);
1315
1316 bitmap_clear_bit (seen, bb->index);
1317 }
1318
1319 /* Find for every block the components that are *not* needed on some reverse
1320 path from the exit to that block. */
1321
1322 if (dump_file)
1323 fprintf (dump_file, "Spreading up...\n");
1324
1325 /* First, mark all blocks not reachable from the exit block as not needing
1326 any component on any path to the exit. Mark everything, and then clear
1327 again by a flood fill. */
1328
1329 FOR_ALL_BB_FN (bb, cfun)
1330 bitmap_copy (SW (bb)->tail_components, components);
1331
1332 FOR_EACH_EDGE (e, ei, exit_block->preds)
1333 {
1334 todo.quick_push (e->src);
1335 bitmap_set_bit (seen, e->src->index);
1336 }
1337
1338 while (!todo.is_empty ())
1339 {
1340 bb = todo.pop ();
1341
1342 if (!bitmap_empty_p (SW (bb)->tail_components))
1343 FOR_EACH_EDGE (e, ei, bb->preds)
1344 if (bitmap_set_bit (seen, e->src->index))
1345 todo.quick_push (e->src);
1346
1347 bitmap_clear (SW (bb)->tail_components);
1348
1349 bitmap_clear_bit (seen, bb->index);
1350 }
1351
1352 /* And then, flood fill backwards to find for every block the components
1353 not needed on some path to the exit. */
1354
1355 bitmap_copy (SW (exit_block)->tail_components, components);
1356
1357 FOR_EACH_EDGE (e, ei, exit_block->preds)
1358 {
1359 todo.quick_push (e->src);
1360 bitmap_set_bit (seen, e->src->index);
1361 }
1362
1363 while (!todo.is_empty ())
1364 {
1365 bb = todo.pop ();
1366
1367 bitmap_copy (old, SW (bb)->tail_components);
1368
1369 FOR_EACH_EDGE (e, ei, bb->succs)
1370 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1371 SW (e->dest)->tail_components);
1372
1373 bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1374 SW (bb)->has_components);
1375
1376 if (!bitmap_equal_p (old, SW (bb)->tail_components))
1377 FOR_EACH_EDGE (e, ei, bb->preds)
1378 if (bitmap_set_bit (seen, e->src->index))
1379 todo.quick_push (e->src);
1380
1381 bitmap_clear_bit (seen, bb->index);
1382 }
1383
1384 todo.release ();
1385
1386 /* Finally, mark everything not needed both forwards and backwards. */
1387
1388 bool did_changes = false;
1389
1390 FOR_EACH_BB_FN (bb, cfun)
1391 {
1392 bitmap_copy (old, SW (bb)->has_components);
1393
1394 bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1395 SW (bb)->tail_components);
1396 bitmap_and_compl (SW (bb)->has_components, components,
1397 SW (bb)->head_components);
1398
1399 if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
1400 did_changes = true;
1401 }
1402
1403 FOR_ALL_BB_FN (bb, cfun)
1404 {
1405 if (dump_file)
1406 {
1407 fprintf (dump_file, "bb %d components:", bb->index);
1408 dump_components ("has", SW (bb)->has_components);
1409 fprintf (dump_file, "\n");
1410 }
1411 }
1412
1413 return did_changes;
1414 }
1415
1416 /* If we cannot handle placing some component's prologues or epilogues where
1417 we decided we should place them, unmark that component in COMPONENTS so
1418 that it is not wrapped separately. */
1419 static void
disqualify_problematic_components(sbitmap components)1420 disqualify_problematic_components (sbitmap components)
1421 {
1422 auto_sbitmap pro (SBITMAP_SIZE (components));
1423 auto_sbitmap epi (SBITMAP_SIZE (components));
1424
1425 basic_block bb;
1426 FOR_EACH_BB_FN (bb, cfun)
1427 {
1428 edge e;
1429 edge_iterator ei;
1430 FOR_EACH_EDGE (e, ei, bb->succs)
1431 {
1432 /* Find which components we want pro/epilogues for here. */
1433 bitmap_and_compl (epi, SW (e->src)->has_components,
1434 SW (e->dest)->has_components);
1435 bitmap_and_compl (pro, SW (e->dest)->has_components,
1436 SW (e->src)->has_components);
1437
1438 /* Ask the target what it thinks about things. */
1439 if (!bitmap_empty_p (epi))
1440 targetm.shrink_wrap.disqualify_components (components, e, epi,
1441 false);
1442 if (!bitmap_empty_p (pro))
1443 targetm.shrink_wrap.disqualify_components (components, e, pro,
1444 true);
1445
1446 /* If this edge doesn't need splitting, we're fine. */
1447 if (single_pred_p (e->dest)
1448 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1449 continue;
1450
1451 /* If the edge can be split, that is fine too. */
1452 if ((e->flags & EDGE_ABNORMAL) == 0)
1453 continue;
1454
1455 /* We also can handle sibcalls. */
1456 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1457 {
1458 gcc_assert (e->flags & EDGE_SIBCALL);
1459 continue;
1460 }
1461
1462 /* Remove from consideration those components we would need
1463 pro/epilogues for on edges where we cannot insert them. */
1464 bitmap_and_compl (components, components, epi);
1465 bitmap_and_compl (components, components, pro);
1466
1467 if (dump_file && !bitmap_subset_p (epi, components))
1468 {
1469 fprintf (dump_file, " BAD epi %d->%d", e->src->index,
1470 e->dest->index);
1471 if (e->flags & EDGE_EH)
1472 fprintf (dump_file, " for EH");
1473 dump_components ("epi", epi);
1474 fprintf (dump_file, "\n");
1475 }
1476
1477 if (dump_file && !bitmap_subset_p (pro, components))
1478 {
1479 fprintf (dump_file, " BAD pro %d->%d", e->src->index,
1480 e->dest->index);
1481 if (e->flags & EDGE_EH)
1482 fprintf (dump_file, " for EH");
1483 dump_components ("pro", pro);
1484 fprintf (dump_file, "\n");
1485 }
1486 }
1487 }
1488 }
1489
1490 /* Place code for prologues and epilogues for COMPONENTS where we can put
1491 that code at the start of basic blocks. */
1492 static void
emit_common_heads_for_components(sbitmap components)1493 emit_common_heads_for_components (sbitmap components)
1494 {
1495 auto_sbitmap pro (SBITMAP_SIZE (components));
1496 auto_sbitmap epi (SBITMAP_SIZE (components));
1497 auto_sbitmap tmp (SBITMAP_SIZE (components));
1498
1499 basic_block bb;
1500 FOR_ALL_BB_FN (bb, cfun)
1501 bitmap_clear (SW (bb)->head_components);
1502
1503 FOR_EACH_BB_FN (bb, cfun)
1504 {
1505 /* Find which prologue resp. epilogue components are needed for all
1506 predecessor edges to this block. */
1507
1508 /* First, select all possible components. */
1509 bitmap_copy (epi, components);
1510 bitmap_copy (pro, components);
1511
1512 edge e;
1513 edge_iterator ei;
1514 FOR_EACH_EDGE (e, ei, bb->preds)
1515 {
1516 if (e->flags & EDGE_ABNORMAL)
1517 {
1518 bitmap_clear (epi);
1519 bitmap_clear (pro);
1520 break;
1521 }
1522
1523 /* Deselect those epilogue components that should not be inserted
1524 for this edge. */
1525 bitmap_and_compl (tmp, SW (e->src)->has_components,
1526 SW (e->dest)->has_components);
1527 bitmap_and (epi, epi, tmp);
1528
1529 /* Similar, for the prologue. */
1530 bitmap_and_compl (tmp, SW (e->dest)->has_components,
1531 SW (e->src)->has_components);
1532 bitmap_and (pro, pro, tmp);
1533 }
1534
1535 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1536 fprintf (dump_file, " bb %d", bb->index);
1537
1538 if (dump_file && !bitmap_empty_p (epi))
1539 dump_components ("epi", epi);
1540 if (dump_file && !bitmap_empty_p (pro))
1541 dump_components ("pro", pro);
1542
1543 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1544 fprintf (dump_file, "\n");
1545
1546 /* Place code after the BB note. */
1547 if (!bitmap_empty_p (pro))
1548 {
1549 start_sequence ();
1550 targetm.shrink_wrap.emit_prologue_components (pro);
1551 rtx_insn *seq = get_insns ();
1552 end_sequence ();
1553 record_prologue_seq (seq);
1554
1555 emit_insn_after (seq, bb_note (bb));
1556
1557 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1558 }
1559
1560 if (!bitmap_empty_p (epi))
1561 {
1562 start_sequence ();
1563 targetm.shrink_wrap.emit_epilogue_components (epi);
1564 rtx_insn *seq = get_insns ();
1565 end_sequence ();
1566 record_epilogue_seq (seq);
1567
1568 emit_insn_after (seq, bb_note (bb));
1569
1570 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1571 }
1572 }
1573 }
1574
1575 /* Place code for prologues and epilogues for COMPONENTS where we can put
1576 that code at the end of basic blocks. */
1577 static void
emit_common_tails_for_components(sbitmap components)1578 emit_common_tails_for_components (sbitmap components)
1579 {
1580 auto_sbitmap pro (SBITMAP_SIZE (components));
1581 auto_sbitmap epi (SBITMAP_SIZE (components));
1582 auto_sbitmap tmp (SBITMAP_SIZE (components));
1583
1584 basic_block bb;
1585 FOR_ALL_BB_FN (bb, cfun)
1586 bitmap_clear (SW (bb)->tail_components);
1587
1588 FOR_EACH_BB_FN (bb, cfun)
1589 {
1590 /* Find which prologue resp. epilogue components are needed for all
1591 successor edges from this block. */
1592 if (EDGE_COUNT (bb->succs) == 0)
1593 continue;
1594
1595 /* First, select all possible components. */
1596 bitmap_copy (epi, components);
1597 bitmap_copy (pro, components);
1598
1599 edge e;
1600 edge_iterator ei;
1601 FOR_EACH_EDGE (e, ei, bb->succs)
1602 {
1603 if (e->flags & EDGE_ABNORMAL)
1604 {
1605 bitmap_clear (epi);
1606 bitmap_clear (pro);
1607 break;
1608 }
1609
1610 /* Deselect those epilogue components that should not be inserted
1611 for this edge, and also those that are already put at the head
1612 of the successor block. */
1613 bitmap_and_compl (tmp, SW (e->src)->has_components,
1614 SW (e->dest)->has_components);
1615 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1616 bitmap_and (epi, epi, tmp);
1617
1618 /* Similarly, for the prologue. */
1619 bitmap_and_compl (tmp, SW (e->dest)->has_components,
1620 SW (e->src)->has_components);
1621 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1622 bitmap_and (pro, pro, tmp);
1623 }
1624
1625 /* If the last insn of this block is a control flow insn we cannot
1626 put anything after it. We can put our code before it instead,
1627 but only if that jump insn is a simple jump. */
1628 rtx_insn *last_insn = BB_END (bb);
1629 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1630 {
1631 bitmap_clear (epi);
1632 bitmap_clear (pro);
1633 }
1634
1635 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1636 fprintf (dump_file, " bb %d", bb->index);
1637
1638 if (dump_file && !bitmap_empty_p (epi))
1639 dump_components ("epi", epi);
1640 if (dump_file && !bitmap_empty_p (pro))
1641 dump_components ("pro", pro);
1642
1643 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1644 fprintf (dump_file, "\n");
1645
1646 /* Put the code at the end of the BB, but before any final jump. */
1647 if (!bitmap_empty_p (epi))
1648 {
1649 start_sequence ();
1650 targetm.shrink_wrap.emit_epilogue_components (epi);
1651 rtx_insn *seq = get_insns ();
1652 end_sequence ();
1653 record_epilogue_seq (seq);
1654
1655 if (control_flow_insn_p (last_insn))
1656 emit_insn_before (seq, last_insn);
1657 else
1658 emit_insn_after (seq, last_insn);
1659
1660 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1661 }
1662
1663 if (!bitmap_empty_p (pro))
1664 {
1665 start_sequence ();
1666 targetm.shrink_wrap.emit_prologue_components (pro);
1667 rtx_insn *seq = get_insns ();
1668 end_sequence ();
1669 record_prologue_seq (seq);
1670
1671 if (control_flow_insn_p (last_insn))
1672 emit_insn_before (seq, last_insn);
1673 else
1674 emit_insn_after (seq, last_insn);
1675
1676 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1677 }
1678 }
1679 }
1680
1681 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1682 placed them inside blocks directly. */
1683 static void
insert_prologue_epilogue_for_components(sbitmap components)1684 insert_prologue_epilogue_for_components (sbitmap components)
1685 {
1686 auto_sbitmap pro (SBITMAP_SIZE (components));
1687 auto_sbitmap epi (SBITMAP_SIZE (components));
1688
1689 basic_block bb;
1690 FOR_EACH_BB_FN (bb, cfun)
1691 {
1692 if (!bb->aux)
1693 continue;
1694
1695 edge e;
1696 edge_iterator ei;
1697 FOR_EACH_EDGE (e, ei, bb->succs)
1698 {
1699 /* Find which pro/epilogue components are needed on this edge. */
1700 bitmap_and_compl (epi, SW (e->src)->has_components,
1701 SW (e->dest)->has_components);
1702 bitmap_and_compl (pro, SW (e->dest)->has_components,
1703 SW (e->src)->has_components);
1704 bitmap_and (epi, epi, components);
1705 bitmap_and (pro, pro, components);
1706
1707 /* Deselect those we already have put at the head or tail of the
1708 edge's dest resp. src. */
1709 bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1710 bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1711 bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1712 bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1713
1714 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1715 {
1716 if (dump_file)
1717 {
1718 fprintf (dump_file, " %d->%d", e->src->index,
1719 e->dest->index);
1720 dump_components ("epi", epi);
1721 dump_components ("pro", pro);
1722 if (e->flags & EDGE_SIBCALL)
1723 fprintf (dump_file, " (SIBCALL)");
1724 else if (e->flags & EDGE_ABNORMAL)
1725 fprintf (dump_file, " (ABNORMAL)");
1726 fprintf (dump_file, "\n");
1727 }
1728
1729 /* Put the epilogue components in place. */
1730 start_sequence ();
1731 targetm.shrink_wrap.emit_epilogue_components (epi);
1732 rtx_insn *seq = get_insns ();
1733 end_sequence ();
1734 record_epilogue_seq (seq);
1735
1736 if (e->flags & EDGE_SIBCALL)
1737 {
1738 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1739
1740 rtx_insn *insn = BB_END (e->src);
1741 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1742 emit_insn_before (seq, insn);
1743 }
1744 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1745 {
1746 gcc_assert (e->flags & EDGE_FALLTHRU);
1747 basic_block new_bb = split_edge (e);
1748 emit_insn_after (seq, BB_END (new_bb));
1749 }
1750 else
1751 insert_insn_on_edge (seq, e);
1752
1753 /* Put the prologue components in place. */
1754 start_sequence ();
1755 targetm.shrink_wrap.emit_prologue_components (pro);
1756 seq = get_insns ();
1757 end_sequence ();
1758 record_prologue_seq (seq);
1759
1760 insert_insn_on_edge (seq, e);
1761 }
1762 }
1763 }
1764
1765 commit_edge_insertions ();
1766 }
1767
1768 /* The main entry point to this subpass. FIRST_BB is where the prologue
1769 would be normally put. */
1770 void
try_shrink_wrapping_separate(basic_block first_bb)1771 try_shrink_wrapping_separate (basic_block first_bb)
1772 {
1773 if (HAVE_cc0)
1774 return;
1775
1776 if (!(SHRINK_WRAPPING_ENABLED
1777 && flag_shrink_wrap_separate
1778 && optimize_function_for_speed_p (cfun)
1779 && targetm.shrink_wrap.get_separate_components))
1780 return;
1781
1782 /* We don't handle "strange" functions. */
1783 if (cfun->calls_alloca
1784 || cfun->calls_setjmp
1785 || cfun->can_throw_non_call_exceptions
1786 || crtl->calls_eh_return
1787 || crtl->has_nonlocal_goto
1788 || crtl->saves_all_registers)
1789 return;
1790
1791 /* Ask the target what components there are. If it returns NULL, don't
1792 do anything. */
1793 sbitmap components = targetm.shrink_wrap.get_separate_components ();
1794 if (!components)
1795 return;
1796
1797 /* We need LIVE info, not defining anything in the entry block and not
1798 using anything in the exit block. A block then needs a component if
1799 the register for that component is in the IN or GEN or KILL set for
1800 that block. */
1801 df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1802 df_update_entry_exit_and_calls ();
1803 df_live_add_problem ();
1804 df_live_set_all_dirty ();
1805 df_analyze ();
1806
1807 calculate_dominance_info (CDI_DOMINATORS);
1808 calculate_dominance_info (CDI_POST_DOMINATORS);
1809
1810 init_separate_shrink_wrap (components);
1811
1812 sbitmap_iterator sbi;
1813 unsigned int j;
1814 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1815 place_prologue_for_one_component (j, first_bb);
1816
1817 /* Try to minimize the number of saves and restores. Do this as long as
1818 it changes anything. This does not iterate more than a few times. */
1819 int spread_times = 0;
1820 while (spread_components (components))
1821 {
1822 spread_times++;
1823
1824 if (dump_file)
1825 fprintf (dump_file, "Now spread %d times.\n", spread_times);
1826 }
1827
1828 disqualify_problematic_components (components);
1829
1830 /* Don't separately shrink-wrap anything where the "main" prologue will
1831 go; the target code can often optimize things if it is presented with
1832 all components together (say, if it generates store-multiple insns). */
1833 bitmap_and_compl (components, components, SW (first_bb)->has_components);
1834
1835 if (bitmap_empty_p (components))
1836 {
1837 if (dump_file)
1838 fprintf (dump_file, "Not wrapping anything separately.\n");
1839 }
1840 else
1841 {
1842 if (dump_file)
1843 {
1844 fprintf (dump_file, "The components we wrap separately are");
1845 dump_components ("sep", components);
1846 fprintf (dump_file, "\n");
1847
1848 fprintf (dump_file, "... Inserting common heads...\n");
1849 }
1850
1851 emit_common_heads_for_components (components);
1852
1853 if (dump_file)
1854 fprintf (dump_file, "... Inserting common tails...\n");
1855
1856 emit_common_tails_for_components (components);
1857
1858 if (dump_file)
1859 fprintf (dump_file, "... Inserting the more difficult ones...\n");
1860
1861 insert_prologue_epilogue_for_components (components);
1862
1863 if (dump_file)
1864 fprintf (dump_file, "... Done.\n");
1865
1866 targetm.shrink_wrap.set_handled_components (components);
1867
1868 crtl->shrink_wrapped_separate = true;
1869 }
1870
1871 fini_separate_shrink_wrap ();
1872
1873 sbitmap_free (components);
1874 free_dominance_info (CDI_DOMINATORS);
1875 free_dominance_info (CDI_POST_DOMINATORS);
1876
1877 /* All done. */
1878 df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1879 df_update_entry_exit_and_calls ();
1880 df_live_set_all_dirty ();
1881 df_analyze ();
1882 }
1883