xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/shrink-wrap.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Shrink-wrapping related optimizations.
2    Copyright (C) 1987-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file handles shrink-wrapping related optimizations.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "tm_p.h"
33 #include "regs.h"
34 #include "insn-config.h"
35 #include "emit-rtl.h"
36 #include "output.h"
37 #include "tree-pass.h"
38 #include "cfgrtl.h"
39 #include "cfgbuild.h"
40 #include "params.h"
41 #include "bb-reorder.h"
42 #include "shrink-wrap.h"
43 #include "regcprop.h"
44 #include "rtl-iter.h"
45 #include "valtrack.h"
46 
47 
48 /* Return true if INSN requires the stack frame to be set up.
49    PROLOGUE_USED contains the hard registers used in the function
50    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
51    prologue to set up for the function.  */
52 bool
53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
54 			HARD_REG_SET set_up_by_prologue)
55 {
56   df_ref def, use;
57   HARD_REG_SET hardregs;
58   unsigned regno;
59 
60   if (CALL_P (insn))
61     return !SIBLING_CALL_P (insn);
62 
63   /* We need a frame to get the unique CFA expected by the unwinder.  */
64   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
65     return true;
66 
67   CLEAR_HARD_REG_SET (hardregs);
68   FOR_EACH_INSN_DEF (def, insn)
69     {
70       rtx dreg = DF_REF_REG (def);
71 
72       if (!REG_P (dreg))
73 	continue;
74 
75       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76     }
77   if (hard_reg_set_intersect_p (hardregs, prologue_used))
78     return true;
79   AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
80   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
81     if (TEST_HARD_REG_BIT (hardregs, regno)
82 	&& df_regs_ever_live_p (regno))
83       return true;
84 
85   FOR_EACH_INSN_USE (use, insn)
86     {
87       rtx reg = DF_REF_REG (use);
88 
89       if (!REG_P (reg))
90 	continue;
91 
92       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
93 			   REGNO (reg));
94     }
95   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
96     return true;
97 
98   return false;
99 }
100 
101 /* See whether there has a single live edge from BB, which dest uses
102    [REGNO, END_REGNO).  Return the live edge if its dest bb has
103    one or two predecessors.  Otherwise return NULL.  */
104 
105 static edge
106 live_edge_for_reg (basic_block bb, int regno, int end_regno)
107 {
108   edge e, live_edge;
109   edge_iterator ei;
110   bitmap live;
111   int i;
112 
113   live_edge = NULL;
114   FOR_EACH_EDGE (e, ei, bb->succs)
115     {
116       live = df_get_live_in (e->dest);
117       for (i = regno; i < end_regno; i++)
118 	if (REGNO_REG_SET_P (live, i))
119 	  {
120 	    if (live_edge && live_edge != e)
121 	      return NULL;
122 	    live_edge = e;
123 	  }
124     }
125 
126   /* We can sometimes encounter dead code.  Don't try to move it
127      into the exit block.  */
128   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
129     return NULL;
130 
131   /* Reject targets of abnormal edges.  This is needed for correctness
132      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
133      exception edges even though it is generally treated as call-saved
134      for the majority of the compilation.  Moving across abnormal edges
135      isn't going to be interesting for shrink-wrap usage anyway.  */
136   if (live_edge->flags & EDGE_ABNORMAL)
137     return NULL;
138 
139   /* When live_edge->dest->preds == 2, we can create a new block on
140      the edge to make it meet the requirement.  */
141   if (EDGE_COUNT (live_edge->dest->preds) > 2)
142     return NULL;
143 
144   return live_edge;
145 }
146 
147 /* Try to move INSN from BB to a successor.  Return true on success.
148    USES and DEFS are the set of registers that are used and defined
149    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
150    is splitted or not.  */
151 
152 static bool
153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
154 			   const HARD_REG_SET uses,
155 			   const HARD_REG_SET defs,
156 			   bool *split_p,
157 			   struct dead_debug_local *debug)
158 {
159   rtx set, src, dest;
160   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
161   unsigned int i, dregno, end_dregno;
162   unsigned int sregno = FIRST_PSEUDO_REGISTER;
163   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
164   basic_block next_block;
165   edge live_edge;
166   rtx_insn *dinsn;
167   df_ref def;
168 
169   /* Look for a simple register assignment.  We don't use single_set here
170      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
171      destinations.  */
172   if (!INSN_P (insn))
173     return false;
174   set = PATTERN (insn);
175   if (GET_CODE (set) != SET)
176     return false;
177   src = SET_SRC (set);
178   dest = SET_DEST (set);
179 
180   /* For the destination, we want only a register.  Also disallow STACK
181      or FRAME related adjustments.  They are likely part of the prologue,
182      so keep them in the entry block.  */
183   if (!REG_P (dest)
184       || dest == stack_pointer_rtx
185       || dest == frame_pointer_rtx
186       || dest == hard_frame_pointer_rtx)
187     return false;
188 
189   /* For the source, we want one of:
190       (1) A (non-overlapping) register
191       (2) A constant,
192       (3) An expression involving no more than one register.
193 
194      That last point comes from the code following, which was originally
195      written to handle only register move operations, and still only handles
196      a single source register when checking for overlaps.  Happily, the
197      same checks can be applied to expressions like (plus reg const).  */
198 
199   if (CONSTANT_P (src))
200     ;
201   else if (!REG_P (src))
202     {
203       rtx src_inner = NULL_RTX;
204 
205       if (can_throw_internal (insn))
206 	return false;
207 
208       subrtx_var_iterator::array_type array;
209       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210 	{
211 	  rtx x = *iter;
212 	  switch (GET_RTX_CLASS (GET_CODE (x)))
213 	    {
214 	    case RTX_CONST_OBJ:
215 	    case RTX_COMPARE:
216 	    case RTX_COMM_COMPARE:
217 	    case RTX_BIN_ARITH:
218 	    case RTX_COMM_ARITH:
219 	    case RTX_UNARY:
220 	    case RTX_TERNARY:
221 	      /* Constant or expression.  Continue.  */
222 	      break;
223 
224 	    case RTX_OBJ:
225 	    case RTX_EXTRA:
226 	      switch (GET_CODE (x))
227 		{
228 		case UNSPEC:
229 		case SUBREG:
230 		case STRICT_LOW_PART:
231 		case PC:
232 		case LO_SUM:
233 		  /* Ok.  Continue.  */
234 		  break;
235 
236 		case REG:
237 		  /* Fail if we see a second inner register.  */
238 		  if (src_inner != NULL)
239 		    return false;
240 		  src_inner = x;
241 		  break;
242 
243 		default:
244 		  return false;
245 		}
246 	      break;
247 
248 	    default:
249 	      return false;
250 	    }
251 	}
252 
253       if (src_inner != NULL)
254 	src = src_inner;
255     }
256 
257   /* Make sure that the source register isn't defined later in BB.  */
258   if (REG_P (src))
259     {
260       sregno = REGNO (src);
261       end_sregno = END_REGNO (src);
262       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
263 	return false;
264     }
265 
266   /* Make sure that the destination register isn't referenced later in BB.  */
267   dregno = REGNO (dest);
268   end_dregno = END_REGNO (dest);
269   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
270       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
271     return false;
272 
273   /* See whether there is a successor block to which we could move INSN.  */
274   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
275   if (!live_edge)
276     return false;
277 
278   next_block = live_edge->dest;
279   /* Create a new basic block on the edge.  */
280   if (EDGE_COUNT (next_block->preds) == 2)
281     {
282       /* split_edge for a block with only one successor is meaningless.  */
283       if (EDGE_COUNT (bb->succs) == 1)
284 	return false;
285 
286       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
287       if (!df_live)
288 	return false;
289 
290       basic_block old_dest = live_edge->dest;
291       next_block = split_edge (live_edge);
292 
293       /* We create a new basic block.  Call df_grow_bb_info to make sure
294 	 all data structures are allocated.  */
295       df_grow_bb_info (df_live);
296 
297       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
298 		  df_get_live_in (old_dest));
299       df_set_bb_dirty (next_block);
300 
301       /* We should not split more than once for a function.  */
302       if (*split_p)
303 	return false;
304 
305       *split_p = true;
306     }
307 
308   /* At this point we are committed to moving INSN, but let's try to
309      move it as far as we can.  */
310   do
311     {
312       if (MAY_HAVE_DEBUG_BIND_INSNS)
313 	{
314 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
315 	    if (DEBUG_BIND_INSN_P (dinsn))
316 	      {
317 		df_ref use;
318 		FOR_EACH_INSN_USE (use, dinsn)
319 		  if (refers_to_regno_p (dregno, end_dregno,
320 					 DF_REF_REG (use), (rtx *) NULL))
321 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
322 	      }
323 	    else if (dinsn == insn)
324 	      break;
325 	}
326       live_out = df_get_live_out (bb);
327       live_in = df_get_live_in (next_block);
328       bb = next_block;
329 
330       /* Check whether BB uses DEST or clobbers DEST.  We need to add
331 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
332 	 except for any part that overlaps SRC (next loop).  */
333       if (!*split_p)
334 	{
335 	  bb_uses = &DF_LR_BB_INFO (bb)->use;
336 	  bb_defs = &DF_LR_BB_INFO (bb)->def;
337 	}
338       if (df_live)
339 	{
340 	  for (i = dregno; i < end_dregno; i++)
341 	    {
342 	      if (*split_p
343 		  || REGNO_REG_SET_P (bb_uses, i)
344 		  || REGNO_REG_SET_P (bb_defs, i)
345 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346 		next_block = NULL;
347 	      CLEAR_REGNO_REG_SET (live_out, i);
348 	      CLEAR_REGNO_REG_SET (live_in, i);
349 	    }
350 
351 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
352 	     Either way, SRC is now live on entry.  */
353 	  for (i = sregno; i < end_sregno; i++)
354 	    {
355 	      if (*split_p
356 		  || REGNO_REG_SET_P (bb_defs, i)
357 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 		next_block = NULL;
359 	      SET_REGNO_REG_SET (live_out, i);
360 	      SET_REGNO_REG_SET (live_in, i);
361 	    }
362 	}
363       else
364 	{
365 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
367 	     at -O1, just give up searching NEXT_BLOCK.  */
368 	  next_block = NULL;
369 	  for (i = dregno; i < end_dregno; i++)
370 	    {
371 	      CLEAR_REGNO_REG_SET (live_out, i);
372 	      CLEAR_REGNO_REG_SET (live_in, i);
373 	    }
374 
375 	  for (i = sregno; i < end_sregno; i++)
376 	    {
377 	      SET_REGNO_REG_SET (live_out, i);
378 	      SET_REGNO_REG_SET (live_in, i);
379 	    }
380 	}
381 
382       /* If we don't need to add the move to BB, look for a single
383 	 successor block.  */
384       if (next_block)
385 	{
386 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388 	    break;
389 	  next_block = live_edge->dest;
390 	}
391     }
392   while (next_block);
393 
394   /* For the new created basic block, there is no dataflow info at all.
395      So skip the following dataflow update and check.  */
396   if (!(*split_p))
397     {
398       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
399 	 (next loop).  */
400       for (i = dregno; i < end_dregno; i++)
401 	{
402 	  CLEAR_REGNO_REG_SET (bb_uses, i);
403 	  SET_REGNO_REG_SET (bb_defs, i);
404 	}
405 
406       /* BB now uses SRC.  */
407       for (i = sregno; i < end_sregno; i++)
408 	SET_REGNO_REG_SET (bb_uses, i);
409     }
410 
411   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
412   if (debug->used && !bitmap_empty_p (debug->used))
413     FOR_EACH_INSN_DEF (def, insn)
414       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
415 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
416 
417   rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
418   /* Update the LABEL_NUSES count on any referenced labels. The ideal
419      solution here would be to actually move the instruction instead
420      of copying/deleting it as this loses some notations on the
421      insn.  */
422   mark_jump_label (PATTERN (insn), insn_copy, 0);
423   delete_insn (insn);
424   return true;
425 }
426 
427 /* Look for register copies in the first block of the function, and move
428    them down into successor blocks if the register is used only on one
429    path.  This exposes more opportunities for shrink-wrapping.  These
430    kinds of sets often occur when incoming argument registers are moved
431    to call-saved registers because their values are live across one or
432    more calls during the function.  */
433 
434 static void
435 prepare_shrink_wrap (basic_block entry_block)
436 {
437   rtx_insn *insn, *curr;
438   rtx x;
439   HARD_REG_SET uses, defs;
440   df_ref def, use;
441   bool split_p = false;
442   unsigned int i;
443   struct dead_debug_local debug;
444 
445   if (JUMP_P (BB_END (entry_block)))
446     {
447       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
448 	 to sink the copies from parameter to callee saved register out of
449 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
450 	 to release some dependences.  */
451       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
452     }
453 
454   dead_debug_local_init (&debug, NULL, NULL);
455   CLEAR_HARD_REG_SET (uses);
456   CLEAR_HARD_REG_SET (defs);
457 
458   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
459     if (NONDEBUG_INSN_P (insn)
460 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
461 				       &split_p, &debug))
462       {
463 	/* Add all defined registers to DEFs.  */
464 	FOR_EACH_INSN_DEF (def, insn)
465 	  {
466 	    x = DF_REF_REG (def);
467 	    if (REG_P (x) && HARD_REGISTER_P (x))
468 	      for (i = REGNO (x); i < END_REGNO (x); i++)
469 		SET_HARD_REG_BIT (defs, i);
470 	  }
471 
472 	/* Add all used registers to USESs.  */
473 	FOR_EACH_INSN_USE (use, insn)
474 	  {
475 	    x = DF_REF_REG (use);
476 	    if (REG_P (x) && HARD_REGISTER_P (x))
477 	      for (i = REGNO (x); i < END_REGNO (x); i++)
478 		SET_HARD_REG_BIT (uses, i);
479 	  }
480       }
481 
482   dead_debug_local_finish (&debug, NULL);
483 }
484 
485 /* Return whether basic block PRO can get the prologue.  It cannot if it
486    has incoming complex edges that need a prologue inserted (we make a new
487    block for the prologue, so those edges would need to be redirected, which
488    does not work).  It also cannot if there exist registers live on entry
489    to PRO that are clobbered by the prologue.  */
490 
491 static bool
492 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
493 {
494   edge e;
495   edge_iterator ei;
496   FOR_EACH_EDGE (e, ei, pro->preds)
497     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
498 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
499       return false;
500 
501   HARD_REG_SET live;
502   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
503   if (hard_reg_set_intersect_p (live, prologue_clobbered))
504     return false;
505 
506   return true;
507 }
508 
509 /* Return whether we can duplicate basic block BB for shrink wrapping.  We
510    cannot if the block cannot be duplicated at all, or if any of its incoming
511    edges are complex and come from a block that does not require a prologue
512    (we cannot redirect such edges), or if the block is too big to copy.
513    PRO is the basic block before which we would put the prologue, MAX_SIZE is
514    the maximum size block we allow to be copied.  */
515 
516 static bool
517 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
518 {
519   if (!can_duplicate_block_p (bb))
520     return false;
521 
522   edge e;
523   edge_iterator ei;
524   FOR_EACH_EDGE (e, ei, bb->preds)
525     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
526 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
527       return false;
528 
529   unsigned size = 0;
530 
531   rtx_insn *insn;
532   FOR_BB_INSNS (bb, insn)
533     if (NONDEBUG_INSN_P (insn))
534       {
535 	size += get_attr_min_length (insn);
536 	if (size > max_size)
537 	  return false;
538       }
539 
540   return true;
541 }
542 
543 /* Do whatever needs to be done for exits that run without prologue.
544    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
545 
546 static void
547 handle_simple_exit (edge e)
548 {
549 
550   if (e->flags & EDGE_SIBCALL)
551     {
552       /* Tell function.c to take no further action on this edge.  */
553       e->flags |= EDGE_IGNORE;
554 
555       e->flags &= ~EDGE_FALLTHRU;
556       emit_barrier_after_bb (e->src);
557       return;
558     }
559 
560   /* If the basic block the edge comes from has multiple successors,
561      split the edge.  */
562   if (EDGE_COUNT (e->src->succs) > 1)
563     {
564       basic_block old_bb = e->src;
565       rtx_insn *end = BB_END (old_bb);
566       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
567       basic_block new_bb = create_basic_block (note, note, old_bb);
568       BB_COPY_PARTITION (new_bb, old_bb);
569       BB_END (old_bb) = end;
570 
571       redirect_edge_succ (e, new_bb);
572       new_bb->count = e->count ();
573       e->flags |= EDGE_FALLTHRU;
574 
575       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
576     }
577 
578   e->flags &= ~EDGE_FALLTHRU;
579   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
580 					     BB_END (e->src));
581   JUMP_LABEL (ret) = simple_return_rtx;
582   emit_barrier_after_bb (e->src);
583 
584   if (dump_file)
585     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
586 	     INSN_UID (ret), e->src->index);
587 }
588 
589 /* Try to perform a kind of shrink-wrapping, making sure the
590    prologue/epilogue is emitted only around those parts of the
591    function that require it.
592 
593    There will be exactly one prologue, and it will be executed either
594    zero or one time, on any path.  Depending on where the prologue is
595    placed, some of the basic blocks can be reached via both paths with
596    and without a prologue.  Such blocks will be duplicated here, and the
597    edges changed to match.
598 
599    Paths that go to the exit without going through the prologue will use
600    a simple_return instead of the epilogue.  We maximize the number of
601    those, making sure to only duplicate blocks that can be duplicated.
602    If the prologue can then still be placed in multiple locations, we
603    place it as early as possible.
604 
605    An example, where we duplicate blocks with control flow (legend:
606    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
607    be taken to point down or to the right, to simplify the diagram; here,
608    block 3 needs a prologue, the rest does not):
609 
610 
611        B                 B
612        |                 |
613        2                 2
614        |\                |\
615        | 3    becomes    | 3
616        |/                |  \
617        4                 7   4
618        |\                |\  |\
619        | 5               | 8 | 5
620        |/                |/  |/
621        6                 9   6
622        |                 |   |
623        R                 S   R
624 
625 
626    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
627    edge 2->3).
628 
629    Another example, where part of a loop is duplicated (again, bb 3 is
630    the only block that needs a prologue):
631 
632 
633        B   3<--              B       ->3<--
634        |   |   |             |      |  |   |
635        |   v   |   becomes   |      |  v   |
636        2---4---              2---5--   4---
637            |                     |     |
638            R                     S     R
639 
640 
641    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
642 
643    ENTRY_EDGE is the edge where the prologue will be placed, possibly
644    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
645 
646 void
647 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
648 {
649   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
650      no sense to shrink-wrap: then do not shrink-wrap!  */
651 
652   if (!SHRINK_WRAPPING_ENABLED)
653     return;
654 
655   if (crtl->profile && !targetm.profile_before_prologue ())
656     return;
657 
658   if (crtl->calls_eh_return)
659     return;
660 
661   bool empty_prologue = true;
662   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
663     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
664       {
665 	empty_prologue = false;
666 	break;
667       }
668   if (empty_prologue)
669     return;
670 
671   /* Move some code down to expose more shrink-wrapping opportunities.  */
672 
673   basic_block entry = (*entry_edge)->dest;
674   prepare_shrink_wrap (entry);
675 
676   if (dump_file)
677     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
678 
679   /* Compute the registers set and used in the prologue.  */
680 
681   HARD_REG_SET prologue_clobbered, prologue_used;
682   CLEAR_HARD_REG_SET (prologue_clobbered);
683   CLEAR_HARD_REG_SET (prologue_used);
684   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
685     if (NONDEBUG_INSN_P (insn))
686       {
687 	HARD_REG_SET this_used;
688 	CLEAR_HARD_REG_SET (this_used);
689 	note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
690 	AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
691 	IOR_HARD_REG_SET (prologue_used, this_used);
692 	note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
693       }
694   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
695   if (frame_pointer_needed)
696     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
697 
698   /* Find out what registers are set up by the prologue; any use of these
699      cannot happen before the prologue.  */
700 
701   struct hard_reg_set_container set_up_by_prologue;
702   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
703   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
704   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
705   if (frame_pointer_needed)
706     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
707 			 HARD_FRAME_POINTER_REGNUM);
708   if (pic_offset_table_rtx
709       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
710     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
711 			 PIC_OFFSET_TABLE_REGNUM);
712   if (crtl->drap_reg)
713     add_to_hard_reg_set (&set_up_by_prologue.set,
714 			 GET_MODE (crtl->drap_reg),
715 			 REGNO (crtl->drap_reg));
716   if (targetm.set_up_by_prologue)
717     targetm.set_up_by_prologue (&set_up_by_prologue);
718 
719   /* We will insert the prologue before the basic block PRO.  PRO should
720      dominate all basic blocks that need the prologue to be executed
721      before them.  First, make PRO the "tightest wrap" possible.  */
722 
723   calculate_dominance_info (CDI_DOMINATORS);
724 
725   basic_block pro = 0;
726 
727   basic_block bb;
728   edge e;
729   edge_iterator ei;
730   FOR_EACH_BB_FN (bb, cfun)
731     {
732       rtx_insn *insn;
733       FOR_BB_INSNS (bb, insn)
734 	if (NONDEBUG_INSN_P (insn)
735 	    && requires_stack_frame_p (insn, prologue_used,
736 				       set_up_by_prologue.set))
737 	  {
738 	    if (dump_file)
739 	      fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
740 	    pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
741 	    break;
742 	  }
743     }
744 
745   /* If nothing needs a prologue, just put it at the start.  This really
746      shouldn't happen, but we cannot fix it here.  */
747 
748   if (pro == 0)
749     {
750       if (dump_file)
751 	fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
752 			   "putting it at the start.\n");
753       pro = entry;
754     }
755 
756   if (dump_file)
757     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
758 	     pro->index);
759 
760   /* Now see if we can put the prologue at the start of PRO.  Putting it
761      there might require duplicating a block that cannot be duplicated,
762      or in some cases we cannot insert the prologue there at all.  If PRO
763      wont't do, try again with the immediate dominator of PRO, and so on.
764 
765      The blocks that need duplicating are those reachable from PRO but
766      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
767      reachable from PRO that we already found, and in VEC a stack of
768      those we still need to consider (to find successors).  */
769 
770   auto_bitmap bb_with;
771   bitmap_set_bit (bb_with, pro->index);
772 
773   vec<basic_block> vec;
774   vec.create (n_basic_blocks_for_fn (cfun));
775   vec.quick_push (pro);
776 
777   unsigned max_grow_size = get_uncond_jump_length ();
778   max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
779 
780   while (!vec.is_empty () && pro != entry)
781     {
782       while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
783 	{
784 	  pro = get_immediate_dominator (CDI_DOMINATORS, pro);
785 
786 	  if (bitmap_set_bit (bb_with, pro->index))
787 	    vec.quick_push (pro);
788 	}
789 
790       basic_block bb = vec.pop ();
791       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
792 	while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
793 	  {
794 	    gcc_assert (pro != entry);
795 
796 	    pro = get_immediate_dominator (CDI_DOMINATORS, pro);
797 
798 	    if (bitmap_set_bit (bb_with, pro->index))
799 	      vec.quick_push (pro);
800 	  }
801 
802       FOR_EACH_EDGE (e, ei, bb->succs)
803 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
804 	    && bitmap_set_bit (bb_with, e->dest->index))
805 	  vec.quick_push (e->dest);
806     }
807 
808   if (dump_file)
809     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
810 	     pro->index);
811 
812   /* If we can move PRO back without having to duplicate more blocks, do so.
813      We do this because putting the prologue earlier is better for scheduling.
814 
815      We can move back to a block PRE if every path from PRE will eventually
816      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
817      to dominate every block reachable from itself.  We keep in BB_TMP a
818      bitmap of the blocks reachable from PRE that we already found, and in
819      VEC a stack of those we still need to consider.
820 
821      Any block reachable from PRE is also reachable from all predecessors
822      of PRE, so if we find we need to move PRE back further we can leave
823      everything not considered so far on the stack.  Any block dominated
824      by PRE is also dominated by all other dominators of PRE, so anything
825      found good for some PRE does not need to be reconsidered later.
826 
827      We don't need to update BB_WITH because none of the new blocks found
828      can jump to a block that does not need the prologue.  */
829 
830   if (pro != entry)
831     {
832       calculate_dominance_info (CDI_POST_DOMINATORS);
833 
834       auto_bitmap bb_tmp;
835       bitmap_copy (bb_tmp, bb_with);
836       basic_block last_ok = pro;
837       vec.truncate (0);
838 
839       while (pro != entry)
840 	{
841 	  basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
842 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
843 	    break;
844 
845 	  if (bitmap_set_bit (bb_tmp, pre->index))
846 	    vec.quick_push (pre);
847 
848 	  bool ok = true;
849 	  while (!vec.is_empty ())
850 	    {
851 	      if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
852 		{
853 		  ok = false;
854 		  break;
855 		}
856 
857 	      basic_block bb = vec.pop ();
858 	      FOR_EACH_EDGE (e, ei, bb->succs)
859 		if (bitmap_set_bit (bb_tmp, e->dest->index))
860 		  vec.quick_push (e->dest);
861 	    }
862 
863 	  if (ok && can_get_prologue (pre, prologue_clobbered))
864 	    last_ok = pre;
865 
866 	  pro = pre;
867 	}
868 
869       pro = last_ok;
870 
871       free_dominance_info (CDI_POST_DOMINATORS);
872     }
873 
874   vec.release ();
875 
876   if (dump_file)
877     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
878 	     pro->index);
879 
880   if (pro == entry)
881     {
882       free_dominance_info (CDI_DOMINATORS);
883       return;
884     }
885 
886   /* Compute what fraction of the frequency and count of the blocks that run
887      both with and without prologue are for running with prologue.  This gives
888      the correct answer for reducible flow graphs; for irreducible flow graphs
889      our profile is messed up beyond repair anyway.  */
890 
891   profile_count num = profile_count::zero ();
892   profile_count den = profile_count::zero ();
893 
894   FOR_EACH_EDGE (e, ei, pro->preds)
895     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
896       {
897 	if (e->count ().initialized_p ())
898 	  num += e->count ();
899 	if (e->src->count.initialized_p ())
900 	  den += e->src->count;
901       }
902 
903   /* All is okay, so do it.  */
904 
905   crtl->shrink_wrapped = true;
906   if (dump_file)
907     fprintf (dump_file, "Performing shrink-wrapping.\n");
908 
909   /* Copy the blocks that can run both with and without prologue.  The
910      originals run with prologue, the copies without.  Store a pointer to
911      the copy in the ->aux field of the original.  */
912 
913   FOR_EACH_BB_FN (bb, cfun)
914     if (bitmap_bit_p (bb_with, bb->index)
915 	&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
916       {
917 	basic_block dup = duplicate_block (bb, 0, 0);
918 
919 	bb->aux = dup;
920 
921 	if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
922 	  emit_barrier_after_bb (dup);
923 
924 	if (EDGE_COUNT (dup->succs) == 0)
925 	  emit_barrier_after_bb (dup);
926 
927 	if (dump_file)
928 	  fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
929 
930 	if (num == profile_count::zero () || den.nonzero_p ())
931 	  bb->count = bb->count.apply_scale (num, den);
932 	dup->count -= bb->count;
933       }
934 
935   /* Now change the edges to point to the copies, where appropriate.  */
936 
937   FOR_EACH_BB_FN (bb, cfun)
938     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
939       {
940 	basic_block src = bb;
941 	if (bitmap_bit_p (bb_with, bb->index))
942 	  src = (basic_block) bb->aux;
943 
944 	FOR_EACH_EDGE (e, ei, src->succs)
945 	  {
946 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
947 	      continue;
948 
949 	    if (bitmap_bit_p (bb_with, e->dest->index)
950 		&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
951 	      {
952 		if (dump_file)
953 		  fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
954 			   e->src->index, e->dest->index,
955 			   ((basic_block) e->dest->aux)->index);
956 		redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
957 	      }
958 	    else if (e->flags & EDGE_FALLTHRU
959 		     && bitmap_bit_p (bb_with, bb->index))
960 	      force_nonfallthru (e);
961 	  }
962       }
963 
964   /* Also redirect the function entry edge if necessary.  */
965 
966   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
967     if (bitmap_bit_p (bb_with, e->dest->index)
968 	&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
969       {
970 	basic_block split_bb = split_edge (e);
971 	e = single_succ_edge (split_bb);
972 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
973       }
974 
975   /* Make a simple_return for those exits that run without prologue.  */
976 
977   FOR_EACH_BB_REVERSE_FN (bb, cfun)
978     if (!bitmap_bit_p (bb_with, bb->index))
979       FOR_EACH_EDGE (e, ei, bb->succs)
980 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
981 	  handle_simple_exit (e);
982 
983   /* Finally, we want a single edge to put the prologue on.  Make a new
984      block before the PRO block; the edge beteen them is the edge we want.
985      Then redirect those edges into PRO that come from blocks without the
986      prologue, to point to the new block instead.  The new prologue block
987      is put at the end of the insn chain.  */
988 
989   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
990   BB_COPY_PARTITION (new_bb, pro);
991   new_bb->count = profile_count::zero ();
992   if (dump_file)
993     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
994 
995   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
996     {
997       if (bitmap_bit_p (bb_with, e->src->index)
998 	  || dominated_by_p (CDI_DOMINATORS, e->src, pro))
999 	{
1000 	  ei_next (&ei);
1001 	  continue;
1002 	}
1003 
1004       new_bb->count += e->count ();
1005 
1006       redirect_edge_and_branch_force (e, new_bb);
1007       if (dump_file)
1008 	fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1009     }
1010 
1011   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1012   force_nonfallthru (*entry_edge);
1013 
1014   free_dominance_info (CDI_DOMINATORS);
1015 }
1016 
1017 /* Separate shrink-wrapping
1018 
1019    Instead of putting all of the prologue and epilogue in one spot, we
1020    can put parts of it in places where those components are executed less
1021    frequently.  The following code does this, for prologue and epilogue
1022    components that can be put in more than one location, and where those
1023    components can be executed more than once (the epilogue component will
1024    always be executed before the prologue component is executed a second
1025    time).
1026 
1027    What exactly is a component is target-dependent.  The more usual
1028    components are simple saves/restores to/from the frame of callee-saved
1029    registers.  This code treats components abstractly (as an sbitmap),
1030    letting the target handle all details.
1031 
1032    Prologue components are placed in such a way that for every component
1033    the prologue is executed as infrequently as possible.  We do this by
1034    walking the dominator tree, comparing the cost of placing a prologue
1035    component before a block to the sum of costs determined for all subtrees
1036    of that block.
1037 
1038    From this placement, we then determine for each component all blocks
1039    where at least one of this block's dominators (including itself) will
1040    get a prologue inserted.  That then is how the components are placed.
1041    We could place the epilogue components a bit smarter (we can save a
1042    bit of code size sometimes); this is a possible future improvement.
1043 
1044    Prologues and epilogues are preferably placed into a block, either at
1045    the beginning or end of it, if it is needed for all predecessor resp.
1046    successor edges; or placed on the edge otherwise.
1047 
1048    If the placement of any prologue/epilogue leads to a situation we cannot
1049    handle (for example, an abnormal edge would need to be split, or some
1050    targets want to use some specific registers that may not be available
1051    where we want to put them), separate shrink-wrapping for the components
1052    in that prologue/epilogue is aborted.  */
1053 
1054 
1055 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1056    label LABEL.  */
1057 static void
1058 dump_components (const char *label, sbitmap components)
1059 {
1060   if (bitmap_empty_p (components))
1061     return;
1062 
1063   fprintf (dump_file, " [%s", label);
1064 
1065   for (unsigned int j = 0; j < components->n_bits; j++)
1066     if (bitmap_bit_p (components, j))
1067       fprintf (dump_file, " %u", j);
1068 
1069   fprintf (dump_file, "]");
1070 }
1071 
1072 /* The data we collect for each bb.  */
1073 struct sw {
1074   /* What components does this BB need?  */
1075   sbitmap needs_components;
1076 
1077   /* What components does this BB have?  This is the main decision this
1078      pass makes.  */
1079   sbitmap has_components;
1080 
1081   /* The components for which we placed code at the start of the BB (instead
1082      of on all incoming edges).  */
1083   sbitmap head_components;
1084 
1085   /* The components for which we placed code at the end of the BB (instead
1086      of on all outgoing edges).  */
1087   sbitmap tail_components;
1088 
1089   /* The frequency of executing the prologue for this BB, if a prologue is
1090      placed on this BB.  This is a pessimistic estimate (no prologue is
1091      needed for edges from blocks that have the component under consideration
1092      active already).  */
1093   gcov_type own_cost;
1094 
1095   /* The frequency of executing the prologue for this BB and all BBs
1096      dominated by it.  */
1097   gcov_type total_cost;
1098 };
1099 
1100 /* A helper function for accessing the pass-specific info.  */
1101 static inline struct sw *
1102 SW (basic_block bb)
1103 {
1104   gcc_assert (bb->aux);
1105   return (struct sw *) bb->aux;
1106 }
1107 
1108 /* Create the pass-specific data structures for separately shrink-wrapping
1109    with components COMPONENTS.  */
1110 static void
1111 init_separate_shrink_wrap (sbitmap components)
1112 {
1113   basic_block bb;
1114   FOR_ALL_BB_FN (bb, cfun)
1115     {
1116       bb->aux = xcalloc (1, sizeof (struct sw));
1117 
1118       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1119 
1120       /* Mark all basic blocks without successor as needing all components.
1121 	 This avoids problems in at least cfgcleanup, sel-sched, and
1122 	 regrename (largely to do with all paths to such a block still
1123 	 needing the same dwarf CFI info).  */
1124       if (EDGE_COUNT (bb->succs) == 0)
1125 	bitmap_copy (SW (bb)->needs_components, components);
1126 
1127       if (dump_file)
1128 	{
1129 	  fprintf (dump_file, "bb %d components:", bb->index);
1130 	  dump_components ("has", SW (bb)->needs_components);
1131 	  fprintf (dump_file, "\n");
1132 	}
1133 
1134       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1135       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1136       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1137       bitmap_clear (SW (bb)->has_components);
1138     }
1139 }
1140 
1141 /* Destroy the pass-specific data.  */
1142 static void
1143 fini_separate_shrink_wrap (void)
1144 {
1145   basic_block bb;
1146   FOR_ALL_BB_FN (bb, cfun)
1147     if (bb->aux)
1148       {
1149 	sbitmap_free (SW (bb)->needs_components);
1150 	sbitmap_free (SW (bb)->has_components);
1151 	sbitmap_free (SW (bb)->head_components);
1152 	sbitmap_free (SW (bb)->tail_components);
1153 	free (bb->aux);
1154 	bb->aux = 0;
1155       }
1156 }
1157 
1158 /* Place the prologue for component WHICH, in the basic blocks dominated
1159    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
1160    HAS_COMPONENTS of a block if either the block has that bit set in
1161    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1162    dominator subtrees separately.  */
1163 static void
1164 place_prologue_for_one_component (unsigned int which, basic_block head)
1165 {
1166   /* The block we are currently dealing with.  */
1167   basic_block bb = head;
1168   /* Is this the first time we visit this block, i.e. have we just gone
1169      down the tree.  */
1170   bool first_visit = true;
1171 
1172   /* Walk the dominator tree, visit one block per iteration of this loop.
1173      Each basic block is visited twice: once before visiting any children
1174      of the block, and once after visiting all of them (leaf nodes are
1175      visited only once).  As an optimization, we do not visit subtrees
1176      that can no longer influence the prologue placement.  */
1177   for (;;)
1178     {
1179       /* First visit of a block: set the (children) cost accumulator to zero;
1180 	 if the block does not have the component itself, walk down.  */
1181       if (first_visit)
1182 	{
1183 	  /* Initialize the cost.  The cost is the block execution frequency
1184 	     that does not come from backedges.  Calculating this by simply
1185 	     adding the cost of all edges that aren't backedges does not
1186 	     work: this does not always add up to the block frequency at
1187 	     all, and even if it does, rounding error makes for bad
1188 	     decisions.  */
1189 	  SW (bb)->own_cost = bb->count.to_frequency (cfun);
1190 
1191 	  edge e;
1192 	  edge_iterator ei;
1193 	  FOR_EACH_EDGE (e, ei, bb->preds)
1194 	    if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1195 	      {
1196 		if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1197 		  SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1198 		else
1199 		  SW (bb)->own_cost = 0;
1200 	      }
1201 
1202 	  SW (bb)->total_cost = 0;
1203 
1204 	  if (!bitmap_bit_p (SW (bb)->needs_components, which)
1205 	      && first_dom_son (CDI_DOMINATORS, bb))
1206 	    {
1207 	      bb = first_dom_son (CDI_DOMINATORS, bb);
1208 	      continue;
1209 	    }
1210 	}
1211 
1212       /* If this block does need the component itself, or it is cheaper to
1213 	 put the prologue here than in all the descendants that need it,
1214 	 mark it so.  If this block's immediate post-dominator is dominated
1215 	 by this block, and that needs the prologue, we can put it on this
1216 	 block as well (earlier is better).  */
1217       if (bitmap_bit_p (SW (bb)->needs_components, which)
1218 	  || SW (bb)->total_cost > SW (bb)->own_cost)
1219 	{
1220 	  SW (bb)->total_cost = SW (bb)->own_cost;
1221 	  bitmap_set_bit (SW (bb)->has_components, which);
1222 	}
1223       else
1224 	{
1225 	  basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1226 	  if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1227 	      && bitmap_bit_p (SW (kid)->has_components, which))
1228 	    {
1229 	      SW (bb)->total_cost = SW (bb)->own_cost;
1230 	      bitmap_set_bit (SW (bb)->has_components, which);
1231 	    }
1232 	}
1233 
1234       /* We are back where we started, so we are done now.  */
1235       if (bb == head)
1236 	return;
1237 
1238       /* We now know the cost of the subtree rooted at the current block.
1239 	 Accumulate this cost in the parent.  */
1240       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1241       SW (parent)->total_cost += SW (bb)->total_cost;
1242 
1243       /* Don't walk the tree down unless necessary.  */
1244       if (next_dom_son (CDI_DOMINATORS, bb)
1245           && SW (parent)->total_cost <= SW (parent)->own_cost)
1246 	{
1247 	  bb = next_dom_son (CDI_DOMINATORS, bb);
1248 	  first_visit = true;
1249 	}
1250       else
1251 	{
1252 	  bb = parent;
1253 	  first_visit = false;
1254 	}
1255     }
1256 }
1257 
1258 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1259    setting it on any path from entry to exit where it was not already set
1260    somewhere (or, for blocks that have no path to the exit, consider only
1261    paths from the entry to the block itself).  Return whether any changes
1262    were made to some HAS_COMPONENTS.  */
1263 static bool
1264 spread_components (sbitmap components)
1265 {
1266   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1267   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1268 
1269   /* A stack of all blocks left to consider, and a bitmap of all blocks
1270      on that stack.  */
1271   vec<basic_block> todo;
1272   todo.create (n_basic_blocks_for_fn (cfun));
1273   auto_bitmap seen;
1274 
1275   auto_sbitmap old (SBITMAP_SIZE (components));
1276 
1277   /* Find for every block the components that are *not* needed on some path
1278      from the entry to that block.  Do this with a flood fill from the entry
1279      block.  Every block can be visited at most as often as the number of
1280      components (plus one), and usually much less often.  */
1281 
1282   if (dump_file)
1283     fprintf (dump_file, "Spreading down...\n");
1284 
1285   basic_block bb;
1286   FOR_ALL_BB_FN (bb, cfun)
1287     bitmap_clear (SW (bb)->head_components);
1288 
1289   bitmap_copy (SW (entry_block)->head_components, components);
1290 
1291   edge e;
1292   edge_iterator ei;
1293 
1294   todo.quick_push (single_succ (entry_block));
1295   bitmap_set_bit (seen, single_succ (entry_block)->index);
1296   while (!todo.is_empty ())
1297     {
1298       bb = todo.pop ();
1299 
1300       bitmap_copy (old, SW (bb)->head_components);
1301 
1302       FOR_EACH_EDGE (e, ei, bb->preds)
1303 	bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1304 		    SW (e->src)->head_components);
1305 
1306       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1307 			SW (bb)->has_components);
1308 
1309       if (!bitmap_equal_p (old, SW (bb)->head_components))
1310 	FOR_EACH_EDGE (e, ei, bb->succs)
1311 	  if (bitmap_set_bit (seen, e->dest->index))
1312 	    todo.quick_push (e->dest);
1313 
1314       bitmap_clear_bit (seen, bb->index);
1315     }
1316 
1317   /* Find for every block the components that are *not* needed on some reverse
1318      path from the exit to that block.  */
1319 
1320   if (dump_file)
1321     fprintf (dump_file, "Spreading up...\n");
1322 
1323   /* First, mark all blocks not reachable from the exit block as not needing
1324      any component on any path to the exit.  Mark everything, and then clear
1325      again by a flood fill.  */
1326 
1327   FOR_ALL_BB_FN (bb, cfun)
1328     bitmap_copy (SW (bb)->tail_components, components);
1329 
1330   FOR_EACH_EDGE (e, ei, exit_block->preds)
1331     {
1332       todo.quick_push (e->src);
1333       bitmap_set_bit (seen, e->src->index);
1334     }
1335 
1336   while (!todo.is_empty ())
1337     {
1338       bb = todo.pop ();
1339 
1340       if (!bitmap_empty_p (SW (bb)->tail_components))
1341 	FOR_EACH_EDGE (e, ei, bb->preds)
1342 	  if (bitmap_set_bit (seen, e->src->index))
1343 	    todo.quick_push (e->src);
1344 
1345       bitmap_clear (SW (bb)->tail_components);
1346 
1347       bitmap_clear_bit (seen, bb->index);
1348     }
1349 
1350   /* And then, flood fill backwards to find for every block the components
1351      not needed on some path to the exit.  */
1352 
1353   bitmap_copy (SW (exit_block)->tail_components, components);
1354 
1355   FOR_EACH_EDGE (e, ei, exit_block->preds)
1356     {
1357       todo.quick_push (e->src);
1358       bitmap_set_bit (seen, e->src->index);
1359     }
1360 
1361   while (!todo.is_empty ())
1362     {
1363       bb = todo.pop ();
1364 
1365       bitmap_copy (old, SW (bb)->tail_components);
1366 
1367       FOR_EACH_EDGE (e, ei, bb->succs)
1368 	bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1369 		    SW (e->dest)->tail_components);
1370 
1371       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1372 			SW (bb)->has_components);
1373 
1374       if (!bitmap_equal_p (old, SW (bb)->tail_components))
1375 	FOR_EACH_EDGE (e, ei, bb->preds)
1376 	  if (bitmap_set_bit (seen, e->src->index))
1377 	    todo.quick_push (e->src);
1378 
1379       bitmap_clear_bit (seen, bb->index);
1380     }
1381 
1382   todo.release ();
1383 
1384   /* Finally, mark everything not not needed both forwards and backwards.  */
1385 
1386   bool did_changes = false;
1387 
1388   FOR_EACH_BB_FN (bb, cfun)
1389     {
1390       bitmap_copy (old, SW (bb)->has_components);
1391 
1392       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1393 		  SW (bb)->tail_components);
1394       bitmap_and_compl (SW (bb)->has_components, components,
1395 			SW (bb)->head_components);
1396 
1397       if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
1398 	did_changes = true;
1399     }
1400 
1401   FOR_ALL_BB_FN (bb, cfun)
1402     {
1403       if (dump_file)
1404 	{
1405 	  fprintf (dump_file, "bb %d components:", bb->index);
1406 	  dump_components ("has", SW (bb)->has_components);
1407 	  fprintf (dump_file, "\n");
1408 	}
1409     }
1410 
1411   return did_changes;
1412 }
1413 
1414 /* If we cannot handle placing some component's prologues or epilogues where
1415    we decided we should place them, unmark that component in COMPONENTS so
1416    that it is not wrapped separately.  */
1417 static void
1418 disqualify_problematic_components (sbitmap components)
1419 {
1420   auto_sbitmap pro (SBITMAP_SIZE (components));
1421   auto_sbitmap epi (SBITMAP_SIZE (components));
1422 
1423   basic_block bb;
1424   FOR_EACH_BB_FN (bb, cfun)
1425     {
1426       edge e;
1427       edge_iterator ei;
1428       FOR_EACH_EDGE (e, ei, bb->succs)
1429 	{
1430 	  /* Find which components we want pro/epilogues for here.  */
1431 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1432 			    SW (e->dest)->has_components);
1433 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1434 			    SW (e->src)->has_components);
1435 
1436 	  /* Ask the target what it thinks about things.  */
1437 	  if (!bitmap_empty_p (epi))
1438 	    targetm.shrink_wrap.disqualify_components (components, e, epi,
1439 						       false);
1440 	  if (!bitmap_empty_p (pro))
1441 	    targetm.shrink_wrap.disqualify_components (components, e, pro,
1442 						       true);
1443 
1444 	  /* If this edge doesn't need splitting, we're fine.  */
1445 	  if (single_pred_p (e->dest)
1446 	      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1447 	    continue;
1448 
1449 	  /* If the edge can be split, that is fine too.  */
1450 	  if ((e->flags & EDGE_ABNORMAL) == 0)
1451 	    continue;
1452 
1453 	  /* We also can handle sibcalls.  */
1454 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1455 	    {
1456 	      gcc_assert (e->flags & EDGE_SIBCALL);
1457 	      continue;
1458 	    }
1459 
1460 	  /* Remove from consideration those components we would need
1461 	     pro/epilogues for on edges where we cannot insert them.  */
1462 	  bitmap_and_compl (components, components, epi);
1463 	  bitmap_and_compl (components, components, pro);
1464 
1465 	  if (dump_file && !bitmap_subset_p (epi, components))
1466 	    {
1467 	      fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
1468 		       e->dest->index);
1469 	      if (e->flags & EDGE_EH)
1470 		fprintf (dump_file, " for EH");
1471 	      dump_components ("epi", epi);
1472 	      fprintf (dump_file, "\n");
1473 	    }
1474 
1475 	  if (dump_file && !bitmap_subset_p (pro, components))
1476 	    {
1477 	      fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
1478 		       e->dest->index);
1479 	      if (e->flags & EDGE_EH)
1480 		fprintf (dump_file, " for EH");
1481 	      dump_components ("pro", pro);
1482 	      fprintf (dump_file, "\n");
1483 	    }
1484 	}
1485     }
1486 }
1487 
1488 /* Place code for prologues and epilogues for COMPONENTS where we can put
1489    that code at the start of basic blocks.  */
1490 static void
1491 emit_common_heads_for_components (sbitmap components)
1492 {
1493   auto_sbitmap pro (SBITMAP_SIZE (components));
1494   auto_sbitmap epi (SBITMAP_SIZE (components));
1495   auto_sbitmap tmp (SBITMAP_SIZE (components));
1496 
1497   basic_block bb;
1498   FOR_ALL_BB_FN (bb, cfun)
1499     bitmap_clear (SW (bb)->head_components);
1500 
1501   FOR_EACH_BB_FN (bb, cfun)
1502     {
1503       /* Find which prologue resp. epilogue components are needed for all
1504 	 predecessor edges to this block.  */
1505 
1506       /* First, select all possible components.  */
1507       bitmap_copy (epi, components);
1508       bitmap_copy (pro, components);
1509 
1510       edge e;
1511       edge_iterator ei;
1512       FOR_EACH_EDGE (e, ei, bb->preds)
1513 	{
1514 	  if (e->flags & EDGE_ABNORMAL)
1515 	    {
1516 	      bitmap_clear (epi);
1517 	      bitmap_clear (pro);
1518 	      break;
1519 	    }
1520 
1521 	  /* Deselect those epilogue components that should not be inserted
1522 	     for this edge.  */
1523 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1524 			    SW (e->dest)->has_components);
1525 	  bitmap_and (epi, epi, tmp);
1526 
1527 	  /* Similar, for the prologue.  */
1528 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1529 			    SW (e->src)->has_components);
1530 	  bitmap_and (pro, pro, tmp);
1531 	}
1532 
1533       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1534 	fprintf (dump_file, "  bb %d", bb->index);
1535 
1536       if (dump_file && !bitmap_empty_p (epi))
1537 	dump_components ("epi", epi);
1538       if (dump_file && !bitmap_empty_p (pro))
1539 	dump_components ("pro", pro);
1540 
1541       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1542 	fprintf (dump_file, "\n");
1543 
1544       /* Place code after the BB note.  */
1545       if (!bitmap_empty_p (pro))
1546 	{
1547 	  start_sequence ();
1548 	  targetm.shrink_wrap.emit_prologue_components (pro);
1549 	  rtx_insn *seq = get_insns ();
1550 	  end_sequence ();
1551 	  record_prologue_seq (seq);
1552 
1553 	  emit_insn_after (seq, bb_note (bb));
1554 
1555 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1556 	}
1557 
1558       if (!bitmap_empty_p (epi))
1559 	{
1560 	  start_sequence ();
1561 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1562 	  rtx_insn *seq = get_insns ();
1563 	  end_sequence ();
1564 	  record_epilogue_seq (seq);
1565 
1566 	  emit_insn_after (seq, bb_note (bb));
1567 
1568 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1569 	}
1570     }
1571 }
1572 
1573 /* Place code for prologues and epilogues for COMPONENTS where we can put
1574    that code at the end of basic blocks.  */
1575 static void
1576 emit_common_tails_for_components (sbitmap components)
1577 {
1578   auto_sbitmap pro (SBITMAP_SIZE (components));
1579   auto_sbitmap epi (SBITMAP_SIZE (components));
1580   auto_sbitmap tmp (SBITMAP_SIZE (components));
1581 
1582   basic_block bb;
1583   FOR_ALL_BB_FN (bb, cfun)
1584     bitmap_clear (SW (bb)->tail_components);
1585 
1586   FOR_EACH_BB_FN (bb, cfun)
1587     {
1588       /* Find which prologue resp. epilogue components are needed for all
1589 	 successor edges from this block.  */
1590       if (EDGE_COUNT (bb->succs) == 0)
1591 	continue;
1592 
1593       /* First, select all possible components.  */
1594       bitmap_copy (epi, components);
1595       bitmap_copy (pro, components);
1596 
1597       edge e;
1598       edge_iterator ei;
1599       FOR_EACH_EDGE (e, ei, bb->succs)
1600 	{
1601 	  if (e->flags & EDGE_ABNORMAL)
1602 	    {
1603 	      bitmap_clear (epi);
1604 	      bitmap_clear (pro);
1605 	      break;
1606 	    }
1607 
1608 	  /* Deselect those epilogue components that should not be inserted
1609 	     for this edge, and also those that are already put at the head
1610 	     of the successor block.  */
1611 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1612 			    SW (e->dest)->has_components);
1613 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1614 	  bitmap_and (epi, epi, tmp);
1615 
1616 	  /* Similarly, for the prologue.  */
1617 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1618 			    SW (e->src)->has_components);
1619 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1620 	  bitmap_and (pro, pro, tmp);
1621 	}
1622 
1623       /* If the last insn of this block is a control flow insn we cannot
1624 	 put anything after it.  We can put our code before it instead,
1625 	 but only if that jump insn is a simple jump.  */
1626       rtx_insn *last_insn = BB_END (bb);
1627       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1628 	{
1629 	  bitmap_clear (epi);
1630 	  bitmap_clear (pro);
1631 	}
1632 
1633       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1634 	fprintf (dump_file, "  bb %d", bb->index);
1635 
1636       if (dump_file && !bitmap_empty_p (epi))
1637 	dump_components ("epi", epi);
1638       if (dump_file && !bitmap_empty_p (pro))
1639 	dump_components ("pro", pro);
1640 
1641       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1642 	fprintf (dump_file, "\n");
1643 
1644       /* Put the code at the end of the BB, but before any final jump.  */
1645       if (!bitmap_empty_p (epi))
1646 	{
1647 	  start_sequence ();
1648 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1649 	  rtx_insn *seq = get_insns ();
1650 	  end_sequence ();
1651 	  record_epilogue_seq (seq);
1652 
1653 	  if (control_flow_insn_p (last_insn))
1654 	    emit_insn_before (seq, last_insn);
1655 	  else
1656 	    emit_insn_after (seq, last_insn);
1657 
1658 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1659 	}
1660 
1661       if (!bitmap_empty_p (pro))
1662 	{
1663 	  start_sequence ();
1664 	  targetm.shrink_wrap.emit_prologue_components (pro);
1665 	  rtx_insn *seq = get_insns ();
1666 	  end_sequence ();
1667 	  record_prologue_seq (seq);
1668 
1669 	  if (control_flow_insn_p (last_insn))
1670 	    emit_insn_before (seq, last_insn);
1671 	  else
1672 	    emit_insn_after (seq, last_insn);
1673 
1674 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1675 	}
1676     }
1677 }
1678 
1679 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1680    placed them inside blocks directly.  */
1681 static void
1682 insert_prologue_epilogue_for_components (sbitmap components)
1683 {
1684   auto_sbitmap pro (SBITMAP_SIZE (components));
1685   auto_sbitmap epi (SBITMAP_SIZE (components));
1686 
1687   basic_block bb;
1688   FOR_EACH_BB_FN (bb, cfun)
1689     {
1690       if (!bb->aux)
1691 	continue;
1692 
1693       edge e;
1694       edge_iterator ei;
1695       FOR_EACH_EDGE (e, ei, bb->succs)
1696 	{
1697 	  /* Find which pro/epilogue components are needed on this edge.  */
1698 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1699 			    SW (e->dest)->has_components);
1700 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1701 			    SW (e->src)->has_components);
1702 	  bitmap_and (epi, epi, components);
1703 	  bitmap_and (pro, pro, components);
1704 
1705 	  /* Deselect those we already have put at the head or tail of the
1706 	     edge's dest resp. src.  */
1707 	  bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1708 	  bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1709 	  bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1710 	  bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1711 
1712 	  if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1713 	    {
1714 	      if (dump_file)
1715 		{
1716 		  fprintf (dump_file, "  %d->%d", e->src->index,
1717 			   e->dest->index);
1718 		  dump_components ("epi", epi);
1719 		  dump_components ("pro", pro);
1720 		  if (e->flags & EDGE_SIBCALL)
1721 		    fprintf (dump_file, "  (SIBCALL)");
1722 		  else if (e->flags & EDGE_ABNORMAL)
1723 		    fprintf (dump_file, "  (ABNORMAL)");
1724 		  fprintf (dump_file, "\n");
1725 		}
1726 
1727 	      /* Put the epilogue components in place.  */
1728 	      start_sequence ();
1729 	      targetm.shrink_wrap.emit_epilogue_components (epi);
1730 	      rtx_insn *seq = get_insns ();
1731 	      end_sequence ();
1732 	      record_epilogue_seq (seq);
1733 
1734 	      if (e->flags & EDGE_SIBCALL)
1735 		{
1736 		  gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1737 
1738 		  rtx_insn *insn = BB_END (e->src);
1739 		  gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1740 		  emit_insn_before (seq, insn);
1741 		}
1742 	      else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1743 		{
1744 		  gcc_assert (e->flags & EDGE_FALLTHRU);
1745 		  basic_block new_bb = split_edge (e);
1746 		  emit_insn_after (seq, BB_END (new_bb));
1747 		}
1748 	      else
1749 		insert_insn_on_edge (seq, e);
1750 
1751 	      /* Put the prologue components in place.  */
1752 	      start_sequence ();
1753 	      targetm.shrink_wrap.emit_prologue_components (pro);
1754 	      seq = get_insns ();
1755 	      end_sequence ();
1756 	      record_prologue_seq (seq);
1757 
1758 	      insert_insn_on_edge (seq, e);
1759 	    }
1760 	}
1761     }
1762 
1763   commit_edge_insertions ();
1764 }
1765 
1766 /* The main entry point to this subpass.  FIRST_BB is where the prologue
1767    would be normally put.  */
1768 void
1769 try_shrink_wrapping_separate (basic_block first_bb)
1770 {
1771   if (HAVE_cc0)
1772     return;
1773 
1774   if (!(SHRINK_WRAPPING_ENABLED
1775 	&& flag_shrink_wrap_separate
1776 	&& optimize_function_for_speed_p (cfun)
1777 	&& targetm.shrink_wrap.get_separate_components))
1778     return;
1779 
1780   /* We don't handle "strange" functions.  */
1781   if (cfun->calls_alloca
1782       || cfun->calls_setjmp
1783       || cfun->can_throw_non_call_exceptions
1784       || crtl->calls_eh_return
1785       || crtl->has_nonlocal_goto
1786       || crtl->saves_all_registers)
1787     return;
1788 
1789   /* Ask the target what components there are.  If it returns NULL, don't
1790      do anything.  */
1791   sbitmap components = targetm.shrink_wrap.get_separate_components ();
1792   if (!components)
1793     return;
1794 
1795   /* We need LIVE info, not defining anything in the entry block and not
1796      using anything in the exit block.  A block then needs a component if
1797      the register for that component is in the IN or GEN or KILL set for
1798      that block.  */
1799   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1800   df_update_entry_exit_and_calls ();
1801   df_live_add_problem ();
1802   df_live_set_all_dirty ();
1803   df_analyze ();
1804 
1805   calculate_dominance_info (CDI_DOMINATORS);
1806   calculate_dominance_info (CDI_POST_DOMINATORS);
1807 
1808   init_separate_shrink_wrap (components);
1809 
1810   sbitmap_iterator sbi;
1811   unsigned int j;
1812   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1813     place_prologue_for_one_component (j, first_bb);
1814 
1815   /* Try to minimize the number of saves and restores.  Do this as long as
1816      it changes anything.  This does not iterate more than a few times.  */
1817   int spread_times = 0;
1818   while (spread_components (components))
1819     {
1820       spread_times++;
1821 
1822       if (dump_file)
1823 	fprintf (dump_file, "Now spread %d times.\n", spread_times);
1824     }
1825 
1826   disqualify_problematic_components (components);
1827 
1828   /* Don't separately shrink-wrap anything where the "main" prologue will
1829      go; the target code can often optimize things if it is presented with
1830      all components together (say, if it generates store-multiple insns).  */
1831   bitmap_and_compl (components, components, SW (first_bb)->has_components);
1832 
1833   if (bitmap_empty_p (components))
1834     {
1835       if (dump_file)
1836 	fprintf (dump_file, "Not wrapping anything separately.\n");
1837     }
1838   else
1839     {
1840       if (dump_file)
1841 	{
1842 	  fprintf (dump_file, "The components we wrap separately are");
1843 	  dump_components ("sep", components);
1844 	  fprintf (dump_file, "\n");
1845 
1846 	  fprintf (dump_file, "... Inserting common heads...\n");
1847 	}
1848 
1849       emit_common_heads_for_components (components);
1850 
1851       if (dump_file)
1852 	fprintf (dump_file, "... Inserting common tails...\n");
1853 
1854       emit_common_tails_for_components (components);
1855 
1856       if (dump_file)
1857 	fprintf (dump_file, "... Inserting the more difficult ones...\n");
1858 
1859       insert_prologue_epilogue_for_components (components);
1860 
1861       if (dump_file)
1862 	fprintf (dump_file, "... Done.\n");
1863 
1864       targetm.shrink_wrap.set_handled_components (components);
1865 
1866       crtl->shrink_wrapped_separate = true;
1867     }
1868 
1869   fini_separate_shrink_wrap ();
1870 
1871   sbitmap_free (components);
1872   free_dominance_info (CDI_DOMINATORS);
1873   free_dominance_info (CDI_POST_DOMINATORS);
1874 
1875   /* All done.  */
1876   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1877   df_update_entry_exit_and_calls ();
1878   df_live_set_all_dirty ();
1879   df_analyze ();
1880 }
1881