xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/shrink-wrap.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Shrink-wrapping related optimizations.
2    Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file handles shrink-wrapping related optimizations.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "tm_p.h"
33 #include "regs.h"
34 #include "insn-config.h"
35 #include "emit-rtl.h"
36 #include "output.h"
37 #include "tree-pass.h"
38 #include "cfgrtl.h"
39 #include "cfgbuild.h"
40 #include "params.h"
41 #include "bb-reorder.h"
42 #include "shrink-wrap.h"
43 #include "regcprop.h"
44 #include "rtl-iter.h"
45 #include "valtrack.h"
46 
47 
48 /* Return true if INSN requires the stack frame to be set up.
49    PROLOGUE_USED contains the hard registers used in the function
50    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
51    prologue to set up for the function.  */
52 bool
53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
54 			HARD_REG_SET set_up_by_prologue)
55 {
56   df_ref def, use;
57   HARD_REG_SET hardregs;
58   unsigned regno;
59 
60   if (CALL_P (insn))
61     return !SIBLING_CALL_P (insn);
62 
63   /* We need a frame to get the unique CFA expected by the unwinder.  */
64   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
65     return true;
66 
67   CLEAR_HARD_REG_SET (hardregs);
68   FOR_EACH_INSN_DEF (def, insn)
69     {
70       rtx dreg = DF_REF_REG (def);
71 
72       if (!REG_P (dreg))
73 	continue;
74 
75       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76     }
77   if (hard_reg_set_intersect_p (hardregs, prologue_used))
78     return true;
79   AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
80   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
81     if (TEST_HARD_REG_BIT (hardregs, regno)
82 	&& df_regs_ever_live_p (regno))
83       return true;
84 
85   FOR_EACH_INSN_USE (use, insn)
86     {
87       rtx reg = DF_REF_REG (use);
88 
89       if (!REG_P (reg))
90 	continue;
91 
92       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
93 			   REGNO (reg));
94     }
95   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
96     return true;
97 
98   return false;
99 }
100 
101 /* See whether there has a single live edge from BB, which dest uses
102    [REGNO, END_REGNO).  Return the live edge if its dest bb has
103    one or two predecessors.  Otherwise return NULL.  */
104 
105 static edge
106 live_edge_for_reg (basic_block bb, int regno, int end_regno)
107 {
108   edge e, live_edge;
109   edge_iterator ei;
110   bitmap live;
111   int i;
112 
113   live_edge = NULL;
114   FOR_EACH_EDGE (e, ei, bb->succs)
115     {
116       live = df_get_live_in (e->dest);
117       for (i = regno; i < end_regno; i++)
118 	if (REGNO_REG_SET_P (live, i))
119 	  {
120 	    if (live_edge && live_edge != e)
121 	      return NULL;
122 	    live_edge = e;
123 	  }
124     }
125 
126   /* We can sometimes encounter dead code.  Don't try to move it
127      into the exit block.  */
128   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
129     return NULL;
130 
131   /* Reject targets of abnormal edges.  This is needed for correctness
132      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
133      exception edges even though it is generally treated as call-saved
134      for the majority of the compilation.  Moving across abnormal edges
135      isn't going to be interesting for shrink-wrap usage anyway.  */
136   if (live_edge->flags & EDGE_ABNORMAL)
137     return NULL;
138 
139   /* When live_edge->dest->preds == 2, we can create a new block on
140      the edge to make it meet the requirement.  */
141   if (EDGE_COUNT (live_edge->dest->preds) > 2)
142     return NULL;
143 
144   return live_edge;
145 }
146 
147 /* Try to move INSN from BB to a successor.  Return true on success.
148    USES and DEFS are the set of registers that are used and defined
149    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
150    is splitted or not.  */
151 
152 static bool
153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
154 			   const HARD_REG_SET uses,
155 			   const HARD_REG_SET defs,
156 			   bool *split_p,
157 			   struct dead_debug_local *debug)
158 {
159   rtx set, src, dest;
160   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
161   unsigned int i, dregno, end_dregno;
162   unsigned int sregno = FIRST_PSEUDO_REGISTER;
163   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
164   basic_block next_block;
165   edge live_edge;
166   rtx_insn *dinsn;
167   df_ref def;
168 
169   /* Look for a simple register assignment.  We don't use single_set here
170      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
171      destinations.  */
172   if (!INSN_P (insn))
173     return false;
174   set = PATTERN (insn);
175   if (GET_CODE (set) != SET)
176     return false;
177   src = SET_SRC (set);
178   dest = SET_DEST (set);
179 
180   /* For the destination, we want only a register.  Also disallow STACK
181      or FRAME related adjustments.  They are likely part of the prologue,
182      so keep them in the entry block.  */
183   if (!REG_P (dest)
184       || dest == stack_pointer_rtx
185       || dest == frame_pointer_rtx
186       || dest == hard_frame_pointer_rtx)
187     return false;
188 
189   /* For the source, we want one of:
190       (1) A (non-overlapping) register
191       (2) A constant,
192       (3) An expression involving no more than one register.
193 
194      That last point comes from the code following, which was originally
195      written to handle only register move operations, and still only handles
196      a single source register when checking for overlaps.  Happily, the
197      same checks can be applied to expressions like (plus reg const).  */
198 
199   if (CONSTANT_P (src))
200     ;
201   else if (!REG_P (src))
202     {
203       rtx src_inner = NULL_RTX;
204 
205       if (can_throw_internal (insn))
206 	return false;
207 
208       subrtx_var_iterator::array_type array;
209       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210 	{
211 	  rtx x = *iter;
212 	  switch (GET_RTX_CLASS (GET_CODE (x)))
213 	    {
214 	    case RTX_CONST_OBJ:
215 	    case RTX_COMPARE:
216 	    case RTX_COMM_COMPARE:
217 	    case RTX_BIN_ARITH:
218 	    case RTX_COMM_ARITH:
219 	    case RTX_UNARY:
220 	    case RTX_TERNARY:
221 	      /* Constant or expression.  Continue.  */
222 	      break;
223 
224 	    case RTX_OBJ:
225 	    case RTX_EXTRA:
226 	      switch (GET_CODE (x))
227 		{
228 		case UNSPEC:
229 		case SUBREG:
230 		case STRICT_LOW_PART:
231 		case PC:
232 		case LO_SUM:
233 		  /* Ok.  Continue.  */
234 		  break;
235 
236 		case REG:
237 		  /* Fail if we see a second inner register.  */
238 		  if (src_inner != NULL)
239 		    return false;
240 		  src_inner = x;
241 		  break;
242 
243 		default:
244 		  return false;
245 		}
246 	      break;
247 
248 	    default:
249 	      return false;
250 	    }
251 	}
252 
253       if (src_inner != NULL)
254 	src = src_inner;
255     }
256 
257   /* Make sure that the source register isn't defined later in BB.  */
258   if (REG_P (src))
259     {
260       sregno = REGNO (src);
261       end_sregno = END_REGNO (src);
262       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
263 	return false;
264     }
265 
266   /* Make sure that the destination register isn't referenced later in BB.  */
267   dregno = REGNO (dest);
268   end_dregno = END_REGNO (dest);
269   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
270       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
271     return false;
272 
273   /* See whether there is a successor block to which we could move INSN.  */
274   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
275   if (!live_edge)
276     return false;
277 
278   next_block = live_edge->dest;
279   /* Create a new basic block on the edge.  */
280   if (EDGE_COUNT (next_block->preds) == 2)
281     {
282       /* split_edge for a block with only one successor is meaningless.  */
283       if (EDGE_COUNT (bb->succs) == 1)
284 	return false;
285 
286       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
287       if (!df_live)
288 	return false;
289 
290       basic_block old_dest = live_edge->dest;
291       next_block = split_edge (live_edge);
292 
293       /* We create a new basic block.  Call df_grow_bb_info to make sure
294 	 all data structures are allocated.  */
295       df_grow_bb_info (df_live);
296 
297       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
298 		  df_get_live_in (old_dest));
299       df_set_bb_dirty (next_block);
300 
301       /* We should not split more than once for a function.  */
302       if (*split_p)
303 	return false;
304 
305       *split_p = true;
306     }
307 
308   /* At this point we are committed to moving INSN, but let's try to
309      move it as far as we can.  */
310   do
311     {
312       if (MAY_HAVE_DEBUG_INSNS)
313 	{
314 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
315 	    if (DEBUG_INSN_P (dinsn))
316 	      {
317 		df_ref use;
318 		FOR_EACH_INSN_USE (use, dinsn)
319 		  if (refers_to_regno_p (dregno, end_dregno,
320 					 DF_REF_REG (use), (rtx *) NULL))
321 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
322 	      }
323 	    else if (dinsn == insn)
324 	      break;
325 	}
326       live_out = df_get_live_out (bb);
327       live_in = df_get_live_in (next_block);
328       bb = next_block;
329 
330       /* Check whether BB uses DEST or clobbers DEST.  We need to add
331 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
332 	 except for any part that overlaps SRC (next loop).  */
333       if (!*split_p)
334 	{
335 	  bb_uses = &DF_LR_BB_INFO (bb)->use;
336 	  bb_defs = &DF_LR_BB_INFO (bb)->def;
337 	}
338       if (df_live)
339 	{
340 	  for (i = dregno; i < end_dregno; i++)
341 	    {
342 	      if (*split_p
343 		  || REGNO_REG_SET_P (bb_uses, i)
344 		  || REGNO_REG_SET_P (bb_defs, i)
345 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346 		next_block = NULL;
347 	      CLEAR_REGNO_REG_SET (live_out, i);
348 	      CLEAR_REGNO_REG_SET (live_in, i);
349 	    }
350 
351 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
352 	     Either way, SRC is now live on entry.  */
353 	  for (i = sregno; i < end_sregno; i++)
354 	    {
355 	      if (*split_p
356 		  || REGNO_REG_SET_P (bb_defs, i)
357 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 		next_block = NULL;
359 	      SET_REGNO_REG_SET (live_out, i);
360 	      SET_REGNO_REG_SET (live_in, i);
361 	    }
362 	}
363       else
364 	{
365 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
367 	     at -O1, just give up searching NEXT_BLOCK.  */
368 	  next_block = NULL;
369 	  for (i = dregno; i < end_dregno; i++)
370 	    {
371 	      CLEAR_REGNO_REG_SET (live_out, i);
372 	      CLEAR_REGNO_REG_SET (live_in, i);
373 	    }
374 
375 	  for (i = sregno; i < end_sregno; i++)
376 	    {
377 	      SET_REGNO_REG_SET (live_out, i);
378 	      SET_REGNO_REG_SET (live_in, i);
379 	    }
380 	}
381 
382       /* If we don't need to add the move to BB, look for a single
383 	 successor block.  */
384       if (next_block)
385 	{
386 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388 	    break;
389 	  next_block = live_edge->dest;
390 	}
391     }
392   while (next_block);
393 
394   /* For the new created basic block, there is no dataflow info at all.
395      So skip the following dataflow update and check.  */
396   if (!(*split_p))
397     {
398       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
399 	 (next loop).  */
400       for (i = dregno; i < end_dregno; i++)
401 	{
402 	  CLEAR_REGNO_REG_SET (bb_uses, i);
403 	  SET_REGNO_REG_SET (bb_defs, i);
404 	}
405 
406       /* BB now uses SRC.  */
407       for (i = sregno; i < end_sregno; i++)
408 	SET_REGNO_REG_SET (bb_uses, i);
409     }
410 
411   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
412   if (debug->used && !bitmap_empty_p (debug->used))
413     FOR_EACH_INSN_DEF (def, insn)
414       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
415 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
416 
417   emit_insn_after (PATTERN (insn), bb_note (bb));
418   delete_insn (insn);
419   return true;
420 }
421 
422 /* Look for register copies in the first block of the function, and move
423    them down into successor blocks if the register is used only on one
424    path.  This exposes more opportunities for shrink-wrapping.  These
425    kinds of sets often occur when incoming argument registers are moved
426    to call-saved registers because their values are live across one or
427    more calls during the function.  */
428 
429 static void
430 prepare_shrink_wrap (basic_block entry_block)
431 {
432   rtx_insn *insn, *curr;
433   rtx x;
434   HARD_REG_SET uses, defs;
435   df_ref def, use;
436   bool split_p = false;
437   unsigned int i;
438   struct dead_debug_local debug;
439 
440   if (JUMP_P (BB_END (entry_block)))
441     {
442       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
443 	 to sink the copies from parameter to callee saved register out of
444 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
445 	 to release some dependences.  */
446       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
447     }
448 
449   dead_debug_local_init (&debug, NULL, NULL);
450   CLEAR_HARD_REG_SET (uses);
451   CLEAR_HARD_REG_SET (defs);
452 
453   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
454     if (NONDEBUG_INSN_P (insn)
455 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
456 				       &split_p, &debug))
457       {
458 	/* Add all defined registers to DEFs.  */
459 	FOR_EACH_INSN_DEF (def, insn)
460 	  {
461 	    x = DF_REF_REG (def);
462 	    if (REG_P (x) && HARD_REGISTER_P (x))
463 	      for (i = REGNO (x); i < END_REGNO (x); i++)
464 		SET_HARD_REG_BIT (defs, i);
465 	  }
466 
467 	/* Add all used registers to USESs.  */
468 	FOR_EACH_INSN_USE (use, insn)
469 	  {
470 	    x = DF_REF_REG (use);
471 	    if (REG_P (x) && HARD_REGISTER_P (x))
472 	      for (i = REGNO (x); i < END_REGNO (x); i++)
473 		SET_HARD_REG_BIT (uses, i);
474 	  }
475       }
476 
477   dead_debug_local_finish (&debug, NULL);
478 }
479 
480 /* Return whether basic block PRO can get the prologue.  It can not if it
481    has incoming complex edges that need a prologue inserted (we make a new
482    block for the prologue, so those edges would need to be redirected, which
483    does not work).  It also can not if there exist registers live on entry
484    to PRO that are clobbered by the prologue.  */
485 
486 static bool
487 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
488 {
489   edge e;
490   edge_iterator ei;
491   FOR_EACH_EDGE (e, ei, pro->preds)
492     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
493 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
494       return false;
495 
496   HARD_REG_SET live;
497   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
498   if (hard_reg_set_intersect_p (live, prologue_clobbered))
499     return false;
500 
501   return true;
502 }
503 
504 /* Return whether we can duplicate basic block BB for shrink wrapping.  We
505    cannot if the block cannot be duplicated at all, or if any of its incoming
506    edges are complex and come from a block that does not require a prologue
507    (we cannot redirect such edges), or if the block is too big to copy.
508    PRO is the basic block before which we would put the prologue, MAX_SIZE is
509    the maximum size block we allow to be copied.  */
510 
511 static bool
512 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
513 {
514   if (!can_duplicate_block_p (bb))
515     return false;
516 
517   edge e;
518   edge_iterator ei;
519   FOR_EACH_EDGE (e, ei, bb->preds)
520     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
521 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
522       return false;
523 
524   unsigned size = 0;
525 
526   rtx_insn *insn;
527   FOR_BB_INSNS (bb, insn)
528     if (NONDEBUG_INSN_P (insn))
529       {
530 	size += get_attr_min_length (insn);
531 	if (size > max_size)
532 	  return false;
533       }
534 
535   return true;
536 }
537 
538 /* Do whatever needs to be done for exits that run without prologue.
539    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
540 
541 static void
542 handle_simple_exit (edge e)
543 {
544 
545   if (e->flags & EDGE_SIBCALL)
546     {
547       /* Tell function.c to take no further action on this edge.  */
548       e->flags |= EDGE_IGNORE;
549 
550       e->flags &= ~EDGE_FALLTHRU;
551       emit_barrier_after_bb (e->src);
552       return;
553     }
554 
555   /* If the basic block the edge comes from has multiple successors,
556      split the edge.  */
557   if (EDGE_COUNT (e->src->succs) > 1)
558     {
559       basic_block old_bb = e->src;
560       rtx_insn *end = BB_END (old_bb);
561       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
562       basic_block new_bb = create_basic_block (note, note, old_bb);
563       BB_COPY_PARTITION (new_bb, old_bb);
564       BB_END (old_bb) = end;
565 
566       redirect_edge_succ (e, new_bb);
567       e->flags |= EDGE_FALLTHRU;
568 
569       e = make_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
570     }
571 
572   e->flags &= ~EDGE_FALLTHRU;
573   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
574 					     BB_END (e->src));
575   JUMP_LABEL (ret) = simple_return_rtx;
576   emit_barrier_after_bb (e->src);
577 
578   if (dump_file)
579     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
580 	     INSN_UID (ret), e->src->index);
581 }
582 
583 /* Try to perform a kind of shrink-wrapping, making sure the
584    prologue/epilogue is emitted only around those parts of the
585    function that require it.
586 
587    There will be exactly one prologue, and it will be executed either
588    zero or one time, on any path.  Depending on where the prologue is
589    placed, some of the basic blocks can be reached via both paths with
590    and without a prologue.  Such blocks will be duplicated here, and the
591    edges changed to match.
592 
593    Paths that go to the exit without going through the prologue will use
594    a simple_return instead of the epilogue.  We maximize the number of
595    those, making sure to only duplicate blocks that can be duplicated.
596    If the prologue can then still be placed in multiple locations, we
597    place it as early as possible.
598 
599    An example, where we duplicate blocks with control flow (legend:
600    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
601    be taken to point down or to the right, to simplify the diagram; here,
602    block 3 needs a prologue, the rest does not):
603 
604 
605        B                 B
606        |                 |
607        2                 2
608        |\                |\
609        | 3    becomes    | 3
610        |/                |  \
611        4                 7   4
612        |\                |\  |\
613        | 5               | 8 | 5
614        |/                |/  |/
615        6                 9   6
616        |                 |   |
617        R                 S   R
618 
619 
620    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
621    edge 2->3).
622 
623    Another example, where part of a loop is duplicated (again, bb 3 is
624    the only block that needs a prologue):
625 
626 
627        B   3<--              B       ->3<--
628        |   |   |             |      |  |   |
629        |   v   |   becomes   |      |  v   |
630        2---4---              2---5--   4---
631            |                     |     |
632            R                     S     R
633 
634 
635    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
636 
637    ENTRY_EDGE is the edge where the prologue will be placed, possibly
638    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
639 
640 void
641 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
642 {
643   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
644      no sense to shrink-wrap: then do not shrink-wrap!  */
645 
646   if (!SHRINK_WRAPPING_ENABLED)
647     return;
648 
649   if (crtl->profile && !targetm.profile_before_prologue ())
650     return;
651 
652   if (crtl->calls_eh_return)
653     return;
654 
655   bool empty_prologue = true;
656   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
657     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
658       {
659 	empty_prologue = false;
660 	break;
661       }
662   if (empty_prologue)
663     return;
664 
665   /* Move some code down to expose more shrink-wrapping opportunities.  */
666 
667   basic_block entry = (*entry_edge)->dest;
668   prepare_shrink_wrap (entry);
669 
670   if (dump_file)
671     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
672 
673   /* Compute the registers set and used in the prologue.  */
674 
675   HARD_REG_SET prologue_clobbered, prologue_used;
676   CLEAR_HARD_REG_SET (prologue_clobbered);
677   CLEAR_HARD_REG_SET (prologue_used);
678   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
679     if (NONDEBUG_INSN_P (insn))
680       {
681 	HARD_REG_SET this_used;
682 	CLEAR_HARD_REG_SET (this_used);
683 	note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
684 	AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
685 	IOR_HARD_REG_SET (prologue_used, this_used);
686 	note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
687       }
688   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
689   if (frame_pointer_needed)
690     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
691 
692   /* Find out what registers are set up by the prologue; any use of these
693      cannot happen before the prologue.  */
694 
695   struct hard_reg_set_container set_up_by_prologue;
696   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
697   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
698   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
699   if (frame_pointer_needed)
700     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
701 			 HARD_FRAME_POINTER_REGNUM);
702   if (pic_offset_table_rtx
703       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
704     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
705 			 PIC_OFFSET_TABLE_REGNUM);
706   if (crtl->drap_reg)
707     add_to_hard_reg_set (&set_up_by_prologue.set,
708 			 GET_MODE (crtl->drap_reg),
709 			 REGNO (crtl->drap_reg));
710   if (targetm.set_up_by_prologue)
711     targetm.set_up_by_prologue (&set_up_by_prologue);
712 
713   /* We will insert the prologue before the basic block PRO.  PRO should
714      dominate all basic blocks that need the prologue to be executed
715      before them.  First, make PRO the "tightest wrap" possible.  */
716 
717   calculate_dominance_info (CDI_DOMINATORS);
718 
719   basic_block pro = 0;
720 
721   basic_block bb;
722   edge e;
723   edge_iterator ei;
724   FOR_EACH_BB_FN (bb, cfun)
725     {
726       rtx_insn *insn;
727       FOR_BB_INSNS (bb, insn)
728 	if (NONDEBUG_INSN_P (insn)
729 	    && requires_stack_frame_p (insn, prologue_used,
730 				       set_up_by_prologue.set))
731 	  {
732 	    if (dump_file)
733 	      fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
734 	    pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
735 	    break;
736 	  }
737     }
738 
739   /* If nothing needs a prologue, just put it at the start.  This really
740      shouldn't happen, but we cannot fix it here.  */
741 
742   if (pro == 0)
743     {
744       if (dump_file)
745 	fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
746 			   "putting it at the start.\n");
747       pro = entry;
748     }
749 
750   if (dump_file)
751     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
752 	     pro->index);
753 
754   /* Now see if we can put the prologue at the start of PRO.  Putting it
755      there might require duplicating a block that cannot be duplicated,
756      or in some cases we cannot insert the prologue there at all.  If PRO
757      wont't do, try again with the immediate dominator of PRO, and so on.
758 
759      The blocks that need duplicating are those reachable from PRO but
760      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
761      reachable from PRO that we already found, and in VEC a stack of
762      those we still need to consider (to find successors).  */
763 
764   bitmap bb_with = BITMAP_ALLOC (NULL);
765   bitmap_set_bit (bb_with, pro->index);
766 
767   vec<basic_block> vec;
768   vec.create (n_basic_blocks_for_fn (cfun));
769   vec.quick_push (pro);
770 
771   unsigned max_grow_size = get_uncond_jump_length ();
772   max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
773 
774   while (!vec.is_empty () && pro != entry)
775     {
776       while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
777 	{
778 	  pro = get_immediate_dominator (CDI_DOMINATORS, pro);
779 
780 	  if (bitmap_set_bit (bb_with, pro->index))
781 	    vec.quick_push (pro);
782 	}
783 
784       basic_block bb = vec.pop ();
785       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
786 	while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
787 	  {
788 	    gcc_assert (pro != entry);
789 
790 	    pro = get_immediate_dominator (CDI_DOMINATORS, pro);
791 
792 	    if (bitmap_set_bit (bb_with, pro->index))
793 	      vec.quick_push (pro);
794 	  }
795 
796       FOR_EACH_EDGE (e, ei, bb->succs)
797 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
798 	    && bitmap_set_bit (bb_with, e->dest->index))
799 	  vec.quick_push (e->dest);
800     }
801 
802   if (dump_file)
803     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
804 	     pro->index);
805 
806   /* If we can move PRO back without having to duplicate more blocks, do so.
807      We do this because putting the prologue earlier is better for scheduling.
808 
809      We can move back to a block PRE if every path from PRE will eventually
810      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
811      to dominate every block reachable from itself.  We keep in BB_TMP a
812      bitmap of the blocks reachable from PRE that we already found, and in
813      VEC a stack of those we still need to consider.
814 
815      Any block reachable from PRE is also reachable from all predecessors
816      of PRE, so if we find we need to move PRE back further we can leave
817      everything not considered so far on the stack.  Any block dominated
818      by PRE is also dominated by all other dominators of PRE, so anything
819      found good for some PRE does not need to be reconsidered later.
820 
821      We don't need to update BB_WITH because none of the new blocks found
822      can jump to a block that does not need the prologue.  */
823 
824   if (pro != entry)
825     {
826       calculate_dominance_info (CDI_POST_DOMINATORS);
827 
828       bitmap bb_tmp = BITMAP_ALLOC (NULL);
829       bitmap_copy (bb_tmp, bb_with);
830       basic_block last_ok = pro;
831       vec.truncate (0);
832 
833       while (pro != entry)
834 	{
835 	  basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
836 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
837 	    break;
838 
839 	  if (bitmap_set_bit (bb_tmp, pre->index))
840 	    vec.quick_push (pre);
841 
842 	  bool ok = true;
843 	  while (!vec.is_empty ())
844 	    {
845 	      if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
846 		{
847 		  ok = false;
848 		  break;
849 		}
850 
851 	      basic_block bb = vec.pop ();
852 	      FOR_EACH_EDGE (e, ei, bb->succs)
853 		if (bitmap_set_bit (bb_tmp, e->dest->index))
854 		  vec.quick_push (e->dest);
855 	    }
856 
857 	  if (ok && can_get_prologue (pre, prologue_clobbered))
858 	    last_ok = pre;
859 
860 	  pro = pre;
861 	}
862 
863       pro = last_ok;
864 
865       BITMAP_FREE (bb_tmp);
866       free_dominance_info (CDI_POST_DOMINATORS);
867     }
868 
869   vec.release ();
870 
871   if (dump_file)
872     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
873 	     pro->index);
874 
875   if (pro == entry)
876     {
877       BITMAP_FREE (bb_with);
878       free_dominance_info (CDI_DOMINATORS);
879       return;
880     }
881 
882   /* Compute what fraction of the frequency and count of the blocks that run
883      both with and without prologue are for running with prologue.  This gives
884      the correct answer for reducible flow graphs; for irreducible flow graphs
885      our profile is messed up beyond repair anyway.  */
886 
887   gcov_type num = 0;
888   gcov_type den = 0;
889 
890   FOR_EACH_EDGE (e, ei, pro->preds)
891     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
892       {
893 	num += EDGE_FREQUENCY (e);
894 	den += e->src->frequency;
895       }
896 
897   if (den == 0)
898     den = 1;
899 
900   /* All is okay, so do it.  */
901 
902   crtl->shrink_wrapped = true;
903   if (dump_file)
904     fprintf (dump_file, "Performing shrink-wrapping.\n");
905 
906   /* Copy the blocks that can run both with and without prologue.  The
907      originals run with prologue, the copies without.  Store a pointer to
908      the copy in the ->aux field of the original.  */
909 
910   FOR_EACH_BB_FN (bb, cfun)
911     if (bitmap_bit_p (bb_with, bb->index)
912 	&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
913       {
914 	basic_block dup = duplicate_block (bb, 0, 0);
915 
916 	bb->aux = dup;
917 
918 	if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
919 	  emit_barrier_after_bb (dup);
920 
921 	if (EDGE_COUNT (dup->succs) == 0)
922 	  emit_barrier_after_bb (dup);
923 
924 	if (dump_file)
925 	  fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
926 
927 	bb->frequency = RDIV (num * bb->frequency, den);
928 	dup->frequency -= bb->frequency;
929 	bb->count = RDIV (num * bb->count, den);
930 	dup->count -= bb->count;
931       }
932 
933   /* Now change the edges to point to the copies, where appropriate.  */
934 
935   FOR_EACH_BB_FN (bb, cfun)
936     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
937       {
938 	basic_block src = bb;
939 	if (bitmap_bit_p (bb_with, bb->index))
940 	  src = (basic_block) bb->aux;
941 
942 	FOR_EACH_EDGE (e, ei, src->succs)
943 	  {
944 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
945 	      continue;
946 
947 	    if (bitmap_bit_p (bb_with, e->dest->index)
948 		&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
949 	      {
950 		if (dump_file)
951 		  fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
952 			   e->src->index, e->dest->index,
953 			   ((basic_block) e->dest->aux)->index);
954 		redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
955 	      }
956 	    else if (e->flags & EDGE_FALLTHRU
957 		     && bitmap_bit_p (bb_with, bb->index))
958 	      force_nonfallthru (e);
959 	  }
960       }
961 
962   /* Also redirect the function entry edge if necessary.  */
963 
964   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
965     if (bitmap_bit_p (bb_with, e->dest->index)
966 	&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
967       {
968 	basic_block split_bb = split_edge (e);
969 	e = single_succ_edge (split_bb);
970 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
971       }
972 
973   /* Make a simple_return for those exits that run without prologue.  */
974 
975   FOR_EACH_BB_REVERSE_FN (bb, cfun)
976     if (!bitmap_bit_p (bb_with, bb->index))
977       FOR_EACH_EDGE (e, ei, bb->succs)
978 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
979 	  handle_simple_exit (e);
980 
981   /* Finally, we want a single edge to put the prologue on.  Make a new
982      block before the PRO block; the edge beteen them is the edge we want.
983      Then redirect those edges into PRO that come from blocks without the
984      prologue, to point to the new block instead.  The new prologue block
985      is put at the end of the insn chain.  */
986 
987   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
988   BB_COPY_PARTITION (new_bb, pro);
989   if (dump_file)
990     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
991 
992   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
993     {
994       if (bitmap_bit_p (bb_with, e->src->index)
995 	  || dominated_by_p (CDI_DOMINATORS, e->src, pro))
996 	{
997 	  ei_next (&ei);
998 	  continue;
999 	}
1000 
1001       new_bb->count += RDIV (e->src->count * e->probability, REG_BR_PROB_BASE);
1002       new_bb->frequency += EDGE_FREQUENCY (e);
1003 
1004       redirect_edge_and_branch_force (e, new_bb);
1005       if (dump_file)
1006 	fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1007     }
1008 
1009   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1010   force_nonfallthru (*entry_edge);
1011 
1012   BITMAP_FREE (bb_with);
1013   free_dominance_info (CDI_DOMINATORS);
1014 }
1015 
1016 /* Separate shrink-wrapping
1017 
1018    Instead of putting all of the prologue and epilogue in one spot, we
1019    can put parts of it in places where those components are executed less
1020    frequently.  The following code does this, for prologue and epilogue
1021    components that can be put in more than one location, and where those
1022    components can be executed more than once (the epilogue component will
1023    always be executed before the prologue component is executed a second
1024    time).
1025 
1026    What exactly is a component is target-dependent.  The more usual
1027    components are simple saves/restores to/from the frame of callee-saved
1028    registers.  This code treats components abstractly (as an sbitmap),
1029    letting the target handle all details.
1030 
1031    Prologue components are placed in such a way that for every component
1032    the prologue is executed as infrequently as possible.  We do this by
1033    walking the dominator tree, comparing the cost of placing a prologue
1034    component before a block to the sum of costs determined for all subtrees
1035    of that block.
1036 
1037    From this placement, we then determine for each component all blocks
1038    where at least one of this block's dominators (including itself) will
1039    get a prologue inserted.  That then is how the components are placed.
1040    We could place the epilogue components a bit smarter (we can save a
1041    bit of code size sometimes); this is a possible future improvement.
1042 
1043    Prologues and epilogues are preferably placed into a block, either at
1044    the beginning or end of it, if it is needed for all predecessor resp.
1045    successor edges; or placed on the edge otherwise.
1046 
1047    If the placement of any prologue/epilogue leads to a situation we cannot
1048    handle (for example, an abnormal edge would need to be split, or some
1049    targets want to use some specific registers that may not be available
1050    where we want to put them), separate shrink-wrapping for the components
1051    in that prologue/epilogue is aborted.  */
1052 
1053 
1054 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1055    label LABEL.  */
1056 static void
1057 dump_components (const char *label, sbitmap components)
1058 {
1059   if (bitmap_empty_p (components))
1060     return;
1061 
1062   fprintf (dump_file, " [%s", label);
1063 
1064   for (unsigned int j = 0; j < components->n_bits; j++)
1065     if (bitmap_bit_p (components, j))
1066       fprintf (dump_file, " %u", j);
1067 
1068   fprintf (dump_file, "]");
1069 }
1070 
1071 /* The data we collect for each bb.  */
1072 struct sw {
1073   /* What components does this BB need?  */
1074   sbitmap needs_components;
1075 
1076   /* What components does this BB have?  This is the main decision this
1077      pass makes.  */
1078   sbitmap has_components;
1079 
1080   /* The components for which we placed code at the start of the BB (instead
1081      of on all incoming edges).  */
1082   sbitmap head_components;
1083 
1084   /* The components for which we placed code at the end of the BB (instead
1085      of on all outgoing edges).  */
1086   sbitmap tail_components;
1087 
1088   /* The frequency of executing the prologue for this BB, if a prologue is
1089      placed on this BB.  This is a pessimistic estimate (no prologue is
1090      needed for edges from blocks that have the component under consideration
1091      active already).  */
1092   gcov_type own_cost;
1093 
1094   /* The frequency of executing the prologue for this BB and all BBs
1095      dominated by it.  */
1096   gcov_type total_cost;
1097 };
1098 
1099 /* A helper function for accessing the pass-specific info.  */
1100 static inline struct sw *
1101 SW (basic_block bb)
1102 {
1103   gcc_assert (bb->aux);
1104   return (struct sw *) bb->aux;
1105 }
1106 
1107 /* Create the pass-specific data structures for separately shrink-wrapping
1108    with components COMPONENTS.  */
1109 static void
1110 init_separate_shrink_wrap (sbitmap components)
1111 {
1112   basic_block bb;
1113   FOR_ALL_BB_FN (bb, cfun)
1114     {
1115       bb->aux = xcalloc (1, sizeof (struct sw));
1116 
1117       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1118 
1119       /* Mark all basic blocks without successor as needing all components.
1120 	 This avoids problems in at least cfgcleanup, sel-sched, and
1121 	 regrename (largely to do with all paths to such a block still
1122 	 needing the same dwarf CFI info).  */
1123       if (EDGE_COUNT (bb->succs) == 0)
1124 	bitmap_copy (SW (bb)->needs_components, components);
1125 
1126       if (dump_file)
1127 	{
1128 	  fprintf (dump_file, "bb %d components:", bb->index);
1129 	  dump_components ("has", SW (bb)->needs_components);
1130 	  fprintf (dump_file, "\n");
1131 	}
1132 
1133       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1134       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1135       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1136       bitmap_clear (SW (bb)->has_components);
1137     }
1138 }
1139 
1140 /* Destroy the pass-specific data.  */
1141 static void
1142 fini_separate_shrink_wrap (void)
1143 {
1144   basic_block bb;
1145   FOR_ALL_BB_FN (bb, cfun)
1146     if (bb->aux)
1147       {
1148 	sbitmap_free (SW (bb)->needs_components);
1149 	sbitmap_free (SW (bb)->has_components);
1150 	sbitmap_free (SW (bb)->head_components);
1151 	sbitmap_free (SW (bb)->tail_components);
1152 	free (bb->aux);
1153 	bb->aux = 0;
1154       }
1155 }
1156 
1157 /* Place the prologue for component WHICH, in the basic blocks dominated
1158    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
1159    HAS_COMPONENTS of a block if either the block has that bit set in
1160    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1161    dominator subtrees separately.  */
1162 static void
1163 place_prologue_for_one_component (unsigned int which, basic_block head)
1164 {
1165   /* The block we are currently dealing with.  */
1166   basic_block bb = head;
1167   /* Is this the first time we visit this block, i.e. have we just gone
1168      down the tree.  */
1169   bool first_visit = true;
1170 
1171   /* Walk the dominator tree, visit one block per iteration of this loop.
1172      Each basic block is visited twice: once before visiting any children
1173      of the block, and once after visiting all of them (leaf nodes are
1174      visited only once).  As an optimization, we do not visit subtrees
1175      that can no longer influence the prologue placement.  */
1176   for (;;)
1177     {
1178       /* First visit of a block: set the (children) cost accumulator to zero;
1179 	 if the block does not have the component itself, walk down.  */
1180       if (first_visit)
1181 	{
1182 	  /* Initialize the cost.  The cost is the block execution frequency
1183 	     that does not come from backedges.  Calculating this by simply
1184 	     adding the cost of all edges that aren't backedges does not
1185 	     work: this does not always add up to the block frequency at
1186 	     all, and even if it does, rounding error makes for bad
1187 	     decisions.  */
1188 	  SW (bb)->own_cost = bb->frequency;
1189 
1190 	  edge e;
1191 	  edge_iterator ei;
1192 	  FOR_EACH_EDGE (e, ei, bb->preds)
1193 	    if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1194 	      {
1195 		if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1196 		  SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1197 		else
1198 		  SW (bb)->own_cost = 0;
1199 	      }
1200 
1201 	  SW (bb)->total_cost = 0;
1202 
1203 	  if (!bitmap_bit_p (SW (bb)->needs_components, which)
1204 	      && first_dom_son (CDI_DOMINATORS, bb))
1205 	    {
1206 	      bb = first_dom_son (CDI_DOMINATORS, bb);
1207 	      continue;
1208 	    }
1209 	}
1210 
1211       /* If this block does need the component itself, or it is cheaper to
1212 	 put the prologue here than in all the descendants that need it,
1213 	 mark it so.  If this block's immediate post-dominator is dominated
1214 	 by this block, and that needs the prologue, we can put it on this
1215 	 block as well (earlier is better).  */
1216       if (bitmap_bit_p (SW (bb)->needs_components, which)
1217 	  || SW (bb)->total_cost > SW (bb)->own_cost)
1218 	{
1219 	  SW (bb)->total_cost = SW (bb)->own_cost;
1220 	  bitmap_set_bit (SW (bb)->has_components, which);
1221 	}
1222       else
1223 	{
1224 	  basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1225 	  if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1226 	      && bitmap_bit_p (SW (kid)->has_components, which))
1227 	    {
1228 	      SW (bb)->total_cost = SW (bb)->own_cost;
1229 	      bitmap_set_bit (SW (bb)->has_components, which);
1230 	    }
1231 	}
1232 
1233       /* We are back where we started, so we are done now.  */
1234       if (bb == head)
1235 	return;
1236 
1237       /* We now know the cost of the subtree rooted at the current block.
1238 	 Accumulate this cost in the parent.  */
1239       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1240       SW (parent)->total_cost += SW (bb)->total_cost;
1241 
1242       /* Don't walk the tree down unless necessary.  */
1243       if (next_dom_son (CDI_DOMINATORS, bb)
1244           && SW (parent)->total_cost <= SW (parent)->own_cost)
1245 	{
1246 	  bb = next_dom_son (CDI_DOMINATORS, bb);
1247 	  first_visit = true;
1248 	}
1249       else
1250 	{
1251 	  bb = parent;
1252 	  first_visit = false;
1253 	}
1254     }
1255 }
1256 
1257 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1258    setting it on any path from entry to exit where it was not already set
1259    somewhere (or, for blocks that have no path to the exit, consider only
1260    paths from the entry to the block itself).  */
1261 static void
1262 spread_components (sbitmap components)
1263 {
1264   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1265   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1266 
1267   /* A stack of all blocks left to consider, and a bitmap of all blocks
1268      on that stack.  */
1269   vec<basic_block> todo;
1270   todo.create (n_basic_blocks_for_fn (cfun));
1271   bitmap seen = BITMAP_ALLOC (NULL);
1272 
1273   sbitmap old = sbitmap_alloc (SBITMAP_SIZE (components));
1274 
1275   /* Find for every block the components that are *not* needed on some path
1276      from the entry to that block.  Do this with a flood fill from the entry
1277      block.  Every block can be visited at most as often as the number of
1278      components (plus one), and usually much less often.  */
1279 
1280   if (dump_file)
1281     fprintf (dump_file, "Spreading down...\n");
1282 
1283   basic_block bb;
1284   FOR_ALL_BB_FN (bb, cfun)
1285     bitmap_clear (SW (bb)->head_components);
1286 
1287   bitmap_copy (SW (entry_block)->head_components, components);
1288 
1289   edge e;
1290   edge_iterator ei;
1291 
1292   todo.quick_push (single_succ (entry_block));
1293   bitmap_set_bit (seen, single_succ (entry_block)->index);
1294   while (!todo.is_empty ())
1295     {
1296       bb = todo.pop ();
1297 
1298       bitmap_copy (old, SW (bb)->head_components);
1299 
1300       FOR_EACH_EDGE (e, ei, bb->preds)
1301 	bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1302 		    SW (e->src)->head_components);
1303 
1304       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1305 			SW (bb)->has_components);
1306 
1307       if (!bitmap_equal_p (old, SW (bb)->head_components))
1308 	FOR_EACH_EDGE (e, ei, bb->succs)
1309 	  if (bitmap_set_bit (seen, e->dest->index))
1310 	    todo.quick_push (e->dest);
1311 
1312       bitmap_clear_bit (seen, bb->index);
1313     }
1314 
1315   /* Find for every block the components that are *not* needed on some reverse
1316      path from the exit to that block.  */
1317 
1318   if (dump_file)
1319     fprintf (dump_file, "Spreading up...\n");
1320 
1321   /* First, mark all blocks not reachable from the exit block as not needing
1322      any component on any path to the exit.  Mark everything, and then clear
1323      again by a flood fill.  */
1324 
1325   FOR_ALL_BB_FN (bb, cfun)
1326     bitmap_copy (SW (bb)->tail_components, components);
1327 
1328   FOR_EACH_EDGE (e, ei, exit_block->preds)
1329     {
1330       todo.quick_push (e->src);
1331       bitmap_set_bit (seen, e->src->index);
1332     }
1333 
1334   while (!todo.is_empty ())
1335     {
1336       bb = todo.pop ();
1337 
1338       if (!bitmap_empty_p (SW (bb)->tail_components))
1339 	FOR_EACH_EDGE (e, ei, bb->preds)
1340 	  if (bitmap_set_bit (seen, e->src->index))
1341 	    todo.quick_push (e->src);
1342 
1343       bitmap_clear (SW (bb)->tail_components);
1344 
1345       bitmap_clear_bit (seen, bb->index);
1346     }
1347 
1348   /* And then, flood fill backwards to find for every block the components
1349      not needed on some path to the exit.  */
1350 
1351   bitmap_copy (SW (exit_block)->tail_components, components);
1352 
1353   FOR_EACH_EDGE (e, ei, exit_block->preds)
1354     {
1355       todo.quick_push (e->src);
1356       bitmap_set_bit (seen, e->src->index);
1357     }
1358 
1359   while (!todo.is_empty ())
1360     {
1361       bb = todo.pop ();
1362 
1363       bitmap_copy (old, SW (bb)->tail_components);
1364 
1365       FOR_EACH_EDGE (e, ei, bb->succs)
1366 	bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1367 		    SW (e->dest)->tail_components);
1368 
1369       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1370 			SW (bb)->has_components);
1371 
1372       if (!bitmap_equal_p (old, SW (bb)->tail_components))
1373 	FOR_EACH_EDGE (e, ei, bb->preds)
1374 	  if (bitmap_set_bit (seen, e->src->index))
1375 	    todo.quick_push (e->src);
1376 
1377       bitmap_clear_bit (seen, bb->index);
1378     }
1379 
1380   todo.release ();
1381 
1382   /* Finally, mark everything not not needed both forwards and backwards.  */
1383 
1384   FOR_EACH_BB_FN (bb, cfun)
1385     {
1386       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1387 		  SW (bb)->tail_components);
1388       bitmap_and_compl (SW (bb)->has_components, components,
1389 			SW (bb)->head_components);
1390     }
1391 
1392   FOR_ALL_BB_FN (bb, cfun)
1393     {
1394       if (dump_file)
1395 	{
1396 	  fprintf (dump_file, "bb %d components:", bb->index);
1397 	  dump_components ("has", SW (bb)->has_components);
1398 	  fprintf (dump_file, "\n");
1399 	}
1400     }
1401 
1402   sbitmap_free (old);
1403   BITMAP_FREE (seen);
1404 }
1405 
1406 /* If we cannot handle placing some component's prologues or epilogues where
1407    we decided we should place them, unmark that component in COMPONENTS so
1408    that it is not wrapped separately.  */
1409 static void
1410 disqualify_problematic_components (sbitmap components)
1411 {
1412   sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1413   sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1414 
1415   basic_block bb;
1416   FOR_EACH_BB_FN (bb, cfun)
1417     {
1418       edge e;
1419       edge_iterator ei;
1420       FOR_EACH_EDGE (e, ei, bb->succs)
1421 	{
1422 	  /* Find which components we want pro/epilogues for here.  */
1423 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1424 			    SW (e->dest)->has_components);
1425 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1426 			    SW (e->src)->has_components);
1427 
1428 	  /* Ask the target what it thinks about things.  */
1429 	  if (!bitmap_empty_p (epi))
1430 	    targetm.shrink_wrap.disqualify_components (components, e, epi,
1431 						       false);
1432 	  if (!bitmap_empty_p (pro))
1433 	    targetm.shrink_wrap.disqualify_components (components, e, pro,
1434 						       true);
1435 
1436 	  /* If this edge doesn't need splitting, we're fine.  */
1437 	  if (single_pred_p (e->dest)
1438 	      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1439 	    continue;
1440 
1441 	  /* If the edge can be split, that is fine too.  */
1442 	  if ((e->flags & EDGE_ABNORMAL) == 0)
1443 	    continue;
1444 
1445 	  /* We also can handle sibcalls.  */
1446 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1447 	    {
1448 	      gcc_assert (e->flags & EDGE_SIBCALL);
1449 	      continue;
1450 	    }
1451 
1452 	  /* Remove from consideration those components we would need
1453 	     pro/epilogues for on edges where we cannot insert them.  */
1454 	  bitmap_and_compl (components, components, epi);
1455 	  bitmap_and_compl (components, components, pro);
1456 
1457 	  if (dump_file && !bitmap_subset_p (epi, components))
1458 	    {
1459 	      fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
1460 		       e->dest->index);
1461 	      if (e->flags & EDGE_EH)
1462 		fprintf (dump_file, " for EH");
1463 	      dump_components ("epi", epi);
1464 	      fprintf (dump_file, "\n");
1465 	    }
1466 
1467 	  if (dump_file && !bitmap_subset_p (pro, components))
1468 	    {
1469 	      fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
1470 		       e->dest->index);
1471 	      if (e->flags & EDGE_EH)
1472 		fprintf (dump_file, " for EH");
1473 	      dump_components ("pro", pro);
1474 	      fprintf (dump_file, "\n");
1475 	    }
1476 	}
1477     }
1478 
1479   sbitmap_free (pro);
1480   sbitmap_free (epi);
1481 }
1482 
1483 /* Place code for prologues and epilogues for COMPONENTS where we can put
1484    that code at the start of basic blocks.  */
1485 static void
1486 emit_common_heads_for_components (sbitmap components)
1487 {
1488   sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1489   sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1490   sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components));
1491 
1492   basic_block bb;
1493   FOR_ALL_BB_FN (bb, cfun)
1494     bitmap_clear (SW (bb)->head_components);
1495 
1496   FOR_EACH_BB_FN (bb, cfun)
1497     {
1498       /* Find which prologue resp. epilogue components are needed for all
1499 	 predecessor edges to this block.  */
1500 
1501       /* First, select all possible components.  */
1502       bitmap_copy (epi, components);
1503       bitmap_copy (pro, components);
1504 
1505       edge e;
1506       edge_iterator ei;
1507       FOR_EACH_EDGE (e, ei, bb->preds)
1508 	{
1509 	  if (e->flags & EDGE_ABNORMAL)
1510 	    {
1511 	      bitmap_clear (epi);
1512 	      bitmap_clear (pro);
1513 	      break;
1514 	    }
1515 
1516 	  /* Deselect those epilogue components that should not be inserted
1517 	     for this edge.  */
1518 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1519 			    SW (e->dest)->has_components);
1520 	  bitmap_and (epi, epi, tmp);
1521 
1522 	  /* Similar, for the prologue.  */
1523 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1524 			    SW (e->src)->has_components);
1525 	  bitmap_and (pro, pro, tmp);
1526 	}
1527 
1528       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1529 	fprintf (dump_file, "  bb %d", bb->index);
1530 
1531       if (dump_file && !bitmap_empty_p (epi))
1532 	dump_components ("epi", epi);
1533       if (dump_file && !bitmap_empty_p (pro))
1534 	dump_components ("pro", pro);
1535 
1536       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1537 	fprintf (dump_file, "\n");
1538 
1539       /* Place code after the BB note.  */
1540       if (!bitmap_empty_p (pro))
1541 	{
1542 	  start_sequence ();
1543 	  targetm.shrink_wrap.emit_prologue_components (pro);
1544 	  rtx_insn *seq = get_insns ();
1545 	  end_sequence ();
1546 	  record_prologue_seq (seq);
1547 
1548 	  emit_insn_after (seq, bb_note (bb));
1549 
1550 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1551 	}
1552 
1553       if (!bitmap_empty_p (epi))
1554 	{
1555 	  start_sequence ();
1556 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1557 	  rtx_insn *seq = get_insns ();
1558 	  end_sequence ();
1559 	  record_epilogue_seq (seq);
1560 
1561 	  emit_insn_after (seq, bb_note (bb));
1562 
1563 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1564 	}
1565     }
1566 
1567   sbitmap_free (pro);
1568   sbitmap_free (epi);
1569   sbitmap_free (tmp);
1570 }
1571 
1572 /* Place code for prologues and epilogues for COMPONENTS where we can put
1573    that code at the end of basic blocks.  */
1574 static void
1575 emit_common_tails_for_components (sbitmap components)
1576 {
1577   sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1578   sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1579   sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components));
1580 
1581   basic_block bb;
1582   FOR_ALL_BB_FN (bb, cfun)
1583     bitmap_clear (SW (bb)->tail_components);
1584 
1585   FOR_EACH_BB_FN (bb, cfun)
1586     {
1587       /* Find which prologue resp. epilogue components are needed for all
1588 	 successor edges from this block.  */
1589       if (EDGE_COUNT (bb->succs) == 0)
1590 	continue;
1591 
1592       /* First, select all possible components.  */
1593       bitmap_copy (epi, components);
1594       bitmap_copy (pro, components);
1595 
1596       edge e;
1597       edge_iterator ei;
1598       FOR_EACH_EDGE (e, ei, bb->succs)
1599 	{
1600 	  if (e->flags & EDGE_ABNORMAL)
1601 	    {
1602 	      bitmap_clear (epi);
1603 	      bitmap_clear (pro);
1604 	      break;
1605 	    }
1606 
1607 	  /* Deselect those epilogue components that should not be inserted
1608 	     for this edge, and also those that are already put at the head
1609 	     of the successor block.  */
1610 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1611 			    SW (e->dest)->has_components);
1612 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1613 	  bitmap_and (epi, epi, tmp);
1614 
1615 	  /* Similarly, for the prologue.  */
1616 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1617 			    SW (e->src)->has_components);
1618 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1619 	  bitmap_and (pro, pro, tmp);
1620 	}
1621 
1622       /* If the last insn of this block is a control flow insn we cannot
1623 	 put anything after it.  We can put our code before it instead,
1624 	 but only if that jump insn is a simple jump.  */
1625       rtx_insn *last_insn = BB_END (bb);
1626       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1627 	{
1628 	  bitmap_clear (epi);
1629 	  bitmap_clear (pro);
1630 	}
1631 
1632       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1633 	fprintf (dump_file, "  bb %d", bb->index);
1634 
1635       if (dump_file && !bitmap_empty_p (epi))
1636 	dump_components ("epi", epi);
1637       if (dump_file && !bitmap_empty_p (pro))
1638 	dump_components ("pro", pro);
1639 
1640       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1641 	fprintf (dump_file, "\n");
1642 
1643       /* Put the code at the end of the BB, but before any final jump.  */
1644       if (!bitmap_empty_p (epi))
1645 	{
1646 	  start_sequence ();
1647 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1648 	  rtx_insn *seq = get_insns ();
1649 	  end_sequence ();
1650 	  record_epilogue_seq (seq);
1651 
1652 	  if (control_flow_insn_p (last_insn))
1653 	    emit_insn_before (seq, last_insn);
1654 	  else
1655 	    emit_insn_after (seq, last_insn);
1656 
1657 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1658 	}
1659 
1660       if (!bitmap_empty_p (pro))
1661 	{
1662 	  start_sequence ();
1663 	  targetm.shrink_wrap.emit_prologue_components (pro);
1664 	  rtx_insn *seq = get_insns ();
1665 	  end_sequence ();
1666 	  record_prologue_seq (seq);
1667 
1668 	  if (control_flow_insn_p (last_insn))
1669 	    emit_insn_before (seq, last_insn);
1670 	  else
1671 	    emit_insn_after (seq, last_insn);
1672 
1673 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1674 	}
1675     }
1676 
1677   sbitmap_free (pro);
1678   sbitmap_free (epi);
1679   sbitmap_free (tmp);
1680 }
1681 
1682 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1683    placed them inside blocks directly.  */
1684 static void
1685 insert_prologue_epilogue_for_components (sbitmap components)
1686 {
1687   sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1688   sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1689 
1690   basic_block bb;
1691   FOR_EACH_BB_FN (bb, cfun)
1692     {
1693       if (!bb->aux)
1694 	continue;
1695 
1696       edge e;
1697       edge_iterator ei;
1698       FOR_EACH_EDGE (e, ei, bb->succs)
1699 	{
1700 	  /* Find which pro/epilogue components are needed on this edge.  */
1701 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1702 			    SW (e->dest)->has_components);
1703 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1704 			    SW (e->src)->has_components);
1705 	  bitmap_and (epi, epi, components);
1706 	  bitmap_and (pro, pro, components);
1707 
1708 	  /* Deselect those we already have put at the head or tail of the
1709 	     edge's dest resp. src.  */
1710 	  bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1711 	  bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1712 	  bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1713 	  bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1714 
1715 	  if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1716 	    {
1717 	      if (dump_file)
1718 		{
1719 		  fprintf (dump_file, "  %d->%d", e->src->index,
1720 			   e->dest->index);
1721 		  dump_components ("epi", epi);
1722 		  dump_components ("pro", pro);
1723 		  if (e->flags & EDGE_SIBCALL)
1724 		    fprintf (dump_file, "  (SIBCALL)");
1725 		  else if (e->flags & EDGE_ABNORMAL)
1726 		    fprintf (dump_file, "  (ABNORMAL)");
1727 		  fprintf (dump_file, "\n");
1728 		}
1729 
1730 	      /* Put the epilogue components in place.  */
1731 	      start_sequence ();
1732 	      targetm.shrink_wrap.emit_epilogue_components (epi);
1733 	      rtx_insn *seq = get_insns ();
1734 	      end_sequence ();
1735 	      record_epilogue_seq (seq);
1736 
1737 	      if (e->flags & EDGE_SIBCALL)
1738 		{
1739 		  gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1740 
1741 		  rtx_insn *insn = BB_END (e->src);
1742 		  gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1743 		  emit_insn_before (seq, insn);
1744 		}
1745 	      else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1746 		{
1747 		  gcc_assert (e->flags & EDGE_FALLTHRU);
1748 		  basic_block new_bb = split_edge (e);
1749 		  emit_insn_after (seq, BB_END (new_bb));
1750 		}
1751 	      else
1752 		insert_insn_on_edge (seq, e);
1753 
1754 	      /* Put the prologue components in place.  */
1755 	      start_sequence ();
1756 	      targetm.shrink_wrap.emit_prologue_components (pro);
1757 	      seq = get_insns ();
1758 	      end_sequence ();
1759 	      record_prologue_seq (seq);
1760 
1761 	      insert_insn_on_edge (seq, e);
1762 	    }
1763 	}
1764     }
1765 
1766   sbitmap_free (pro);
1767   sbitmap_free (epi);
1768 
1769   commit_edge_insertions ();
1770 }
1771 
1772 /* The main entry point to this subpass.  FIRST_BB is where the prologue
1773    would be normally put.  */
1774 void
1775 try_shrink_wrapping_separate (basic_block first_bb)
1776 {
1777   if (HAVE_cc0)
1778     return;
1779 
1780   if (!(SHRINK_WRAPPING_ENABLED
1781 	&& flag_shrink_wrap_separate
1782 	&& optimize_function_for_speed_p (cfun)
1783 	&& targetm.shrink_wrap.get_separate_components))
1784     return;
1785 
1786   /* We don't handle "strange" functions.  */
1787   if (cfun->calls_alloca
1788       || cfun->calls_setjmp
1789       || cfun->can_throw_non_call_exceptions
1790       || crtl->calls_eh_return
1791       || crtl->has_nonlocal_goto
1792       || crtl->saves_all_registers)
1793     return;
1794 
1795   /* Ask the target what components there are.  If it returns NULL, don't
1796      do anything.  */
1797   sbitmap components = targetm.shrink_wrap.get_separate_components ();
1798   if (!components)
1799     return;
1800 
1801   /* We need LIVE info, not defining anything in the entry block and not
1802      using anything in the exit block.  A block then needs a component if
1803      the register for that component is in the IN or GEN or KILL set for
1804      that block.  */
1805   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1806   df_update_entry_exit_and_calls ();
1807   df_live_add_problem ();
1808   df_live_set_all_dirty ();
1809   df_analyze ();
1810 
1811   calculate_dominance_info (CDI_DOMINATORS);
1812   calculate_dominance_info (CDI_POST_DOMINATORS);
1813 
1814   init_separate_shrink_wrap (components);
1815 
1816   sbitmap_iterator sbi;
1817   unsigned int j;
1818   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1819     place_prologue_for_one_component (j, first_bb);
1820 
1821   spread_components (components);
1822 
1823   disqualify_problematic_components (components);
1824 
1825   /* Don't separately shrink-wrap anything where the "main" prologue will
1826      go; the target code can often optimize things if it is presented with
1827      all components together (say, if it generates store-multiple insns).  */
1828   bitmap_and_compl (components, components, SW (first_bb)->has_components);
1829 
1830   if (bitmap_empty_p (components))
1831     {
1832       if (dump_file)
1833 	fprintf (dump_file, "Not wrapping anything separately.\n");
1834     }
1835   else
1836     {
1837       if (dump_file)
1838 	{
1839 	  fprintf (dump_file, "The components we wrap separately are");
1840 	  dump_components ("sep", components);
1841 	  fprintf (dump_file, "\n");
1842 
1843 	  fprintf (dump_file, "... Inserting common heads...\n");
1844 	}
1845 
1846       emit_common_heads_for_components (components);
1847 
1848       if (dump_file)
1849 	fprintf (dump_file, "... Inserting common tails...\n");
1850 
1851       emit_common_tails_for_components (components);
1852 
1853       if (dump_file)
1854 	fprintf (dump_file, "... Inserting the more difficult ones...\n");
1855 
1856       insert_prologue_epilogue_for_components (components);
1857 
1858       if (dump_file)
1859 	fprintf (dump_file, "... Done.\n");
1860 
1861       targetm.shrink_wrap.set_handled_components (components);
1862 
1863       crtl->shrink_wrapped_separate = true;
1864     }
1865 
1866   fini_separate_shrink_wrap ();
1867 
1868   sbitmap_free (components);
1869   free_dominance_info (CDI_DOMINATORS);
1870   free_dominance_info (CDI_POST_DOMINATORS);
1871 
1872   /* All done.  */
1873   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1874   df_update_entry_exit_and_calls ();
1875   df_live_set_all_dirty ();
1876   df_analyze ();
1877 }
1878