xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/shrink-wrap.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Shrink-wrapping related optimizations.
2    Copyright (C) 1987-2015 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file handles shrink-wrapping related optimizations.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "varasm.h"
40 #include "stringpool.h"
41 #include "flags.h"
42 #include "except.h"
43 #include "hard-reg-set.h"
44 #include "function.h"
45 #include "hashtab.h"
46 #include "rtl.h"
47 #include "statistics.h"
48 #include "real.h"
49 #include "fixed-value.h"
50 #include "insn-config.h"
51 #include "expmed.h"
52 #include "dojump.h"
53 #include "explow.h"
54 #include "calls.h"
55 #include "emit-rtl.h"
56 #include "stmt.h"
57 #include "expr.h"
58 #include "insn-codes.h"
59 #include "optabs.h"
60 #include "libfuncs.h"
61 #include "regs.h"
62 #include "recog.h"
63 #include "output.h"
64 #include "tm_p.h"
65 #include "langhooks.h"
66 #include "target.h"
67 #include "common/common-target.h"
68 #include "gimple-expr.h"
69 #include "gimplify.h"
70 #include "tree-pass.h"
71 #include "predict.h"
72 #include "dominance.h"
73 #include "cfg.h"
74 #include "cfgrtl.h"
75 #include "basic-block.h"
76 #include "df.h"
77 #include "params.h"
78 #include "bb-reorder.h"
79 #include "shrink-wrap.h"
80 #include "regcprop.h"
81 #include "rtl-iter.h"
82 #include "valtrack.h"
83 
84 #ifdef HAVE_simple_return
85 
86 /* Return true if INSN requires the stack frame to be set up.
87    PROLOGUE_USED contains the hard registers used in the function
88    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
89    prologue to set up for the function.  */
90 bool
91 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
92 			HARD_REG_SET set_up_by_prologue)
93 {
94   df_ref def, use;
95   HARD_REG_SET hardregs;
96   unsigned regno;
97 
98   if (CALL_P (insn))
99     return !SIBLING_CALL_P (insn);
100 
101   /* We need a frame to get the unique CFA expected by the unwinder.  */
102   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
103     return true;
104 
105   CLEAR_HARD_REG_SET (hardregs);
106   FOR_EACH_INSN_DEF (def, insn)
107     {
108       rtx dreg = DF_REF_REG (def);
109 
110       if (!REG_P (dreg))
111 	continue;
112 
113       add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
114 			   REGNO (dreg));
115     }
116   if (hard_reg_set_intersect_p (hardregs, prologue_used))
117     return true;
118   AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
119   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
120     if (TEST_HARD_REG_BIT (hardregs, regno)
121 	&& df_regs_ever_live_p (regno))
122       return true;
123 
124   FOR_EACH_INSN_USE (use, insn)
125     {
126       rtx reg = DF_REF_REG (use);
127 
128       if (!REG_P (reg))
129 	continue;
130 
131       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
132 			   REGNO (reg));
133     }
134   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
135     return true;
136 
137   return false;
138 }
139 
140 /* See whether there has a single live edge from BB, which dest uses
141    [REGNO, END_REGNO).  Return the live edge if its dest bb has
142    one or two predecessors.  Otherwise return NULL.  */
143 
144 static edge
145 live_edge_for_reg (basic_block bb, int regno, int end_regno)
146 {
147   edge e, live_edge;
148   edge_iterator ei;
149   bitmap live;
150   int i;
151 
152   live_edge = NULL;
153   FOR_EACH_EDGE (e, ei, bb->succs)
154     {
155       live = df_get_live_in (e->dest);
156       for (i = regno; i < end_regno; i++)
157 	if (REGNO_REG_SET_P (live, i))
158 	  {
159 	    if (live_edge && live_edge != e)
160 	      return NULL;
161 	    live_edge = e;
162 	  }
163     }
164 
165   /* We can sometimes encounter dead code.  Don't try to move it
166      into the exit block.  */
167   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
168     return NULL;
169 
170   /* Reject targets of abnormal edges.  This is needed for correctness
171      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
172      exception edges even though it is generally treated as call-saved
173      for the majority of the compilation.  Moving across abnormal edges
174      isn't going to be interesting for shrink-wrap usage anyway.  */
175   if (live_edge->flags & EDGE_ABNORMAL)
176     return NULL;
177 
178   /* When live_edge->dest->preds == 2, we can create a new block on
179      the edge to make it meet the requirement.  */
180   if (EDGE_COUNT (live_edge->dest->preds) > 2)
181     return NULL;
182 
183   return live_edge;
184 }
185 
186 /* Try to move INSN from BB to a successor.  Return true on success.
187    USES and DEFS are the set of registers that are used and defined
188    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
189    is splitted or not.  */
190 
191 static bool
192 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
193 			   const HARD_REG_SET uses,
194 			   const HARD_REG_SET defs,
195 			   bool *split_p,
196 			   struct dead_debug_local *debug)
197 {
198   rtx set, src, dest;
199   bitmap live_out, live_in, bb_uses, bb_defs;
200   unsigned int i, dregno, end_dregno;
201   unsigned int sregno = FIRST_PSEUDO_REGISTER;
202   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
203   basic_block next_block;
204   edge live_edge;
205   rtx_insn *dinsn;
206   df_ref def;
207 
208   /* Look for a simple register assignment.  We don't use single_set here
209      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
210      destinations.  */
211   if (!INSN_P (insn))
212     return false;
213   set = PATTERN (insn);
214   if (GET_CODE (set) != SET)
215     return false;
216   src = SET_SRC (set);
217   dest = SET_DEST (set);
218 
219   /* For the destination, we want only a register.  Also disallow STACK
220      or FRAME related adjustments.  They are likely part of the prologue,
221      so keep them in the entry block.  */
222   if (!REG_P (dest)
223       || dest == stack_pointer_rtx
224       || dest == frame_pointer_rtx
225       || dest == hard_frame_pointer_rtx)
226     return false;
227 
228   /* For the source, we want one of:
229       (1) A (non-overlapping) register
230       (2) A constant,
231       (3) An expression involving no more than one register.
232 
233      That last point comes from the code following, which was originally
234      written to handle only register move operations, and still only handles
235      a single source register when checking for overlaps.  Happily, the
236      same checks can be applied to expressions like (plus reg const).  */
237 
238   if (CONSTANT_P (src))
239     ;
240   else if (!REG_P (src))
241     {
242       rtx src_inner = NULL_RTX;
243 
244       if (can_throw_internal (insn))
245 	return false;
246 
247       subrtx_var_iterator::array_type array;
248       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
249 	{
250 	  rtx x = *iter;
251 	  switch (GET_RTX_CLASS (GET_CODE (x)))
252 	    {
253 	    case RTX_CONST_OBJ:
254 	    case RTX_COMPARE:
255 	    case RTX_COMM_COMPARE:
256 	    case RTX_BIN_ARITH:
257 	    case RTX_COMM_ARITH:
258 	    case RTX_UNARY:
259 	    case RTX_TERNARY:
260 	      /* Constant or expression.  Continue.  */
261 	      break;
262 
263 	    case RTX_OBJ:
264 	    case RTX_EXTRA:
265 	      switch (GET_CODE (x))
266 		{
267 		case UNSPEC:
268 		case SUBREG:
269 		case STRICT_LOW_PART:
270 		case PC:
271 		case LO_SUM:
272 		  /* Ok.  Continue.  */
273 		  break;
274 
275 		case REG:
276 		  /* Fail if we see a second inner register.  */
277 		  if (src_inner != NULL)
278 		    return false;
279 		  src_inner = x;
280 		  break;
281 
282 		default:
283 		  return false;
284 		}
285 	      break;
286 
287 	    default:
288 	      return false;
289 	    }
290 	}
291 
292       if (src_inner != NULL)
293 	src = src_inner;
294     }
295 
296   /* Make sure that the source register isn't defined later in BB.  */
297   if (REG_P (src))
298     {
299       sregno = REGNO (src);
300       end_sregno = END_REGNO (src);
301       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
302 	return false;
303     }
304 
305   /* Make sure that the destination register isn't referenced later in BB.  */
306   dregno = REGNO (dest);
307   end_dregno = END_REGNO (dest);
308   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
309       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
310     return false;
311 
312   /* See whether there is a successor block to which we could move INSN.  */
313   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
314   if (!live_edge)
315     return false;
316 
317   next_block = live_edge->dest;
318   /* Create a new basic block on the edge.  */
319   if (EDGE_COUNT (next_block->preds) == 2)
320     {
321       /* split_edge for a block with only one successor is meaningless.  */
322       if (EDGE_COUNT (bb->succs) == 1)
323 	return false;
324 
325       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
326       if (!df_live)
327 	return false;
328 
329       basic_block old_dest = live_edge->dest;
330       next_block = split_edge (live_edge);
331 
332       /* We create a new basic block.  Call df_grow_bb_info to make sure
333 	 all data structures are allocated.  */
334       df_grow_bb_info (df_live);
335 
336       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
337 		  df_get_live_in (old_dest));
338       df_set_bb_dirty (next_block);
339 
340       /* We should not split more than once for a function.  */
341       if (*split_p)
342 	return false;
343 
344       *split_p = true;
345     }
346 
347   /* At this point we are committed to moving INSN, but let's try to
348      move it as far as we can.  */
349   do
350     {
351       if (MAY_HAVE_DEBUG_INSNS)
352 	{
353 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
354 	    if (DEBUG_INSN_P (dinsn))
355 	      {
356 		df_ref use;
357 		FOR_EACH_INSN_USE (use, dinsn)
358 		  if (refers_to_regno_p (dregno, end_dregno,
359 					 DF_REF_REG (use), (rtx *) NULL))
360 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
361 	      }
362 	    else if (dinsn == insn)
363 	      break;
364 	}
365       live_out = df_get_live_out (bb);
366       live_in = df_get_live_in (next_block);
367       bb = next_block;
368 
369       /* Check whether BB uses DEST or clobbers DEST.  We need to add
370 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
371 	 except for any part that overlaps SRC (next loop).  */
372       bb_uses = &DF_LR_BB_INFO (bb)->use;
373       bb_defs = &DF_LR_BB_INFO (bb)->def;
374       if (df_live)
375 	{
376 	  for (i = dregno; i < end_dregno; i++)
377 	    {
378 	      if (*split_p
379 		  || REGNO_REG_SET_P (bb_uses, i)
380 		  || REGNO_REG_SET_P (bb_defs, i)
381 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
382 		next_block = NULL;
383 	      CLEAR_REGNO_REG_SET (live_out, i);
384 	      CLEAR_REGNO_REG_SET (live_in, i);
385 	    }
386 
387 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
388 	     Either way, SRC is now live on entry.  */
389 	  for (i = sregno; i < end_sregno; i++)
390 	    {
391 	      if (*split_p
392 		  || REGNO_REG_SET_P (bb_defs, i)
393 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
394 		next_block = NULL;
395 	      SET_REGNO_REG_SET (live_out, i);
396 	      SET_REGNO_REG_SET (live_in, i);
397 	    }
398 	}
399       else
400 	{
401 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
402 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
403 	     at -O1, just give up searching NEXT_BLOCK.  */
404 	  next_block = NULL;
405 	  for (i = dregno; i < end_dregno; i++)
406 	    {
407 	      CLEAR_REGNO_REG_SET (live_out, i);
408 	      CLEAR_REGNO_REG_SET (live_in, i);
409 	    }
410 
411 	  for (i = sregno; i < end_sregno; i++)
412 	    {
413 	      SET_REGNO_REG_SET (live_out, i);
414 	      SET_REGNO_REG_SET (live_in, i);
415 	    }
416 	}
417 
418       /* If we don't need to add the move to BB, look for a single
419 	 successor block.  */
420       if (next_block)
421 	{
422 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
423 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
424 	    break;
425 	  next_block = live_edge->dest;
426 	}
427     }
428   while (next_block);
429 
430   /* For the new created basic block, there is no dataflow info at all.
431      So skip the following dataflow update and check.  */
432   if (!(*split_p))
433     {
434       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
435 	 (next loop).  */
436       for (i = dregno; i < end_dregno; i++)
437 	{
438 	  CLEAR_REGNO_REG_SET (bb_uses, i);
439 	  SET_REGNO_REG_SET (bb_defs, i);
440 	}
441 
442       /* BB now uses SRC.  */
443       for (i = sregno; i < end_sregno; i++)
444 	SET_REGNO_REG_SET (bb_uses, i);
445     }
446 
447   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
448   if (debug->used && !bitmap_empty_p (debug->used))
449     FOR_EACH_INSN_DEF (def, insn)
450       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
451 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
452 
453   emit_insn_after (PATTERN (insn), bb_note (bb));
454   delete_insn (insn);
455   return true;
456 }
457 
458 /* Look for register copies in the first block of the function, and move
459    them down into successor blocks if the register is used only on one
460    path.  This exposes more opportunities for shrink-wrapping.  These
461    kinds of sets often occur when incoming argument registers are moved
462    to call-saved registers because their values are live across one or
463    more calls during the function.  */
464 
465 void
466 prepare_shrink_wrap (basic_block entry_block)
467 {
468   rtx_insn *insn, *curr;
469   rtx x;
470   HARD_REG_SET uses, defs;
471   df_ref def, use;
472   bool split_p = false;
473   unsigned int i;
474   struct dead_debug_local debug;
475 
476   if (JUMP_P (BB_END (entry_block)))
477     {
478       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
479 	 to sink the copies from parameter to callee saved register out of
480 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
481 	 to release some dependences.  */
482       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
483     }
484 
485   dead_debug_local_init (&debug, NULL, NULL);
486   CLEAR_HARD_REG_SET (uses);
487   CLEAR_HARD_REG_SET (defs);
488 
489   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
490     if (NONDEBUG_INSN_P (insn)
491 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
492 				       &split_p, &debug))
493       {
494 	/* Add all defined registers to DEFs.  */
495 	FOR_EACH_INSN_DEF (def, insn)
496 	  {
497 	    x = DF_REF_REG (def);
498 	    if (REG_P (x) && HARD_REGISTER_P (x))
499 	      for (i = REGNO (x); i < END_REGNO (x); i++)
500 		SET_HARD_REG_BIT (defs, i);
501 	  }
502 
503 	/* Add all used registers to USESs.  */
504 	FOR_EACH_INSN_USE (use, insn)
505 	  {
506 	    x = DF_REF_REG (use);
507 	    if (REG_P (x) && HARD_REGISTER_P (x))
508 	      for (i = REGNO (x); i < END_REGNO (x); i++)
509 		SET_HARD_REG_BIT (uses, i);
510 	  }
511       }
512 
513   dead_debug_local_finish (&debug, NULL);
514 }
515 
516 /* Create a copy of BB instructions and insert at BEFORE.  Redirect
517    preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE.  */
518 void
519 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
520 			bitmap_head *need_prologue)
521 {
522   edge_iterator ei;
523   edge e;
524   rtx_insn *insn = BB_END (bb);
525 
526   /* We know BB has a single successor, so there is no need to copy a
527      simple jump at the end of BB.  */
528   if (simplejump_p (insn))
529     insn = PREV_INSN (insn);
530 
531   start_sequence ();
532   duplicate_insn_chain (BB_HEAD (bb), insn);
533   if (dump_file)
534     {
535       unsigned count = 0;
536       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
537 	if (active_insn_p (insn))
538 	  ++count;
539       fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
540 	       bb->index, copy_bb->index, count);
541     }
542   insn = get_insns ();
543   end_sequence ();
544   emit_insn_before (insn, before);
545 
546   /* Redirect all the paths that need no prologue into copy_bb.  */
547   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
548     if (!bitmap_bit_p (need_prologue, e->src->index))
549       {
550 	int freq = EDGE_FREQUENCY (e);
551 	copy_bb->count += e->count;
552 	copy_bb->frequency += EDGE_FREQUENCY (e);
553 	e->dest->count -= e->count;
554 	if (e->dest->count < 0)
555 	  e->dest->count = 0;
556 	e->dest->frequency -= freq;
557 	if (e->dest->frequency < 0)
558 	  e->dest->frequency = 0;
559 	redirect_edge_and_branch_force (e, copy_bb);
560 	continue;
561       }
562     else
563       ei_next (&ei);
564 }
565 
566 
567 /* Try to perform a kind of shrink-wrapping, making sure the
568    prologue/epilogue is emitted only around those parts of the
569    function that require it.  */
570 
571 void
572 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
573 		     bitmap_head *bb_flags, rtx_insn *prologue_seq)
574 {
575   edge e;
576   edge_iterator ei;
577   bool nonempty_prologue = false;
578   unsigned max_grow_size;
579   rtx_insn *seq;
580 
581   for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
582     if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
583       {
584 	nonempty_prologue = true;
585 	break;
586       }
587 
588   if (flag_shrink_wrap && HAVE_simple_return
589       && (targetm.profile_before_prologue () || !crtl->profile)
590       && nonempty_prologue && !crtl->calls_eh_return)
591     {
592       HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
593       struct hard_reg_set_container set_up_by_prologue;
594       rtx_insn *p_insn;
595       vec<basic_block> vec;
596       basic_block bb;
597       bitmap_head bb_antic_flags;
598       bitmap_head bb_on_list;
599       bitmap_head bb_tail;
600 
601       if (dump_file)
602 	fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
603 
604       /* Compute the registers set and used in the prologue.  */
605       CLEAR_HARD_REG_SET (prologue_clobbered);
606       CLEAR_HARD_REG_SET (prologue_used);
607       for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
608 	{
609 	  HARD_REG_SET this_used;
610 	  if (!NONDEBUG_INSN_P (p_insn))
611 	    continue;
612 
613 	  CLEAR_HARD_REG_SET (this_used);
614 	  note_uses (&PATTERN (p_insn), record_hard_reg_uses,
615 		     &this_used);
616 	  AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
617 	  IOR_HARD_REG_SET (prologue_used, this_used);
618 	  note_stores (PATTERN (p_insn), record_hard_reg_sets,
619 		       &prologue_clobbered);
620 	}
621 
622       prepare_shrink_wrap ((*entry_edge)->dest);
623 
624       bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
625       bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
626       bitmap_initialize (&bb_tail, &bitmap_default_obstack);
627 
628       /* Find the set of basic blocks that require a stack frame,
629 	 and blocks that are too big to be duplicated.  */
630 
631       vec.create (n_basic_blocks_for_fn (cfun));
632 
633       CLEAR_HARD_REG_SET (set_up_by_prologue.set);
634       add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
635 			   STACK_POINTER_REGNUM);
636       add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
637       if (frame_pointer_needed)
638 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
639 			     HARD_FRAME_POINTER_REGNUM);
640       if (pic_offset_table_rtx
641 	  && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
642 	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
643 			     PIC_OFFSET_TABLE_REGNUM);
644       if (crtl->drap_reg)
645 	add_to_hard_reg_set (&set_up_by_prologue.set,
646 			     GET_MODE (crtl->drap_reg),
647 			     REGNO (crtl->drap_reg));
648       if (targetm.set_up_by_prologue)
649 	targetm.set_up_by_prologue (&set_up_by_prologue);
650 
651       /* We don't use a different max size depending on
652 	 optimize_bb_for_speed_p because increasing shrink-wrapping
653 	 opportunities by duplicating tail blocks can actually result
654 	 in an overall decrease in code size.  */
655       max_grow_size = get_uncond_jump_length ();
656       max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
657 
658       FOR_EACH_BB_FN (bb, cfun)
659 	{
660 	  rtx_insn *insn;
661 	  unsigned size = 0;
662 
663 	  FOR_BB_INSNS (bb, insn)
664 	    if (NONDEBUG_INSN_P (insn))
665 	      {
666 		if (requires_stack_frame_p (insn, prologue_used,
667 					    set_up_by_prologue.set))
668 		  {
669 		    if (bb == (*entry_edge)->dest)
670 		      goto fail_shrinkwrap;
671 		    bitmap_set_bit (bb_flags, bb->index);
672 		    vec.quick_push (bb);
673 		    break;
674 		  }
675 		else if (size <= max_grow_size)
676 		  {
677 		    size += get_attr_min_length (insn);
678 		    if (size > max_grow_size)
679 		      bitmap_set_bit (&bb_on_list, bb->index);
680 		  }
681 	      }
682 	}
683 
684       /* Blocks that really need a prologue, or are too big for tails.  */
685       bitmap_ior_into (&bb_on_list, bb_flags);
686 
687       /* For every basic block that needs a prologue, mark all blocks
688 	 reachable from it, so as to ensure they are also seen as
689 	 requiring a prologue.  */
690       while (!vec.is_empty ())
691 	{
692 	  basic_block tmp_bb = vec.pop ();
693 
694 	  FOR_EACH_EDGE (e, ei, tmp_bb->succs)
695 	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
696 		&& bitmap_set_bit (bb_flags, e->dest->index))
697 	      vec.quick_push (e->dest);
698 	}
699 
700       /* Find the set of basic blocks that need no prologue, have a
701 	 single successor, can be duplicated, meet a max size
702 	 requirement, and go to the exit via like blocks.  */
703       vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
704       while (!vec.is_empty ())
705 	{
706 	  basic_block tmp_bb = vec.pop ();
707 
708 	  FOR_EACH_EDGE (e, ei, tmp_bb->preds)
709 	    if (single_succ_p (e->src)
710 		&& !bitmap_bit_p (&bb_on_list, e->src->index)
711 		&& can_duplicate_block_p (e->src))
712 	      {
713 		edge pe;
714 		edge_iterator pei;
715 
716 		/* If there is predecessor of e->src which doesn't
717 		   need prologue and the edge is complex,
718 		   we might not be able to redirect the branch
719 		   to a copy of e->src.  */
720 		FOR_EACH_EDGE (pe, pei, e->src->preds)
721 		  if ((pe->flags & EDGE_COMPLEX) != 0
722 		      && !bitmap_bit_p (bb_flags, pe->src->index))
723 		    break;
724 		if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
725 		  vec.quick_push (e->src);
726 	      }
727 	}
728 
729       /* Now walk backwards from every block that is marked as needing
730 	 a prologue to compute the bb_antic_flags bitmap.  Exclude
731 	 tail blocks; They can be duplicated to be used on paths not
732 	 needing a prologue.  */
733       bitmap_clear (&bb_on_list);
734       bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
735       FOR_EACH_BB_FN (bb, cfun)
736 	{
737 	  if (!bitmap_bit_p (&bb_antic_flags, bb->index))
738 	    continue;
739 	  FOR_EACH_EDGE (e, ei, bb->preds)
740 	    if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
741 		&& bitmap_set_bit (&bb_on_list, e->src->index))
742 	      vec.quick_push (e->src);
743 	}
744       while (!vec.is_empty ())
745 	{
746 	  basic_block tmp_bb = vec.pop ();
747 	  bool all_set = true;
748 
749 	  bitmap_clear_bit (&bb_on_list, tmp_bb->index);
750 	  FOR_EACH_EDGE (e, ei, tmp_bb->succs)
751 	    if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
752 	      {
753 		all_set = false;
754 		break;
755 	      }
756 
757 	  if (all_set)
758 	    {
759 	      bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
760 	      FOR_EACH_EDGE (e, ei, tmp_bb->preds)
761 		if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
762 		    && bitmap_set_bit (&bb_on_list, e->src->index))
763 		  vec.quick_push (e->src);
764 	    }
765 	}
766       /* Find exactly one edge that leads to a block in ANTIC from
767 	 a block that isn't.  */
768       if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
769 	FOR_EACH_BB_FN (bb, cfun)
770 	  {
771 	    if (!bitmap_bit_p (&bb_antic_flags, bb->index))
772 	      continue;
773 	    FOR_EACH_EDGE (e, ei, bb->preds)
774 	      if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
775 		{
776 		  if (*entry_edge != orig_entry_edge)
777 		    {
778 		      *entry_edge = orig_entry_edge;
779 		      if (dump_file)
780 			fprintf (dump_file, "More than one candidate edge.\n");
781 		      goto fail_shrinkwrap;
782 		    }
783 		  if (dump_file)
784 		    fprintf (dump_file, "Found candidate edge for "
785 			     "shrink-wrapping, %d->%d.\n", e->src->index,
786 			     e->dest->index);
787 		  *entry_edge = e;
788 		}
789 	  }
790 
791       if (*entry_edge != orig_entry_edge)
792 	{
793 	  /* Test whether the prologue is known to clobber any register
794 	     (other than FP or SP) which are live on the edge.  */
795 	  CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
796 	  if (frame_pointer_needed)
797 	    CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
798 	  REG_SET_TO_HARD_REG_SET (live_on_edge,
799 				   df_get_live_in ((*entry_edge)->dest));
800 	  if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
801 	    {
802 	      *entry_edge = orig_entry_edge;
803 	      if (dump_file)
804 		fprintf (dump_file,
805 			 "Shrink-wrapping aborted due to clobber.\n");
806 	    }
807 	}
808       if (*entry_edge != orig_entry_edge)
809 	{
810 	  crtl->shrink_wrapped = true;
811 	  if (dump_file)
812 	    fprintf (dump_file, "Performing shrink-wrapping.\n");
813 
814 	  /* Find tail blocks reachable from both blocks needing a
815 	     prologue and blocks not needing a prologue.  */
816 	  if (!bitmap_empty_p (&bb_tail))
817 	    FOR_EACH_BB_FN (bb, cfun)
818 	      {
819 		bool some_pro, some_no_pro;
820 		if (!bitmap_bit_p (&bb_tail, bb->index))
821 		  continue;
822 		some_pro = some_no_pro = false;
823 		FOR_EACH_EDGE (e, ei, bb->preds)
824 		  {
825 		    if (bitmap_bit_p (bb_flags, e->src->index))
826 		      some_pro = true;
827 		    else
828 		      some_no_pro = true;
829 		  }
830 		if (some_pro && some_no_pro)
831 		  vec.quick_push (bb);
832 		else
833 		  bitmap_clear_bit (&bb_tail, bb->index);
834 	      }
835 	  /* Find the head of each tail.  */
836 	  while (!vec.is_empty ())
837 	    {
838 	      basic_block tbb = vec.pop ();
839 
840 	      if (!bitmap_bit_p (&bb_tail, tbb->index))
841 		continue;
842 
843 	      while (single_succ_p (tbb))
844 		{
845 		  tbb = single_succ (tbb);
846 		  bitmap_clear_bit (&bb_tail, tbb->index);
847 		}
848 	    }
849 	  /* Now duplicate the tails.  */
850 	  if (!bitmap_empty_p (&bb_tail))
851 	    FOR_EACH_BB_REVERSE_FN (bb, cfun)
852 	      {
853 		basic_block copy_bb, tbb;
854 		rtx_insn *insert_point;
855 		int eflags;
856 
857 		if (!bitmap_clear_bit (&bb_tail, bb->index))
858 		  continue;
859 
860 		/* Create a copy of BB, instructions and all, for
861 		   use on paths that don't need a prologue.
862 		   Ideal placement of the copy is on a fall-thru edge
863 		   or after a block that would jump to the copy.  */
864 		FOR_EACH_EDGE (e, ei, bb->preds)
865 		  if (!bitmap_bit_p (bb_flags, e->src->index)
866 		      && single_succ_p (e->src))
867 		    break;
868 		if (e)
869 		  {
870                     /* Make sure we insert after any barriers.  */
871                     rtx_insn *end = get_last_bb_insn (e->src);
872                     copy_bb = create_basic_block (NEXT_INSN (end),
873                                                   NULL_RTX, e->src);
874 		    BB_COPY_PARTITION (copy_bb, e->src);
875 		  }
876 		else
877 		  {
878 		    /* Otherwise put the copy at the end of the function.  */
879 		    copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
880 						  EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
881 		    BB_COPY_PARTITION (copy_bb, bb);
882 		  }
883 
884 		insert_point = emit_note_after (NOTE_INSN_DELETED,
885 						BB_END (copy_bb));
886 		emit_barrier_after (BB_END (copy_bb));
887 
888 		tbb = bb;
889 		while (1)
890 		  {
891 		    dup_block_and_redirect (tbb, copy_bb, insert_point,
892 					    bb_flags);
893 		    tbb = single_succ (tbb);
894 		    if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
895 		      break;
896 		    e = split_block (copy_bb, PREV_INSN (insert_point));
897 		    copy_bb = e->dest;
898 		  }
899 
900 		/* Quiet verify_flow_info by (ab)using EDGE_FAKE.
901 		   We have yet to add a simple_return to the tails,
902 		   as we'd like to first convert_jumps_to_returns in
903 		   case the block is no longer used after that.  */
904 		eflags = EDGE_FAKE;
905 		if (CALL_P (PREV_INSN (insert_point))
906 		    && SIBLING_CALL_P (PREV_INSN (insert_point)))
907 		  eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
908 		make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
909 				       eflags);
910 
911 		/* verify_flow_info doesn't like a note after a
912 		   sibling call.  */
913 		delete_insn (insert_point);
914 		if (bitmap_empty_p (&bb_tail))
915 		  break;
916 	      }
917 	}
918 
919     fail_shrinkwrap:
920       bitmap_clear (&bb_tail);
921       bitmap_clear (&bb_antic_flags);
922       bitmap_clear (&bb_on_list);
923       vec.release ();
924     }
925 }
926 
927 /* If we're allowed to generate a simple return instruction, then by
928    definition we don't need a full epilogue.  If the last basic
929    block before the exit block does not contain active instructions,
930    examine its predecessors and try to emit (conditional) return
931    instructions.  */
932 
933 edge
934 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
935 			       vec<edge> *unconverted_simple_returns,
936 			       rtx_insn **returnjump)
937 {
938   if (optimize)
939     {
940       unsigned i, last;
941 
942       /* convert_jumps_to_returns may add to preds of the exit block
943          (but won't remove).  Stop at end of current preds.  */
944       last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
945       for (i = 0; i < last; i++)
946 	{
947 	  edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
948 	  if (LABEL_P (BB_HEAD (e->src))
949 	      && !bitmap_bit_p (&bb_flags, e->src->index)
950 	      && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
951 	    *unconverted_simple_returns
952 		  = convert_jumps_to_returns (e->src, true,
953 					      *unconverted_simple_returns);
954 	}
955     }
956 
957   if (exit_fallthru_edge != NULL
958       && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
959       && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
960     {
961       basic_block last_bb;
962 
963       last_bb = emit_return_for_exit (exit_fallthru_edge, true);
964       *returnjump = BB_END (last_bb);
965       exit_fallthru_edge = NULL;
966     }
967   return exit_fallthru_edge;
968 }
969 
970 /* If there were branches to an empty LAST_BB which we tried to
971    convert to conditional simple_returns, but couldn't for some
972    reason, create a block to hold a simple_return insn and redirect
973    those remaining edges.  */
974 
975 void
976 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
977 			  bitmap_head bb_flags, rtx_insn *returnjump,
978 			  vec<edge> unconverted_simple_returns)
979 {
980   edge e;
981   edge_iterator ei;
982 
983   if (!unconverted_simple_returns.is_empty ())
984     {
985       basic_block simple_return_block_hot = NULL;
986       basic_block simple_return_block_cold = NULL;
987       edge pending_edge_hot = NULL;
988       edge pending_edge_cold = NULL;
989       basic_block exit_pred;
990       int i;
991 
992       gcc_assert (entry_edge != orig_entry_edge);
993 
994       /* See if we can reuse the last insn that was emitted for the
995 	 epilogue.  */
996       if (returnjump != NULL_RTX
997 	  && JUMP_LABEL (returnjump) == simple_return_rtx)
998 	{
999 	  e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
1000 	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1001 	    simple_return_block_hot = e->dest;
1002 	  else
1003 	    simple_return_block_cold = e->dest;
1004 	}
1005 
1006       /* Also check returns we might need to add to tail blocks.  */
1007       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1008 	if (EDGE_COUNT (e->src->preds) != 0
1009 	    && (e->flags & EDGE_FAKE) != 0
1010 	    && !bitmap_bit_p (&bb_flags, e->src->index))
1011 	  {
1012 	    if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1013 	      pending_edge_hot = e;
1014 	    else
1015 	      pending_edge_cold = e;
1016 	  }
1017 
1018       /* Save a pointer to the exit's predecessor BB for use in
1019          inserting new BBs at the end of the function.  Do this
1020          after the call to split_block above which may split
1021          the original exit pred.  */
1022       exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1023 
1024       FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
1025 	{
1026 	  basic_block *pdest_bb;
1027 	  edge pending;
1028 
1029 	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1030 	    {
1031 	      pdest_bb = &simple_return_block_hot;
1032 	      pending = pending_edge_hot;
1033 	    }
1034 	  else
1035 	    {
1036 	      pdest_bb = &simple_return_block_cold;
1037 	      pending = pending_edge_cold;
1038 	    }
1039 
1040 	  if (*pdest_bb == NULL && pending != NULL)
1041 	    {
1042 	      emit_return_into_block (true, pending->src);
1043 	      pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1044 	      *pdest_bb = pending->src;
1045 	    }
1046 	  else if (*pdest_bb == NULL)
1047 	    {
1048 	      basic_block bb;
1049 	      rtx_insn *start;
1050 
1051 	      bb = create_basic_block (NULL, NULL, exit_pred);
1052 	      BB_COPY_PARTITION (bb, e->src);
1053 	      start = emit_jump_insn_after (gen_simple_return (),
1054 					    BB_END (bb));
1055 	      JUMP_LABEL (start) = simple_return_rtx;
1056 	      emit_barrier_after (start);
1057 
1058 	      *pdest_bb = bb;
1059 	      make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1060 	    }
1061 	  redirect_edge_and_branch_force (e, *pdest_bb);
1062 	}
1063       unconverted_simple_returns.release ();
1064     }
1065 
1066   if (entry_edge != orig_entry_edge)
1067     {
1068       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1069 	if (EDGE_COUNT (e->src->preds) != 0
1070 	    && (e->flags & EDGE_FAKE) != 0
1071 	    && !bitmap_bit_p (&bb_flags, e->src->index))
1072 	  {
1073 	    emit_return_into_block (true, e->src);
1074 	    e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1075 	  }
1076     }
1077 }
1078 
1079 #endif
1080