xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/bb-reorder.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Basic block reordering routines for the GNU compiler.
2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING3.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 /* This (greedy) algorithm constructs traces in several rounds.
21    The construction starts from "seeds".  The seed for the first round
22    is the entry point of the function.  When there are more than one seed,
23    the one with the lowest key in the heap is selected first (see bb_to_key).
24    Then the algorithm repeatedly adds the most probable successor to the end
25    of a trace.  Finally it connects the traces.
26 
27    There are two parameters: Branch Threshold and Exec Threshold.
28    If the probability of an edge to a successor of the current basic block is
29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
30    then the successor will be the seed in one of the next rounds.
31    Each round has these parameters lower than the previous one.
32    The last round has to have these parameters set to zero so that the
33    remaining blocks are picked up.
34 
35    The algorithm selects the most probable successor from all unvisited
36    successors and successors that have been added to this trace.
37    The other successors (that has not been "sent" to the next round) will be
38    other seeds for this round and the secondary traces will start from them.
39    If the successor has not been visited in this trace, it is added to the
40    trace (however, there is some heuristic for simple branches).
41    If the successor has been visited in this trace, a loop has been found.
42    If the loop has many iterations, the loop is rotated so that the source
43    block of the most probable edge going out of the loop is the last block
44    of the trace.
45    If the loop has few iterations and there is no edge from the last block of
46    the loop going out of the loop, the loop header is duplicated.
47 
48    When connecting traces, the algorithm first checks whether there is an edge
49    from the last block of a trace to the first block of another trace.
50    When there are still some unconnected traces it checks whether there exists
51    a basic block BB such that BB is a successor of the last block of a trace
52    and BB is a predecessor of the first block of another trace.  In this case,
53    BB is duplicated, added at the end of the first trace and the traces are
54    connected through it.
55    The rest of traces are simply connected so there will be a jump to the
56    beginning of the rest of traces.
57 
58    The above description is for the full algorithm, which is used when the
59    function is optimized for speed.  When the function is optimized for size,
60    in order to reduce long jumps and connect more fallthru edges, the
61    algorithm is modified as follows:
62    (1) Break long traces to short ones.  A trace is broken at a block that has
63    multiple predecessors/ successors during trace discovery.  When connecting
64    traces, only connect Trace n with Trace n + 1.  This change reduces most
65    long jumps compared with the above algorithm.
66    (2) Ignore the edge probability and frequency for fallthru edges.
67    (3) Keep the original order of blocks when there is no chance to fall
68    through.  We rely on the results of cfg_cleanup.
69 
70    To implement the change for code size optimization, block's index is
71    selected as the key and all traces are found in one round.
72 
73    References:
74 
75    "Software Trace Cache"
76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
77    http://citeseer.nj.nec.com/15361.html
78 
79 */
80 
81 #include "config.h"
82 #include "system.h"
83 #include "coretypes.h"
84 #include "tm.h"
85 #include "hash-set.h"
86 #include "machmode.h"
87 #include "vec.h"
88 #include "double-int.h"
89 #include "input.h"
90 #include "alias.h"
91 #include "symtab.h"
92 #include "wide-int.h"
93 #include "inchash.h"
94 #include "tree.h"
95 #include "rtl.h"
96 #include "regs.h"
97 #include "flags.h"
98 #include "output.h"
99 #include "target.h"
100 #include "hashtab.h"
101 #include "hard-reg-set.h"
102 #include "function.h"
103 #include "tm_p.h"
104 #include "obstack.h"
105 #include "statistics.h"
106 #include "real.h"
107 #include "fixed-value.h"
108 #include "insn-config.h"
109 #include "expmed.h"
110 #include "dojump.h"
111 #include "explow.h"
112 #include "calls.h"
113 #include "emit-rtl.h"
114 #include "varasm.h"
115 #include "stmt.h"
116 #include "expr.h"
117 #include "optabs.h"
118 #include "params.h"
119 #include "diagnostic-core.h"
120 #include "toplev.h" /* user_defined_section_attribute */
121 #include "tree-pass.h"
122 #include "dominance.h"
123 #include "cfg.h"
124 #include "cfgrtl.h"
125 #include "cfganal.h"
126 #include "cfgbuild.h"
127 #include "cfgcleanup.h"
128 #include "predict.h"
129 #include "basic-block.h"
130 #include "df.h"
131 #include "bb-reorder.h"
132 #include "hash-map.h"
133 #include "is-a.h"
134 #include "plugin-api.h"
135 #include "ipa-ref.h"
136 #include "cgraph.h"
137 #include "except.h"
138 #include "fibonacci_heap.h"
139 
140 /* The number of rounds.  In most cases there will only be 4 rounds, but
141    when partitioning hot and cold basic blocks into separate sections of
142    the object file there will be an extra round.  */
143 #define N_ROUNDS 5
144 
145 /* Stubs in case we don't have a return insn.
146    We have to check at run time too, not only compile time.  */
147 
148 #ifndef HAVE_return
149 #define HAVE_return 0
150 #define gen_return() NULL_RTX
151 #endif
152 
153 
154 struct target_bb_reorder default_target_bb_reorder;
155 #if SWITCHABLE_TARGET
156 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
157 #endif
158 
159 #define uncond_jump_length \
160   (this_target_bb_reorder->x_uncond_jump_length)
161 
162 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
163 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
164 
165 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
166 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
167 
168 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
169    block the edge destination is not duplicated while connecting traces.  */
170 #define DUPLICATION_THRESHOLD 100
171 
172 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
173 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
174 
175 /* Structure to hold needed information for each basic block.  */
176 typedef struct bbro_basic_block_data_def
177 {
178   /* Which trace is the bb start of (-1 means it is not a start of any).  */
179   int start_of_trace;
180 
181   /* Which trace is the bb end of (-1 means it is not an end of any).  */
182   int end_of_trace;
183 
184   /* Which trace is the bb in?  */
185   int in_trace;
186 
187   /* Which trace was this bb visited in?  */
188   int visited;
189 
190   /* Cached maximum frequency of interesting incoming edges.
191      Minus one means not yet computed.  */
192   int priority;
193 
194   /* Which heap is BB in (if any)?  */
195   bb_heap_t *heap;
196 
197   /* Which heap node is BB in (if any)?  */
198   bb_heap_node_t *node;
199 } bbro_basic_block_data;
200 
201 /* The current size of the following dynamic array.  */
202 static int array_size;
203 
204 /* The array which holds needed information for basic blocks.  */
205 static bbro_basic_block_data *bbd;
206 
207 /* To avoid frequent reallocation the size of arrays is greater than needed,
208    the number of elements is (not less than) 1.25 * size_wanted.  */
209 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
210 
211 /* Free the memory and set the pointer to NULL.  */
212 #define FREE(P) (gcc_assert (P), free (P), P = 0)
213 
214 /* Structure for holding information about a trace.  */
215 struct trace
216 {
217   /* First and last basic block of the trace.  */
218   basic_block first, last;
219 
220   /* The round of the STC creation which this trace was found in.  */
221   int round;
222 
223   /* The length (i.e. the number of basic blocks) of the trace.  */
224   int length;
225 };
226 
227 /* Maximum frequency and count of one of the entry blocks.  */
228 static int max_entry_frequency;
229 static gcov_type max_entry_count;
230 
231 /* Local function prototypes.  */
232 static void find_traces (int *, struct trace *);
233 static basic_block rotate_loop (edge, struct trace *, int);
234 static void mark_bb_visited (basic_block, int);
235 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
236 				 int, bb_heap_t **, int);
237 static basic_block copy_bb (basic_block, edge, basic_block, int);
238 static long bb_to_key (basic_block);
239 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
240 			   const_edge);
241 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
242 				   struct trace *);
243 static void connect_traces (int, struct trace *);
244 static bool copy_bb_p (const_basic_block, int);
245 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
246 
247 /* Return the trace number in which BB was visited.  */
248 
249 static int
250 bb_visited_trace (const_basic_block bb)
251 {
252   gcc_assert (bb->index < array_size);
253   return bbd[bb->index].visited;
254 }
255 
256 /* This function marks BB that it was visited in trace number TRACE.  */
257 
258 static void
259 mark_bb_visited (basic_block bb, int trace)
260 {
261   bbd[bb->index].visited = trace;
262   if (bbd[bb->index].heap)
263     {
264       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
265       bbd[bb->index].heap = NULL;
266       bbd[bb->index].node = NULL;
267     }
268 }
269 
270 /* Check to see if bb should be pushed into the next round of trace
271    collections or not.  Reasons for pushing the block forward are 1).
272    If the block is cold, we are doing partitioning, and there will be
273    another round (cold partition blocks are not supposed to be
274    collected into traces until the very last round); or 2). There will
275    be another round, and the basic block is not "hot enough" for the
276    current round of trace collection.  */
277 
278 static bool
279 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
280 		      int exec_th, gcov_type count_th)
281 {
282   bool there_exists_another_round;
283   bool block_not_hot_enough;
284 
285   there_exists_another_round = round < number_of_rounds - 1;
286 
287   block_not_hot_enough = (bb->frequency < exec_th
288 			  || bb->count < count_th
289 			  || probably_never_executed_bb_p (cfun, bb));
290 
291   if (there_exists_another_round
292       && block_not_hot_enough)
293     return true;
294   else
295     return false;
296 }
297 
298 /* Find the traces for Software Trace Cache.  Chain each trace through
299    RBI()->next.  Store the number of traces to N_TRACES and description of
300    traces to TRACES.  */
301 
302 static void
303 find_traces (int *n_traces, struct trace *traces)
304 {
305   int i;
306   int number_of_rounds;
307   edge e;
308   edge_iterator ei;
309   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
310 
311   /* Add one extra round of trace collection when partitioning hot/cold
312      basic blocks into separate sections.  The last round is for all the
313      cold blocks (and ONLY the cold blocks).  */
314 
315   number_of_rounds = N_ROUNDS - 1;
316 
317   /* Insert entry points of function into heap.  */
318   max_entry_frequency = 0;
319   max_entry_count = 0;
320   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
321     {
322       bbd[e->dest->index].heap = heap;
323       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
324       if (e->dest->frequency > max_entry_frequency)
325 	max_entry_frequency = e->dest->frequency;
326       if (e->dest->count > max_entry_count)
327 	max_entry_count = e->dest->count;
328     }
329 
330   /* Find the traces.  */
331   for (i = 0; i < number_of_rounds; i++)
332     {
333       gcov_type count_threshold;
334 
335       if (dump_file)
336 	fprintf (dump_file, "STC - round %d\n", i + 1);
337 
338       if (max_entry_count < INT_MAX / 1000)
339 	count_threshold = max_entry_count * exec_threshold[i] / 1000;
340       else
341 	count_threshold = max_entry_count / 1000 * exec_threshold[i];
342 
343       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
344 			   max_entry_frequency * exec_threshold[i] / 1000,
345 			   count_threshold, traces, n_traces, i, &heap,
346 			   number_of_rounds);
347     }
348   delete heap;
349 
350   if (dump_file)
351     {
352       for (i = 0; i < *n_traces; i++)
353 	{
354 	  basic_block bb;
355 	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
356 		   traces[i].round + 1);
357 	  for (bb = traces[i].first;
358 	       bb != traces[i].last;
359 	       bb = (basic_block) bb->aux)
360 	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
361 	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
362 	}
363       fflush (dump_file);
364     }
365 }
366 
367 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
368    (with sequential number TRACE_N).  */
369 
370 static basic_block
371 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
372 {
373   basic_block bb;
374 
375   /* Information about the best end (end after rotation) of the loop.  */
376   basic_block best_bb = NULL;
377   edge best_edge = NULL;
378   int best_freq = -1;
379   gcov_type best_count = -1;
380   /* The best edge is preferred when its destination is not visited yet
381      or is a start block of some trace.  */
382   bool is_preferred = false;
383 
384   /* Find the most frequent edge that goes out from current trace.  */
385   bb = back_edge->dest;
386   do
387     {
388       edge e;
389       edge_iterator ei;
390 
391       FOR_EACH_EDGE (e, ei, bb->succs)
392 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
393 	    && bb_visited_trace (e->dest) != trace_n
394 	    && (e->flags & EDGE_CAN_FALLTHRU)
395 	    && !(e->flags & EDGE_COMPLEX))
396 	{
397 	  if (is_preferred)
398 	    {
399 	      /* The best edge is preferred.  */
400 	      if (!bb_visited_trace (e->dest)
401 		  || bbd[e->dest->index].start_of_trace >= 0)
402 		{
403 		  /* The current edge E is also preferred.  */
404 		  int freq = EDGE_FREQUENCY (e);
405 		  if (freq > best_freq || e->count > best_count)
406 		    {
407 		      best_freq = freq;
408 		      best_count = e->count;
409 		      best_edge = e;
410 		      best_bb = bb;
411 		    }
412 		}
413 	    }
414 	  else
415 	    {
416 	      if (!bb_visited_trace (e->dest)
417 		  || bbd[e->dest->index].start_of_trace >= 0)
418 		{
419 		  /* The current edge E is preferred.  */
420 		  is_preferred = true;
421 		  best_freq = EDGE_FREQUENCY (e);
422 		  best_count = e->count;
423 		  best_edge = e;
424 		  best_bb = bb;
425 		}
426 	      else
427 		{
428 		  int freq = EDGE_FREQUENCY (e);
429 		  if (!best_edge || freq > best_freq || e->count > best_count)
430 		    {
431 		      best_freq = freq;
432 		      best_count = e->count;
433 		      best_edge = e;
434 		      best_bb = bb;
435 		    }
436 		}
437 	    }
438 	}
439       bb = (basic_block) bb->aux;
440     }
441   while (bb != back_edge->dest);
442 
443   if (best_bb)
444     {
445       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
446 	 the trace.  */
447       if (back_edge->dest == trace->first)
448 	{
449 	  trace->first = (basic_block) best_bb->aux;
450 	}
451       else
452 	{
453 	  basic_block prev_bb;
454 
455 	  for (prev_bb = trace->first;
456 	       prev_bb->aux != back_edge->dest;
457 	       prev_bb = (basic_block) prev_bb->aux)
458 	    ;
459 	  prev_bb->aux = best_bb->aux;
460 
461 	  /* Try to get rid of uncond jump to cond jump.  */
462 	  if (single_succ_p (prev_bb))
463 	    {
464 	      basic_block header = single_succ (prev_bb);
465 
466 	      /* Duplicate HEADER if it is a small block containing cond jump
467 		 in the end.  */
468 	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
469 		  && !CROSSING_JUMP_P (BB_END (header)))
470 		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
471 	    }
472 	}
473     }
474   else
475     {
476       /* We have not found suitable loop tail so do no rotation.  */
477       best_bb = back_edge->src;
478     }
479   best_bb->aux = NULL;
480   return best_bb;
481 }
482 
483 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
484    not include basic blocks whose probability is lower than BRANCH_TH or whose
485    frequency is lower than EXEC_TH into traces (or whose count is lower than
486    COUNT_TH).  Store the new traces into TRACES and modify the number of
487    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
488    The function expects starting basic blocks to be in *HEAP and will delete
489    *HEAP and store starting points for the next round into new *HEAP.  */
490 
491 static void
492 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
493 		     struct trace *traces, int *n_traces, int round,
494 		     bb_heap_t **heap, int number_of_rounds)
495 {
496   /* Heap for discarded basic blocks which are possible starting points for
497      the next round.  */
498   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
499   bool for_size = optimize_function_for_size_p (cfun);
500 
501   while (!(*heap)->empty ())
502     {
503       basic_block bb;
504       struct trace *trace;
505       edge best_edge, e;
506       long key;
507       edge_iterator ei;
508 
509       bb = (*heap)->extract_min ();
510       bbd[bb->index].heap = NULL;
511       bbd[bb->index].node = NULL;
512 
513       if (dump_file)
514 	fprintf (dump_file, "Getting bb %d\n", bb->index);
515 
516       /* If the BB's frequency is too low, send BB to the next round.  When
517 	 partitioning hot/cold blocks into separate sections, make sure all
518 	 the cold blocks (and ONLY the cold blocks) go into the (extra) final
519 	 round.  When optimizing for size, do not push to next round.  */
520 
521       if (!for_size
522 	  && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
523 				   count_th))
524 	{
525 	  int key = bb_to_key (bb);
526 	  bbd[bb->index].heap = new_heap;
527 	  bbd[bb->index].node = new_heap->insert (key, bb);
528 
529 	  if (dump_file)
530 	    fprintf (dump_file,
531 		     "  Possible start point of next round: %d (key: %d)\n",
532 		     bb->index, key);
533 	  continue;
534 	}
535 
536       trace = traces + *n_traces;
537       trace->first = bb;
538       trace->round = round;
539       trace->length = 0;
540       bbd[bb->index].in_trace = *n_traces;
541       (*n_traces)++;
542 
543       do
544 	{
545 	  int prob, freq;
546 	  bool ends_in_call;
547 
548 	  /* The probability and frequency of the best edge.  */
549 	  int best_prob = INT_MIN / 2;
550 	  int best_freq = INT_MIN / 2;
551 
552 	  best_edge = NULL;
553 	  mark_bb_visited (bb, *n_traces);
554 	  trace->length++;
555 
556 	  if (dump_file)
557 	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
558 		     bb->index, *n_traces - 1);
559 
560 	  ends_in_call = block_ends_with_call_p (bb);
561 
562 	  /* Select the successor that will be placed after BB.  */
563 	  FOR_EACH_EDGE (e, ei, bb->succs)
564 	    {
565 	      gcc_assert (!(e->flags & EDGE_FAKE));
566 
567 	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
568 		continue;
569 
570 	      if (bb_visited_trace (e->dest)
571 		  && bb_visited_trace (e->dest) != *n_traces)
572 		continue;
573 
574 	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
575 		continue;
576 
577 	      prob = e->probability;
578 	      freq = e->dest->frequency;
579 
580 	      /* The only sensible preference for a call instruction is the
581 		 fallthru edge.  Don't bother selecting anything else.  */
582 	      if (ends_in_call)
583 		{
584 		  if (e->flags & EDGE_CAN_FALLTHRU)
585 		    {
586 		      best_edge = e;
587 		      best_prob = prob;
588 		      best_freq = freq;
589 		    }
590 		  continue;
591 		}
592 
593 	      /* Edge that cannot be fallthru or improbable or infrequent
594 		 successor (i.e. it is unsuitable successor).  When optimizing
595 		 for size, ignore the probability and frequency.  */
596 	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
597 		  || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
598 		      || e->count < count_th) && (!for_size)))
599 		continue;
600 
601 	      /* If partitioning hot/cold basic blocks, don't consider edges
602 		 that cross section boundaries.  */
603 
604 	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
605 				 best_edge))
606 		{
607 		  best_edge = e;
608 		  best_prob = prob;
609 		  best_freq = freq;
610 		}
611 	    }
612 
613 	  /* If the best destination has multiple predecessors, and can be
614 	     duplicated cheaper than a jump, don't allow it to be added
615 	     to a trace.  We'll duplicate it when connecting traces.  */
616 	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
617 	      && copy_bb_p (best_edge->dest, 0))
618 	    best_edge = NULL;
619 
620 	  /* If the best destination has multiple successors or predecessors,
621 	     don't allow it to be added when optimizing for size.  This makes
622 	     sure predecessors with smaller index are handled before the best
623 	     destinarion.  It breaks long trace and reduces long jumps.
624 
625 	     Take if-then-else as an example.
626 		A
627 	       / \
628 	      B   C
629 	       \ /
630 		D
631 	     If we do not remove the best edge B->D/C->D, the final order might
632 	     be A B D ... C.  C is at the end of the program.  If D's successors
633 	     and D are complicated, might need long jumps for A->C and C->D.
634 	     Similar issue for order: A C D ... B.
635 
636 	     After removing the best edge, the final result will be ABCD/ ACBD.
637 	     It does not add jump compared with the previous order.  But it
638 	     reduces the possibility of long jumps.  */
639 	  if (best_edge && for_size
640 	      && (EDGE_COUNT (best_edge->dest->succs) > 1
641 		 || EDGE_COUNT (best_edge->dest->preds) > 1))
642 	    best_edge = NULL;
643 
644 	  /* Add all non-selected successors to the heaps.  */
645 	  FOR_EACH_EDGE (e, ei, bb->succs)
646 	    {
647 	      if (e == best_edge
648 		  || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
649 		  || bb_visited_trace (e->dest))
650 		continue;
651 
652 	      key = bb_to_key (e->dest);
653 
654 	      if (bbd[e->dest->index].heap)
655 		{
656 		  /* E->DEST is already in some heap.  */
657 		  if (key != bbd[e->dest->index].node->get_key ())
658 		    {
659 		      if (dump_file)
660 			{
661 			  fprintf (dump_file,
662 				   "Changing key for bb %d from %ld to %ld.\n",
663 				   e->dest->index,
664 				   (long) bbd[e->dest->index].node->get_key (),
665 				   key);
666 			}
667 		      bbd[e->dest->index].heap->replace_key
668 		        (bbd[e->dest->index].node, key);
669 		    }
670 		}
671 	      else
672 		{
673 		  bb_heap_t *which_heap = *heap;
674 
675 		  prob = e->probability;
676 		  freq = EDGE_FREQUENCY (e);
677 
678 		  if (!(e->flags & EDGE_CAN_FALLTHRU)
679 		      || (e->flags & EDGE_COMPLEX)
680 		      || prob < branch_th || freq < exec_th
681 		      || e->count < count_th)
682 		    {
683 		      /* When partitioning hot/cold basic blocks, make sure
684 			 the cold blocks (and only the cold blocks) all get
685 			 pushed to the last round of trace collection.  When
686 			 optimizing for size, do not push to next round.  */
687 
688 		      if (!for_size && push_to_next_round_p (e->dest, round,
689 							     number_of_rounds,
690 							     exec_th, count_th))
691 			which_heap = new_heap;
692 		    }
693 
694 		  bbd[e->dest->index].heap = which_heap;
695 		  bbd[e->dest->index].node = which_heap->insert (key, e->dest);
696 
697 		  if (dump_file)
698 		    {
699 		      fprintf (dump_file,
700 			       "  Possible start of %s round: %d (key: %ld)\n",
701 			       (which_heap == new_heap) ? "next" : "this",
702 			       e->dest->index, (long) key);
703 		    }
704 
705 		}
706 	    }
707 
708 	  if (best_edge) /* Suitable successor was found.  */
709 	    {
710 	      if (bb_visited_trace (best_edge->dest) == *n_traces)
711 		{
712 		  /* We do nothing with one basic block loops.  */
713 		  if (best_edge->dest != bb)
714 		    {
715 		      if (EDGE_FREQUENCY (best_edge)
716 			  > 4 * best_edge->dest->frequency / 5)
717 			{
718 			  /* The loop has at least 4 iterations.  If the loop
719 			     header is not the first block of the function
720 			     we can rotate the loop.  */
721 
722 			  if (best_edge->dest
723 			      != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
724 			    {
725 			      if (dump_file)
726 				{
727 				  fprintf (dump_file,
728 					   "Rotating loop %d - %d\n",
729 					   best_edge->dest->index, bb->index);
730 				}
731 			      bb->aux = best_edge->dest;
732 			      bbd[best_edge->dest->index].in_trace =
733 							     (*n_traces) - 1;
734 			      bb = rotate_loop (best_edge, trace, *n_traces);
735 			    }
736 			}
737 		      else
738 			{
739 			  /* The loop has less than 4 iterations.  */
740 
741 			  if (single_succ_p (bb)
742 			      && copy_bb_p (best_edge->dest,
743 			      		    optimize_edge_for_speed_p
744 			      		    (best_edge)))
745 			    {
746 			      bb = copy_bb (best_edge->dest, best_edge, bb,
747 					    *n_traces);
748 			      trace->length++;
749 			    }
750 			}
751 		    }
752 
753 		  /* Terminate the trace.  */
754 		  break;
755 		}
756 	      else
757 		{
758 		  /* Check for a situation
759 
760 		    A
761 		   /|
762 		  B |
763 		   \|
764 		    C
765 
766 		  where
767 		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
768 		    >= EDGE_FREQUENCY (AC).
769 		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
770 		  Best ordering is then A B C.
771 
772 		  When optimizing for size, A B C is always the best order.
773 
774 		  This situation is created for example by:
775 
776 		  if (A) B;
777 		  C;
778 
779 		  */
780 
781 		  FOR_EACH_EDGE (e, ei, bb->succs)
782 		    if (e != best_edge
783 			&& (e->flags & EDGE_CAN_FALLTHRU)
784 			&& !(e->flags & EDGE_COMPLEX)
785 			&& !bb_visited_trace (e->dest)
786 			&& single_pred_p (e->dest)
787 			&& !(e->flags & EDGE_CROSSING)
788 			&& single_succ_p (e->dest)
789 			&& (single_succ_edge (e->dest)->flags
790 			    & EDGE_CAN_FALLTHRU)
791 			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
792 			&& single_succ (e->dest) == best_edge->dest
793 			&& (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
794 			    || for_size))
795 		      {
796 			best_edge = e;
797 			if (dump_file)
798 			  fprintf (dump_file, "Selecting BB %d\n",
799 				   best_edge->dest->index);
800 			break;
801 		      }
802 
803 		  bb->aux = best_edge->dest;
804 		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
805 		  bb = best_edge->dest;
806 		}
807 	    }
808 	}
809       while (best_edge);
810       trace->last = bb;
811       bbd[trace->first->index].start_of_trace = *n_traces - 1;
812       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
813 	{
814 	  bbd[trace->last->index].end_of_trace = *n_traces - 1;
815 	  /* Update the cached maximum frequency for interesting predecessor
816 	     edges for successors of the new trace end.  */
817 	  FOR_EACH_EDGE (e, ei, trace->last->succs)
818 	    if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
819 	      bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
820 	}
821 
822       /* The trace is terminated so we have to recount the keys in heap
823 	 (some block can have a lower key because now one of its predecessors
824 	 is an end of the trace).  */
825       FOR_EACH_EDGE (e, ei, bb->succs)
826 	{
827 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
828 	      || bb_visited_trace (e->dest))
829 	    continue;
830 
831 	  if (bbd[e->dest->index].heap)
832 	    {
833 	      key = bb_to_key (e->dest);
834 	      if (key != bbd[e->dest->index].node->get_key ())
835 		{
836 		  if (dump_file)
837 		    {
838 		      fprintf (dump_file,
839 			       "Changing key for bb %d from %ld to %ld.\n",
840 			       e->dest->index,
841 			       (long) bbd[e->dest->index].node->get_key (), key);
842 		    }
843 		  bbd[e->dest->index].heap->replace_key
844 		    (bbd[e->dest->index].node, key);
845 		}
846 	    }
847 	}
848     }
849 
850   delete (*heap);
851 
852   /* "Return" the new heap.  */
853   *heap = new_heap;
854 }
855 
856 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
857    it to trace after BB, mark OLD_BB visited and update pass' data structures
858    (TRACE is a number of trace which OLD_BB is duplicated to).  */
859 
860 static basic_block
861 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
862 {
863   basic_block new_bb;
864 
865   new_bb = duplicate_block (old_bb, e, bb);
866   BB_COPY_PARTITION (new_bb, old_bb);
867 
868   gcc_assert (e->dest == new_bb);
869 
870   if (dump_file)
871     fprintf (dump_file,
872 	     "Duplicated bb %d (created bb %d)\n",
873 	     old_bb->index, new_bb->index);
874 
875   if (new_bb->index >= array_size
876       || last_basic_block_for_fn (cfun) > array_size)
877     {
878       int i;
879       int new_size;
880 
881       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
882       new_size = GET_ARRAY_SIZE (new_size);
883       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
884       for (i = array_size; i < new_size; i++)
885 	{
886 	  bbd[i].start_of_trace = -1;
887 	  bbd[i].end_of_trace = -1;
888 	  bbd[i].in_trace = -1;
889 	  bbd[i].visited = 0;
890 	  bbd[i].priority = -1;
891 	  bbd[i].heap = NULL;
892 	  bbd[i].node = NULL;
893 	}
894       array_size = new_size;
895 
896       if (dump_file)
897 	{
898 	  fprintf (dump_file,
899 		   "Growing the dynamic array to %d elements.\n",
900 		   array_size);
901 	}
902     }
903 
904   gcc_assert (!bb_visited_trace (e->dest));
905   mark_bb_visited (new_bb, trace);
906   new_bb->aux = bb->aux;
907   bb->aux = new_bb;
908 
909   bbd[new_bb->index].in_trace = trace;
910 
911   return new_bb;
912 }
913 
914 /* Compute and return the key (for the heap) of the basic block BB.  */
915 
916 static long
917 bb_to_key (basic_block bb)
918 {
919   edge e;
920   edge_iterator ei;
921 
922   /* Use index as key to align with its original order.  */
923   if (optimize_function_for_size_p (cfun))
924     return bb->index;
925 
926   /* Do not start in probably never executed blocks.  */
927 
928   if (BB_PARTITION (bb) == BB_COLD_PARTITION
929       || probably_never_executed_bb_p (cfun, bb))
930     return BB_FREQ_MAX;
931 
932   /* Prefer blocks whose predecessor is an end of some trace
933      or whose predecessor edge is EDGE_DFS_BACK.  */
934   int priority = bbd[bb->index].priority;
935   if (priority == -1)
936     {
937       priority = 0;
938       FOR_EACH_EDGE (e, ei, bb->preds)
939 	{
940 	  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
941 	       && bbd[e->src->index].end_of_trace >= 0)
942 	      || (e->flags & EDGE_DFS_BACK))
943 	    {
944 	      int edge_freq = EDGE_FREQUENCY (e);
945 
946 	      if (edge_freq > priority)
947 		priority = edge_freq;
948 	    }
949 	}
950       bbd[bb->index].priority = priority;
951     }
952 
953   if (priority)
954     /* The block with priority should have significantly lower key.  */
955     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
956 
957   return -bb->frequency;
958 }
959 
960 /* Return true when the edge E from basic block BB is better than the temporary
961    best edge (details are in function).  The probability of edge E is PROB. The
962    frequency of the successor is FREQ.  The current best probability is
963    BEST_PROB, the best frequency is BEST_FREQ.
964    The edge is considered to be equivalent when PROB does not differ much from
965    BEST_PROB; similarly for frequency.  */
966 
967 static bool
968 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
969 	       int best_prob, int best_freq, const_edge cur_best_edge)
970 {
971   bool is_better_edge;
972 
973   /* The BEST_* values do not have to be best, but can be a bit smaller than
974      maximum values.  */
975   int diff_prob = best_prob / 10;
976   int diff_freq = best_freq / 10;
977 
978   /* The smaller one is better to keep the original order.  */
979   if (optimize_function_for_size_p (cfun))
980     return !cur_best_edge
981 	   || cur_best_edge->dest->index > e->dest->index;
982 
983   if (prob > best_prob + diff_prob)
984     /* The edge has higher probability than the temporary best edge.  */
985     is_better_edge = true;
986   else if (prob < best_prob - diff_prob)
987     /* The edge has lower probability than the temporary best edge.  */
988     is_better_edge = false;
989   else if (freq < best_freq - diff_freq)
990     /* The edge and the temporary best edge  have almost equivalent
991        probabilities.  The higher frequency of a successor now means
992        that there is another edge going into that successor.
993        This successor has lower frequency so it is better.  */
994     is_better_edge = true;
995   else if (freq > best_freq + diff_freq)
996     /* This successor has higher frequency so it is worse.  */
997     is_better_edge = false;
998   else if (e->dest->prev_bb == bb)
999     /* The edges have equivalent probabilities and the successors
1000        have equivalent frequencies.  Select the previous successor.  */
1001     is_better_edge = true;
1002   else
1003     is_better_edge = false;
1004 
1005   /* If we are doing hot/cold partitioning, make sure that we always favor
1006      non-crossing edges over crossing edges.  */
1007 
1008   if (!is_better_edge
1009       && flag_reorder_blocks_and_partition
1010       && cur_best_edge
1011       && (cur_best_edge->flags & EDGE_CROSSING)
1012       && !(e->flags & EDGE_CROSSING))
1013     is_better_edge = true;
1014 
1015   return is_better_edge;
1016 }
1017 
1018 /* Return true when the edge E is better than the temporary best edge
1019    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1020    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1021    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1022    TRACES record the information about traces.
1023    When optimizing for size, the edge with smaller index is better.
1024    When optimizing for speed, the edge with bigger probability or longer trace
1025    is better.  */
1026 
1027 static bool
1028 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1029 		       const_edge cur_best_edge, struct trace *traces)
1030 {
1031   int e_index;
1032   int b_index;
1033   bool is_better_edge;
1034 
1035   if (!cur_best_edge)
1036     return true;
1037 
1038   if (optimize_function_for_size_p (cfun))
1039     {
1040       e_index = src_index_p ? e->src->index : e->dest->index;
1041       b_index = src_index_p ? cur_best_edge->src->index
1042 			      : cur_best_edge->dest->index;
1043       /* The smaller one is better to keep the original order.  */
1044       return b_index > e_index;
1045     }
1046 
1047   if (src_index_p)
1048     {
1049       e_index = e->src->index;
1050 
1051       if (e->probability > cur_best_edge->probability)
1052 	/* The edge has higher probability than the temporary best edge.  */
1053 	is_better_edge = true;
1054       else if (e->probability < cur_best_edge->probability)
1055 	/* The edge has lower probability than the temporary best edge.  */
1056 	is_better_edge = false;
1057       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1058 	/* The edge and the temporary best edge have equivalent probabilities.
1059 	   The edge with longer trace is better.  */
1060 	is_better_edge = true;
1061       else
1062 	is_better_edge = false;
1063     }
1064   else
1065     {
1066       e_index = e->dest->index;
1067 
1068       if (e->probability > cur_best_edge->probability)
1069 	/* The edge has higher probability than the temporary best edge.  */
1070 	is_better_edge = true;
1071       else if (e->probability < cur_best_edge->probability)
1072 	/* The edge has lower probability than the temporary best edge.  */
1073 	is_better_edge = false;
1074       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1075 	/* The edge and the temporary best edge have equivalent probabilities.
1076 	   The edge with longer trace is better.  */
1077 	is_better_edge = true;
1078       else
1079 	is_better_edge = false;
1080     }
1081 
1082   return is_better_edge;
1083 }
1084 
1085 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1086 
1087 static void
1088 connect_traces (int n_traces, struct trace *traces)
1089 {
1090   int i;
1091   bool *connected;
1092   bool two_passes;
1093   int last_trace;
1094   int current_pass;
1095   int current_partition;
1096   int freq_threshold;
1097   gcov_type count_threshold;
1098   bool for_size = optimize_function_for_size_p (cfun);
1099 
1100   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1101   if (max_entry_count < INT_MAX / 1000)
1102     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1103   else
1104     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1105 
1106   connected = XCNEWVEC (bool, n_traces);
1107   last_trace = -1;
1108   current_pass = 1;
1109   current_partition = BB_PARTITION (traces[0].first);
1110   two_passes = false;
1111 
1112   if (crtl->has_bb_partition)
1113     for (i = 0; i < n_traces && !two_passes; i++)
1114       if (BB_PARTITION (traces[0].first)
1115 	  != BB_PARTITION (traces[i].first))
1116 	two_passes = true;
1117 
1118   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1119     {
1120       int t = i;
1121       int t2;
1122       edge e, best;
1123       int best_len;
1124 
1125       if (i >= n_traces)
1126 	{
1127 	  gcc_assert (two_passes && current_pass == 1);
1128 	  i = 0;
1129 	  t = i;
1130 	  current_pass = 2;
1131 	  if (current_partition == BB_HOT_PARTITION)
1132 	    current_partition = BB_COLD_PARTITION;
1133 	  else
1134 	    current_partition = BB_HOT_PARTITION;
1135 	}
1136 
1137       if (connected[t])
1138 	continue;
1139 
1140       if (two_passes
1141 	  && BB_PARTITION (traces[t].first) != current_partition)
1142 	continue;
1143 
1144       connected[t] = true;
1145 
1146       /* Find the predecessor traces.  */
1147       for (t2 = t; t2 > 0;)
1148 	{
1149 	  edge_iterator ei;
1150 	  best = NULL;
1151 	  best_len = 0;
1152 	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1153 	    {
1154 	      int si = e->src->index;
1155 
1156 	      if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1157 		  && (e->flags & EDGE_CAN_FALLTHRU)
1158 		  && !(e->flags & EDGE_COMPLEX)
1159 		  && bbd[si].end_of_trace >= 0
1160 		  && !connected[bbd[si].end_of_trace]
1161 		  && (BB_PARTITION (e->src) == current_partition)
1162 		  && connect_better_edge_p (e, true, best_len, best, traces))
1163 		{
1164 		  best = e;
1165 		  best_len = traces[bbd[si].end_of_trace].length;
1166 		}
1167 	    }
1168 	  if (best)
1169 	    {
1170 	      best->src->aux = best->dest;
1171 	      t2 = bbd[best->src->index].end_of_trace;
1172 	      connected[t2] = true;
1173 
1174 	      if (dump_file)
1175 		{
1176 		  fprintf (dump_file, "Connection: %d %d\n",
1177 			   best->src->index, best->dest->index);
1178 		}
1179 	    }
1180 	  else
1181 	    break;
1182 	}
1183 
1184       if (last_trace >= 0)
1185 	traces[last_trace].last->aux = traces[t2].first;
1186       last_trace = t;
1187 
1188       /* Find the successor traces.  */
1189       while (1)
1190 	{
1191 	  /* Find the continuation of the chain.  */
1192 	  edge_iterator ei;
1193 	  best = NULL;
1194 	  best_len = 0;
1195 	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1196 	    {
1197 	      int di = e->dest->index;
1198 
1199 	      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1200 		  && (e->flags & EDGE_CAN_FALLTHRU)
1201 		  && !(e->flags & EDGE_COMPLEX)
1202 		  && bbd[di].start_of_trace >= 0
1203 		  && !connected[bbd[di].start_of_trace]
1204 		  && (BB_PARTITION (e->dest) == current_partition)
1205 		  && connect_better_edge_p (e, false, best_len, best, traces))
1206 		{
1207 		  best = e;
1208 		  best_len = traces[bbd[di].start_of_trace].length;
1209 		}
1210 	    }
1211 
1212 	  if (for_size)
1213 	    {
1214 	      if (!best)
1215 		/* Stop finding the successor traces.  */
1216 		break;
1217 
1218 	      /* It is OK to connect block n with block n + 1 or a block
1219 		 before n.  For others, only connect to the loop header.  */
1220 	      if (best->dest->index > (traces[t].last->index + 1))
1221 		{
1222 		  int count = EDGE_COUNT (best->dest->preds);
1223 
1224 		  FOR_EACH_EDGE (e, ei, best->dest->preds)
1225 		    if (e->flags & EDGE_DFS_BACK)
1226 		      count--;
1227 
1228 		  /* If dest has multiple predecessors, skip it.  We expect
1229 		     that one predecessor with smaller index connects with it
1230 		     later.  */
1231 		  if (count != 1)
1232 		    break;
1233 		}
1234 
1235 	      /* Only connect Trace n with Trace n + 1.  It is conservative
1236 		 to keep the order as close as possible to the original order.
1237 		 It also helps to reduce long jumps.  */
1238 	      if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1239 		break;
1240 
1241 	      if (dump_file)
1242 		fprintf (dump_file, "Connection: %d %d\n",
1243 			 best->src->index, best->dest->index);
1244 
1245 	      t = bbd[best->dest->index].start_of_trace;
1246 	      traces[last_trace].last->aux = traces[t].first;
1247 	      connected[t] = true;
1248 	      last_trace = t;
1249 	    }
1250 	  else if (best)
1251 	    {
1252 	      if (dump_file)
1253 		{
1254 		  fprintf (dump_file, "Connection: %d %d\n",
1255 			   best->src->index, best->dest->index);
1256 		}
1257 	      t = bbd[best->dest->index].start_of_trace;
1258 	      traces[last_trace].last->aux = traces[t].first;
1259 	      connected[t] = true;
1260 	      last_trace = t;
1261 	    }
1262 	  else
1263 	    {
1264 	      /* Try to connect the traces by duplication of 1 block.  */
1265 	      edge e2;
1266 	      basic_block next_bb = NULL;
1267 	      bool try_copy = false;
1268 
1269 	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1270 		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1271 		    && (e->flags & EDGE_CAN_FALLTHRU)
1272 		    && !(e->flags & EDGE_COMPLEX)
1273 		    && (!best || e->probability > best->probability))
1274 		  {
1275 		    edge_iterator ei;
1276 		    edge best2 = NULL;
1277 		    int best2_len = 0;
1278 
1279 		    /* If the destination is a start of a trace which is only
1280 		       one block long, then no need to search the successor
1281 		       blocks of the trace.  Accept it.  */
1282 		    if (bbd[e->dest->index].start_of_trace >= 0
1283 			&& traces[bbd[e->dest->index].start_of_trace].length
1284 			   == 1)
1285 		      {
1286 			best = e;
1287 			try_copy = true;
1288 			continue;
1289 		      }
1290 
1291 		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1292 		      {
1293 			int di = e2->dest->index;
1294 
1295 			if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1296 			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1297 				&& !(e2->flags & EDGE_COMPLEX)
1298 				&& bbd[di].start_of_trace >= 0
1299 				&& !connected[bbd[di].start_of_trace]
1300 				&& BB_PARTITION (e2->dest) == current_partition
1301 				&& EDGE_FREQUENCY (e2) >= freq_threshold
1302 				&& e2->count >= count_threshold
1303 				&& (!best2
1304 				    || e2->probability > best2->probability
1305 				    || (e2->probability == best2->probability
1306 					&& traces[bbd[di].start_of_trace].length
1307 					   > best2_len))))
1308 			  {
1309 			    best = e;
1310 			    best2 = e2;
1311 			    if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1312 			      best2_len = traces[bbd[di].start_of_trace].length;
1313 			    else
1314 			      best2_len = INT_MAX;
1315 			    next_bb = e2->dest;
1316 			    try_copy = true;
1317 			  }
1318 		      }
1319 		  }
1320 
1321 	      if (crtl->has_bb_partition)
1322 		try_copy = false;
1323 
1324 	      /* Copy tiny blocks always; copy larger blocks only when the
1325 		 edge is traversed frequently enough.  */
1326 	      if (try_copy
1327 		  && copy_bb_p (best->dest,
1328 				optimize_edge_for_speed_p (best)
1329 				&& EDGE_FREQUENCY (best) >= freq_threshold
1330 				&& best->count >= count_threshold))
1331 		{
1332 		  basic_block new_bb;
1333 
1334 		  if (dump_file)
1335 		    {
1336 		      fprintf (dump_file, "Connection: %d %d ",
1337 			       traces[t].last->index, best->dest->index);
1338 		      if (!next_bb)
1339 			fputc ('\n', dump_file);
1340 		      else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1341 			fprintf (dump_file, "exit\n");
1342 		      else
1343 			fprintf (dump_file, "%d\n", next_bb->index);
1344 		    }
1345 
1346 		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1347 		  traces[t].last = new_bb;
1348 		  if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1349 		    {
1350 		      t = bbd[next_bb->index].start_of_trace;
1351 		      traces[last_trace].last->aux = traces[t].first;
1352 		      connected[t] = true;
1353 		      last_trace = t;
1354 		    }
1355 		  else
1356 		    break;	/* Stop finding the successor traces.  */
1357 		}
1358 	      else
1359 		break;	/* Stop finding the successor traces.  */
1360 	    }
1361 	}
1362     }
1363 
1364   if (dump_file)
1365     {
1366       basic_block bb;
1367 
1368       fprintf (dump_file, "Final order:\n");
1369       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1370 	fprintf (dump_file, "%d ", bb->index);
1371       fprintf (dump_file, "\n");
1372       fflush (dump_file);
1373     }
1374 
1375   FREE (connected);
1376 }
1377 
1378 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1379    when code size is allowed to grow by duplication.  */
1380 
1381 static bool
1382 copy_bb_p (const_basic_block bb, int code_may_grow)
1383 {
1384   int size = 0;
1385   int max_size = uncond_jump_length;
1386   rtx_insn *insn;
1387 
1388   if (!bb->frequency)
1389     return false;
1390   if (EDGE_COUNT (bb->preds) < 2)
1391     return false;
1392   if (!can_duplicate_block_p (bb))
1393     return false;
1394 
1395   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1396   if (EDGE_COUNT (bb->succs) > 8)
1397     return false;
1398 
1399   if (code_may_grow && optimize_bb_for_speed_p (bb))
1400     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1401 
1402   FOR_BB_INSNS (bb, insn)
1403     {
1404       if (INSN_P (insn))
1405 	size += get_attr_min_length (insn);
1406     }
1407 
1408   if (size <= max_size)
1409     return true;
1410 
1411   if (dump_file)
1412     {
1413       fprintf (dump_file,
1414 	       "Block %d can't be copied because its size = %d.\n",
1415 	       bb->index, size);
1416     }
1417 
1418   return false;
1419 }
1420 
1421 /* Return the length of unconditional jump instruction.  */
1422 
1423 int
1424 get_uncond_jump_length (void)
1425 {
1426   rtx_insn *label, *jump;
1427   int length;
1428 
1429   start_sequence ();
1430   label = emit_label (gen_label_rtx ());
1431   jump = emit_jump_insn (gen_jump (label));
1432   length = get_attr_min_length (jump);
1433   end_sequence ();
1434 
1435   return length;
1436 }
1437 
1438 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1439    Duplicate the landing pad and split the edges so that no EH edge
1440    crosses partitions.  */
1441 
1442 static void
1443 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1444 {
1445   eh_landing_pad new_lp;
1446   basic_block new_bb, last_bb, post_bb;
1447   rtx_insn *new_label, *jump;
1448   rtx post_label;
1449   unsigned new_partition;
1450   edge_iterator ei;
1451   edge e;
1452 
1453   /* Generate the new landing-pad structure.  */
1454   new_lp = gen_eh_landing_pad (old_lp->region);
1455   new_lp->post_landing_pad = old_lp->post_landing_pad;
1456   new_lp->landing_pad = gen_label_rtx ();
1457   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1458 
1459   /* Put appropriate instructions in new bb.  */
1460   new_label = emit_label (new_lp->landing_pad);
1461 
1462   expand_dw2_landing_pad_for_region (old_lp->region);
1463 
1464   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1465   post_bb = single_succ (post_bb);
1466   post_label = block_label (post_bb);
1467   jump = emit_jump_insn (gen_jump (post_label));
1468   JUMP_LABEL (jump) = post_label;
1469 
1470   /* Create new basic block to be dest for lp.  */
1471   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1472   new_bb = create_basic_block (new_label, jump, last_bb);
1473   new_bb->aux = last_bb->aux;
1474   last_bb->aux = new_bb;
1475 
1476   emit_barrier_after_bb (new_bb);
1477 
1478   make_edge (new_bb, post_bb, 0);
1479 
1480   /* Make sure new bb is in the other partition.  */
1481   new_partition = BB_PARTITION (old_bb);
1482   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1483   BB_SET_PARTITION (new_bb, new_partition);
1484 
1485   /* Fix up the edges.  */
1486   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1487     if (BB_PARTITION (e->src) == new_partition)
1488       {
1489 	rtx_insn *insn = BB_END (e->src);
1490 	rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1491 
1492 	gcc_assert (note != NULL);
1493 	gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1494 	XEXP (note, 0) = GEN_INT (new_lp->index);
1495 
1496 	/* Adjust the edge to the new destination.  */
1497 	redirect_edge_succ (e, new_bb);
1498       }
1499     else
1500       ei_next (&ei);
1501 }
1502 
1503 
1504 /* Ensure that all hot bbs are included in a hot path through the
1505    procedure. This is done by calling this function twice, once
1506    with WALK_UP true (to look for paths from the entry to hot bbs) and
1507    once with WALK_UP false (to look for paths from hot bbs to the exit).
1508    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1509    to BBS_IN_HOT_PARTITION.  */
1510 
1511 static unsigned int
1512 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1513                     vec<basic_block> *bbs_in_hot_partition)
1514 {
1515   /* Callers check this.  */
1516   gcc_checking_assert (cold_bb_count);
1517 
1518   /* Keep examining hot bbs while we still have some left to check
1519      and there are remaining cold bbs.  */
1520   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1521   while (! hot_bbs_to_check.is_empty ()
1522          && cold_bb_count)
1523     {
1524       basic_block bb = hot_bbs_to_check.pop ();
1525       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1526       edge e;
1527       edge_iterator ei;
1528       int highest_probability = 0;
1529       int highest_freq = 0;
1530       gcov_type highest_count = 0;
1531       bool found = false;
1532 
1533       /* Walk the preds/succs and check if there is at least one already
1534          marked hot. Keep track of the most frequent pred/succ so that we
1535          can mark it hot if we don't find one.  */
1536       FOR_EACH_EDGE (e, ei, edges)
1537         {
1538           basic_block reach_bb = walk_up ? e->src : e->dest;
1539 
1540           if (e->flags & EDGE_DFS_BACK)
1541             continue;
1542 
1543           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1544           {
1545             found = true;
1546             break;
1547           }
1548           /* The following loop will look for the hottest edge via
1549              the edge count, if it is non-zero, then fallback to the edge
1550              frequency and finally the edge probability.  */
1551           if (e->count > highest_count)
1552             highest_count = e->count;
1553           int edge_freq = EDGE_FREQUENCY (e);
1554           if (edge_freq > highest_freq)
1555             highest_freq = edge_freq;
1556           if (e->probability > highest_probability)
1557             highest_probability = e->probability;
1558         }
1559 
1560       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1561          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1562          then the most frequent pred (or succ) needs to be adjusted.  In the
1563          case where multiple preds/succs have the same frequency (e.g. a
1564          50-50 branch), then both will be adjusted.  */
1565       if (found)
1566         continue;
1567 
1568       FOR_EACH_EDGE (e, ei, edges)
1569         {
1570           if (e->flags & EDGE_DFS_BACK)
1571             continue;
1572           /* Select the hottest edge using the edge count, if it is non-zero,
1573              then fallback to the edge frequency and finally the edge
1574              probability.  */
1575           if (highest_count)
1576             {
1577               if (e->count < highest_count)
1578                 continue;
1579             }
1580           else if (highest_freq)
1581             {
1582               if (EDGE_FREQUENCY (e) < highest_freq)
1583                 continue;
1584             }
1585           else if (e->probability < highest_probability)
1586             continue;
1587 
1588           basic_block reach_bb = walk_up ? e->src : e->dest;
1589 
1590           /* We have a hot bb with an immediate dominator that is cold.
1591              The dominator needs to be re-marked hot.  */
1592           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1593           cold_bb_count--;
1594 
1595           /* Now we need to examine newly-hot reach_bb to see if it is also
1596              dominated by a cold bb.  */
1597           bbs_in_hot_partition->safe_push (reach_bb);
1598           hot_bbs_to_check.safe_push (reach_bb);
1599         }
1600     }
1601 
1602   return cold_bb_count;
1603 }
1604 
1605 
1606 /* Find the basic blocks that are rarely executed and need to be moved to
1607    a separate section of the .o file (to cut down on paging and improve
1608    cache locality).  Return a vector of all edges that cross.  */
1609 
1610 static vec<edge>
1611 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1612 {
1613   vec<edge> crossing_edges = vNULL;
1614   basic_block bb;
1615   edge e;
1616   edge_iterator ei;
1617   unsigned int cold_bb_count = 0;
1618   auto_vec<basic_block> bbs_in_hot_partition;
1619 
1620   /* Mark which partition (hot/cold) each basic block belongs in.  */
1621   FOR_EACH_BB_FN (bb, cfun)
1622     {
1623       bool cold_bb = false;
1624 
1625       if (probably_never_executed_bb_p (cfun, bb))
1626         {
1627           /* Handle profile insanities created by upstream optimizations
1628              by also checking the incoming edge weights. If there is a non-cold
1629              incoming edge, conservatively prevent this block from being split
1630              into the cold section.  */
1631           cold_bb = true;
1632           FOR_EACH_EDGE (e, ei, bb->preds)
1633             if (!probably_never_executed_edge_p (cfun, e))
1634               {
1635                 cold_bb = false;
1636                 break;
1637               }
1638         }
1639       if (cold_bb)
1640         {
1641           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1642           cold_bb_count++;
1643         }
1644       else
1645         {
1646           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1647           bbs_in_hot_partition.safe_push (bb);
1648         }
1649     }
1650 
1651   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1652      Several different possibilities may include cold bbs along all paths
1653      to/from a hot bb. One is that there are edge weight insanities
1654      due to optimization phases that do not properly update basic block profile
1655      counts. The second is that the entry of the function may not be hot, because
1656      it is entered fewer times than the number of profile training runs, but there
1657      is a loop inside the function that causes blocks within the function to be
1658      above the threshold for hotness. This is fixed by walking up from hot bbs
1659      to the entry block, and then down from hot bbs to the exit, performing
1660      partitioning fixups as necessary.  */
1661   if (cold_bb_count)
1662     {
1663       mark_dfs_back_edges ();
1664       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1665                                           &bbs_in_hot_partition);
1666       if (cold_bb_count)
1667         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1668     }
1669 
1670   /* The format of .gcc_except_table does not allow landing pads to
1671      be in a different partition as the throw.  Fix this by either
1672      moving or duplicating the landing pads.  */
1673   if (cfun->eh->lp_array)
1674     {
1675       unsigned i;
1676       eh_landing_pad lp;
1677 
1678       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1679 	{
1680 	  bool all_same, all_diff;
1681 
1682 	  if (lp == NULL
1683 	      || lp->landing_pad == NULL_RTX
1684 	      || !LABEL_P (lp->landing_pad))
1685 	    continue;
1686 
1687 	  all_same = all_diff = true;
1688 	  bb = BLOCK_FOR_INSN (lp->landing_pad);
1689 	  FOR_EACH_EDGE (e, ei, bb->preds)
1690 	    {
1691 	      gcc_assert (e->flags & EDGE_EH);
1692 	      if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1693 		all_diff = false;
1694 	      else
1695 		all_same = false;
1696 	    }
1697 
1698 	  if (all_same)
1699 	    ;
1700 	  else if (all_diff)
1701 	    {
1702 	      int which = BB_PARTITION (bb);
1703 	      which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1704 	      BB_SET_PARTITION (bb, which);
1705 	    }
1706 	  else
1707 	    fix_up_crossing_landing_pad (lp, bb);
1708 	}
1709     }
1710 
1711   /* Mark every edge that crosses between sections.  */
1712 
1713   FOR_EACH_BB_FN (bb, cfun)
1714     FOR_EACH_EDGE (e, ei, bb->succs)
1715       {
1716 	unsigned int flags = e->flags;
1717 
1718         /* We should never have EDGE_CROSSING set yet.  */
1719 	gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1720 
1721 	if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1722 	    && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1723 	    && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1724 	  {
1725 	    crossing_edges.safe_push (e);
1726 	    flags |= EDGE_CROSSING;
1727 	  }
1728 
1729 	/* Now that we've split eh edges as appropriate, allow landing pads
1730 	   to be merged with the post-landing pads.  */
1731 	flags &= ~EDGE_PRESERVE;
1732 
1733 	e->flags = flags;
1734       }
1735 
1736   return crossing_edges;
1737 }
1738 
1739 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1740 
1741 static void
1742 set_edge_can_fallthru_flag (void)
1743 {
1744   basic_block bb;
1745 
1746   FOR_EACH_BB_FN (bb, cfun)
1747     {
1748       edge e;
1749       edge_iterator ei;
1750 
1751       FOR_EACH_EDGE (e, ei, bb->succs)
1752 	{
1753 	  e->flags &= ~EDGE_CAN_FALLTHRU;
1754 
1755 	  /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1756 	  if (e->flags & EDGE_FALLTHRU)
1757 	    e->flags |= EDGE_CAN_FALLTHRU;
1758 	}
1759 
1760       /* If the BB ends with an invertible condjump all (2) edges are
1761 	 CAN_FALLTHRU edges.  */
1762       if (EDGE_COUNT (bb->succs) != 2)
1763 	continue;
1764       if (!any_condjump_p (BB_END (bb)))
1765 	continue;
1766       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1767 	continue;
1768       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1769       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1770       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1771     }
1772 }
1773 
1774 /* If any destination of a crossing edge does not have a label, add label;
1775    Convert any easy fall-through crossing edges to unconditional jumps.  */
1776 
1777 static void
1778 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1779 {
1780   size_t i;
1781   edge e;
1782 
1783   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1784     {
1785       basic_block src = e->src;
1786       basic_block dest = e->dest;
1787       rtx label;
1788       rtx_insn *new_jump;
1789 
1790       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1791 	continue;
1792 
1793       /* Make sure dest has a label.  */
1794       label = block_label (dest);
1795 
1796       /* Nothing to do for non-fallthru edges.  */
1797       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1798 	continue;
1799       if ((e->flags & EDGE_FALLTHRU) == 0)
1800 	continue;
1801 
1802       /* If the block does not end with a control flow insn, then we
1803 	 can trivially add a jump to the end to fixup the crossing.
1804 	 Otherwise the jump will have to go in a new bb, which will
1805 	 be handled by fix_up_fall_thru_edges function.  */
1806       if (control_flow_insn_p (BB_END (src)))
1807 	continue;
1808 
1809       /* Make sure there's only one successor.  */
1810       gcc_assert (single_succ_p (src));
1811 
1812       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1813       BB_END (src) = new_jump;
1814       JUMP_LABEL (new_jump) = label;
1815       LABEL_NUSES (label) += 1;
1816 
1817       emit_barrier_after_bb (src);
1818 
1819       /* Mark edge as non-fallthru.  */
1820       e->flags &= ~EDGE_FALLTHRU;
1821     }
1822 }
1823 
1824 /* Find any bb's where the fall-through edge is a crossing edge (note that
1825    these bb's must also contain a conditional jump or end with a call
1826    instruction; we've already dealt with fall-through edges for blocks
1827    that didn't have a conditional jump or didn't end with call instruction
1828    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1829    edge to non-crossing edge by inserting a new bb to fall-through into.
1830    The new bb will contain an unconditional jump (crossing edge) to the
1831    original fall through destination.  */
1832 
1833 static void
1834 fix_up_fall_thru_edges (void)
1835 {
1836   basic_block cur_bb;
1837   basic_block new_bb;
1838   edge succ1;
1839   edge succ2;
1840   edge fall_thru;
1841   edge cond_jump = NULL;
1842   edge e;
1843   bool cond_jump_crosses;
1844   int invert_worked;
1845   rtx_insn *old_jump;
1846   rtx fall_thru_label;
1847 
1848   FOR_EACH_BB_FN (cur_bb, cfun)
1849     {
1850       fall_thru = NULL;
1851       if (EDGE_COUNT (cur_bb->succs) > 0)
1852 	succ1 = EDGE_SUCC (cur_bb, 0);
1853       else
1854 	succ1 = NULL;
1855 
1856       if (EDGE_COUNT (cur_bb->succs) > 1)
1857 	succ2 = EDGE_SUCC (cur_bb, 1);
1858       else
1859 	succ2 = NULL;
1860 
1861       /* Find the fall-through edge.  */
1862 
1863       if (succ1
1864 	  && (succ1->flags & EDGE_FALLTHRU))
1865 	{
1866 	  fall_thru = succ1;
1867 	  cond_jump = succ2;
1868 	}
1869       else if (succ2
1870 	       && (succ2->flags & EDGE_FALLTHRU))
1871 	{
1872 	  fall_thru = succ2;
1873 	  cond_jump = succ1;
1874 	}
1875       else if (succ1
1876 	       && (block_ends_with_call_p (cur_bb)
1877 		   || can_throw_internal (BB_END (cur_bb))))
1878 	{
1879 	  edge e;
1880 	  edge_iterator ei;
1881 
1882 	  FOR_EACH_EDGE (e, ei, cur_bb->succs)
1883 	    if (e->flags & EDGE_FALLTHRU)
1884 	      {
1885 		fall_thru = e;
1886 		break;
1887 	      }
1888 	}
1889 
1890       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1891 	{
1892 	  /* Check to see if the fall-thru edge is a crossing edge.  */
1893 
1894 	  if (fall_thru->flags & EDGE_CROSSING)
1895 	    {
1896 	      /* The fall_thru edge crosses; now check the cond jump edge, if
1897 		 it exists.  */
1898 
1899 	      cond_jump_crosses = true;
1900 	      invert_worked  = 0;
1901 	      old_jump = BB_END (cur_bb);
1902 
1903 	      /* Find the jump instruction, if there is one.  */
1904 
1905 	      if (cond_jump)
1906 		{
1907 		  if (!(cond_jump->flags & EDGE_CROSSING))
1908 		    cond_jump_crosses = false;
1909 
1910 		  /* We know the fall-thru edge crosses; if the cond
1911 		     jump edge does NOT cross, and its destination is the
1912 		     next block in the bb order, invert the jump
1913 		     (i.e. fix it so the fall through does not cross and
1914 		     the cond jump does).  */
1915 
1916 		  if (!cond_jump_crosses)
1917 		    {
1918 		      /* Find label in fall_thru block. We've already added
1919 			 any missing labels, so there must be one.  */
1920 
1921 		      fall_thru_label = block_label (fall_thru->dest);
1922 
1923 		      if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1924 			invert_worked = invert_jump (old_jump,
1925 						     fall_thru_label,0);
1926 		      if (invert_worked)
1927 			{
1928 			  fall_thru->flags &= ~EDGE_FALLTHRU;
1929 			  cond_jump->flags |= EDGE_FALLTHRU;
1930 			  update_br_prob_note (cur_bb);
1931 			  e = fall_thru;
1932 			  fall_thru = cond_jump;
1933 			  cond_jump = e;
1934 			  cond_jump->flags |= EDGE_CROSSING;
1935 			  fall_thru->flags &= ~EDGE_CROSSING;
1936 			}
1937 		    }
1938 		}
1939 
1940 	      if (cond_jump_crosses || !invert_worked)
1941 		{
1942 		  /* This is the case where both edges out of the basic
1943 		     block are crossing edges. Here we will fix up the
1944 		     fall through edge. The jump edge will be taken care
1945 		     of later.  The EDGE_CROSSING flag of fall_thru edge
1946 		     is unset before the call to force_nonfallthru
1947 		     function because if a new basic-block is created
1948 		     this edge remains in the current section boundary
1949 		     while the edge between new_bb and the fall_thru->dest
1950 		     becomes EDGE_CROSSING.  */
1951 
1952 		  fall_thru->flags &= ~EDGE_CROSSING;
1953 		  new_bb = force_nonfallthru (fall_thru);
1954 
1955 		  if (new_bb)
1956 		    {
1957 		      new_bb->aux = cur_bb->aux;
1958 		      cur_bb->aux = new_bb;
1959 
1960                       /* This is done by force_nonfallthru_and_redirect.  */
1961 		      gcc_assert (BB_PARTITION (new_bb)
1962                                   == BB_PARTITION (cur_bb));
1963 
1964 		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1965 		    }
1966 		  else
1967 		    {
1968 		      /* If a new basic-block was not created; restore
1969 			 the EDGE_CROSSING flag.  */
1970 		      fall_thru->flags |= EDGE_CROSSING;
1971 		    }
1972 
1973 		  /* Add barrier after new jump */
1974 		  emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1975 		}
1976 	    }
1977 	}
1978     }
1979 }
1980 
1981 /* This function checks the destination block of a "crossing jump" to
1982    see if it has any crossing predecessors that begin with a code label
1983    and end with an unconditional jump.  If so, it returns that predecessor
1984    block.  (This is to avoid creating lots of new basic blocks that all
1985    contain unconditional jumps to the same destination).  */
1986 
1987 static basic_block
1988 find_jump_block (basic_block jump_dest)
1989 {
1990   basic_block source_bb = NULL;
1991   edge e;
1992   rtx_insn *insn;
1993   edge_iterator ei;
1994 
1995   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1996     if (e->flags & EDGE_CROSSING)
1997       {
1998 	basic_block src = e->src;
1999 
2000 	/* Check each predecessor to see if it has a label, and contains
2001 	   only one executable instruction, which is an unconditional jump.
2002 	   If so, we can use it.  */
2003 
2004 	if (LABEL_P (BB_HEAD (src)))
2005 	  for (insn = BB_HEAD (src);
2006 	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2007 	       insn = NEXT_INSN (insn))
2008 	    {
2009 	      if (INSN_P (insn)
2010 		  && insn == BB_END (src)
2011 		  && JUMP_P (insn)
2012 		  && !any_condjump_p (insn))
2013 		{
2014 		  source_bb = src;
2015 		  break;
2016 		}
2017 	    }
2018 
2019 	if (source_bb)
2020 	  break;
2021       }
2022 
2023   return source_bb;
2024 }
2025 
2026 /* Find all BB's with conditional jumps that are crossing edges;
2027    insert a new bb and make the conditional jump branch to the new
2028    bb instead (make the new bb same color so conditional branch won't
2029    be a 'crossing' edge).  Insert an unconditional jump from the
2030    new bb to the original destination of the conditional jump.  */
2031 
2032 static void
2033 fix_crossing_conditional_branches (void)
2034 {
2035   basic_block cur_bb;
2036   basic_block new_bb;
2037   basic_block dest;
2038   edge succ1;
2039   edge succ2;
2040   edge crossing_edge;
2041   edge new_edge;
2042   rtx_insn *old_jump;
2043   rtx set_src;
2044   rtx old_label = NULL_RTX;
2045   rtx new_label;
2046 
2047   FOR_EACH_BB_FN (cur_bb, cfun)
2048     {
2049       crossing_edge = NULL;
2050       if (EDGE_COUNT (cur_bb->succs) > 0)
2051 	succ1 = EDGE_SUCC (cur_bb, 0);
2052       else
2053 	succ1 = NULL;
2054 
2055       if (EDGE_COUNT (cur_bb->succs) > 1)
2056 	succ2 = EDGE_SUCC (cur_bb, 1);
2057       else
2058 	succ2 = NULL;
2059 
2060       /* We already took care of fall-through edges, so only one successor
2061 	 can be a crossing edge.  */
2062 
2063       if (succ1 && (succ1->flags & EDGE_CROSSING))
2064 	crossing_edge = succ1;
2065       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2066 	crossing_edge = succ2;
2067 
2068       if (crossing_edge)
2069 	{
2070 	  old_jump = BB_END (cur_bb);
2071 
2072 	  /* Check to make sure the jump instruction is a
2073 	     conditional jump.  */
2074 
2075 	  set_src = NULL_RTX;
2076 
2077 	  if (any_condjump_p (old_jump))
2078 	    {
2079 	      if (GET_CODE (PATTERN (old_jump)) == SET)
2080 		set_src = SET_SRC (PATTERN (old_jump));
2081 	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2082 		{
2083 		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
2084 		  if (GET_CODE (set_src) == SET)
2085 		    set_src = SET_SRC (set_src);
2086 		  else
2087 		    set_src = NULL_RTX;
2088 		}
2089 	    }
2090 
2091 	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2092 	    {
2093 	      if (GET_CODE (XEXP (set_src, 1)) == PC)
2094 		old_label = XEXP (set_src, 2);
2095 	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
2096 		old_label = XEXP (set_src, 1);
2097 
2098 	      /* Check to see if new bb for jumping to that dest has
2099 		 already been created; if so, use it; if not, create
2100 		 a new one.  */
2101 
2102 	      new_bb = find_jump_block (crossing_edge->dest);
2103 
2104 	      if (new_bb)
2105 		new_label = block_label (new_bb);
2106 	      else
2107 		{
2108 		  basic_block last_bb;
2109 		  rtx_insn *new_jump;
2110 
2111 		  /* Create new basic block to be dest for
2112 		     conditional jump.  */
2113 
2114 		  /* Put appropriate instructions in new bb.  */
2115 
2116 		  new_label = gen_label_rtx ();
2117 		  emit_label (new_label);
2118 
2119 		  gcc_assert (GET_CODE (old_label) == LABEL_REF);
2120 		  old_label = JUMP_LABEL (old_jump);
2121 		  new_jump = emit_jump_insn (gen_jump (old_label));
2122 		  JUMP_LABEL (new_jump) = old_label;
2123 
2124 		  last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2125 		  new_bb = create_basic_block (new_label, new_jump, last_bb);
2126 		  new_bb->aux = last_bb->aux;
2127 		  last_bb->aux = new_bb;
2128 
2129 		  emit_barrier_after_bb (new_bb);
2130 
2131 		  /* Make sure new bb is in same partition as source
2132 		     of conditional branch.  */
2133 		  BB_COPY_PARTITION (new_bb, cur_bb);
2134 		}
2135 
2136 	      /* Make old jump branch to new bb.  */
2137 
2138 	      redirect_jump (old_jump, new_label, 0);
2139 
2140 	      /* Remove crossing_edge as predecessor of 'dest'.  */
2141 
2142 	      dest = crossing_edge->dest;
2143 
2144 	      redirect_edge_succ (crossing_edge, new_bb);
2145 
2146 	      /* Make a new edge from new_bb to old dest; new edge
2147 		 will be a successor for new_bb and a predecessor
2148 		 for 'dest'.  */
2149 
2150 	      if (EDGE_COUNT (new_bb->succs) == 0)
2151 		new_edge = make_edge (new_bb, dest, 0);
2152 	      else
2153 		new_edge = EDGE_SUCC (new_bb, 0);
2154 
2155 	      crossing_edge->flags &= ~EDGE_CROSSING;
2156 	      new_edge->flags |= EDGE_CROSSING;
2157 	    }
2158 	}
2159     }
2160 }
2161 
2162 /* Find any unconditional branches that cross between hot and cold
2163    sections.  Convert them into indirect jumps instead.  */
2164 
2165 static void
2166 fix_crossing_unconditional_branches (void)
2167 {
2168   basic_block cur_bb;
2169   rtx_insn *last_insn;
2170   rtx label;
2171   rtx label_addr;
2172   rtx_insn *indirect_jump_sequence;
2173   rtx_insn *jump_insn = NULL;
2174   rtx new_reg;
2175   rtx_insn *cur_insn;
2176   edge succ;
2177 
2178   FOR_EACH_BB_FN (cur_bb, cfun)
2179     {
2180       last_insn = BB_END (cur_bb);
2181 
2182       if (EDGE_COUNT (cur_bb->succs) < 1)
2183 	continue;
2184 
2185       succ = EDGE_SUCC (cur_bb, 0);
2186 
2187       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2188 	 this point, no crossing jumps should be conditional.  */
2189 
2190       if (JUMP_P (last_insn)
2191 	  && (succ->flags & EDGE_CROSSING))
2192 	{
2193 	  gcc_assert (!any_condjump_p (last_insn));
2194 
2195 	  /* Make sure the jump is not already an indirect or table jump.  */
2196 
2197 	  if (!computed_jump_p (last_insn)
2198 	      && !tablejump_p (last_insn, NULL, NULL))
2199 	    {
2200 	      /* We have found a "crossing" unconditional branch.  Now
2201 		 we must convert it to an indirect jump.  First create
2202 		 reference of label, as target for jump.  */
2203 
2204 	      label = JUMP_LABEL (last_insn);
2205 	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
2206 	      LABEL_NUSES (label) += 1;
2207 
2208 	      /* Get a register to use for the indirect jump.  */
2209 
2210 	      new_reg = gen_reg_rtx (Pmode);
2211 
2212 	      /* Generate indirect the jump sequence.  */
2213 
2214 	      start_sequence ();
2215 	      emit_move_insn (new_reg, label_addr);
2216 	      emit_indirect_jump (new_reg);
2217 	      indirect_jump_sequence = get_insns ();
2218 	      end_sequence ();
2219 
2220 	      /* Make sure every instruction in the new jump sequence has
2221 		 its basic block set to be cur_bb.  */
2222 
2223 	      for (cur_insn = indirect_jump_sequence; cur_insn;
2224 		   cur_insn = NEXT_INSN (cur_insn))
2225 		{
2226 		  if (!BARRIER_P (cur_insn))
2227 		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
2228 		  if (JUMP_P (cur_insn))
2229 		    jump_insn = cur_insn;
2230 		}
2231 
2232 	      /* Insert the new (indirect) jump sequence immediately before
2233 		 the unconditional jump, then delete the unconditional jump.  */
2234 
2235 	      emit_insn_before (indirect_jump_sequence, last_insn);
2236 	      delete_insn (last_insn);
2237 
2238 	      JUMP_LABEL (jump_insn) = label;
2239 	      LABEL_NUSES (label)++;
2240 
2241 	      /* Make BB_END for cur_bb be the jump instruction (NOT the
2242 		 barrier instruction at the end of the sequence...).  */
2243 
2244 	      BB_END (cur_bb) = jump_insn;
2245 	    }
2246 	}
2247     }
2248 }
2249 
2250 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2251 
2252 static void
2253 update_crossing_jump_flags (void)
2254 {
2255   basic_block bb;
2256   edge e;
2257   edge_iterator ei;
2258 
2259   FOR_EACH_BB_FN (bb, cfun)
2260     FOR_EACH_EDGE (e, ei, bb->succs)
2261       if (e->flags & EDGE_CROSSING)
2262 	{
2263 	  if (JUMP_P (BB_END (bb))
2264 	      /* Some flags were added during fix_up_fall_thru_edges, via
2265 		 force_nonfallthru_and_redirect.  */
2266 	      && !CROSSING_JUMP_P (BB_END (bb)))
2267 	    CROSSING_JUMP_P (BB_END (bb)) = 1;
2268 	  break;
2269 	}
2270 }
2271 
2272 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2273    the set of flags to pass to cfg_layout_initialize().  */
2274 
2275 static void
2276 reorder_basic_blocks (void)
2277 {
2278   int n_traces;
2279   int i;
2280   struct trace *traces;
2281 
2282   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2283 
2284   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2285     return;
2286 
2287   set_edge_can_fallthru_flag ();
2288   mark_dfs_back_edges ();
2289 
2290   /* We are estimating the length of uncond jump insn only once since the code
2291      for getting the insn length always returns the minimal length now.  */
2292   if (uncond_jump_length == 0)
2293     uncond_jump_length = get_uncond_jump_length ();
2294 
2295   /* We need to know some information for each basic block.  */
2296   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2297   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2298   for (i = 0; i < array_size; i++)
2299     {
2300       bbd[i].start_of_trace = -1;
2301       bbd[i].end_of_trace = -1;
2302       bbd[i].in_trace = -1;
2303       bbd[i].visited = 0;
2304       bbd[i].priority = -1;
2305       bbd[i].heap = NULL;
2306       bbd[i].node = NULL;
2307     }
2308 
2309   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2310   n_traces = 0;
2311   find_traces (&n_traces, traces);
2312   connect_traces (n_traces, traces);
2313   FREE (traces);
2314   FREE (bbd);
2315 
2316   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2317 
2318   if (dump_file)
2319     {
2320       if (dump_flags & TDF_DETAILS)
2321 	dump_reg_info (dump_file);
2322       dump_flow_info (dump_file, dump_flags);
2323     }
2324 
2325   /* Signal that rtl_verify_flow_info_1 can now verify that there
2326      is at most one switch between hot/cold sections.  */
2327   crtl->bb_reorder_complete = true;
2328 }
2329 
2330 /* Determine which partition the first basic block in the function
2331    belongs to, then find the first basic block in the current function
2332    that belongs to a different section, and insert a
2333    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2334    instruction stream.  When writing out the assembly code,
2335    encountering this note will make the compiler switch between the
2336    hot and cold text sections.  */
2337 
2338 void
2339 insert_section_boundary_note (void)
2340 {
2341   basic_block bb;
2342   bool switched_sections = false;
2343   int current_partition = 0;
2344 
2345   if (!crtl->has_bb_partition)
2346     return;
2347 
2348   FOR_EACH_BB_FN (bb, cfun)
2349     {
2350       if (!current_partition)
2351 	current_partition = BB_PARTITION (bb);
2352       if (BB_PARTITION (bb) != current_partition)
2353 	{
2354 	  gcc_assert (!switched_sections);
2355           switched_sections = true;
2356           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2357           current_partition = BB_PARTITION (bb);
2358 	}
2359     }
2360 }
2361 
2362 namespace {
2363 
2364 const pass_data pass_data_reorder_blocks =
2365 {
2366   RTL_PASS, /* type */
2367   "bbro", /* name */
2368   OPTGROUP_NONE, /* optinfo_flags */
2369   TV_REORDER_BLOCKS, /* tv_id */
2370   0, /* properties_required */
2371   0, /* properties_provided */
2372   0, /* properties_destroyed */
2373   0, /* todo_flags_start */
2374   0, /* todo_flags_finish */
2375 };
2376 
2377 class pass_reorder_blocks : public rtl_opt_pass
2378 {
2379 public:
2380   pass_reorder_blocks (gcc::context *ctxt)
2381     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2382   {}
2383 
2384   /* opt_pass methods: */
2385   virtual bool gate (function *)
2386     {
2387       if (targetm.cannot_modify_jumps_p ())
2388 	return false;
2389       return (optimize > 0
2390 	      && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2391     }
2392 
2393   virtual unsigned int execute (function *);
2394 
2395 }; // class pass_reorder_blocks
2396 
2397 unsigned int
2398 pass_reorder_blocks::execute (function *fun)
2399 {
2400   basic_block bb;
2401 
2402   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2403      splitting possibly introduced more crossjumping opportunities.  */
2404   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2405 
2406   reorder_basic_blocks ();
2407   cleanup_cfg (CLEANUP_EXPENSIVE);
2408 
2409   FOR_EACH_BB_FN (bb, fun)
2410     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2411       bb->aux = bb->next_bb;
2412   cfg_layout_finalize ();
2413 
2414   return 0;
2415 }
2416 
2417 } // anon namespace
2418 
2419 rtl_opt_pass *
2420 make_pass_reorder_blocks (gcc::context *ctxt)
2421 {
2422   return new pass_reorder_blocks (ctxt);
2423 }
2424 
2425 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2426    computed gotos that were factored early on in the compilation process to
2427    speed up edge based data flow.  We used to not unfactoring them again,
2428    which can seriously pessimize code with many computed jumps in the source
2429    code, such as interpreters.  See e.g. PR15242.  */
2430 
2431 namespace {
2432 
2433 const pass_data pass_data_duplicate_computed_gotos =
2434 {
2435   RTL_PASS, /* type */
2436   "compgotos", /* name */
2437   OPTGROUP_NONE, /* optinfo_flags */
2438   TV_REORDER_BLOCKS, /* tv_id */
2439   0, /* properties_required */
2440   0, /* properties_provided */
2441   0, /* properties_destroyed */
2442   0, /* todo_flags_start */
2443   0, /* todo_flags_finish */
2444 };
2445 
2446 class pass_duplicate_computed_gotos : public rtl_opt_pass
2447 {
2448 public:
2449   pass_duplicate_computed_gotos (gcc::context *ctxt)
2450     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2451   {}
2452 
2453   /* opt_pass methods: */
2454   virtual bool gate (function *);
2455   virtual unsigned int execute (function *);
2456 
2457 }; // class pass_duplicate_computed_gotos
2458 
2459 bool
2460 pass_duplicate_computed_gotos::gate (function *fun)
2461 {
2462   if (targetm.cannot_modify_jumps_p ())
2463     return false;
2464   return (optimize > 0
2465 	  && flag_expensive_optimizations
2466 	  && ! optimize_function_for_size_p (fun));
2467 }
2468 
2469 unsigned int
2470 pass_duplicate_computed_gotos::execute (function *fun)
2471 {
2472   basic_block bb, new_bb;
2473   bitmap candidates;
2474   int max_size;
2475   bool changed = false;
2476 
2477   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2478     return 0;
2479 
2480   clear_bb_flags ();
2481   cfg_layout_initialize (0);
2482 
2483   /* We are estimating the length of uncond jump insn only once
2484      since the code for getting the insn length always returns
2485      the minimal length now.  */
2486   if (uncond_jump_length == 0)
2487     uncond_jump_length = get_uncond_jump_length ();
2488 
2489   max_size
2490     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2491   candidates = BITMAP_ALLOC (NULL);
2492 
2493   /* Look for blocks that end in a computed jump, and see if such blocks
2494      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2495      mark it in the candidates.  */
2496   FOR_EACH_BB_FN (bb, fun)
2497     {
2498       rtx_insn *insn;
2499       edge e;
2500       edge_iterator ei;
2501       int size, all_flags;
2502 
2503       /* Build the reorder chain for the original order of blocks.  */
2504       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2505 	bb->aux = bb->next_bb;
2506 
2507       /* Obviously the block has to end in a computed jump.  */
2508       if (!computed_jump_p (BB_END (bb)))
2509 	continue;
2510 
2511       /* Only consider blocks that can be duplicated.  */
2512       if (CROSSING_JUMP_P (BB_END (bb))
2513 	  || !can_duplicate_block_p (bb))
2514 	continue;
2515 
2516       /* Make sure that the block is small enough.  */
2517       size = 0;
2518       FOR_BB_INSNS (bb, insn)
2519 	if (INSN_P (insn))
2520 	  {
2521 	    size += get_attr_min_length (insn);
2522 	    if (size > max_size)
2523 	       break;
2524 	  }
2525       if (size > max_size)
2526 	continue;
2527 
2528       /* Final check: there must not be any incoming abnormal edges.  */
2529       all_flags = 0;
2530       FOR_EACH_EDGE (e, ei, bb->preds)
2531 	all_flags |= e->flags;
2532       if (all_flags & EDGE_COMPLEX)
2533 	continue;
2534 
2535       bitmap_set_bit (candidates, bb->index);
2536     }
2537 
2538   /* Nothing to do if there is no computed jump here.  */
2539   if (bitmap_empty_p (candidates))
2540     goto done;
2541 
2542   /* Duplicate computed gotos.  */
2543   FOR_EACH_BB_FN (bb, fun)
2544     {
2545       if (bb->flags & BB_VISITED)
2546 	continue;
2547 
2548       bb->flags |= BB_VISITED;
2549 
2550       /* BB must have one outgoing edge.  That edge must not lead to
2551 	 the exit block or the next block.
2552 	 The destination must have more than one predecessor.  */
2553       if (!single_succ_p (bb)
2554 	  || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2555 	  || single_succ (bb) == bb->next_bb
2556 	  || single_pred_p (single_succ (bb)))
2557 	continue;
2558 
2559       /* The successor block has to be a duplication candidate.  */
2560       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2561 	continue;
2562 
2563       /* Don't duplicate a partition crossing edge, which requires difficult
2564          fixup.  */
2565       if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2566 	continue;
2567 
2568       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2569       new_bb->aux = bb->aux;
2570       bb->aux = new_bb;
2571       new_bb->flags |= BB_VISITED;
2572       changed = true;
2573     }
2574 
2575  done:
2576   if (changed)
2577     {
2578       /* Duplicating blocks above will redirect edges and may cause hot
2579 	 blocks previously reached by both hot and cold blocks to become
2580 	 dominated only by cold blocks.  */
2581       fixup_partitions ();
2582 
2583       /* Merge the duplicated blocks into predecessors, when possible.  */
2584       cfg_layout_finalize ();
2585       cleanup_cfg (0);
2586     }
2587   else
2588     cfg_layout_finalize ();
2589 
2590   BITMAP_FREE (candidates);
2591   return 0;
2592 }
2593 
2594 } // anon namespace
2595 
2596 rtl_opt_pass *
2597 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2598 {
2599   return new pass_duplicate_computed_gotos (ctxt);
2600 }
2601 
2602 /* This function is the main 'entrance' for the optimization that
2603    partitions hot and cold basic blocks into separate sections of the
2604    .o file (to improve performance and cache locality).  Ideally it
2605    would be called after all optimizations that rearrange the CFG have
2606    been called.  However part of this optimization may introduce new
2607    register usage, so it must be called before register allocation has
2608    occurred.  This means that this optimization is actually called
2609    well before the optimization that reorders basic blocks (see
2610    function above).
2611 
2612    This optimization checks the feedback information to determine
2613    which basic blocks are hot/cold, updates flags on the basic blocks
2614    to indicate which section they belong in.  This information is
2615    later used for writing out sections in the .o file.  Because hot
2616    and cold sections can be arbitrarily large (within the bounds of
2617    memory), far beyond the size of a single function, it is necessary
2618    to fix up all edges that cross section boundaries, to make sure the
2619    instructions used can actually span the required distance.  The
2620    fixes are described below.
2621 
2622    Fall-through edges must be changed into jumps; it is not safe or
2623    legal to fall through across a section boundary.  Whenever a
2624    fall-through edge crossing a section boundary is encountered, a new
2625    basic block is inserted (in the same section as the fall-through
2626    source), and the fall through edge is redirected to the new basic
2627    block.  The new basic block contains an unconditional jump to the
2628    original fall-through target.  (If the unconditional jump is
2629    insufficient to cross section boundaries, that is dealt with a
2630    little later, see below).
2631 
2632    In order to deal with architectures that have short conditional
2633    branches (which cannot span all of memory) we take any conditional
2634    jump that attempts to cross a section boundary and add a level of
2635    indirection: it becomes a conditional jump to a new basic block, in
2636    the same section.  The new basic block contains an unconditional
2637    jump to the original target, in the other section.
2638 
2639    For those architectures whose unconditional branch is also
2640    incapable of reaching all of memory, those unconditional jumps are
2641    converted into indirect jumps, through a register.
2642 
2643    IMPORTANT NOTE: This optimization causes some messy interactions
2644    with the cfg cleanup optimizations; those optimizations want to
2645    merge blocks wherever possible, and to collapse indirect jump
2646    sequences (change "A jumps to B jumps to C" directly into "A jumps
2647    to C").  Those optimizations can undo the jump fixes that
2648    partitioning is required to make (see above), in order to ensure
2649    that jumps attempting to cross section boundaries are really able
2650    to cover whatever distance the jump requires (on many architectures
2651    conditional or unconditional jumps are not able to reach all of
2652    memory).  Therefore tests have to be inserted into each such
2653    optimization to make sure that it does not undo stuff necessary to
2654    cross partition boundaries.  This would be much less of a problem
2655    if we could perform this optimization later in the compilation, but
2656    unfortunately the fact that we may need to create indirect jumps
2657    (through registers) requires that this optimization be performed
2658    before register allocation.
2659 
2660    Hot and cold basic blocks are partitioned and put in separate
2661    sections of the .o file, to reduce paging and improve cache
2662    performance (hopefully).  This can result in bits of code from the
2663    same function being widely separated in the .o file.  However this
2664    is not obvious to the current bb structure.  Therefore we must take
2665    care to ensure that: 1). There are no fall_thru edges that cross
2666    between sections; 2). For those architectures which have "short"
2667    conditional branches, all conditional branches that attempt to
2668    cross between sections are converted to unconditional branches;
2669    and, 3). For those architectures which have "short" unconditional
2670    branches, all unconditional branches that attempt to cross between
2671    sections are converted to indirect jumps.
2672 
2673    The code for fixing up fall_thru edges that cross between hot and
2674    cold basic blocks does so by creating new basic blocks containing
2675    unconditional branches to the appropriate label in the "other"
2676    section.  The new basic block is then put in the same (hot or cold)
2677    section as the original conditional branch, and the fall_thru edge
2678    is modified to fall into the new basic block instead.  By adding
2679    this level of indirection we end up with only unconditional branches
2680    crossing between hot and cold sections.
2681 
2682    Conditional branches are dealt with by adding a level of indirection.
2683    A new basic block is added in the same (hot/cold) section as the
2684    conditional branch, and the conditional branch is retargeted to the
2685    new basic block.  The new basic block contains an unconditional branch
2686    to the original target of the conditional branch (in the other section).
2687 
2688    Unconditional branches are dealt with by converting them into
2689    indirect jumps.  */
2690 
2691 namespace {
2692 
2693 const pass_data pass_data_partition_blocks =
2694 {
2695   RTL_PASS, /* type */
2696   "bbpart", /* name */
2697   OPTGROUP_NONE, /* optinfo_flags */
2698   TV_REORDER_BLOCKS, /* tv_id */
2699   PROP_cfglayout, /* properties_required */
2700   0, /* properties_provided */
2701   0, /* properties_destroyed */
2702   0, /* todo_flags_start */
2703   0, /* todo_flags_finish */
2704 };
2705 
2706 class pass_partition_blocks : public rtl_opt_pass
2707 {
2708 public:
2709   pass_partition_blocks (gcc::context *ctxt)
2710     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2711   {}
2712 
2713   /* opt_pass methods: */
2714   virtual bool gate (function *);
2715   virtual unsigned int execute (function *);
2716 
2717 }; // class pass_partition_blocks
2718 
2719 bool
2720 pass_partition_blocks::gate (function *fun)
2721 {
2722   /* The optimization to partition hot/cold basic blocks into separate
2723      sections of the .o file does not work well with linkonce or with
2724      user defined section attributes.  Don't call it if either case
2725      arises.  */
2726   return (flag_reorder_blocks_and_partition
2727 	  && optimize
2728 	  /* See gate_handle_reorder_blocks.  We should not partition if
2729 	     we are going to omit the reordering.  */
2730 	  && optimize_function_for_speed_p (fun)
2731 	  && !DECL_COMDAT_GROUP (current_function_decl)
2732 	  && !user_defined_section_attribute);
2733 }
2734 
2735 unsigned
2736 pass_partition_blocks::execute (function *fun)
2737 {
2738   vec<edge> crossing_edges;
2739 
2740   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2741     return 0;
2742 
2743   df_set_flags (DF_DEFER_INSN_RESCAN);
2744 
2745   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2746   if (!crossing_edges.exists ())
2747     /* Make sure to process deferred rescans and clear changeable df flags.  */
2748     return TODO_df_finish;
2749 
2750   crtl->has_bb_partition = true;
2751 
2752   /* Make sure the source of any crossing edge ends in a jump and the
2753      destination of any crossing edge has a label.  */
2754   add_labels_and_missing_jumps (crossing_edges);
2755 
2756   /* Convert all crossing fall_thru edges to non-crossing fall
2757      thrus to unconditional jumps (that jump to the original fall
2758      through dest).  */
2759   fix_up_fall_thru_edges ();
2760 
2761   /* If the architecture does not have conditional branches that can
2762      span all of memory, convert crossing conditional branches into
2763      crossing unconditional branches.  */
2764   if (!HAS_LONG_COND_BRANCH)
2765     fix_crossing_conditional_branches ();
2766 
2767   /* If the architecture does not have unconditional branches that
2768      can span all of memory, convert crossing unconditional branches
2769      into indirect jumps.  Since adding an indirect jump also adds
2770      a new register usage, update the register usage information as
2771      well.  */
2772   if (!HAS_LONG_UNCOND_BRANCH)
2773     fix_crossing_unconditional_branches ();
2774 
2775   update_crossing_jump_flags ();
2776 
2777   /* Clear bb->aux fields that the above routines were using.  */
2778   clear_aux_for_blocks ();
2779 
2780   crossing_edges.release ();
2781 
2782   /* ??? FIXME: DF generates the bb info for a block immediately.
2783      And by immediately, I mean *during* creation of the block.
2784 
2785 	#0  df_bb_refs_collect
2786 	#1  in df_bb_refs_record
2787 	#2  in create_basic_block_structure
2788 
2789      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2790      will *always* fail, because no edges can have been added to the
2791      block yet.  Which of course means we don't add the right
2792      artificial refs, which means we fail df_verify (much) later.
2793 
2794      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2795      that we also shouldn't grab data from the new blocks those new
2796      insns are in either.  In this way one can create the block, link
2797      it up properly, and have everything Just Work later, when deferred
2798      insns are processed.
2799 
2800      In the meantime, we have no other option but to throw away all
2801      of the DF data and recompute it all.  */
2802   if (fun->eh->lp_array)
2803     {
2804       df_finish_pass (true);
2805       df_scan_alloc (NULL);
2806       df_scan_blocks ();
2807       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2808 	 data.  We blindly generated all of them when creating the new
2809 	 landing pad.  Delete those assignments we don't use.  */
2810       df_set_flags (DF_LR_RUN_DCE);
2811       df_analyze ();
2812     }
2813 
2814   /* Make sure to process deferred rescans and clear changeable df flags.  */
2815   return TODO_df_finish;
2816 }
2817 
2818 } // anon namespace
2819 
2820 rtl_opt_pass *
2821 make_pass_partition_blocks (gcc::context *ctxt)
2822 {
2823   return new pass_partition_blocks (ctxt);
2824 }
2825