xref: /dflybsd-src/contrib/gcc-8.0/gcc/sched-rgn.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Instruction scheduling pass.
2*38fd1498Szrj    Copyright (C) 1992-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4*38fd1498Szrj    and currently maintained by, Jim Wilson (wilson@cygnus.com)
5*38fd1498Szrj 
6*38fd1498Szrj This file is part of GCC.
7*38fd1498Szrj 
8*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
9*38fd1498Szrj the terms of the GNU General Public License as published by the Free
10*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
11*38fd1498Szrj version.
12*38fd1498Szrj 
13*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16*38fd1498Szrj for more details.
17*38fd1498Szrj 
18*38fd1498Szrj You should have received a copy of the GNU General Public License
19*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
20*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
21*38fd1498Szrj 
22*38fd1498Szrj /* This pass implements list scheduling within basic blocks.  It is
23*38fd1498Szrj    run twice: (1) after flow analysis, but before register allocation,
24*38fd1498Szrj    and (2) after register allocation.
25*38fd1498Szrj 
26*38fd1498Szrj    The first run performs interblock scheduling, moving insns between
27*38fd1498Szrj    different blocks in the same "region", and the second runs only
28*38fd1498Szrj    basic block scheduling.
29*38fd1498Szrj 
30*38fd1498Szrj    Interblock motions performed are useful motions and speculative
31*38fd1498Szrj    motions, including speculative loads.  Motions requiring code
32*38fd1498Szrj    duplication are not supported.  The identification of motion type
33*38fd1498Szrj    and the check for validity of speculative motions requires
34*38fd1498Szrj    construction and analysis of the function's control flow graph.
35*38fd1498Szrj 
36*38fd1498Szrj    The main entry point for this pass is schedule_insns(), called for
37*38fd1498Szrj    each function.  The work of the scheduler is organized in three
38*38fd1498Szrj    levels: (1) function level: insns are subject to splitting,
39*38fd1498Szrj    control-flow-graph is constructed, regions are computed (after
40*38fd1498Szrj    reload, each region is of one block), (2) region level: control
41*38fd1498Szrj    flow graph attributes required for interblock scheduling are
42*38fd1498Szrj    computed (dominators, reachability, etc.), data dependences and
43*38fd1498Szrj    priorities are computed, and (3) block level: insns in the block
44*38fd1498Szrj    are actually scheduled.  */
45*38fd1498Szrj 
46*38fd1498Szrj #include "config.h"
47*38fd1498Szrj #include "system.h"
48*38fd1498Szrj #include "coretypes.h"
49*38fd1498Szrj #include "backend.h"
50*38fd1498Szrj #include "target.h"
51*38fd1498Szrj #include "rtl.h"
52*38fd1498Szrj #include "df.h"
53*38fd1498Szrj #include "memmodel.h"
54*38fd1498Szrj #include "tm_p.h"
55*38fd1498Szrj #include "insn-config.h"
56*38fd1498Szrj #include "emit-rtl.h"
57*38fd1498Szrj #include "recog.h"
58*38fd1498Szrj #include "profile.h"
59*38fd1498Szrj #include "insn-attr.h"
60*38fd1498Szrj #include "except.h"
61*38fd1498Szrj #include "params.h"
62*38fd1498Szrj #include "cfganal.h"
63*38fd1498Szrj #include "sched-int.h"
64*38fd1498Szrj #include "sel-sched.h"
65*38fd1498Szrj #include "tree-pass.h"
66*38fd1498Szrj #include "dbgcnt.h"
67*38fd1498Szrj #include "pretty-print.h"
68*38fd1498Szrj #include "print-rtl.h"
69*38fd1498Szrj 
70*38fd1498Szrj #ifdef INSN_SCHEDULING
71*38fd1498Szrj 
72*38fd1498Szrj /* Some accessor macros for h_i_d members only used within this file.  */
73*38fd1498Szrj #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
74*38fd1498Szrj #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
75*38fd1498Szrj 
76*38fd1498Szrj /* nr_inter/spec counts interblock/speculative motion for the function.  */
77*38fd1498Szrj static int nr_inter, nr_spec;
78*38fd1498Szrj 
79*38fd1498Szrj static int is_cfg_nonregular (void);
80*38fd1498Szrj 
81*38fd1498Szrj /* Number of regions in the procedure.  */
82*38fd1498Szrj int nr_regions = 0;
83*38fd1498Szrj 
84*38fd1498Szrj /* Same as above before adding any new regions.  */
85*38fd1498Szrj static int nr_regions_initial = 0;
86*38fd1498Szrj 
87*38fd1498Szrj /* Table of region descriptions.  */
88*38fd1498Szrj region *rgn_table = NULL;
89*38fd1498Szrj 
90*38fd1498Szrj /* Array of lists of regions' blocks.  */
91*38fd1498Szrj int *rgn_bb_table = NULL;
92*38fd1498Szrj 
93*38fd1498Szrj /* Topological order of blocks in the region (if b2 is reachable from
94*38fd1498Szrj    b1, block_to_bb[b2] > block_to_bb[b1]).  Note: A basic block is
95*38fd1498Szrj    always referred to by either block or b, while its topological
96*38fd1498Szrj    order name (in the region) is referred to by bb.  */
97*38fd1498Szrj int *block_to_bb = NULL;
98*38fd1498Szrj 
99*38fd1498Szrj /* The number of the region containing a block.  */
100*38fd1498Szrj int *containing_rgn = NULL;
101*38fd1498Szrj 
102*38fd1498Szrj /* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
103*38fd1498Szrj    Currently we can get a ebb only through splitting of currently
104*38fd1498Szrj    scheduling block, therefore, we don't need ebb_head array for every region,
105*38fd1498Szrj    hence, its sufficient to hold it for current one only.  */
106*38fd1498Szrj int *ebb_head = NULL;
107*38fd1498Szrj 
108*38fd1498Szrj /* The minimum probability of reaching a source block so that it will be
109*38fd1498Szrj    considered for speculative scheduling.  */
110*38fd1498Szrj static int min_spec_prob;
111*38fd1498Szrj 
112*38fd1498Szrj static void find_single_block_region (bool);
113*38fd1498Szrj static void find_rgns (void);
114*38fd1498Szrj static bool too_large (int, int *, int *);
115*38fd1498Szrj 
116*38fd1498Szrj /* Blocks of the current region being scheduled.  */
117*38fd1498Szrj int current_nr_blocks;
118*38fd1498Szrj int current_blocks;
119*38fd1498Szrj 
120*38fd1498Szrj /* A speculative motion requires checking live information on the path
121*38fd1498Szrj    from 'source' to 'target'.  The split blocks are those to be checked.
122*38fd1498Szrj    After a speculative motion, live information should be modified in
123*38fd1498Szrj    the 'update' blocks.
124*38fd1498Szrj 
125*38fd1498Szrj    Lists of split and update blocks for each candidate of the current
126*38fd1498Szrj    target are in array bblst_table.  */
127*38fd1498Szrj static basic_block *bblst_table;
128*38fd1498Szrj static int bblst_size, bblst_last;
129*38fd1498Szrj 
130*38fd1498Szrj /* Arrays that hold the DFA state at the end of a basic block, to re-use
131*38fd1498Szrj    as the initial state at the start of successor blocks.  The BB_STATE
132*38fd1498Szrj    array holds the actual DFA state, and BB_STATE_ARRAY[I] is a pointer
133*38fd1498Szrj    into BB_STATE for basic block I.  FIXME: This should be a vec.  */
134*38fd1498Szrj static char *bb_state_array = NULL;
135*38fd1498Szrj static state_t *bb_state = NULL;
136*38fd1498Szrj 
137*38fd1498Szrj /* Target info declarations.
138*38fd1498Szrj 
139*38fd1498Szrj    The block currently being scheduled is referred to as the "target" block,
140*38fd1498Szrj    while other blocks in the region from which insns can be moved to the
141*38fd1498Szrj    target are called "source" blocks.  The candidate structure holds info
142*38fd1498Szrj    about such sources: are they valid?  Speculative?  Etc.  */
143*38fd1498Szrj struct bblst
144*38fd1498Szrj {
145*38fd1498Szrj   basic_block *first_member;
146*38fd1498Szrj   int nr_members;
147*38fd1498Szrj };
148*38fd1498Szrj 
149*38fd1498Szrj struct candidate
150*38fd1498Szrj {
151*38fd1498Szrj   char is_valid;
152*38fd1498Szrj   char is_speculative;
153*38fd1498Szrj   int src_prob;
154*38fd1498Szrj   bblst split_bbs;
155*38fd1498Szrj   bblst update_bbs;
156*38fd1498Szrj };
157*38fd1498Szrj 
158*38fd1498Szrj static candidate *candidate_table;
159*38fd1498Szrj #define IS_VALID(src) (candidate_table[src].is_valid)
160*38fd1498Szrj #define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
161*38fd1498Szrj #define IS_SPECULATIVE_INSN(INSN)			\
162*38fd1498Szrj   (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
163*38fd1498Szrj #define SRC_PROB(src) ( candidate_table[src].src_prob )
164*38fd1498Szrj 
165*38fd1498Szrj /* The bb being currently scheduled.  */
166*38fd1498Szrj int target_bb;
167*38fd1498Szrj 
168*38fd1498Szrj /* List of edges.  */
169*38fd1498Szrj struct edgelst
170*38fd1498Szrj {
171*38fd1498Szrj   edge *first_member;
172*38fd1498Szrj   int nr_members;
173*38fd1498Szrj };
174*38fd1498Szrj 
175*38fd1498Szrj static edge *edgelst_table;
176*38fd1498Szrj static int edgelst_last;
177*38fd1498Szrj 
178*38fd1498Szrj static void extract_edgelst (sbitmap, edgelst *);
179*38fd1498Szrj 
180*38fd1498Szrj /* Target info functions.  */
181*38fd1498Szrj static void split_edges (int, int, edgelst *);
182*38fd1498Szrj static void compute_trg_info (int);
183*38fd1498Szrj void debug_candidate (int);
184*38fd1498Szrj void debug_candidates (int);
185*38fd1498Szrj 
186*38fd1498Szrj /* Dominators array: dom[i] contains the sbitmap of dominators of
187*38fd1498Szrj    bb i in the region.  */
188*38fd1498Szrj static sbitmap *dom;
189*38fd1498Szrj 
190*38fd1498Szrj /* bb 0 is the only region entry.  */
191*38fd1498Szrj #define IS_RGN_ENTRY(bb) (!bb)
192*38fd1498Szrj 
193*38fd1498Szrj /* Is bb_src dominated by bb_trg.  */
194*38fd1498Szrj #define IS_DOMINATED(bb_src, bb_trg)                                 \
195*38fd1498Szrj ( bitmap_bit_p (dom[bb_src], bb_trg) )
196*38fd1498Szrj 
197*38fd1498Szrj /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
198*38fd1498Szrj    the probability of bb i relative to the region entry.  */
199*38fd1498Szrj static int *prob;
200*38fd1498Szrj 
201*38fd1498Szrj /* Bit-set of edges, where bit i stands for edge i.  */
202*38fd1498Szrj typedef sbitmap edgeset;
203*38fd1498Szrj 
204*38fd1498Szrj /* Number of edges in the region.  */
205*38fd1498Szrj static int rgn_nr_edges;
206*38fd1498Szrj 
207*38fd1498Szrj /* Array of size rgn_nr_edges.  */
208*38fd1498Szrj static edge *rgn_edges;
209*38fd1498Szrj 
210*38fd1498Szrj /* Mapping from each edge in the graph to its number in the rgn.  */
211*38fd1498Szrj #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
212*38fd1498Szrj #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
213*38fd1498Szrj 
214*38fd1498Szrj /* The split edges of a source bb is different for each target
215*38fd1498Szrj    bb.  In order to compute this efficiently, the 'potential-split edges'
216*38fd1498Szrj    are computed for each bb prior to scheduling a region.  This is actually
217*38fd1498Szrj    the split edges of each bb relative to the region entry.
218*38fd1498Szrj 
219*38fd1498Szrj    pot_split[bb] is the set of potential split edges of bb.  */
220*38fd1498Szrj static edgeset *pot_split;
221*38fd1498Szrj 
222*38fd1498Szrj /* For every bb, a set of its ancestor edges.  */
223*38fd1498Szrj static edgeset *ancestor_edges;
224*38fd1498Szrj 
225*38fd1498Szrj #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
226*38fd1498Szrj 
227*38fd1498Szrj /* Speculative scheduling functions.  */
228*38fd1498Szrj static int check_live_1 (int, rtx);
229*38fd1498Szrj static void update_live_1 (int, rtx);
230*38fd1498Szrj static int is_pfree (rtx, int, int);
231*38fd1498Szrj static int find_conditional_protection (rtx_insn *, int);
232*38fd1498Szrj static int is_conditionally_protected (rtx, int, int);
233*38fd1498Szrj static int is_prisky (rtx, int, int);
234*38fd1498Szrj static int is_exception_free (rtx_insn *, int, int);
235*38fd1498Szrj 
236*38fd1498Szrj static bool sets_likely_spilled (rtx);
237*38fd1498Szrj static void sets_likely_spilled_1 (rtx, const_rtx, void *);
238*38fd1498Szrj static void add_branch_dependences (rtx_insn *, rtx_insn *);
239*38fd1498Szrj static void compute_block_dependences (int);
240*38fd1498Szrj 
241*38fd1498Szrj static void schedule_region (int);
242*38fd1498Szrj static void concat_insn_mem_list (rtx_insn_list *, rtx_expr_list *,
243*38fd1498Szrj 				  rtx_insn_list **, rtx_expr_list **);
244*38fd1498Szrj static void propagate_deps (int, struct deps_desc *);
245*38fd1498Szrj static void free_pending_lists (void);
246*38fd1498Szrj 
247*38fd1498Szrj /* Functions for construction of the control flow graph.  */
248*38fd1498Szrj 
249*38fd1498Szrj /* Return 1 if control flow graph should not be constructed, 0 otherwise.
250*38fd1498Szrj 
251*38fd1498Szrj    We decide not to build the control flow graph if there is possibly more
252*38fd1498Szrj    than one entry to the function, if computed branches exist, if we
253*38fd1498Szrj    have nonlocal gotos, or if we have an unreachable loop.  */
254*38fd1498Szrj 
255*38fd1498Szrj static int
is_cfg_nonregular(void)256*38fd1498Szrj is_cfg_nonregular (void)
257*38fd1498Szrj {
258*38fd1498Szrj   basic_block b;
259*38fd1498Szrj   rtx_insn *insn;
260*38fd1498Szrj 
261*38fd1498Szrj   /* If we have a label that could be the target of a nonlocal goto, then
262*38fd1498Szrj      the cfg is not well structured.  */
263*38fd1498Szrj   if (nonlocal_goto_handler_labels)
264*38fd1498Szrj     return 1;
265*38fd1498Szrj 
266*38fd1498Szrj   /* If we have any forced labels, then the cfg is not well structured.  */
267*38fd1498Szrj   if (forced_labels)
268*38fd1498Szrj     return 1;
269*38fd1498Szrj 
270*38fd1498Szrj   /* If we have exception handlers, then we consider the cfg not well
271*38fd1498Szrj      structured.  ?!?  We should be able to handle this now that we
272*38fd1498Szrj      compute an accurate cfg for EH.  */
273*38fd1498Szrj   if (current_function_has_exception_handlers ())
274*38fd1498Szrj     return 1;
275*38fd1498Szrj 
276*38fd1498Szrj   /* If we have insns which refer to labels as non-jumped-to operands,
277*38fd1498Szrj      then we consider the cfg not well structured.  */
278*38fd1498Szrj   FOR_EACH_BB_FN (b, cfun)
279*38fd1498Szrj     FOR_BB_INSNS (b, insn)
280*38fd1498Szrj       {
281*38fd1498Szrj 	rtx note, set, dest;
282*38fd1498Szrj 	rtx_insn *next;
283*38fd1498Szrj 
284*38fd1498Szrj 	/* If this function has a computed jump, then we consider the cfg
285*38fd1498Szrj 	   not well structured.  */
286*38fd1498Szrj 	if (JUMP_P (insn) && computed_jump_p (insn))
287*38fd1498Szrj 	  return 1;
288*38fd1498Szrj 
289*38fd1498Szrj 	if (!INSN_P (insn))
290*38fd1498Szrj 	  continue;
291*38fd1498Szrj 
292*38fd1498Szrj 	note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
293*38fd1498Szrj 	if (note == NULL_RTX)
294*38fd1498Szrj 	  continue;
295*38fd1498Szrj 
296*38fd1498Szrj 	/* For that label not to be seen as a referred-to label, this
297*38fd1498Szrj 	   must be a single-set which is feeding a jump *only*.  This
298*38fd1498Szrj 	   could be a conditional jump with the label split off for
299*38fd1498Szrj 	   machine-specific reasons or a casesi/tablejump.  */
300*38fd1498Szrj 	next = next_nonnote_insn (insn);
301*38fd1498Szrj 	if (next == NULL_RTX
302*38fd1498Szrj 	    || !JUMP_P (next)
303*38fd1498Szrj 	    || (JUMP_LABEL (next) != XEXP (note, 0)
304*38fd1498Szrj 		&& find_reg_note (next, REG_LABEL_TARGET,
305*38fd1498Szrj 				  XEXP (note, 0)) == NULL_RTX)
306*38fd1498Szrj 	    || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
307*38fd1498Szrj 	  return 1;
308*38fd1498Szrj 
309*38fd1498Szrj 	set = single_set (insn);
310*38fd1498Szrj 	if (set == NULL_RTX)
311*38fd1498Szrj 	  return 1;
312*38fd1498Szrj 
313*38fd1498Szrj 	dest = SET_DEST (set);
314*38fd1498Szrj 	if (!REG_P (dest) || !dead_or_set_p (next, dest))
315*38fd1498Szrj 	  return 1;
316*38fd1498Szrj       }
317*38fd1498Szrj 
318*38fd1498Szrj   /* Unreachable loops with more than one basic block are detected
319*38fd1498Szrj      during the DFS traversal in find_rgns.
320*38fd1498Szrj 
321*38fd1498Szrj      Unreachable loops with a single block are detected here.  This
322*38fd1498Szrj      test is redundant with the one in find_rgns, but it's much
323*38fd1498Szrj      cheaper to go ahead and catch the trivial case here.  */
324*38fd1498Szrj   FOR_EACH_BB_FN (b, cfun)
325*38fd1498Szrj     {
326*38fd1498Szrj       if (EDGE_COUNT (b->preds) == 0
327*38fd1498Szrj 	  || (single_pred_p (b)
328*38fd1498Szrj 	      && single_pred (b) == b))
329*38fd1498Szrj 	return 1;
330*38fd1498Szrj     }
331*38fd1498Szrj 
332*38fd1498Szrj   /* All the tests passed.  Consider the cfg well structured.  */
333*38fd1498Szrj   return 0;
334*38fd1498Szrj }
335*38fd1498Szrj 
336*38fd1498Szrj /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits.  */
337*38fd1498Szrj 
338*38fd1498Szrj static void
extract_edgelst(sbitmap set,edgelst * el)339*38fd1498Szrj extract_edgelst (sbitmap set, edgelst *el)
340*38fd1498Szrj {
341*38fd1498Szrj   unsigned int i = 0;
342*38fd1498Szrj   sbitmap_iterator sbi;
343*38fd1498Szrj 
344*38fd1498Szrj   /* edgelst table space is reused in each call to extract_edgelst.  */
345*38fd1498Szrj   edgelst_last = 0;
346*38fd1498Szrj 
347*38fd1498Szrj   el->first_member = &edgelst_table[edgelst_last];
348*38fd1498Szrj   el->nr_members = 0;
349*38fd1498Szrj 
350*38fd1498Szrj   /* Iterate over each word in the bitset.  */
351*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (set, 0, i, sbi)
352*38fd1498Szrj     {
353*38fd1498Szrj       edgelst_table[edgelst_last++] = rgn_edges[i];
354*38fd1498Szrj       el->nr_members++;
355*38fd1498Szrj     }
356*38fd1498Szrj }
357*38fd1498Szrj 
358*38fd1498Szrj /* Functions for the construction of regions.  */
359*38fd1498Szrj 
360*38fd1498Szrj /* Print the regions, for debugging purposes.  Callable from debugger.  */
361*38fd1498Szrj 
362*38fd1498Szrj DEBUG_FUNCTION void
debug_regions(void)363*38fd1498Szrj debug_regions (void)
364*38fd1498Szrj {
365*38fd1498Szrj   int rgn, bb;
366*38fd1498Szrj 
367*38fd1498Szrj   fprintf (sched_dump, "\n;;   ------------ REGIONS ----------\n\n");
368*38fd1498Szrj   for (rgn = 0; rgn < nr_regions; rgn++)
369*38fd1498Szrj     {
370*38fd1498Szrj       fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
371*38fd1498Szrj 	       rgn_table[rgn].rgn_nr_blocks);
372*38fd1498Szrj       fprintf (sched_dump, ";;\tbb/block: ");
373*38fd1498Szrj 
374*38fd1498Szrj       /* We don't have ebb_head initialized yet, so we can't use
375*38fd1498Szrj 	 BB_TO_BLOCK ().  */
376*38fd1498Szrj       current_blocks = RGN_BLOCKS (rgn);
377*38fd1498Szrj 
378*38fd1498Szrj       for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
379*38fd1498Szrj 	fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
380*38fd1498Szrj 
381*38fd1498Szrj       fprintf (sched_dump, "\n\n");
382*38fd1498Szrj     }
383*38fd1498Szrj }
384*38fd1498Szrj 
385*38fd1498Szrj /* Print the region's basic blocks.  */
386*38fd1498Szrj 
387*38fd1498Szrj DEBUG_FUNCTION void
debug_region(int rgn)388*38fd1498Szrj debug_region (int rgn)
389*38fd1498Szrj {
390*38fd1498Szrj   int bb;
391*38fd1498Szrj 
392*38fd1498Szrj   fprintf (stderr, "\n;;   ------------ REGION %d ----------\n\n", rgn);
393*38fd1498Szrj   fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
394*38fd1498Szrj 	   rgn_table[rgn].rgn_nr_blocks);
395*38fd1498Szrj   fprintf (stderr, ";;\tbb/block: ");
396*38fd1498Szrj 
397*38fd1498Szrj   /* We don't have ebb_head initialized yet, so we can't use
398*38fd1498Szrj      BB_TO_BLOCK ().  */
399*38fd1498Szrj   current_blocks = RGN_BLOCKS (rgn);
400*38fd1498Szrj 
401*38fd1498Szrj   for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
402*38fd1498Szrj     fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
403*38fd1498Szrj 
404*38fd1498Szrj   fprintf (stderr, "\n\n");
405*38fd1498Szrj 
406*38fd1498Szrj   for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
407*38fd1498Szrj     {
408*38fd1498Szrj       dump_bb (stderr,
409*38fd1498Szrj 	       BASIC_BLOCK_FOR_FN (cfun, rgn_bb_table[current_blocks + bb]),
410*38fd1498Szrj 	       0, TDF_SLIM | TDF_BLOCKS);
411*38fd1498Szrj       fprintf (stderr, "\n");
412*38fd1498Szrj     }
413*38fd1498Szrj 
414*38fd1498Szrj   fprintf (stderr, "\n");
415*38fd1498Szrj 
416*38fd1498Szrj }
417*38fd1498Szrj 
418*38fd1498Szrj /* True when a bb with index BB_INDEX contained in region RGN.  */
419*38fd1498Szrj static bool
bb_in_region_p(int bb_index,int rgn)420*38fd1498Szrj bb_in_region_p (int bb_index, int rgn)
421*38fd1498Szrj {
422*38fd1498Szrj   int i;
423*38fd1498Szrj 
424*38fd1498Szrj   for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
425*38fd1498Szrj     if (rgn_bb_table[current_blocks + i] == bb_index)
426*38fd1498Szrj       return true;
427*38fd1498Szrj 
428*38fd1498Szrj   return false;
429*38fd1498Szrj }
430*38fd1498Szrj 
431*38fd1498Szrj /* Dump region RGN to file F using dot syntax.  */
432*38fd1498Szrj void
dump_region_dot(FILE * f,int rgn)433*38fd1498Szrj dump_region_dot (FILE *f, int rgn)
434*38fd1498Szrj {
435*38fd1498Szrj   int i;
436*38fd1498Szrj 
437*38fd1498Szrj   fprintf (f, "digraph Region_%d {\n", rgn);
438*38fd1498Szrj 
439*38fd1498Szrj   /* We don't have ebb_head initialized yet, so we can't use
440*38fd1498Szrj      BB_TO_BLOCK ().  */
441*38fd1498Szrj   current_blocks = RGN_BLOCKS (rgn);
442*38fd1498Szrj 
443*38fd1498Szrj   for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
444*38fd1498Szrj     {
445*38fd1498Szrj       edge e;
446*38fd1498Szrj       edge_iterator ei;
447*38fd1498Szrj       int src_bb_num = rgn_bb_table[current_blocks + i];
448*38fd1498Szrj       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, src_bb_num);
449*38fd1498Szrj 
450*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
451*38fd1498Szrj         if (bb_in_region_p (e->dest->index, rgn))
452*38fd1498Szrj 	  fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
453*38fd1498Szrj     }
454*38fd1498Szrj   fprintf (f, "}\n");
455*38fd1498Szrj }
456*38fd1498Szrj 
457*38fd1498Szrj /* The same, but first open a file specified by FNAME.  */
458*38fd1498Szrj void
dump_region_dot_file(const char * fname,int rgn)459*38fd1498Szrj dump_region_dot_file (const char *fname, int rgn)
460*38fd1498Szrj {
461*38fd1498Szrj   FILE *f = fopen (fname, "wt");
462*38fd1498Szrj   dump_region_dot (f, rgn);
463*38fd1498Szrj   fclose (f);
464*38fd1498Szrj }
465*38fd1498Szrj 
466*38fd1498Szrj /* Build a single block region for each basic block in the function.
467*38fd1498Szrj    This allows for using the same code for interblock and basic block
468*38fd1498Szrj    scheduling.  */
469*38fd1498Szrj 
470*38fd1498Szrj static void
find_single_block_region(bool ebbs_p)471*38fd1498Szrj find_single_block_region (bool ebbs_p)
472*38fd1498Szrj {
473*38fd1498Szrj   basic_block bb, ebb_start;
474*38fd1498Szrj   int i = 0;
475*38fd1498Szrj 
476*38fd1498Szrj   nr_regions = 0;
477*38fd1498Szrj 
478*38fd1498Szrj   if (ebbs_p) {
479*38fd1498Szrj     int probability_cutoff;
480*38fd1498Szrj     if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
481*38fd1498Szrj       probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
482*38fd1498Szrj     else
483*38fd1498Szrj       probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
484*38fd1498Szrj     probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
485*38fd1498Szrj 
486*38fd1498Szrj     FOR_EACH_BB_FN (ebb_start, cfun)
487*38fd1498Szrj       {
488*38fd1498Szrj         RGN_NR_BLOCKS (nr_regions) = 0;
489*38fd1498Szrj         RGN_BLOCKS (nr_regions) = i;
490*38fd1498Szrj         RGN_DONT_CALC_DEPS (nr_regions) = 0;
491*38fd1498Szrj         RGN_HAS_REAL_EBB (nr_regions) = 0;
492*38fd1498Szrj 
493*38fd1498Szrj         for (bb = ebb_start; ; bb = bb->next_bb)
494*38fd1498Szrj           {
495*38fd1498Szrj             edge e;
496*38fd1498Szrj 
497*38fd1498Szrj             rgn_bb_table[i] = bb->index;
498*38fd1498Szrj             RGN_NR_BLOCKS (nr_regions)++;
499*38fd1498Szrj             CONTAINING_RGN (bb->index) = nr_regions;
500*38fd1498Szrj             BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
501*38fd1498Szrj             i++;
502*38fd1498Szrj 
503*38fd1498Szrj 	    if (bb->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
504*38fd1498Szrj                 || LABEL_P (BB_HEAD (bb->next_bb)))
505*38fd1498Szrj               break;
506*38fd1498Szrj 
507*38fd1498Szrj 	    e = find_fallthru_edge (bb->succs);
508*38fd1498Szrj             if (! e)
509*38fd1498Szrj               break;
510*38fd1498Szrj             if (e->probability.initialized_p ()
511*38fd1498Szrj 		&& e->probability.to_reg_br_prob_base () <= probability_cutoff)
512*38fd1498Szrj               break;
513*38fd1498Szrj           }
514*38fd1498Szrj 
515*38fd1498Szrj         ebb_start = bb;
516*38fd1498Szrj         nr_regions++;
517*38fd1498Szrj       }
518*38fd1498Szrj   }
519*38fd1498Szrj   else
520*38fd1498Szrj     FOR_EACH_BB_FN (bb, cfun)
521*38fd1498Szrj       {
522*38fd1498Szrj         rgn_bb_table[nr_regions] = bb->index;
523*38fd1498Szrj         RGN_NR_BLOCKS (nr_regions) = 1;
524*38fd1498Szrj         RGN_BLOCKS (nr_regions) = nr_regions;
525*38fd1498Szrj         RGN_DONT_CALC_DEPS (nr_regions) = 0;
526*38fd1498Szrj         RGN_HAS_REAL_EBB (nr_regions) = 0;
527*38fd1498Szrj 
528*38fd1498Szrj         CONTAINING_RGN (bb->index) = nr_regions;
529*38fd1498Szrj         BLOCK_TO_BB (bb->index) = 0;
530*38fd1498Szrj         nr_regions++;
531*38fd1498Szrj       }
532*38fd1498Szrj }
533*38fd1498Szrj 
534*38fd1498Szrj /* Estimate number of the insns in the BB.  */
535*38fd1498Szrj static int
rgn_estimate_number_of_insns(basic_block bb)536*38fd1498Szrj rgn_estimate_number_of_insns (basic_block bb)
537*38fd1498Szrj {
538*38fd1498Szrj   int count;
539*38fd1498Szrj 
540*38fd1498Szrj   count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
541*38fd1498Szrj 
542*38fd1498Szrj   if (MAY_HAVE_DEBUG_INSNS)
543*38fd1498Szrj     {
544*38fd1498Szrj       rtx_insn *insn;
545*38fd1498Szrj 
546*38fd1498Szrj       FOR_BB_INSNS (bb, insn)
547*38fd1498Szrj 	if (DEBUG_INSN_P (insn))
548*38fd1498Szrj 	  count--;
549*38fd1498Szrj     }
550*38fd1498Szrj 
551*38fd1498Szrj   return count;
552*38fd1498Szrj }
553*38fd1498Szrj 
554*38fd1498Szrj /* Update number of blocks and the estimate for number of insns
555*38fd1498Szrj    in the region.  Return true if the region is "too large" for interblock
556*38fd1498Szrj    scheduling (compile time considerations).  */
557*38fd1498Szrj 
558*38fd1498Szrj static bool
too_large(int block,int * num_bbs,int * num_insns)559*38fd1498Szrj too_large (int block, int *num_bbs, int *num_insns)
560*38fd1498Szrj {
561*38fd1498Szrj   (*num_bbs)++;
562*38fd1498Szrj   (*num_insns) += (common_sched_info->estimate_number_of_insns
563*38fd1498Szrj                    (BASIC_BLOCK_FOR_FN (cfun, block)));
564*38fd1498Szrj 
565*38fd1498Szrj   return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
566*38fd1498Szrj 	  || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
567*38fd1498Szrj }
568*38fd1498Szrj 
569*38fd1498Szrj /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
570*38fd1498Szrj    is still an inner loop.  Put in max_hdr[blk] the header of the most inner
571*38fd1498Szrj    loop containing blk.  */
572*38fd1498Szrj #define UPDATE_LOOP_RELATIONS(blk, hdr)		\
573*38fd1498Szrj {						\
574*38fd1498Szrj   if (max_hdr[blk] == -1)			\
575*38fd1498Szrj     max_hdr[blk] = hdr;				\
576*38fd1498Szrj   else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])	\
577*38fd1498Szrj     bitmap_clear_bit (inner, hdr);			\
578*38fd1498Szrj   else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])	\
579*38fd1498Szrj     {						\
580*38fd1498Szrj       bitmap_clear_bit (inner,max_hdr[blk]);		\
581*38fd1498Szrj       max_hdr[blk] = hdr;			\
582*38fd1498Szrj     }						\
583*38fd1498Szrj }
584*38fd1498Szrj 
585*38fd1498Szrj /* Find regions for interblock scheduling.
586*38fd1498Szrj 
587*38fd1498Szrj    A region for scheduling can be:
588*38fd1498Szrj 
589*38fd1498Szrj      * A loop-free procedure, or
590*38fd1498Szrj 
591*38fd1498Szrj      * A reducible inner loop, or
592*38fd1498Szrj 
593*38fd1498Szrj      * A basic block not contained in any other region.
594*38fd1498Szrj 
595*38fd1498Szrj    ?!? In theory we could build other regions based on extended basic
596*38fd1498Szrj    blocks or reverse extended basic blocks.  Is it worth the trouble?
597*38fd1498Szrj 
598*38fd1498Szrj    Loop blocks that form a region are put into the region's block list
599*38fd1498Szrj    in topological order.
600*38fd1498Szrj 
601*38fd1498Szrj    This procedure stores its results into the following global (ick) variables
602*38fd1498Szrj 
603*38fd1498Szrj      * rgn_nr
604*38fd1498Szrj      * rgn_table
605*38fd1498Szrj      * rgn_bb_table
606*38fd1498Szrj      * block_to_bb
607*38fd1498Szrj      * containing region
608*38fd1498Szrj 
609*38fd1498Szrj    We use dominator relationships to avoid making regions out of non-reducible
610*38fd1498Szrj    loops.
611*38fd1498Szrj 
612*38fd1498Szrj    This procedure needs to be converted to work on pred/succ lists instead
613*38fd1498Szrj    of edge tables.  That would simplify it somewhat.  */
614*38fd1498Szrj 
615*38fd1498Szrj static void
haifa_find_rgns(void)616*38fd1498Szrj haifa_find_rgns (void)
617*38fd1498Szrj {
618*38fd1498Szrj   int *max_hdr, *dfs_nr, *degree;
619*38fd1498Szrj   char no_loops = 1;
620*38fd1498Szrj   int node, child, loop_head, i, head, tail;
621*38fd1498Szrj   int count = 0, sp, idx = 0;
622*38fd1498Szrj   edge_iterator current_edge;
623*38fd1498Szrj   edge_iterator *stack;
624*38fd1498Szrj   int num_bbs, num_insns, unreachable;
625*38fd1498Szrj   int too_large_failure;
626*38fd1498Szrj   basic_block bb;
627*38fd1498Szrj 
628*38fd1498Szrj   /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
629*38fd1498Szrj      and a mapping from block to its loop header (if the block is contained
630*38fd1498Szrj      in a loop, else -1).
631*38fd1498Szrj 
632*38fd1498Szrj      Store results in HEADER, INNER, and MAX_HDR respectively, these will
633*38fd1498Szrj      be used as inputs to the second traversal.
634*38fd1498Szrj 
635*38fd1498Szrj      STACK, SP and DFS_NR are only used during the first traversal.  */
636*38fd1498Szrj 
637*38fd1498Szrj   /* Allocate and initialize variables for the first traversal.  */
638*38fd1498Szrj   max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
639*38fd1498Szrj   dfs_nr = XCNEWVEC (int, last_basic_block_for_fn (cfun));
640*38fd1498Szrj   stack = XNEWVEC (edge_iterator, n_edges_for_fn (cfun));
641*38fd1498Szrj 
642*38fd1498Szrj   /* Note if a block is a natural inner loop header.  */
643*38fd1498Szrj   auto_sbitmap inner (last_basic_block_for_fn (cfun));
644*38fd1498Szrj   bitmap_ones (inner);
645*38fd1498Szrj 
646*38fd1498Szrj   /* Note if a block is a natural loop header.  */
647*38fd1498Szrj   auto_sbitmap header (last_basic_block_for_fn (cfun));
648*38fd1498Szrj   bitmap_clear (header);
649*38fd1498Szrj 
650*38fd1498Szrj   /* Note if a block is in the block queue.  */
651*38fd1498Szrj   auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
652*38fd1498Szrj   bitmap_clear (in_queue);
653*38fd1498Szrj 
654*38fd1498Szrj   /* Note if a block is in the block queue.  */
655*38fd1498Szrj   auto_sbitmap in_stack (last_basic_block_for_fn (cfun));
656*38fd1498Szrj   bitmap_clear (in_stack);
657*38fd1498Szrj 
658*38fd1498Szrj   for (i = 0; i < last_basic_block_for_fn (cfun); i++)
659*38fd1498Szrj     max_hdr[i] = -1;
660*38fd1498Szrj 
661*38fd1498Szrj   #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
662*38fd1498Szrj   #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
663*38fd1498Szrj 
664*38fd1498Szrj   /* DFS traversal to find inner loops in the cfg.  */
665*38fd1498Szrj 
666*38fd1498Szrj   current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))->succs);
667*38fd1498Szrj   sp = -1;
668*38fd1498Szrj 
669*38fd1498Szrj   while (1)
670*38fd1498Szrj     {
671*38fd1498Szrj       if (EDGE_PASSED (current_edge))
672*38fd1498Szrj 	{
673*38fd1498Szrj 	  /* We have reached a leaf node or a node that was already
674*38fd1498Szrj 	     processed.  Pop edges off the stack until we find
675*38fd1498Szrj 	     an edge that has not yet been processed.  */
676*38fd1498Szrj 	  while (sp >= 0 && EDGE_PASSED (current_edge))
677*38fd1498Szrj 	    {
678*38fd1498Szrj 	      /* Pop entry off the stack.  */
679*38fd1498Szrj 	      current_edge = stack[sp--];
680*38fd1498Szrj 	      node = ei_edge (current_edge)->src->index;
681*38fd1498Szrj 	      gcc_assert (node != ENTRY_BLOCK);
682*38fd1498Szrj 	      child = ei_edge (current_edge)->dest->index;
683*38fd1498Szrj 	      gcc_assert (child != EXIT_BLOCK);
684*38fd1498Szrj 	      bitmap_clear_bit (in_stack, child);
685*38fd1498Szrj 	      if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
686*38fd1498Szrj 		UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
687*38fd1498Szrj 	      ei_next (&current_edge);
688*38fd1498Szrj 	    }
689*38fd1498Szrj 
690*38fd1498Szrj 	  /* See if have finished the DFS tree traversal.  */
691*38fd1498Szrj 	  if (sp < 0 && EDGE_PASSED (current_edge))
692*38fd1498Szrj 	    break;
693*38fd1498Szrj 
694*38fd1498Szrj 	  /* Nope, continue the traversal with the popped node.  */
695*38fd1498Szrj 	  continue;
696*38fd1498Szrj 	}
697*38fd1498Szrj 
698*38fd1498Szrj       /* Process a node.  */
699*38fd1498Szrj       node = ei_edge (current_edge)->src->index;
700*38fd1498Szrj       gcc_assert (node != ENTRY_BLOCK);
701*38fd1498Szrj       bitmap_set_bit (in_stack, node);
702*38fd1498Szrj       dfs_nr[node] = ++count;
703*38fd1498Szrj 
704*38fd1498Szrj       /* We don't traverse to the exit block.  */
705*38fd1498Szrj       child = ei_edge (current_edge)->dest->index;
706*38fd1498Szrj       if (child == EXIT_BLOCK)
707*38fd1498Szrj 	{
708*38fd1498Szrj 	  SET_EDGE_PASSED (current_edge);
709*38fd1498Szrj 	  ei_next (&current_edge);
710*38fd1498Szrj 	  continue;
711*38fd1498Szrj 	}
712*38fd1498Szrj 
713*38fd1498Szrj       /* If the successor is in the stack, then we've found a loop.
714*38fd1498Szrj 	 Mark the loop, if it is not a natural loop, then it will
715*38fd1498Szrj 	 be rejected during the second traversal.  */
716*38fd1498Szrj       if (bitmap_bit_p (in_stack, child))
717*38fd1498Szrj 	{
718*38fd1498Szrj 	  no_loops = 0;
719*38fd1498Szrj 	  bitmap_set_bit (header, child);
720*38fd1498Szrj 	  UPDATE_LOOP_RELATIONS (node, child);
721*38fd1498Szrj 	  SET_EDGE_PASSED (current_edge);
722*38fd1498Szrj 	  ei_next (&current_edge);
723*38fd1498Szrj 	  continue;
724*38fd1498Szrj 	}
725*38fd1498Szrj 
726*38fd1498Szrj       /* If the child was already visited, then there is no need to visit
727*38fd1498Szrj 	 it again.  Just update the loop relationships and restart
728*38fd1498Szrj 	 with a new edge.  */
729*38fd1498Szrj       if (dfs_nr[child])
730*38fd1498Szrj 	{
731*38fd1498Szrj 	  if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
732*38fd1498Szrj 	    UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
733*38fd1498Szrj 	  SET_EDGE_PASSED (current_edge);
734*38fd1498Szrj 	  ei_next (&current_edge);
735*38fd1498Szrj 	  continue;
736*38fd1498Szrj 	}
737*38fd1498Szrj 
738*38fd1498Szrj       /* Push an entry on the stack and continue DFS traversal.  */
739*38fd1498Szrj       stack[++sp] = current_edge;
740*38fd1498Szrj       SET_EDGE_PASSED (current_edge);
741*38fd1498Szrj       current_edge = ei_start (ei_edge (current_edge)->dest->succs);
742*38fd1498Szrj     }
743*38fd1498Szrj 
744*38fd1498Szrj   /* Reset ->aux field used by EDGE_PASSED.  */
745*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
746*38fd1498Szrj     {
747*38fd1498Szrj       edge_iterator ei;
748*38fd1498Szrj       edge e;
749*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
750*38fd1498Szrj 	e->aux = NULL;
751*38fd1498Szrj     }
752*38fd1498Szrj 
753*38fd1498Szrj 
754*38fd1498Szrj   /* Another check for unreachable blocks.  The earlier test in
755*38fd1498Szrj      is_cfg_nonregular only finds unreachable blocks that do not
756*38fd1498Szrj      form a loop.
757*38fd1498Szrj 
758*38fd1498Szrj      The DFS traversal will mark every block that is reachable from
759*38fd1498Szrj      the entry node by placing a nonzero value in dfs_nr.  Thus if
760*38fd1498Szrj      dfs_nr is zero for any block, then it must be unreachable.  */
761*38fd1498Szrj   unreachable = 0;
762*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
763*38fd1498Szrj     if (dfs_nr[bb->index] == 0)
764*38fd1498Szrj       {
765*38fd1498Szrj 	unreachable = 1;
766*38fd1498Szrj 	break;
767*38fd1498Szrj       }
768*38fd1498Szrj 
769*38fd1498Szrj   /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
770*38fd1498Szrj      to hold degree counts.  */
771*38fd1498Szrj   degree = dfs_nr;
772*38fd1498Szrj 
773*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
774*38fd1498Szrj     degree[bb->index] = EDGE_COUNT (bb->preds);
775*38fd1498Szrj 
776*38fd1498Szrj   /* Do not perform region scheduling if there are any unreachable
777*38fd1498Szrj      blocks.  */
778*38fd1498Szrj   if (!unreachable)
779*38fd1498Szrj     {
780*38fd1498Szrj       int *queue, *degree1 = NULL;
781*38fd1498Szrj       /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
782*38fd1498Szrj 	 there basic blocks, which are forced to be region heads.
783*38fd1498Szrj 	 This is done to try to assemble few smaller regions
784*38fd1498Szrj 	 from a too_large region.  */
785*38fd1498Szrj       sbitmap extended_rgn_header = NULL;
786*38fd1498Szrj       bool extend_regions_p;
787*38fd1498Szrj 
788*38fd1498Szrj       if (no_loops)
789*38fd1498Szrj 	bitmap_set_bit (header, 0);
790*38fd1498Szrj 
791*38fd1498Szrj       /* Second traversal:find reducible inner loops and topologically sort
792*38fd1498Szrj 	 block of each region.  */
793*38fd1498Szrj 
794*38fd1498Szrj       queue = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
795*38fd1498Szrj 
796*38fd1498Szrj       extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
797*38fd1498Szrj       if (extend_regions_p)
798*38fd1498Szrj         {
799*38fd1498Szrj           degree1 = XNEWVEC (int, last_basic_block_for_fn (cfun));
800*38fd1498Szrj           extended_rgn_header =
801*38fd1498Szrj 	    sbitmap_alloc (last_basic_block_for_fn (cfun));
802*38fd1498Szrj           bitmap_clear (extended_rgn_header);
803*38fd1498Szrj 	}
804*38fd1498Szrj 
805*38fd1498Szrj       /* Find blocks which are inner loop headers.  We still have non-reducible
806*38fd1498Szrj 	 loops to consider at this point.  */
807*38fd1498Szrj       FOR_EACH_BB_FN (bb, cfun)
808*38fd1498Szrj 	{
809*38fd1498Szrj 	  if (bitmap_bit_p (header, bb->index) && bitmap_bit_p (inner, bb->index))
810*38fd1498Szrj 	    {
811*38fd1498Szrj 	      edge e;
812*38fd1498Szrj 	      edge_iterator ei;
813*38fd1498Szrj 	      basic_block jbb;
814*38fd1498Szrj 
815*38fd1498Szrj 	      /* Now check that the loop is reducible.  We do this separate
816*38fd1498Szrj 		 from finding inner loops so that we do not find a reducible
817*38fd1498Szrj 		 loop which contains an inner non-reducible loop.
818*38fd1498Szrj 
819*38fd1498Szrj 		 A simple way to find reducible/natural loops is to verify
820*38fd1498Szrj 		 that each block in the loop is dominated by the loop
821*38fd1498Szrj 		 header.
822*38fd1498Szrj 
823*38fd1498Szrj 		 If there exists a block that is not dominated by the loop
824*38fd1498Szrj 		 header, then the block is reachable from outside the loop
825*38fd1498Szrj 		 and thus the loop is not a natural loop.  */
826*38fd1498Szrj 	      FOR_EACH_BB_FN (jbb, cfun)
827*38fd1498Szrj 		{
828*38fd1498Szrj 		  /* First identify blocks in the loop, except for the loop
829*38fd1498Szrj 		     entry block.  */
830*38fd1498Szrj 		  if (bb->index == max_hdr[jbb->index] && bb != jbb)
831*38fd1498Szrj 		    {
832*38fd1498Szrj 		      /* Now verify that the block is dominated by the loop
833*38fd1498Szrj 			 header.  */
834*38fd1498Szrj 		      if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
835*38fd1498Szrj 			break;
836*38fd1498Szrj 		    }
837*38fd1498Szrj 		}
838*38fd1498Szrj 
839*38fd1498Szrj 	      /* If we exited the loop early, then I is the header of
840*38fd1498Szrj 		 a non-reducible loop and we should quit processing it
841*38fd1498Szrj 		 now.  */
842*38fd1498Szrj 	      if (jbb != EXIT_BLOCK_PTR_FOR_FN (cfun))
843*38fd1498Szrj 		continue;
844*38fd1498Szrj 
845*38fd1498Szrj 	      /* I is a header of an inner loop, or block 0 in a subroutine
846*38fd1498Szrj 		 with no loops at all.  */
847*38fd1498Szrj 	      head = tail = -1;
848*38fd1498Szrj 	      too_large_failure = 0;
849*38fd1498Szrj 	      loop_head = max_hdr[bb->index];
850*38fd1498Szrj 
851*38fd1498Szrj               if (extend_regions_p)
852*38fd1498Szrj                 /* We save degree in case when we meet a too_large region
853*38fd1498Szrj 		   and cancel it.  We need a correct degree later when
854*38fd1498Szrj                    calling extend_rgns.  */
855*38fd1498Szrj                 memcpy (degree1, degree,
856*38fd1498Szrj 			last_basic_block_for_fn (cfun) * sizeof (int));
857*38fd1498Szrj 
858*38fd1498Szrj 	      /* Decrease degree of all I's successors for topological
859*38fd1498Szrj 		 ordering.  */
860*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, bb->succs)
861*38fd1498Szrj 		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
862*38fd1498Szrj 		  --degree[e->dest->index];
863*38fd1498Szrj 
864*38fd1498Szrj 	      /* Estimate # insns, and count # blocks in the region.  */
865*38fd1498Szrj 	      num_bbs = 1;
866*38fd1498Szrj 	      num_insns = common_sched_info->estimate_number_of_insns (bb);
867*38fd1498Szrj 
868*38fd1498Szrj 	      /* Find all loop latches (blocks with back edges to the loop
869*38fd1498Szrj 		 header) or all the leaf blocks in the cfg has no loops.
870*38fd1498Szrj 
871*38fd1498Szrj 		 Place those blocks into the queue.  */
872*38fd1498Szrj 	      if (no_loops)
873*38fd1498Szrj 		{
874*38fd1498Szrj 		  FOR_EACH_BB_FN (jbb, cfun)
875*38fd1498Szrj 		    /* Leaf nodes have only a single successor which must
876*38fd1498Szrj 		       be EXIT_BLOCK.  */
877*38fd1498Szrj 		    if (single_succ_p (jbb)
878*38fd1498Szrj 			&& single_succ (jbb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
879*38fd1498Szrj 		      {
880*38fd1498Szrj 			queue[++tail] = jbb->index;
881*38fd1498Szrj 			bitmap_set_bit (in_queue, jbb->index);
882*38fd1498Szrj 
883*38fd1498Szrj 			if (too_large (jbb->index, &num_bbs, &num_insns))
884*38fd1498Szrj 			  {
885*38fd1498Szrj 			    too_large_failure = 1;
886*38fd1498Szrj 			    break;
887*38fd1498Szrj 			  }
888*38fd1498Szrj 		      }
889*38fd1498Szrj 		}
890*38fd1498Szrj 	      else
891*38fd1498Szrj 		{
892*38fd1498Szrj 		  edge e;
893*38fd1498Szrj 
894*38fd1498Szrj 		  FOR_EACH_EDGE (e, ei, bb->preds)
895*38fd1498Szrj 		    {
896*38fd1498Szrj 		      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
897*38fd1498Szrj 			continue;
898*38fd1498Szrj 
899*38fd1498Szrj 		      node = e->src->index;
900*38fd1498Szrj 
901*38fd1498Szrj 		      if (max_hdr[node] == loop_head && node != bb->index)
902*38fd1498Szrj 			{
903*38fd1498Szrj 			  /* This is a loop latch.  */
904*38fd1498Szrj 			  queue[++tail] = node;
905*38fd1498Szrj 			  bitmap_set_bit (in_queue, node);
906*38fd1498Szrj 
907*38fd1498Szrj 			  if (too_large (node, &num_bbs, &num_insns))
908*38fd1498Szrj 			    {
909*38fd1498Szrj 			      too_large_failure = 1;
910*38fd1498Szrj 			      break;
911*38fd1498Szrj 			    }
912*38fd1498Szrj 			}
913*38fd1498Szrj 		    }
914*38fd1498Szrj 		}
915*38fd1498Szrj 
916*38fd1498Szrj 	      /* Now add all the blocks in the loop to the queue.
917*38fd1498Szrj 
918*38fd1498Szrj 	     We know the loop is a natural loop; however the algorithm
919*38fd1498Szrj 	     above will not always mark certain blocks as being in the
920*38fd1498Szrj 	     loop.  Consider:
921*38fd1498Szrj 		node   children
922*38fd1498Szrj 		 a	  b,c
923*38fd1498Szrj 		 b	  c
924*38fd1498Szrj 		 c	  a,d
925*38fd1498Szrj 		 d	  b
926*38fd1498Szrj 
927*38fd1498Szrj 	     The algorithm in the DFS traversal may not mark B & D as part
928*38fd1498Szrj 	     of the loop (i.e. they will not have max_hdr set to A).
929*38fd1498Szrj 
930*38fd1498Szrj 	     We know they can not be loop latches (else they would have
931*38fd1498Szrj 	     had max_hdr set since they'd have a backedge to a dominator
932*38fd1498Szrj 	     block).  So we don't need them on the initial queue.
933*38fd1498Szrj 
934*38fd1498Szrj 	     We know they are part of the loop because they are dominated
935*38fd1498Szrj 	     by the loop header and can be reached by a backwards walk of
936*38fd1498Szrj 	     the edges starting with nodes on the initial queue.
937*38fd1498Szrj 
938*38fd1498Szrj 	     It is safe and desirable to include those nodes in the
939*38fd1498Szrj 	     loop/scheduling region.  To do so we would need to decrease
940*38fd1498Szrj 	     the degree of a node if it is the target of a backedge
941*38fd1498Szrj 	     within the loop itself as the node is placed in the queue.
942*38fd1498Szrj 
943*38fd1498Szrj 	     We do not do this because I'm not sure that the actual
944*38fd1498Szrj 	     scheduling code will properly handle this case. ?!? */
945*38fd1498Szrj 
946*38fd1498Szrj 	      while (head < tail && !too_large_failure)
947*38fd1498Szrj 		{
948*38fd1498Szrj 		  edge e;
949*38fd1498Szrj 		  child = queue[++head];
950*38fd1498Szrj 
951*38fd1498Szrj 		  FOR_EACH_EDGE (e, ei,
952*38fd1498Szrj 				 BASIC_BLOCK_FOR_FN (cfun, child)->preds)
953*38fd1498Szrj 		    {
954*38fd1498Szrj 		      node = e->src->index;
955*38fd1498Szrj 
956*38fd1498Szrj 		      /* See discussion above about nodes not marked as in
957*38fd1498Szrj 			 this loop during the initial DFS traversal.  */
958*38fd1498Szrj 		      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
959*38fd1498Szrj 			  || max_hdr[node] != loop_head)
960*38fd1498Szrj 			{
961*38fd1498Szrj 			  tail = -1;
962*38fd1498Szrj 			  break;
963*38fd1498Szrj 			}
964*38fd1498Szrj 		      else if (!bitmap_bit_p (in_queue, node) && node != bb->index)
965*38fd1498Szrj 			{
966*38fd1498Szrj 			  queue[++tail] = node;
967*38fd1498Szrj 			  bitmap_set_bit (in_queue, node);
968*38fd1498Szrj 
969*38fd1498Szrj 			  if (too_large (node, &num_bbs, &num_insns))
970*38fd1498Szrj 			    {
971*38fd1498Szrj 			      too_large_failure = 1;
972*38fd1498Szrj 			      break;
973*38fd1498Szrj 			    }
974*38fd1498Szrj 			}
975*38fd1498Szrj 		    }
976*38fd1498Szrj 		}
977*38fd1498Szrj 
978*38fd1498Szrj 	      if (tail >= 0 && !too_large_failure)
979*38fd1498Szrj 		{
980*38fd1498Szrj 		  /* Place the loop header into list of region blocks.  */
981*38fd1498Szrj 		  degree[bb->index] = -1;
982*38fd1498Szrj 		  rgn_bb_table[idx] = bb->index;
983*38fd1498Szrj 		  RGN_NR_BLOCKS (nr_regions) = num_bbs;
984*38fd1498Szrj 		  RGN_BLOCKS (nr_regions) = idx++;
985*38fd1498Szrj                   RGN_DONT_CALC_DEPS (nr_regions) = 0;
986*38fd1498Szrj 		  RGN_HAS_REAL_EBB (nr_regions) = 0;
987*38fd1498Szrj 		  CONTAINING_RGN (bb->index) = nr_regions;
988*38fd1498Szrj 		  BLOCK_TO_BB (bb->index) = count = 0;
989*38fd1498Szrj 
990*38fd1498Szrj 		  /* Remove blocks from queue[] when their in degree
991*38fd1498Szrj 		     becomes zero.  Repeat until no blocks are left on the
992*38fd1498Szrj 		     list.  This produces a topological list of blocks in
993*38fd1498Szrj 		     the region.  */
994*38fd1498Szrj 		  while (tail >= 0)
995*38fd1498Szrj 		    {
996*38fd1498Szrj 		      if (head < 0)
997*38fd1498Szrj 			head = tail;
998*38fd1498Szrj 		      child = queue[head];
999*38fd1498Szrj 		      if (degree[child] == 0)
1000*38fd1498Szrj 			{
1001*38fd1498Szrj 			  edge e;
1002*38fd1498Szrj 
1003*38fd1498Szrj 			  degree[child] = -1;
1004*38fd1498Szrj 			  rgn_bb_table[idx++] = child;
1005*38fd1498Szrj 			  BLOCK_TO_BB (child) = ++count;
1006*38fd1498Szrj 			  CONTAINING_RGN (child) = nr_regions;
1007*38fd1498Szrj 			  queue[head] = queue[tail--];
1008*38fd1498Szrj 
1009*38fd1498Szrj 			  FOR_EACH_EDGE (e, ei,
1010*38fd1498Szrj 					 BASIC_BLOCK_FOR_FN (cfun,
1011*38fd1498Szrj 							     child)->succs)
1012*38fd1498Szrj 			    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1013*38fd1498Szrj 			      --degree[e->dest->index];
1014*38fd1498Szrj 			}
1015*38fd1498Szrj 		      else
1016*38fd1498Szrj 			--head;
1017*38fd1498Szrj 		    }
1018*38fd1498Szrj 		  ++nr_regions;
1019*38fd1498Szrj 		}
1020*38fd1498Szrj               else if (extend_regions_p)
1021*38fd1498Szrj                 {
1022*38fd1498Szrj                   /* Restore DEGREE.  */
1023*38fd1498Szrj                   int *t = degree;
1024*38fd1498Szrj 
1025*38fd1498Szrj                   degree = degree1;
1026*38fd1498Szrj                   degree1 = t;
1027*38fd1498Szrj 
1028*38fd1498Szrj                   /* And force successors of BB to be region heads.
1029*38fd1498Szrj 		     This may provide several smaller regions instead
1030*38fd1498Szrj 		     of one too_large region.  */
1031*38fd1498Szrj                   FOR_EACH_EDGE (e, ei, bb->succs)
1032*38fd1498Szrj 		    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1033*38fd1498Szrj                       bitmap_set_bit (extended_rgn_header, e->dest->index);
1034*38fd1498Szrj                 }
1035*38fd1498Szrj 	    }
1036*38fd1498Szrj 	}
1037*38fd1498Szrj       free (queue);
1038*38fd1498Szrj 
1039*38fd1498Szrj       if (extend_regions_p)
1040*38fd1498Szrj         {
1041*38fd1498Szrj           free (degree1);
1042*38fd1498Szrj 
1043*38fd1498Szrj           bitmap_ior (header, header, extended_rgn_header);
1044*38fd1498Szrj           sbitmap_free (extended_rgn_header);
1045*38fd1498Szrj 
1046*38fd1498Szrj           extend_rgns (degree, &idx, header, max_hdr);
1047*38fd1498Szrj         }
1048*38fd1498Szrj     }
1049*38fd1498Szrj 
1050*38fd1498Szrj   /* Any block that did not end up in a region is placed into a region
1051*38fd1498Szrj      by itself.  */
1052*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1053*38fd1498Szrj     if (degree[bb->index] >= 0)
1054*38fd1498Szrj       {
1055*38fd1498Szrj 	rgn_bb_table[idx] = bb->index;
1056*38fd1498Szrj 	RGN_NR_BLOCKS (nr_regions) = 1;
1057*38fd1498Szrj 	RGN_BLOCKS (nr_regions) = idx++;
1058*38fd1498Szrj         RGN_DONT_CALC_DEPS (nr_regions) = 0;
1059*38fd1498Szrj 	RGN_HAS_REAL_EBB (nr_regions) = 0;
1060*38fd1498Szrj 	CONTAINING_RGN (bb->index) = nr_regions++;
1061*38fd1498Szrj 	BLOCK_TO_BB (bb->index) = 0;
1062*38fd1498Szrj       }
1063*38fd1498Szrj 
1064*38fd1498Szrj   free (max_hdr);
1065*38fd1498Szrj   free (degree);
1066*38fd1498Szrj   free (stack);
1067*38fd1498Szrj }
1068*38fd1498Szrj 
1069*38fd1498Szrj 
1070*38fd1498Szrj /* Wrapper function.
1071*38fd1498Szrj    If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1072*38fd1498Szrj    regions.  Otherwise just call find_rgns_haifa.  */
1073*38fd1498Szrj static void
find_rgns(void)1074*38fd1498Szrj find_rgns (void)
1075*38fd1498Szrj {
1076*38fd1498Szrj   if (sel_sched_p () && flag_sel_sched_pipelining)
1077*38fd1498Szrj     sel_find_rgns ();
1078*38fd1498Szrj   else
1079*38fd1498Szrj     haifa_find_rgns ();
1080*38fd1498Szrj }
1081*38fd1498Szrj 
1082*38fd1498Szrj static int gather_region_statistics (int **);
1083*38fd1498Szrj static void print_region_statistics (int *, int, int *, int);
1084*38fd1498Szrj 
1085*38fd1498Szrj /* Calculate the histogram that shows the number of regions having the
1086*38fd1498Szrj    given number of basic blocks, and store it in the RSP array.  Return
1087*38fd1498Szrj    the size of this array.  */
1088*38fd1498Szrj static int
gather_region_statistics(int ** rsp)1089*38fd1498Szrj gather_region_statistics (int **rsp)
1090*38fd1498Szrj {
1091*38fd1498Szrj   int i, *a = 0, a_sz = 0;
1092*38fd1498Szrj 
1093*38fd1498Szrj   /* a[i] is the number of regions that have (i + 1) basic blocks.  */
1094*38fd1498Szrj   for (i = 0; i < nr_regions; i++)
1095*38fd1498Szrj     {
1096*38fd1498Szrj       int nr_blocks = RGN_NR_BLOCKS (i);
1097*38fd1498Szrj 
1098*38fd1498Szrj       gcc_assert (nr_blocks >= 1);
1099*38fd1498Szrj 
1100*38fd1498Szrj       if (nr_blocks > a_sz)
1101*38fd1498Szrj 	{
1102*38fd1498Szrj 	  a = XRESIZEVEC (int, a, nr_blocks);
1103*38fd1498Szrj 	  do
1104*38fd1498Szrj 	    a[a_sz++] = 0;
1105*38fd1498Szrj 	  while (a_sz != nr_blocks);
1106*38fd1498Szrj 	}
1107*38fd1498Szrj 
1108*38fd1498Szrj       a[nr_blocks - 1]++;
1109*38fd1498Szrj     }
1110*38fd1498Szrj 
1111*38fd1498Szrj   *rsp = a;
1112*38fd1498Szrj   return a_sz;
1113*38fd1498Szrj }
1114*38fd1498Szrj 
1115*38fd1498Szrj /* Print regions statistics.  S1 and S2 denote the data before and after
1116*38fd1498Szrj    calling extend_rgns, respectively.  */
1117*38fd1498Szrj static void
print_region_statistics(int * s1,int s1_sz,int * s2,int s2_sz)1118*38fd1498Szrj print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1119*38fd1498Szrj {
1120*38fd1498Szrj   int i;
1121*38fd1498Szrj 
1122*38fd1498Szrj   /* We iterate until s2_sz because extend_rgns does not decrease
1123*38fd1498Szrj      the maximal region size.  */
1124*38fd1498Szrj   for (i = 1; i < s2_sz; i++)
1125*38fd1498Szrj     {
1126*38fd1498Szrj       int n1, n2;
1127*38fd1498Szrj 
1128*38fd1498Szrj       n2 = s2[i];
1129*38fd1498Szrj 
1130*38fd1498Szrj       if (n2 == 0)
1131*38fd1498Szrj 	continue;
1132*38fd1498Szrj 
1133*38fd1498Szrj       if (i >= s1_sz)
1134*38fd1498Szrj 	n1 = 0;
1135*38fd1498Szrj       else
1136*38fd1498Szrj 	n1 = s1[i];
1137*38fd1498Szrj 
1138*38fd1498Szrj       fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1139*38fd1498Szrj 	       "was %d + %d more\n", i + 1, n1, n2 - n1);
1140*38fd1498Szrj     }
1141*38fd1498Szrj }
1142*38fd1498Szrj 
1143*38fd1498Szrj /* Extend regions.
1144*38fd1498Szrj    DEGREE - Array of incoming edge count, considering only
1145*38fd1498Szrj    the edges, that don't have their sources in formed regions yet.
1146*38fd1498Szrj    IDXP - pointer to the next available index in rgn_bb_table.
1147*38fd1498Szrj    HEADER - set of all region heads.
1148*38fd1498Szrj    LOOP_HDR - mapping from block to the containing loop
1149*38fd1498Szrj    (two blocks can reside within one region if they have
1150*38fd1498Szrj    the same loop header).  */
1151*38fd1498Szrj void
extend_rgns(int * degree,int * idxp,sbitmap header,int * loop_hdr)1152*38fd1498Szrj extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1153*38fd1498Szrj {
1154*38fd1498Szrj   int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1155*38fd1498Szrj   int nblocks = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
1156*38fd1498Szrj 
1157*38fd1498Szrj   max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1158*38fd1498Szrj 
1159*38fd1498Szrj   max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
1160*38fd1498Szrj 
1161*38fd1498Szrj   order = XNEWVEC (int, last_basic_block_for_fn (cfun));
1162*38fd1498Szrj   post_order_compute (order, false, false);
1163*38fd1498Szrj 
1164*38fd1498Szrj   for (i = nblocks - 1; i >= 0; i--)
1165*38fd1498Szrj     {
1166*38fd1498Szrj       int bbn = order[i];
1167*38fd1498Szrj       if (degree[bbn] >= 0)
1168*38fd1498Szrj 	{
1169*38fd1498Szrj 	  max_hdr[bbn] = bbn;
1170*38fd1498Szrj 	  rescan = 1;
1171*38fd1498Szrj 	}
1172*38fd1498Szrj       else
1173*38fd1498Szrj         /* This block already was processed in find_rgns.  */
1174*38fd1498Szrj         max_hdr[bbn] = -1;
1175*38fd1498Szrj     }
1176*38fd1498Szrj 
1177*38fd1498Szrj   /* The idea is to topologically walk through CFG in top-down order.
1178*38fd1498Szrj      During the traversal, if all the predecessors of a node are
1179*38fd1498Szrj      marked to be in the same region (they all have the same max_hdr),
1180*38fd1498Szrj      then current node is also marked to be a part of that region.
1181*38fd1498Szrj      Otherwise the node starts its own region.
1182*38fd1498Szrj      CFG should be traversed until no further changes are made.  On each
1183*38fd1498Szrj      iteration the set of the region heads is extended (the set of those
1184*38fd1498Szrj      blocks that have max_hdr[bbi] == bbi).  This set is upper bounded by the
1185*38fd1498Szrj      set of all basic blocks, thus the algorithm is guaranteed to
1186*38fd1498Szrj      terminate.  */
1187*38fd1498Szrj 
1188*38fd1498Szrj   while (rescan && iter < max_iter)
1189*38fd1498Szrj     {
1190*38fd1498Szrj       rescan = 0;
1191*38fd1498Szrj 
1192*38fd1498Szrj       for (i = nblocks - 1; i >= 0; i--)
1193*38fd1498Szrj 	{
1194*38fd1498Szrj 	  edge e;
1195*38fd1498Szrj 	  edge_iterator ei;
1196*38fd1498Szrj 	  int bbn = order[i];
1197*38fd1498Szrj 
1198*38fd1498Szrj 	  if (max_hdr[bbn] != -1 && !bitmap_bit_p (header, bbn))
1199*38fd1498Szrj 	    {
1200*38fd1498Szrj 	      int hdr = -1;
1201*38fd1498Szrj 
1202*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->preds)
1203*38fd1498Szrj 		{
1204*38fd1498Szrj 		  int predn = e->src->index;
1205*38fd1498Szrj 
1206*38fd1498Szrj 		  if (predn != ENTRY_BLOCK
1207*38fd1498Szrj 		      /* If pred wasn't processed in find_rgns.  */
1208*38fd1498Szrj 		      && max_hdr[predn] != -1
1209*38fd1498Szrj 		      /* And pred and bb reside in the same loop.
1210*38fd1498Szrj 			 (Or out of any loop).  */
1211*38fd1498Szrj 		      && loop_hdr[bbn] == loop_hdr[predn])
1212*38fd1498Szrj 		    {
1213*38fd1498Szrj 		      if (hdr == -1)
1214*38fd1498Szrj 			/* Then bb extends the containing region of pred.  */
1215*38fd1498Szrj 			hdr = max_hdr[predn];
1216*38fd1498Szrj 		      else if (hdr != max_hdr[predn])
1217*38fd1498Szrj 			/* Too bad, there are at least two predecessors
1218*38fd1498Szrj 			   that reside in different regions.  Thus, BB should
1219*38fd1498Szrj 			   begin its own region.  */
1220*38fd1498Szrj 			{
1221*38fd1498Szrj 			  hdr = bbn;
1222*38fd1498Szrj 			  break;
1223*38fd1498Szrj 			}
1224*38fd1498Szrj 		    }
1225*38fd1498Szrj 		  else
1226*38fd1498Szrj 		    /* BB starts its own region.  */
1227*38fd1498Szrj 		    {
1228*38fd1498Szrj 		      hdr = bbn;
1229*38fd1498Szrj 		      break;
1230*38fd1498Szrj 		    }
1231*38fd1498Szrj 		}
1232*38fd1498Szrj 
1233*38fd1498Szrj 	      if (hdr == bbn)
1234*38fd1498Szrj 		{
1235*38fd1498Szrj 		  /* If BB start its own region,
1236*38fd1498Szrj 		     update set of headers with BB.  */
1237*38fd1498Szrj 		  bitmap_set_bit (header, bbn);
1238*38fd1498Szrj 		  rescan = 1;
1239*38fd1498Szrj 		}
1240*38fd1498Szrj 	      else
1241*38fd1498Szrj 		gcc_assert (hdr != -1);
1242*38fd1498Szrj 
1243*38fd1498Szrj 	      max_hdr[bbn] = hdr;
1244*38fd1498Szrj 	    }
1245*38fd1498Szrj 	}
1246*38fd1498Szrj 
1247*38fd1498Szrj       iter++;
1248*38fd1498Szrj     }
1249*38fd1498Szrj 
1250*38fd1498Szrj   /* Statistics were gathered on the SPEC2000 package of tests with
1251*38fd1498Szrj      mainline weekly snapshot gcc-4.1-20051015 on ia64.
1252*38fd1498Szrj 
1253*38fd1498Szrj      Statistics for SPECint:
1254*38fd1498Szrj      1 iteration : 1751 cases (38.7%)
1255*38fd1498Szrj      2 iterations: 2770 cases (61.3%)
1256*38fd1498Szrj      Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1257*38fd1498Szrj      Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1258*38fd1498Szrj      (We don't count single block regions here).
1259*38fd1498Szrj 
1260*38fd1498Szrj      Statistics for SPECfp:
1261*38fd1498Szrj      1 iteration : 621 cases (35.9%)
1262*38fd1498Szrj      2 iterations: 1110 cases (64.1%)
1263*38fd1498Szrj      Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1264*38fd1498Szrj      Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1265*38fd1498Szrj      (We don't count single block regions here).
1266*38fd1498Szrj 
1267*38fd1498Szrj      By default we do at most 2 iterations.
1268*38fd1498Szrj      This can be overridden with max-sched-extend-regions-iters parameter:
1269*38fd1498Szrj      0 - disable region extension,
1270*38fd1498Szrj      N > 0 - do at most N iterations.  */
1271*38fd1498Szrj 
1272*38fd1498Szrj   if (sched_verbose && iter != 0)
1273*38fd1498Szrj     fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1274*38fd1498Szrj 	     rescan ? "... failed" : "");
1275*38fd1498Szrj 
1276*38fd1498Szrj   if (!rescan && iter != 0)
1277*38fd1498Szrj     {
1278*38fd1498Szrj       int *s1 = NULL, s1_sz = 0;
1279*38fd1498Szrj 
1280*38fd1498Szrj       /* Save the old statistics for later printout.  */
1281*38fd1498Szrj       if (sched_verbose >= 6)
1282*38fd1498Szrj 	s1_sz = gather_region_statistics (&s1);
1283*38fd1498Szrj 
1284*38fd1498Szrj       /* We have succeeded.  Now assemble the regions.  */
1285*38fd1498Szrj       for (i = nblocks - 1; i >= 0; i--)
1286*38fd1498Szrj 	{
1287*38fd1498Szrj 	  int bbn = order[i];
1288*38fd1498Szrj 
1289*38fd1498Szrj 	  if (max_hdr[bbn] == bbn)
1290*38fd1498Szrj 	    /* BBN is a region head.  */
1291*38fd1498Szrj 	    {
1292*38fd1498Szrj 	      edge e;
1293*38fd1498Szrj 	      edge_iterator ei;
1294*38fd1498Szrj 	      int num_bbs = 0, j, num_insns = 0, large;
1295*38fd1498Szrj 
1296*38fd1498Szrj 	      large = too_large (bbn, &num_bbs, &num_insns);
1297*38fd1498Szrj 
1298*38fd1498Szrj 	      degree[bbn] = -1;
1299*38fd1498Szrj 	      rgn_bb_table[idx] = bbn;
1300*38fd1498Szrj 	      RGN_BLOCKS (nr_regions) = idx++;
1301*38fd1498Szrj 	      RGN_DONT_CALC_DEPS (nr_regions) = 0;
1302*38fd1498Szrj 	      RGN_HAS_REAL_EBB (nr_regions) = 0;
1303*38fd1498Szrj 	      CONTAINING_RGN (bbn) = nr_regions;
1304*38fd1498Szrj 	      BLOCK_TO_BB (bbn) = 0;
1305*38fd1498Szrj 
1306*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->succs)
1307*38fd1498Szrj 		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1308*38fd1498Szrj 		  degree[e->dest->index]--;
1309*38fd1498Szrj 
1310*38fd1498Szrj 	      if (!large)
1311*38fd1498Szrj 		/* Here we check whether the region is too_large.  */
1312*38fd1498Szrj 		for (j = i - 1; j >= 0; j--)
1313*38fd1498Szrj 		  {
1314*38fd1498Szrj 		    int succn = order[j];
1315*38fd1498Szrj 		    if (max_hdr[succn] == bbn)
1316*38fd1498Szrj 		      {
1317*38fd1498Szrj 			if ((large = too_large (succn, &num_bbs, &num_insns)))
1318*38fd1498Szrj 			  break;
1319*38fd1498Szrj 		      }
1320*38fd1498Szrj 		  }
1321*38fd1498Szrj 
1322*38fd1498Szrj 	      if (large)
1323*38fd1498Szrj 		/* If the region is too_large, then wrap every block of
1324*38fd1498Szrj 		   the region into single block region.
1325*38fd1498Szrj 		   Here we wrap region head only.  Other blocks are
1326*38fd1498Szrj 		   processed in the below cycle.  */
1327*38fd1498Szrj 		{
1328*38fd1498Szrj 		  RGN_NR_BLOCKS (nr_regions) = 1;
1329*38fd1498Szrj 		  nr_regions++;
1330*38fd1498Szrj 		}
1331*38fd1498Szrj 
1332*38fd1498Szrj 	      num_bbs = 1;
1333*38fd1498Szrj 
1334*38fd1498Szrj 	      for (j = i - 1; j >= 0; j--)
1335*38fd1498Szrj 		{
1336*38fd1498Szrj 		  int succn = order[j];
1337*38fd1498Szrj 
1338*38fd1498Szrj 		  if (max_hdr[succn] == bbn)
1339*38fd1498Szrj 		    /* This cycle iterates over all basic blocks, that
1340*38fd1498Szrj 		       are supposed to be in the region with head BBN,
1341*38fd1498Szrj 		       and wraps them into that region (or in single
1342*38fd1498Szrj 		       block region).  */
1343*38fd1498Szrj 		    {
1344*38fd1498Szrj 		      gcc_assert (degree[succn] == 0);
1345*38fd1498Szrj 
1346*38fd1498Szrj 		      degree[succn] = -1;
1347*38fd1498Szrj 		      rgn_bb_table[idx] = succn;
1348*38fd1498Szrj 		      BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1349*38fd1498Szrj 		      CONTAINING_RGN (succn) = nr_regions;
1350*38fd1498Szrj 
1351*38fd1498Szrj 		      if (large)
1352*38fd1498Szrj 			/* Wrap SUCCN into single block region.  */
1353*38fd1498Szrj 			{
1354*38fd1498Szrj 			  RGN_BLOCKS (nr_regions) = idx;
1355*38fd1498Szrj 			  RGN_NR_BLOCKS (nr_regions) = 1;
1356*38fd1498Szrj 			  RGN_DONT_CALC_DEPS (nr_regions) = 0;
1357*38fd1498Szrj 			  RGN_HAS_REAL_EBB (nr_regions) = 0;
1358*38fd1498Szrj 			  nr_regions++;
1359*38fd1498Szrj 			}
1360*38fd1498Szrj 
1361*38fd1498Szrj 		      idx++;
1362*38fd1498Szrj 
1363*38fd1498Szrj 		      FOR_EACH_EDGE (e, ei,
1364*38fd1498Szrj 				     BASIC_BLOCK_FOR_FN (cfun, succn)->succs)
1365*38fd1498Szrj 			if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1366*38fd1498Szrj 			  degree[e->dest->index]--;
1367*38fd1498Szrj 		    }
1368*38fd1498Szrj 		}
1369*38fd1498Szrj 
1370*38fd1498Szrj 	      if (!large)
1371*38fd1498Szrj 		{
1372*38fd1498Szrj 		  RGN_NR_BLOCKS (nr_regions) = num_bbs;
1373*38fd1498Szrj 		  nr_regions++;
1374*38fd1498Szrj 		}
1375*38fd1498Szrj 	    }
1376*38fd1498Szrj 	}
1377*38fd1498Szrj 
1378*38fd1498Szrj       if (sched_verbose >= 6)
1379*38fd1498Szrj 	{
1380*38fd1498Szrj 	  int *s2, s2_sz;
1381*38fd1498Szrj 
1382*38fd1498Szrj           /* Get the new statistics and print the comparison with the
1383*38fd1498Szrj              one before calling this function.  */
1384*38fd1498Szrj 	  s2_sz = gather_region_statistics (&s2);
1385*38fd1498Szrj 	  print_region_statistics (s1, s1_sz, s2, s2_sz);
1386*38fd1498Szrj 	  free (s1);
1387*38fd1498Szrj 	  free (s2);
1388*38fd1498Szrj 	}
1389*38fd1498Szrj     }
1390*38fd1498Szrj 
1391*38fd1498Szrj   free (order);
1392*38fd1498Szrj   free (max_hdr);
1393*38fd1498Szrj 
1394*38fd1498Szrj   *idxp = idx;
1395*38fd1498Szrj }
1396*38fd1498Szrj 
1397*38fd1498Szrj /* Functions for regions scheduling information.  */
1398*38fd1498Szrj 
1399*38fd1498Szrj /* Compute dominators, probability, and potential-split-edges of bb.
1400*38fd1498Szrj    Assume that these values were already computed for bb's predecessors.  */
1401*38fd1498Szrj 
1402*38fd1498Szrj static void
compute_dom_prob_ps(int bb)1403*38fd1498Szrj compute_dom_prob_ps (int bb)
1404*38fd1498Szrj {
1405*38fd1498Szrj   edge_iterator in_ei;
1406*38fd1498Szrj   edge in_edge;
1407*38fd1498Szrj 
1408*38fd1498Szrj   /* We shouldn't have any real ebbs yet.  */
1409*38fd1498Szrj   gcc_assert (ebb_head [bb] == bb + current_blocks);
1410*38fd1498Szrj 
1411*38fd1498Szrj   if (IS_RGN_ENTRY (bb))
1412*38fd1498Szrj     {
1413*38fd1498Szrj       bitmap_set_bit (dom[bb], 0);
1414*38fd1498Szrj       prob[bb] = REG_BR_PROB_BASE;
1415*38fd1498Szrj       return;
1416*38fd1498Szrj     }
1417*38fd1498Szrj 
1418*38fd1498Szrj   prob[bb] = 0;
1419*38fd1498Szrj 
1420*38fd1498Szrj   /* Initialize dom[bb] to '111..1'.  */
1421*38fd1498Szrj   bitmap_ones (dom[bb]);
1422*38fd1498Szrj 
1423*38fd1498Szrj   FOR_EACH_EDGE (in_edge, in_ei,
1424*38fd1498Szrj 		 BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb))->preds)
1425*38fd1498Szrj     {
1426*38fd1498Szrj       int pred_bb;
1427*38fd1498Szrj       edge out_edge;
1428*38fd1498Szrj       edge_iterator out_ei;
1429*38fd1498Szrj 
1430*38fd1498Szrj       if (in_edge->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1431*38fd1498Szrj 	continue;
1432*38fd1498Szrj 
1433*38fd1498Szrj       pred_bb = BLOCK_TO_BB (in_edge->src->index);
1434*38fd1498Szrj       bitmap_and (dom[bb], dom[bb], dom[pred_bb]);
1435*38fd1498Szrj       bitmap_ior (ancestor_edges[bb],
1436*38fd1498Szrj 		      ancestor_edges[bb], ancestor_edges[pred_bb]);
1437*38fd1498Szrj 
1438*38fd1498Szrj       bitmap_set_bit (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1439*38fd1498Szrj 
1440*38fd1498Szrj       bitmap_ior (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1441*38fd1498Szrj 
1442*38fd1498Szrj       FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1443*38fd1498Szrj 	bitmap_set_bit (pot_split[bb], EDGE_TO_BIT (out_edge));
1444*38fd1498Szrj 
1445*38fd1498Szrj       prob[bb] += combine_probabilities
1446*38fd1498Szrj 		 (prob[pred_bb],
1447*38fd1498Szrj 		  in_edge->probability.initialized_p ()
1448*38fd1498Szrj 		  ? in_edge->probability.to_reg_br_prob_base ()
1449*38fd1498Szrj 		  : 0);
1450*38fd1498Szrj       // The rounding divide in combine_probabilities can result in an extra
1451*38fd1498Szrj       // probability increment propagating along 50-50 edges. Eventually when
1452*38fd1498Szrj       // the edges re-merge, the accumulated probability can go slightly above
1453*38fd1498Szrj       // REG_BR_PROB_BASE.
1454*38fd1498Szrj       if (prob[bb] > REG_BR_PROB_BASE)
1455*38fd1498Szrj         prob[bb] = REG_BR_PROB_BASE;
1456*38fd1498Szrj     }
1457*38fd1498Szrj 
1458*38fd1498Szrj   bitmap_set_bit (dom[bb], bb);
1459*38fd1498Szrj   bitmap_and_compl (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1460*38fd1498Szrj 
1461*38fd1498Szrj   if (sched_verbose >= 2)
1462*38fd1498Szrj     fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1463*38fd1498Szrj 	     (100 * prob[bb]) / REG_BR_PROB_BASE);
1464*38fd1498Szrj }
1465*38fd1498Szrj 
1466*38fd1498Szrj /* Functions for target info.  */
1467*38fd1498Szrj 
1468*38fd1498Szrj /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1469*38fd1498Szrj    Note that bb_trg dominates bb_src.  */
1470*38fd1498Szrj 
1471*38fd1498Szrj static void
split_edges(int bb_src,int bb_trg,edgelst * bl)1472*38fd1498Szrj split_edges (int bb_src, int bb_trg, edgelst *bl)
1473*38fd1498Szrj {
1474*38fd1498Szrj   auto_sbitmap src (SBITMAP_SIZE (pot_split[bb_src]));
1475*38fd1498Szrj   bitmap_copy (src, pot_split[bb_src]);
1476*38fd1498Szrj 
1477*38fd1498Szrj   bitmap_and_compl (src, src, pot_split[bb_trg]);
1478*38fd1498Szrj   extract_edgelst (src, bl);
1479*38fd1498Szrj }
1480*38fd1498Szrj 
1481*38fd1498Szrj /* Find the valid candidate-source-blocks for the target block TRG, compute
1482*38fd1498Szrj    their probability, and check if they are speculative or not.
1483*38fd1498Szrj    For speculative sources, compute their update-blocks and split-blocks.  */
1484*38fd1498Szrj 
1485*38fd1498Szrj static void
compute_trg_info(int trg)1486*38fd1498Szrj compute_trg_info (int trg)
1487*38fd1498Szrj {
1488*38fd1498Szrj   candidate *sp;
1489*38fd1498Szrj   edgelst el = { NULL, 0 };
1490*38fd1498Szrj   int i, j, k, update_idx;
1491*38fd1498Szrj   basic_block block;
1492*38fd1498Szrj   edge_iterator ei;
1493*38fd1498Szrj   edge e;
1494*38fd1498Szrj 
1495*38fd1498Szrj   candidate_table = XNEWVEC (candidate, current_nr_blocks);
1496*38fd1498Szrj 
1497*38fd1498Szrj   bblst_last = 0;
1498*38fd1498Szrj   /* bblst_table holds split blocks and update blocks for each block after
1499*38fd1498Szrj      the current one in the region.  split blocks and update blocks are
1500*38fd1498Szrj      the TO blocks of region edges, so there can be at most rgn_nr_edges
1501*38fd1498Szrj      of them.  */
1502*38fd1498Szrj   bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1503*38fd1498Szrj   bblst_table = XNEWVEC (basic_block, bblst_size);
1504*38fd1498Szrj 
1505*38fd1498Szrj   edgelst_last = 0;
1506*38fd1498Szrj   edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1507*38fd1498Szrj 
1508*38fd1498Szrj   /* Define some of the fields for the target bb as well.  */
1509*38fd1498Szrj   sp = candidate_table + trg;
1510*38fd1498Szrj   sp->is_valid = 1;
1511*38fd1498Szrj   sp->is_speculative = 0;
1512*38fd1498Szrj   sp->src_prob = REG_BR_PROB_BASE;
1513*38fd1498Szrj 
1514*38fd1498Szrj   auto_sbitmap visited (last_basic_block_for_fn (cfun));
1515*38fd1498Szrj 
1516*38fd1498Szrj   for (i = trg + 1; i < current_nr_blocks; i++)
1517*38fd1498Szrj     {
1518*38fd1498Szrj       sp = candidate_table + i;
1519*38fd1498Szrj 
1520*38fd1498Szrj       sp->is_valid = IS_DOMINATED (i, trg);
1521*38fd1498Szrj       if (sp->is_valid)
1522*38fd1498Szrj 	{
1523*38fd1498Szrj 	  int tf = prob[trg], cf = prob[i];
1524*38fd1498Szrj 
1525*38fd1498Szrj 	  /* In CFGs with low probability edges TF can possibly be zero.  */
1526*38fd1498Szrj 	  sp->src_prob = (tf ? GCOV_COMPUTE_SCALE (cf, tf) : 0);
1527*38fd1498Szrj 	  sp->is_valid = (sp->src_prob >= min_spec_prob);
1528*38fd1498Szrj 	}
1529*38fd1498Szrj 
1530*38fd1498Szrj       if (sp->is_valid)
1531*38fd1498Szrj 	{
1532*38fd1498Szrj 	  split_edges (i, trg, &el);
1533*38fd1498Szrj 	  sp->is_speculative = (el.nr_members) ? 1 : 0;
1534*38fd1498Szrj 	  if (sp->is_speculative && !flag_schedule_speculative)
1535*38fd1498Szrj 	    sp->is_valid = 0;
1536*38fd1498Szrj 	}
1537*38fd1498Szrj 
1538*38fd1498Szrj       if (sp->is_valid)
1539*38fd1498Szrj 	{
1540*38fd1498Szrj 	  /* Compute split blocks and store them in bblst_table.
1541*38fd1498Szrj 	     The TO block of every split edge is a split block.  */
1542*38fd1498Szrj 	  sp->split_bbs.first_member = &bblst_table[bblst_last];
1543*38fd1498Szrj 	  sp->split_bbs.nr_members = el.nr_members;
1544*38fd1498Szrj 	  for (j = 0; j < el.nr_members; bblst_last++, j++)
1545*38fd1498Szrj 	    bblst_table[bblst_last] = el.first_member[j]->dest;
1546*38fd1498Szrj 	  sp->update_bbs.first_member = &bblst_table[bblst_last];
1547*38fd1498Szrj 
1548*38fd1498Szrj 	  /* Compute update blocks and store them in bblst_table.
1549*38fd1498Szrj 	     For every split edge, look at the FROM block, and check
1550*38fd1498Szrj 	     all out edges.  For each out edge that is not a split edge,
1551*38fd1498Szrj 	     add the TO block to the update block list.  This list can end
1552*38fd1498Szrj 	     up with a lot of duplicates.  We need to weed them out to avoid
1553*38fd1498Szrj 	     overrunning the end of the bblst_table.  */
1554*38fd1498Szrj 
1555*38fd1498Szrj 	  update_idx = 0;
1556*38fd1498Szrj 	  bitmap_clear (visited);
1557*38fd1498Szrj 	  for (j = 0; j < el.nr_members; j++)
1558*38fd1498Szrj 	    {
1559*38fd1498Szrj 	      block = el.first_member[j]->src;
1560*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, block->succs)
1561*38fd1498Szrj 		{
1562*38fd1498Szrj 		  if (!bitmap_bit_p (visited, e->dest->index))
1563*38fd1498Szrj 		    {
1564*38fd1498Szrj 		      for (k = 0; k < el.nr_members; k++)
1565*38fd1498Szrj 			if (e == el.first_member[k])
1566*38fd1498Szrj 			  break;
1567*38fd1498Szrj 
1568*38fd1498Szrj 		      if (k >= el.nr_members)
1569*38fd1498Szrj 			{
1570*38fd1498Szrj 			  bblst_table[bblst_last++] = e->dest;
1571*38fd1498Szrj 			  bitmap_set_bit (visited, e->dest->index);
1572*38fd1498Szrj 			  update_idx++;
1573*38fd1498Szrj 			}
1574*38fd1498Szrj 		    }
1575*38fd1498Szrj 		}
1576*38fd1498Szrj 	    }
1577*38fd1498Szrj 	  sp->update_bbs.nr_members = update_idx;
1578*38fd1498Szrj 
1579*38fd1498Szrj 	  /* Make sure we didn't overrun the end of bblst_table.  */
1580*38fd1498Szrj 	  gcc_assert (bblst_last <= bblst_size);
1581*38fd1498Szrj 	}
1582*38fd1498Szrj       else
1583*38fd1498Szrj 	{
1584*38fd1498Szrj 	  sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1585*38fd1498Szrj 
1586*38fd1498Szrj 	  sp->is_speculative = 0;
1587*38fd1498Szrj 	  sp->src_prob = 0;
1588*38fd1498Szrj 	}
1589*38fd1498Szrj     }
1590*38fd1498Szrj }
1591*38fd1498Szrj 
1592*38fd1498Szrj /* Free the computed target info.  */
1593*38fd1498Szrj static void
free_trg_info(void)1594*38fd1498Szrj free_trg_info (void)
1595*38fd1498Szrj {
1596*38fd1498Szrj   free (candidate_table);
1597*38fd1498Szrj   free (bblst_table);
1598*38fd1498Szrj   free (edgelst_table);
1599*38fd1498Szrj }
1600*38fd1498Szrj 
1601*38fd1498Szrj /* Print candidates info, for debugging purposes.  Callable from debugger.  */
1602*38fd1498Szrj 
1603*38fd1498Szrj DEBUG_FUNCTION void
debug_candidate(int i)1604*38fd1498Szrj debug_candidate (int i)
1605*38fd1498Szrj {
1606*38fd1498Szrj   if (!candidate_table[i].is_valid)
1607*38fd1498Szrj     return;
1608*38fd1498Szrj 
1609*38fd1498Szrj   if (candidate_table[i].is_speculative)
1610*38fd1498Szrj     {
1611*38fd1498Szrj       int j;
1612*38fd1498Szrj       fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1613*38fd1498Szrj 
1614*38fd1498Szrj       fprintf (sched_dump, "split path: ");
1615*38fd1498Szrj       for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1616*38fd1498Szrj 	{
1617*38fd1498Szrj 	  int b = candidate_table[i].split_bbs.first_member[j]->index;
1618*38fd1498Szrj 
1619*38fd1498Szrj 	  fprintf (sched_dump, " %d ", b);
1620*38fd1498Szrj 	}
1621*38fd1498Szrj       fprintf (sched_dump, "\n");
1622*38fd1498Szrj 
1623*38fd1498Szrj       fprintf (sched_dump, "update path: ");
1624*38fd1498Szrj       for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1625*38fd1498Szrj 	{
1626*38fd1498Szrj 	  int b = candidate_table[i].update_bbs.first_member[j]->index;
1627*38fd1498Szrj 
1628*38fd1498Szrj 	  fprintf (sched_dump, " %d ", b);
1629*38fd1498Szrj 	}
1630*38fd1498Szrj       fprintf (sched_dump, "\n");
1631*38fd1498Szrj     }
1632*38fd1498Szrj   else
1633*38fd1498Szrj     {
1634*38fd1498Szrj       fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1635*38fd1498Szrj     }
1636*38fd1498Szrj }
1637*38fd1498Szrj 
1638*38fd1498Szrj /* Print candidates info, for debugging purposes.  Callable from debugger.  */
1639*38fd1498Szrj 
1640*38fd1498Szrj DEBUG_FUNCTION void
debug_candidates(int trg)1641*38fd1498Szrj debug_candidates (int trg)
1642*38fd1498Szrj {
1643*38fd1498Szrj   int i;
1644*38fd1498Szrj 
1645*38fd1498Szrj   fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1646*38fd1498Szrj 	   BB_TO_BLOCK (trg), trg);
1647*38fd1498Szrj   for (i = trg + 1; i < current_nr_blocks; i++)
1648*38fd1498Szrj     debug_candidate (i);
1649*38fd1498Szrj }
1650*38fd1498Szrj 
1651*38fd1498Szrj /* Functions for speculative scheduling.  */
1652*38fd1498Szrj 
1653*38fd1498Szrj static bitmap_head not_in_df;
1654*38fd1498Szrj 
1655*38fd1498Szrj /* Return 0 if x is a set of a register alive in the beginning of one
1656*38fd1498Szrj    of the split-blocks of src, otherwise return 1.  */
1657*38fd1498Szrj 
1658*38fd1498Szrj static int
check_live_1(int src,rtx x)1659*38fd1498Szrj check_live_1 (int src, rtx x)
1660*38fd1498Szrj {
1661*38fd1498Szrj   int i;
1662*38fd1498Szrj   int regno;
1663*38fd1498Szrj   rtx reg = SET_DEST (x);
1664*38fd1498Szrj 
1665*38fd1498Szrj   if (reg == 0)
1666*38fd1498Szrj     return 1;
1667*38fd1498Szrj 
1668*38fd1498Szrj   while (GET_CODE (reg) == SUBREG
1669*38fd1498Szrj 	 || GET_CODE (reg) == ZERO_EXTRACT
1670*38fd1498Szrj 	 || GET_CODE (reg) == STRICT_LOW_PART)
1671*38fd1498Szrj     reg = XEXP (reg, 0);
1672*38fd1498Szrj 
1673*38fd1498Szrj   if (GET_CODE (reg) == PARALLEL)
1674*38fd1498Szrj     {
1675*38fd1498Szrj       int i;
1676*38fd1498Szrj 
1677*38fd1498Szrj       for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1678*38fd1498Szrj 	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1679*38fd1498Szrj 	  if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1680*38fd1498Szrj 	    return 1;
1681*38fd1498Szrj 
1682*38fd1498Szrj       return 0;
1683*38fd1498Szrj     }
1684*38fd1498Szrj 
1685*38fd1498Szrj   if (!REG_P (reg))
1686*38fd1498Szrj     return 1;
1687*38fd1498Szrj 
1688*38fd1498Szrj   regno = REGNO (reg);
1689*38fd1498Szrj 
1690*38fd1498Szrj   if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1691*38fd1498Szrj     {
1692*38fd1498Szrj       /* Global registers are assumed live.  */
1693*38fd1498Szrj       return 0;
1694*38fd1498Szrj     }
1695*38fd1498Szrj   else
1696*38fd1498Szrj     {
1697*38fd1498Szrj       if (regno < FIRST_PSEUDO_REGISTER)
1698*38fd1498Szrj 	{
1699*38fd1498Szrj 	  /* Check for hard registers.  */
1700*38fd1498Szrj 	  int j = REG_NREGS (reg);
1701*38fd1498Szrj 	  while (--j >= 0)
1702*38fd1498Szrj 	    {
1703*38fd1498Szrj 	      for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1704*38fd1498Szrj 		{
1705*38fd1498Szrj 		  basic_block b = candidate_table[src].split_bbs.first_member[i];
1706*38fd1498Szrj 		  int t = bitmap_bit_p (&not_in_df, b->index);
1707*38fd1498Szrj 
1708*38fd1498Szrj 		  /* We can have split blocks, that were recently generated.
1709*38fd1498Szrj 		     Such blocks are always outside current region.  */
1710*38fd1498Szrj 		  gcc_assert (!t || (CONTAINING_RGN (b->index)
1711*38fd1498Szrj 				     != CONTAINING_RGN (BB_TO_BLOCK (src))));
1712*38fd1498Szrj 
1713*38fd1498Szrj 		  if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1714*38fd1498Szrj 		    return 0;
1715*38fd1498Szrj 		}
1716*38fd1498Szrj 	    }
1717*38fd1498Szrj 	}
1718*38fd1498Szrj       else
1719*38fd1498Szrj 	{
1720*38fd1498Szrj 	  /* Check for pseudo registers.  */
1721*38fd1498Szrj 	  for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1722*38fd1498Szrj 	    {
1723*38fd1498Szrj 	      basic_block b = candidate_table[src].split_bbs.first_member[i];
1724*38fd1498Szrj 	      int t = bitmap_bit_p (&not_in_df, b->index);
1725*38fd1498Szrj 
1726*38fd1498Szrj 	      gcc_assert (!t || (CONTAINING_RGN (b->index)
1727*38fd1498Szrj 				 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1728*38fd1498Szrj 
1729*38fd1498Szrj 	      if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1730*38fd1498Szrj 		return 0;
1731*38fd1498Szrj 	    }
1732*38fd1498Szrj 	}
1733*38fd1498Szrj     }
1734*38fd1498Szrj 
1735*38fd1498Szrj   return 1;
1736*38fd1498Szrj }
1737*38fd1498Szrj 
1738*38fd1498Szrj /* If x is a set of a register R, mark that R is alive in the beginning
1739*38fd1498Szrj    of every update-block of src.  */
1740*38fd1498Szrj 
1741*38fd1498Szrj static void
update_live_1(int src,rtx x)1742*38fd1498Szrj update_live_1 (int src, rtx x)
1743*38fd1498Szrj {
1744*38fd1498Szrj   int i;
1745*38fd1498Szrj   int regno;
1746*38fd1498Szrj   rtx reg = SET_DEST (x);
1747*38fd1498Szrj 
1748*38fd1498Szrj   if (reg == 0)
1749*38fd1498Szrj     return;
1750*38fd1498Szrj 
1751*38fd1498Szrj   while (GET_CODE (reg) == SUBREG
1752*38fd1498Szrj 	 || GET_CODE (reg) == ZERO_EXTRACT
1753*38fd1498Szrj 	 || GET_CODE (reg) == STRICT_LOW_PART)
1754*38fd1498Szrj     reg = XEXP (reg, 0);
1755*38fd1498Szrj 
1756*38fd1498Szrj   if (GET_CODE (reg) == PARALLEL)
1757*38fd1498Szrj     {
1758*38fd1498Szrj       int i;
1759*38fd1498Szrj 
1760*38fd1498Szrj       for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1761*38fd1498Szrj 	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1762*38fd1498Szrj 	  update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1763*38fd1498Szrj 
1764*38fd1498Szrj       return;
1765*38fd1498Szrj     }
1766*38fd1498Szrj 
1767*38fd1498Szrj   if (!REG_P (reg))
1768*38fd1498Szrj     return;
1769*38fd1498Szrj 
1770*38fd1498Szrj   /* Global registers are always live, so the code below does not apply
1771*38fd1498Szrj      to them.  */
1772*38fd1498Szrj 
1773*38fd1498Szrj   regno = REGNO (reg);
1774*38fd1498Szrj 
1775*38fd1498Szrj   if (! HARD_REGISTER_NUM_P (regno)
1776*38fd1498Szrj       || !global_regs[regno])
1777*38fd1498Szrj     {
1778*38fd1498Szrj       for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1779*38fd1498Szrj 	{
1780*38fd1498Szrj 	  basic_block b = candidate_table[src].update_bbs.first_member[i];
1781*38fd1498Szrj 	  bitmap_set_range (df_get_live_in (b), regno, REG_NREGS (reg));
1782*38fd1498Szrj 	}
1783*38fd1498Szrj     }
1784*38fd1498Szrj }
1785*38fd1498Szrj 
1786*38fd1498Szrj /* Return 1 if insn can be speculatively moved from block src to trg,
1787*38fd1498Szrj    otherwise return 0.  Called before first insertion of insn to
1788*38fd1498Szrj    ready-list or before the scheduling.  */
1789*38fd1498Szrj 
1790*38fd1498Szrj static int
check_live(rtx_insn * insn,int src)1791*38fd1498Szrj check_live (rtx_insn *insn, int src)
1792*38fd1498Szrj {
1793*38fd1498Szrj   /* Find the registers set by instruction.  */
1794*38fd1498Szrj   if (GET_CODE (PATTERN (insn)) == SET
1795*38fd1498Szrj       || GET_CODE (PATTERN (insn)) == CLOBBER)
1796*38fd1498Szrj     return check_live_1 (src, PATTERN (insn));
1797*38fd1498Szrj   else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1798*38fd1498Szrj     {
1799*38fd1498Szrj       int j;
1800*38fd1498Szrj       for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1801*38fd1498Szrj 	if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1802*38fd1498Szrj 	     || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1803*38fd1498Szrj 	    && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1804*38fd1498Szrj 	  return 0;
1805*38fd1498Szrj 
1806*38fd1498Szrj       return 1;
1807*38fd1498Szrj     }
1808*38fd1498Szrj 
1809*38fd1498Szrj   return 1;
1810*38fd1498Szrj }
1811*38fd1498Szrj 
1812*38fd1498Szrj /* Update the live registers info after insn was moved speculatively from
1813*38fd1498Szrj    block src to trg.  */
1814*38fd1498Szrj 
1815*38fd1498Szrj static void
update_live(rtx_insn * insn,int src)1816*38fd1498Szrj update_live (rtx_insn *insn, int src)
1817*38fd1498Szrj {
1818*38fd1498Szrj   /* Find the registers set by instruction.  */
1819*38fd1498Szrj   if (GET_CODE (PATTERN (insn)) == SET
1820*38fd1498Szrj       || GET_CODE (PATTERN (insn)) == CLOBBER)
1821*38fd1498Szrj     update_live_1 (src, PATTERN (insn));
1822*38fd1498Szrj   else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1823*38fd1498Szrj     {
1824*38fd1498Szrj       int j;
1825*38fd1498Szrj       for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1826*38fd1498Szrj 	if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1827*38fd1498Szrj 	    || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1828*38fd1498Szrj 	  update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1829*38fd1498Szrj     }
1830*38fd1498Szrj }
1831*38fd1498Szrj 
1832*38fd1498Szrj /* Nonzero if block bb_to is equal to, or reachable from block bb_from.  */
1833*38fd1498Szrj #define IS_REACHABLE(bb_from, bb_to)					\
1834*38fd1498Szrj   (bb_from == bb_to							\
1835*38fd1498Szrj    || IS_RGN_ENTRY (bb_from)						\
1836*38fd1498Szrj    || (bitmap_bit_p (ancestor_edges[bb_to],					\
1837*38fd1498Szrj 	 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK_FOR_FN (cfun, \
1838*38fd1498Szrj 							    BB_TO_BLOCK (bb_from)))))))
1839*38fd1498Szrj 
1840*38fd1498Szrj /* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
1841*38fd1498Szrj 
1842*38fd1498Szrj static void
set_spec_fed(rtx load_insn)1843*38fd1498Szrj set_spec_fed (rtx load_insn)
1844*38fd1498Szrj {
1845*38fd1498Szrj   sd_iterator_def sd_it;
1846*38fd1498Szrj   dep_t dep;
1847*38fd1498Szrj 
1848*38fd1498Szrj   FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1849*38fd1498Szrj     if (DEP_TYPE (dep) == REG_DEP_TRUE)
1850*38fd1498Szrj       FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1851*38fd1498Szrj }
1852*38fd1498Szrj 
1853*38fd1498Szrj /* On the path from the insn to load_insn_bb, find a conditional
1854*38fd1498Szrj branch depending on insn, that guards the speculative load.  */
1855*38fd1498Szrj 
1856*38fd1498Szrj static int
find_conditional_protection(rtx_insn * insn,int load_insn_bb)1857*38fd1498Szrj find_conditional_protection (rtx_insn *insn, int load_insn_bb)
1858*38fd1498Szrj {
1859*38fd1498Szrj   sd_iterator_def sd_it;
1860*38fd1498Szrj   dep_t dep;
1861*38fd1498Szrj 
1862*38fd1498Szrj   /* Iterate through DEF-USE forward dependences.  */
1863*38fd1498Szrj   FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1864*38fd1498Szrj     {
1865*38fd1498Szrj       rtx_insn *next = DEP_CON (dep);
1866*38fd1498Szrj 
1867*38fd1498Szrj       if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1868*38fd1498Szrj 	   CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1869*38fd1498Szrj 	  && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1870*38fd1498Szrj 	  && load_insn_bb != INSN_BB (next)
1871*38fd1498Szrj 	  && DEP_TYPE (dep) == REG_DEP_TRUE
1872*38fd1498Szrj 	  && (JUMP_P (next)
1873*38fd1498Szrj 	      || find_conditional_protection (next, load_insn_bb)))
1874*38fd1498Szrj 	return 1;
1875*38fd1498Szrj     }
1876*38fd1498Szrj   return 0;
1877*38fd1498Szrj }				/* find_conditional_protection */
1878*38fd1498Szrj 
1879*38fd1498Szrj /* Returns 1 if the same insn1 that participates in the computation
1880*38fd1498Szrj    of load_insn's address is feeding a conditional branch that is
1881*38fd1498Szrj    guarding on load_insn. This is true if we find two DEF-USE
1882*38fd1498Szrj    chains:
1883*38fd1498Szrj    insn1 -> ... -> conditional-branch
1884*38fd1498Szrj    insn1 -> ... -> load_insn,
1885*38fd1498Szrj    and if a flow path exists:
1886*38fd1498Szrj    insn1 -> ... -> conditional-branch -> ... -> load_insn,
1887*38fd1498Szrj    and if insn1 is on the path
1888*38fd1498Szrj    region-entry -> ... -> bb_trg -> ... load_insn.
1889*38fd1498Szrj 
1890*38fd1498Szrj    Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1891*38fd1498Szrj    Locate the branch by following INSN_FORW_DEPS from insn1.  */
1892*38fd1498Szrj 
1893*38fd1498Szrj static int
is_conditionally_protected(rtx load_insn,int bb_src,int bb_trg)1894*38fd1498Szrj is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1895*38fd1498Szrj {
1896*38fd1498Szrj   sd_iterator_def sd_it;
1897*38fd1498Szrj   dep_t dep;
1898*38fd1498Szrj 
1899*38fd1498Szrj   FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1900*38fd1498Szrj     {
1901*38fd1498Szrj       rtx_insn *insn1 = DEP_PRO (dep);
1902*38fd1498Szrj 
1903*38fd1498Szrj       /* Must be a DEF-USE dependence upon non-branch.  */
1904*38fd1498Szrj       if (DEP_TYPE (dep) != REG_DEP_TRUE
1905*38fd1498Szrj 	  || JUMP_P (insn1))
1906*38fd1498Szrj 	continue;
1907*38fd1498Szrj 
1908*38fd1498Szrj       /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn.  */
1909*38fd1498Szrj       if (INSN_BB (insn1) == bb_src
1910*38fd1498Szrj 	  || (CONTAINING_RGN (BLOCK_NUM (insn1))
1911*38fd1498Szrj 	      != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1912*38fd1498Szrj 	  || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1913*38fd1498Szrj 	      && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1914*38fd1498Szrj 	continue;
1915*38fd1498Szrj 
1916*38fd1498Szrj       /* Now search for the conditional-branch.  */
1917*38fd1498Szrj       if (find_conditional_protection (insn1, bb_src))
1918*38fd1498Szrj 	return 1;
1919*38fd1498Szrj 
1920*38fd1498Szrj       /* Recursive step: search another insn1, "above" current insn1.  */
1921*38fd1498Szrj       return is_conditionally_protected (insn1, bb_src, bb_trg);
1922*38fd1498Szrj     }
1923*38fd1498Szrj 
1924*38fd1498Szrj   /* The chain does not exist.  */
1925*38fd1498Szrj   return 0;
1926*38fd1498Szrj }				/* is_conditionally_protected */
1927*38fd1498Szrj 
1928*38fd1498Szrj /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1929*38fd1498Szrj    load_insn can move speculatively from bb_src to bb_trg.  All the
1930*38fd1498Szrj    following must hold:
1931*38fd1498Szrj 
1932*38fd1498Szrj    (1) both loads have 1 base register (PFREE_CANDIDATEs).
1933*38fd1498Szrj    (2) load_insn and load1 have a def-use dependence upon
1934*38fd1498Szrj    the same insn 'insn1'.
1935*38fd1498Szrj    (3) either load2 is in bb_trg, or:
1936*38fd1498Szrj    - there's only one split-block, and
1937*38fd1498Szrj    - load1 is on the escape path, and
1938*38fd1498Szrj 
1939*38fd1498Szrj    From all these we can conclude that the two loads access memory
1940*38fd1498Szrj    addresses that differ at most by a constant, and hence if moving
1941*38fd1498Szrj    load_insn would cause an exception, it would have been caused by
1942*38fd1498Szrj    load2 anyhow.  */
1943*38fd1498Szrj 
1944*38fd1498Szrj static int
is_pfree(rtx load_insn,int bb_src,int bb_trg)1945*38fd1498Szrj is_pfree (rtx load_insn, int bb_src, int bb_trg)
1946*38fd1498Szrj {
1947*38fd1498Szrj   sd_iterator_def back_sd_it;
1948*38fd1498Szrj   dep_t back_dep;
1949*38fd1498Szrj   candidate *candp = candidate_table + bb_src;
1950*38fd1498Szrj 
1951*38fd1498Szrj   if (candp->split_bbs.nr_members != 1)
1952*38fd1498Szrj     /* Must have exactly one escape block.  */
1953*38fd1498Szrj     return 0;
1954*38fd1498Szrj 
1955*38fd1498Szrj   FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1956*38fd1498Szrj     {
1957*38fd1498Szrj       rtx_insn *insn1 = DEP_PRO (back_dep);
1958*38fd1498Szrj 
1959*38fd1498Szrj       if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1960*38fd1498Szrj 	/* Found a DEF-USE dependence (insn1, load_insn).  */
1961*38fd1498Szrj 	{
1962*38fd1498Szrj 	  sd_iterator_def fore_sd_it;
1963*38fd1498Szrj 	  dep_t fore_dep;
1964*38fd1498Szrj 
1965*38fd1498Szrj 	  FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1966*38fd1498Szrj 	    {
1967*38fd1498Szrj 	      rtx_insn *insn2 = DEP_CON (fore_dep);
1968*38fd1498Szrj 
1969*38fd1498Szrj 	      if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1970*38fd1498Szrj 		{
1971*38fd1498Szrj 		  /* Found a DEF-USE dependence (insn1, insn2).  */
1972*38fd1498Szrj 		  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1973*38fd1498Szrj 		    /* insn2 not guaranteed to be a 1 base reg load.  */
1974*38fd1498Szrj 		    continue;
1975*38fd1498Szrj 
1976*38fd1498Szrj 		  if (INSN_BB (insn2) == bb_trg)
1977*38fd1498Szrj 		    /* insn2 is the similar load, in the target block.  */
1978*38fd1498Szrj 		    return 1;
1979*38fd1498Szrj 
1980*38fd1498Szrj 		  if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1981*38fd1498Szrj 		    /* insn2 is a similar load, in a split-block.  */
1982*38fd1498Szrj 		    return 1;
1983*38fd1498Szrj 		}
1984*38fd1498Szrj 	    }
1985*38fd1498Szrj 	}
1986*38fd1498Szrj     }
1987*38fd1498Szrj 
1988*38fd1498Szrj   /* Couldn't find a similar load.  */
1989*38fd1498Szrj   return 0;
1990*38fd1498Szrj }				/* is_pfree */
1991*38fd1498Szrj 
1992*38fd1498Szrj /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1993*38fd1498Szrj    a load moved speculatively, or if load_insn is protected by
1994*38fd1498Szrj    a compare on load_insn's address).  */
1995*38fd1498Szrj 
1996*38fd1498Szrj static int
is_prisky(rtx load_insn,int bb_src,int bb_trg)1997*38fd1498Szrj is_prisky (rtx load_insn, int bb_src, int bb_trg)
1998*38fd1498Szrj {
1999*38fd1498Szrj   if (FED_BY_SPEC_LOAD (load_insn))
2000*38fd1498Szrj     return 1;
2001*38fd1498Szrj 
2002*38fd1498Szrj   if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
2003*38fd1498Szrj     /* Dependence may 'hide' out of the region.  */
2004*38fd1498Szrj     return 1;
2005*38fd1498Szrj 
2006*38fd1498Szrj   if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2007*38fd1498Szrj     return 1;
2008*38fd1498Szrj 
2009*38fd1498Szrj   return 0;
2010*38fd1498Szrj }
2011*38fd1498Szrj 
2012*38fd1498Szrj /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2013*38fd1498Szrj    Return 1 if insn is exception-free (and the motion is valid)
2014*38fd1498Szrj    and 0 otherwise.  */
2015*38fd1498Szrj 
2016*38fd1498Szrj static int
is_exception_free(rtx_insn * insn,int bb_src,int bb_trg)2017*38fd1498Szrj is_exception_free (rtx_insn *insn, int bb_src, int bb_trg)
2018*38fd1498Szrj {
2019*38fd1498Szrj   int insn_class = haifa_classify_insn (insn);
2020*38fd1498Szrj 
2021*38fd1498Szrj   /* Handle non-load insns.  */
2022*38fd1498Szrj   switch (insn_class)
2023*38fd1498Szrj     {
2024*38fd1498Szrj     case TRAP_FREE:
2025*38fd1498Szrj       return 1;
2026*38fd1498Szrj     case TRAP_RISKY:
2027*38fd1498Szrj       return 0;
2028*38fd1498Szrj     default:;
2029*38fd1498Szrj     }
2030*38fd1498Szrj 
2031*38fd1498Szrj   /* Handle loads.  */
2032*38fd1498Szrj   if (!flag_schedule_speculative_load)
2033*38fd1498Szrj     return 0;
2034*38fd1498Szrj   IS_LOAD_INSN (insn) = 1;
2035*38fd1498Szrj   switch (insn_class)
2036*38fd1498Szrj     {
2037*38fd1498Szrj     case IFREE:
2038*38fd1498Szrj       return (1);
2039*38fd1498Szrj     case IRISKY:
2040*38fd1498Szrj       return 0;
2041*38fd1498Szrj     case PFREE_CANDIDATE:
2042*38fd1498Szrj       if (is_pfree (insn, bb_src, bb_trg))
2043*38fd1498Szrj 	return 1;
2044*38fd1498Szrj       /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate.  */
2045*38fd1498Szrj       /* FALLTHRU */
2046*38fd1498Szrj     case PRISKY_CANDIDATE:
2047*38fd1498Szrj       if (!flag_schedule_speculative_load_dangerous
2048*38fd1498Szrj 	  || is_prisky (insn, bb_src, bb_trg))
2049*38fd1498Szrj 	return 0;
2050*38fd1498Szrj       break;
2051*38fd1498Szrj     default:;
2052*38fd1498Szrj     }
2053*38fd1498Szrj 
2054*38fd1498Szrj   return flag_schedule_speculative_load_dangerous;
2055*38fd1498Szrj }
2056*38fd1498Szrj 
2057*38fd1498Szrj /* The number of insns from the current block scheduled so far.  */
2058*38fd1498Szrj static int sched_target_n_insns;
2059*38fd1498Szrj /* The number of insns from the current block to be scheduled in total.  */
2060*38fd1498Szrj static int target_n_insns;
2061*38fd1498Szrj /* The number of insns from the entire region scheduled so far.  */
2062*38fd1498Szrj static int sched_n_insns;
2063*38fd1498Szrj 
2064*38fd1498Szrj /* Implementations of the sched_info functions for region scheduling.  */
2065*38fd1498Szrj static void init_ready_list (void);
2066*38fd1498Szrj static int can_schedule_ready_p (rtx_insn *);
2067*38fd1498Szrj static void begin_schedule_ready (rtx_insn *);
2068*38fd1498Szrj static ds_t new_ready (rtx_insn *, ds_t);
2069*38fd1498Szrj static int schedule_more_p (void);
2070*38fd1498Szrj static const char *rgn_print_insn (const rtx_insn *, int);
2071*38fd1498Szrj static int rgn_rank (rtx_insn *, rtx_insn *);
2072*38fd1498Szrj static void compute_jump_reg_dependencies (rtx, regset);
2073*38fd1498Szrj 
2074*38fd1498Szrj /* Functions for speculative scheduling.  */
2075*38fd1498Szrj static void rgn_add_remove_insn (rtx_insn *, int);
2076*38fd1498Szrj static void rgn_add_block (basic_block, basic_block);
2077*38fd1498Szrj static void rgn_fix_recovery_cfg (int, int, int);
2078*38fd1498Szrj static basic_block advance_target_bb (basic_block, rtx_insn *);
2079*38fd1498Szrj 
2080*38fd1498Szrj /* Return nonzero if there are more insns that should be scheduled.  */
2081*38fd1498Szrj 
2082*38fd1498Szrj static int
schedule_more_p(void)2083*38fd1498Szrj schedule_more_p (void)
2084*38fd1498Szrj {
2085*38fd1498Szrj   return sched_target_n_insns < target_n_insns;
2086*38fd1498Szrj }
2087*38fd1498Szrj 
2088*38fd1498Szrj /* Add all insns that are initially ready to the ready list READY.  Called
2089*38fd1498Szrj    once before scheduling a set of insns.  */
2090*38fd1498Szrj 
2091*38fd1498Szrj static void
init_ready_list(void)2092*38fd1498Szrj init_ready_list (void)
2093*38fd1498Szrj {
2094*38fd1498Szrj   rtx_insn *prev_head = current_sched_info->prev_head;
2095*38fd1498Szrj   rtx_insn *next_tail = current_sched_info->next_tail;
2096*38fd1498Szrj   int bb_src;
2097*38fd1498Szrj   rtx_insn *insn;
2098*38fd1498Szrj 
2099*38fd1498Szrj   target_n_insns = 0;
2100*38fd1498Szrj   sched_target_n_insns = 0;
2101*38fd1498Szrj   sched_n_insns = 0;
2102*38fd1498Szrj 
2103*38fd1498Szrj   /* Print debugging information.  */
2104*38fd1498Szrj   if (sched_verbose >= 5)
2105*38fd1498Szrj     debug_rgn_dependencies (target_bb);
2106*38fd1498Szrj 
2107*38fd1498Szrj   /* Prepare current target block info.  */
2108*38fd1498Szrj   if (current_nr_blocks > 1)
2109*38fd1498Szrj     compute_trg_info (target_bb);
2110*38fd1498Szrj 
2111*38fd1498Szrj   /* Initialize ready list with all 'ready' insns in target block.
2112*38fd1498Szrj      Count number of insns in the target block being scheduled.  */
2113*38fd1498Szrj   for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2114*38fd1498Szrj     {
2115*38fd1498Szrj       gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2116*38fd1498Szrj       TODO_SPEC (insn) = HARD_DEP;
2117*38fd1498Szrj       try_ready (insn);
2118*38fd1498Szrj       target_n_insns++;
2119*38fd1498Szrj 
2120*38fd1498Szrj       gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2121*38fd1498Szrj     }
2122*38fd1498Szrj 
2123*38fd1498Szrj   /* Add to ready list all 'ready' insns in valid source blocks.
2124*38fd1498Szrj      For speculative insns, check-live, exception-free, and
2125*38fd1498Szrj      issue-delay.  */
2126*38fd1498Szrj   for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2127*38fd1498Szrj     if (IS_VALID (bb_src))
2128*38fd1498Szrj       {
2129*38fd1498Szrj 	rtx_insn *src_head;
2130*38fd1498Szrj 	rtx_insn *src_next_tail;
2131*38fd1498Szrj 	rtx_insn *tail, *head;
2132*38fd1498Szrj 
2133*38fd1498Szrj 	get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2134*38fd1498Szrj 			   &head, &tail);
2135*38fd1498Szrj 	src_next_tail = NEXT_INSN (tail);
2136*38fd1498Szrj 	src_head = head;
2137*38fd1498Szrj 
2138*38fd1498Szrj 	for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2139*38fd1498Szrj 	  if (INSN_P (insn))
2140*38fd1498Szrj 	    {
2141*38fd1498Szrj 	      gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2142*38fd1498Szrj 	      TODO_SPEC (insn) = HARD_DEP;
2143*38fd1498Szrj 	      try_ready (insn);
2144*38fd1498Szrj 	    }
2145*38fd1498Szrj       }
2146*38fd1498Szrj }
2147*38fd1498Szrj 
2148*38fd1498Szrj /* Called after taking INSN from the ready list.  Returns nonzero if this
2149*38fd1498Szrj    insn can be scheduled, nonzero if we should silently discard it.  */
2150*38fd1498Szrj 
2151*38fd1498Szrj static int
can_schedule_ready_p(rtx_insn * insn)2152*38fd1498Szrj can_schedule_ready_p (rtx_insn *insn)
2153*38fd1498Szrj {
2154*38fd1498Szrj   /* An interblock motion?  */
2155*38fd1498Szrj   if (INSN_BB (insn) != target_bb && IS_SPECULATIVE_INSN (insn))
2156*38fd1498Szrj     {
2157*38fd1498Szrj       /* Cannot schedule this insn unless all operands are live.  */
2158*38fd1498Szrj       if (!check_live (insn, INSN_BB (insn)))
2159*38fd1498Szrj 	return 0;
2160*38fd1498Szrj 
2161*38fd1498Szrj       /* Should not move expensive instructions speculatively.  */
2162*38fd1498Szrj       if (GET_CODE (PATTERN (insn)) != CLOBBER
2163*38fd1498Szrj 	  && !targetm.sched.can_speculate_insn (insn))
2164*38fd1498Szrj 	return 0;
2165*38fd1498Szrj     }
2166*38fd1498Szrj 
2167*38fd1498Szrj   return 1;
2168*38fd1498Szrj }
2169*38fd1498Szrj 
2170*38fd1498Szrj /* Updates counter and other information.  Split from can_schedule_ready_p ()
2171*38fd1498Szrj    because when we schedule insn speculatively then insn passed to
2172*38fd1498Szrj    can_schedule_ready_p () differs from the one passed to
2173*38fd1498Szrj    begin_schedule_ready ().  */
2174*38fd1498Szrj static void
begin_schedule_ready(rtx_insn * insn)2175*38fd1498Szrj begin_schedule_ready (rtx_insn *insn)
2176*38fd1498Szrj {
2177*38fd1498Szrj   /* An interblock motion?  */
2178*38fd1498Szrj   if (INSN_BB (insn) != target_bb)
2179*38fd1498Szrj     {
2180*38fd1498Szrj       if (IS_SPECULATIVE_INSN (insn))
2181*38fd1498Szrj 	{
2182*38fd1498Szrj 	  gcc_assert (check_live (insn, INSN_BB (insn)));
2183*38fd1498Szrj 
2184*38fd1498Szrj 	  update_live (insn, INSN_BB (insn));
2185*38fd1498Szrj 
2186*38fd1498Szrj 	  /* For speculative load, mark insns fed by it.  */
2187*38fd1498Szrj 	  if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2188*38fd1498Szrj 	    set_spec_fed (insn);
2189*38fd1498Szrj 
2190*38fd1498Szrj 	  nr_spec++;
2191*38fd1498Szrj 	}
2192*38fd1498Szrj       nr_inter++;
2193*38fd1498Szrj     }
2194*38fd1498Szrj   else
2195*38fd1498Szrj     {
2196*38fd1498Szrj       /* In block motion.  */
2197*38fd1498Szrj       sched_target_n_insns++;
2198*38fd1498Szrj     }
2199*38fd1498Szrj   sched_n_insns++;
2200*38fd1498Szrj }
2201*38fd1498Szrj 
2202*38fd1498Szrj /* Called after INSN has all its hard dependencies resolved and the speculation
2203*38fd1498Szrj    of type TS is enough to overcome them all.
2204*38fd1498Szrj    Return nonzero if it should be moved to the ready list or the queue, or zero
2205*38fd1498Szrj    if we should silently discard it.  */
2206*38fd1498Szrj static ds_t
new_ready(rtx_insn * next,ds_t ts)2207*38fd1498Szrj new_ready (rtx_insn *next, ds_t ts)
2208*38fd1498Szrj {
2209*38fd1498Szrj   if (INSN_BB (next) != target_bb)
2210*38fd1498Szrj     {
2211*38fd1498Szrj       int not_ex_free = 0;
2212*38fd1498Szrj 
2213*38fd1498Szrj       /* For speculative insns, before inserting to ready/queue,
2214*38fd1498Szrj 	 check live, exception-free, and issue-delay.  */
2215*38fd1498Szrj       if (!IS_VALID (INSN_BB (next))
2216*38fd1498Szrj 	  || CANT_MOVE (next)
2217*38fd1498Szrj 	  || (IS_SPECULATIVE_INSN (next)
2218*38fd1498Szrj 	      && ((recog_memoized (next) >= 0
2219*38fd1498Szrj 		   && min_insn_conflict_delay (curr_state, next, next)
2220*38fd1498Szrj                    > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2221*38fd1498Szrj                   || IS_SPECULATION_CHECK_P (next)
2222*38fd1498Szrj 		  || !check_live (next, INSN_BB (next))
2223*38fd1498Szrj 		  || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2224*38fd1498Szrj 							target_bb)))))
2225*38fd1498Szrj 	{
2226*38fd1498Szrj 	  if (not_ex_free
2227*38fd1498Szrj 	      /* We are here because is_exception_free () == false.
2228*38fd1498Szrj 		 But we possibly can handle that with control speculation.  */
2229*38fd1498Szrj 	      && sched_deps_info->generate_spec_deps
2230*38fd1498Szrj 	      && spec_info->mask & BEGIN_CONTROL)
2231*38fd1498Szrj 	    {
2232*38fd1498Szrj 	      ds_t new_ds;
2233*38fd1498Szrj 
2234*38fd1498Szrj 	      /* Add control speculation to NEXT's dependency type.  */
2235*38fd1498Szrj 	      new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2236*38fd1498Szrj 
2237*38fd1498Szrj 	      /* Check if NEXT can be speculated with new dependency type.  */
2238*38fd1498Szrj 	      if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2239*38fd1498Szrj 		/* Here we got new control-speculative instruction.  */
2240*38fd1498Szrj 		ts = new_ds;
2241*38fd1498Szrj 	      else
2242*38fd1498Szrj 		/* NEXT isn't ready yet.  */
2243*38fd1498Szrj 		ts = DEP_POSTPONED;
2244*38fd1498Szrj 	    }
2245*38fd1498Szrj 	  else
2246*38fd1498Szrj 	    /* NEXT isn't ready yet.  */
2247*38fd1498Szrj             ts = DEP_POSTPONED;
2248*38fd1498Szrj 	}
2249*38fd1498Szrj     }
2250*38fd1498Szrj 
2251*38fd1498Szrj   return ts;
2252*38fd1498Szrj }
2253*38fd1498Szrj 
2254*38fd1498Szrj /* Return a string that contains the insn uid and optionally anything else
2255*38fd1498Szrj    necessary to identify this insn in an output.  It's valid to use a
2256*38fd1498Szrj    static buffer for this.  The ALIGNED parameter should cause the string
2257*38fd1498Szrj    to be formatted so that multiple output lines will line up nicely.  */
2258*38fd1498Szrj 
2259*38fd1498Szrj static const char *
rgn_print_insn(const rtx_insn * insn,int aligned)2260*38fd1498Szrj rgn_print_insn (const rtx_insn *insn, int aligned)
2261*38fd1498Szrj {
2262*38fd1498Szrj   static char tmp[80];
2263*38fd1498Szrj 
2264*38fd1498Szrj   if (aligned)
2265*38fd1498Szrj     sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2266*38fd1498Szrj   else
2267*38fd1498Szrj     {
2268*38fd1498Szrj       if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2269*38fd1498Szrj 	sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2270*38fd1498Szrj       else
2271*38fd1498Szrj 	sprintf (tmp, "%d", INSN_UID (insn));
2272*38fd1498Szrj     }
2273*38fd1498Szrj   return tmp;
2274*38fd1498Szrj }
2275*38fd1498Szrj 
2276*38fd1498Szrj /* Compare priority of two insns.  Return a positive number if the second
2277*38fd1498Szrj    insn is to be preferred for scheduling, and a negative one if the first
2278*38fd1498Szrj    is to be preferred.  Zero if they are equally good.  */
2279*38fd1498Szrj 
2280*38fd1498Szrj static int
rgn_rank(rtx_insn * insn1,rtx_insn * insn2)2281*38fd1498Szrj rgn_rank (rtx_insn *insn1, rtx_insn *insn2)
2282*38fd1498Szrj {
2283*38fd1498Szrj   /* Some comparison make sense in interblock scheduling only.  */
2284*38fd1498Szrj   if (INSN_BB (insn1) != INSN_BB (insn2))
2285*38fd1498Szrj     {
2286*38fd1498Szrj       int spec_val, prob_val;
2287*38fd1498Szrj 
2288*38fd1498Szrj       /* Prefer an inblock motion on an interblock motion.  */
2289*38fd1498Szrj       if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2290*38fd1498Szrj 	return 1;
2291*38fd1498Szrj       if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2292*38fd1498Szrj 	return -1;
2293*38fd1498Szrj 
2294*38fd1498Szrj       /* Prefer a useful motion on a speculative one.  */
2295*38fd1498Szrj       spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2296*38fd1498Szrj       if (spec_val)
2297*38fd1498Szrj 	return spec_val;
2298*38fd1498Szrj 
2299*38fd1498Szrj       /* Prefer a more probable (speculative) insn.  */
2300*38fd1498Szrj       prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2301*38fd1498Szrj       if (prob_val)
2302*38fd1498Szrj 	return prob_val;
2303*38fd1498Szrj     }
2304*38fd1498Szrj   return 0;
2305*38fd1498Szrj }
2306*38fd1498Szrj 
2307*38fd1498Szrj /* NEXT is an instruction that depends on INSN (a backward dependence);
2308*38fd1498Szrj    return nonzero if we should include this dependence in priority
2309*38fd1498Szrj    calculations.  */
2310*38fd1498Szrj 
2311*38fd1498Szrj int
contributes_to_priority(rtx_insn * next,rtx_insn * insn)2312*38fd1498Szrj contributes_to_priority (rtx_insn *next, rtx_insn *insn)
2313*38fd1498Szrj {
2314*38fd1498Szrj   /* NEXT and INSN reside in one ebb.  */
2315*38fd1498Szrj   return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2316*38fd1498Szrj }
2317*38fd1498Szrj 
2318*38fd1498Szrj /* INSN is a JUMP_INSN.  Store the set of registers that must be
2319*38fd1498Szrj    considered as used by this jump in USED.  */
2320*38fd1498Szrj 
2321*38fd1498Szrj static void
compute_jump_reg_dependencies(rtx insn ATTRIBUTE_UNUSED,regset used ATTRIBUTE_UNUSED)2322*38fd1498Szrj compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2323*38fd1498Szrj 			       regset used ATTRIBUTE_UNUSED)
2324*38fd1498Szrj {
2325*38fd1498Szrj   /* Nothing to do here, since we postprocess jumps in
2326*38fd1498Szrj      add_branch_dependences.  */
2327*38fd1498Szrj }
2328*38fd1498Szrj 
2329*38fd1498Szrj /* This variable holds common_sched_info hooks and data relevant to
2330*38fd1498Szrj    the interblock scheduler.  */
2331*38fd1498Szrj static struct common_sched_info_def rgn_common_sched_info;
2332*38fd1498Szrj 
2333*38fd1498Szrj 
2334*38fd1498Szrj /* This holds data for the dependence analysis relevant to
2335*38fd1498Szrj    the interblock scheduler.  */
2336*38fd1498Szrj static struct sched_deps_info_def rgn_sched_deps_info;
2337*38fd1498Szrj 
2338*38fd1498Szrj /* This holds constant data used for initializing the above structure
2339*38fd1498Szrj    for the Haifa scheduler.  */
2340*38fd1498Szrj static const struct sched_deps_info_def rgn_const_sched_deps_info =
2341*38fd1498Szrj   {
2342*38fd1498Szrj     compute_jump_reg_dependencies,
2343*38fd1498Szrj     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2344*38fd1498Szrj     0, 0, 0
2345*38fd1498Szrj   };
2346*38fd1498Szrj 
2347*38fd1498Szrj /* Same as above, but for the selective scheduler.  */
2348*38fd1498Szrj static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2349*38fd1498Szrj   {
2350*38fd1498Szrj     compute_jump_reg_dependencies,
2351*38fd1498Szrj     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2352*38fd1498Szrj     0, 0, 0
2353*38fd1498Szrj   };
2354*38fd1498Szrj 
2355*38fd1498Szrj /* Return true if scheduling INSN will trigger finish of scheduling
2356*38fd1498Szrj    current block.  */
2357*38fd1498Szrj static bool
rgn_insn_finishes_block_p(rtx_insn * insn)2358*38fd1498Szrj rgn_insn_finishes_block_p (rtx_insn *insn)
2359*38fd1498Szrj {
2360*38fd1498Szrj   if (INSN_BB (insn) == target_bb
2361*38fd1498Szrj       && sched_target_n_insns + 1 == target_n_insns)
2362*38fd1498Szrj     /* INSN is the last not-scheduled instruction in the current block.  */
2363*38fd1498Szrj     return true;
2364*38fd1498Szrj 
2365*38fd1498Szrj   return false;
2366*38fd1498Szrj }
2367*38fd1498Szrj 
2368*38fd1498Szrj /* Used in schedule_insns to initialize current_sched_info for scheduling
2369*38fd1498Szrj    regions (or single basic blocks).  */
2370*38fd1498Szrj 
2371*38fd1498Szrj static const struct haifa_sched_info rgn_const_sched_info =
2372*38fd1498Szrj {
2373*38fd1498Szrj   init_ready_list,
2374*38fd1498Szrj   can_schedule_ready_p,
2375*38fd1498Szrj   schedule_more_p,
2376*38fd1498Szrj   new_ready,
2377*38fd1498Szrj   rgn_rank,
2378*38fd1498Szrj   rgn_print_insn,
2379*38fd1498Szrj   contributes_to_priority,
2380*38fd1498Szrj   rgn_insn_finishes_block_p,
2381*38fd1498Szrj 
2382*38fd1498Szrj   NULL, NULL,
2383*38fd1498Szrj   NULL, NULL,
2384*38fd1498Szrj   0, 0,
2385*38fd1498Szrj 
2386*38fd1498Szrj   rgn_add_remove_insn,
2387*38fd1498Szrj   begin_schedule_ready,
2388*38fd1498Szrj   NULL,
2389*38fd1498Szrj   advance_target_bb,
2390*38fd1498Szrj   NULL, NULL,
2391*38fd1498Szrj   SCHED_RGN
2392*38fd1498Szrj };
2393*38fd1498Szrj 
2394*38fd1498Szrj /* This variable holds the data and hooks needed to the Haifa scheduler backend
2395*38fd1498Szrj    for the interblock scheduler frontend.  */
2396*38fd1498Szrj static struct haifa_sched_info rgn_sched_info;
2397*38fd1498Szrj 
2398*38fd1498Szrj /* Returns maximum priority that an insn was assigned to.  */
2399*38fd1498Szrj 
2400*38fd1498Szrj int
get_rgn_sched_max_insns_priority(void)2401*38fd1498Szrj get_rgn_sched_max_insns_priority (void)
2402*38fd1498Szrj {
2403*38fd1498Szrj   return rgn_sched_info.sched_max_insns_priority;
2404*38fd1498Szrj }
2405*38fd1498Szrj 
2406*38fd1498Szrj /* Determine if PAT sets a TARGET_CLASS_LIKELY_SPILLED_P register.  */
2407*38fd1498Szrj 
2408*38fd1498Szrj static bool
sets_likely_spilled(rtx pat)2409*38fd1498Szrj sets_likely_spilled (rtx pat)
2410*38fd1498Szrj {
2411*38fd1498Szrj   bool ret = false;
2412*38fd1498Szrj   note_stores (pat, sets_likely_spilled_1, &ret);
2413*38fd1498Szrj   return ret;
2414*38fd1498Szrj }
2415*38fd1498Szrj 
2416*38fd1498Szrj static void
sets_likely_spilled_1(rtx x,const_rtx pat,void * data)2417*38fd1498Szrj sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2418*38fd1498Szrj {
2419*38fd1498Szrj   bool *ret = (bool *) data;
2420*38fd1498Szrj 
2421*38fd1498Szrj   if (GET_CODE (pat) == SET
2422*38fd1498Szrj       && REG_P (x)
2423*38fd1498Szrj       && HARD_REGISTER_P (x)
2424*38fd1498Szrj       && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO (x))))
2425*38fd1498Szrj     *ret = true;
2426*38fd1498Szrj }
2427*38fd1498Szrj 
2428*38fd1498Szrj /* A bitmap to note insns that participate in any dependency.  Used in
2429*38fd1498Szrj    add_branch_dependences.  */
2430*38fd1498Szrj static sbitmap insn_referenced;
2431*38fd1498Szrj 
2432*38fd1498Szrj /* Add dependences so that branches are scheduled to run last in their
2433*38fd1498Szrj    block.  */
2434*38fd1498Szrj static void
add_branch_dependences(rtx_insn * head,rtx_insn * tail)2435*38fd1498Szrj add_branch_dependences (rtx_insn *head, rtx_insn *tail)
2436*38fd1498Szrj {
2437*38fd1498Szrj   rtx_insn *insn, *last;
2438*38fd1498Szrj 
2439*38fd1498Szrj   /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2440*38fd1498Szrj      that can throw exceptions, force them to remain in order at the end of
2441*38fd1498Szrj      the block by adding dependencies and giving the last a high priority.
2442*38fd1498Szrj      There may be notes present, and prev_head may also be a note.
2443*38fd1498Szrj 
2444*38fd1498Szrj      Branches must obviously remain at the end.  Calls should remain at the
2445*38fd1498Szrj      end since moving them results in worse register allocation.  Uses remain
2446*38fd1498Szrj      at the end to ensure proper register allocation.
2447*38fd1498Szrj 
2448*38fd1498Szrj      cc0 setters remain at the end because they can't be moved away from
2449*38fd1498Szrj      their cc0 user.
2450*38fd1498Szrj 
2451*38fd1498Szrj      Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
2452*38fd1498Szrj 
2453*38fd1498Szrj      COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2454*38fd1498Szrj 
2455*38fd1498Szrj      Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
2456*38fd1498Szrj      values) are not moved before reload because we can wind up with register
2457*38fd1498Szrj      allocation failures.  */
2458*38fd1498Szrj 
2459*38fd1498Szrj   while (tail != head && DEBUG_INSN_P (tail))
2460*38fd1498Szrj     tail = PREV_INSN (tail);
2461*38fd1498Szrj 
2462*38fd1498Szrj   insn = tail;
2463*38fd1498Szrj   last = 0;
2464*38fd1498Szrj   while (CALL_P (insn)
2465*38fd1498Szrj 	 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
2466*38fd1498Szrj 	 || (NONJUMP_INSN_P (insn)
2467*38fd1498Szrj 	     && (GET_CODE (PATTERN (insn)) == USE
2468*38fd1498Szrj 		 || GET_CODE (PATTERN (insn)) == CLOBBER
2469*38fd1498Szrj 		 || can_throw_internal (insn)
2470*38fd1498Szrj 		 || (HAVE_cc0 && sets_cc0_p (PATTERN (insn)))
2471*38fd1498Szrj 		 || (!reload_completed
2472*38fd1498Szrj 		     && sets_likely_spilled (PATTERN (insn)))))
2473*38fd1498Szrj 	 || NOTE_P (insn)
2474*38fd1498Szrj 	 || (last != 0 && SCHED_GROUP_P (last)))
2475*38fd1498Szrj     {
2476*38fd1498Szrj       if (!NOTE_P (insn))
2477*38fd1498Szrj 	{
2478*38fd1498Szrj 	  if (last != 0
2479*38fd1498Szrj 	      && sd_find_dep_between (insn, last, false) == NULL)
2480*38fd1498Szrj 	    {
2481*38fd1498Szrj 	      if (! sched_insns_conditions_mutex_p (last, insn))
2482*38fd1498Szrj 		add_dependence (last, insn, REG_DEP_ANTI);
2483*38fd1498Szrj 	      bitmap_set_bit (insn_referenced, INSN_LUID (insn));
2484*38fd1498Szrj 	    }
2485*38fd1498Szrj 
2486*38fd1498Szrj 	  CANT_MOVE (insn) = 1;
2487*38fd1498Szrj 
2488*38fd1498Szrj 	  last = insn;
2489*38fd1498Szrj 	}
2490*38fd1498Szrj 
2491*38fd1498Szrj       /* Don't overrun the bounds of the basic block.  */
2492*38fd1498Szrj       if (insn == head)
2493*38fd1498Szrj 	break;
2494*38fd1498Szrj 
2495*38fd1498Szrj       do
2496*38fd1498Szrj 	insn = PREV_INSN (insn);
2497*38fd1498Szrj       while (insn != head && DEBUG_INSN_P (insn));
2498*38fd1498Szrj     }
2499*38fd1498Szrj 
2500*38fd1498Szrj   /* Selective scheduling handles control dependencies by itself, and
2501*38fd1498Szrj      CANT_MOVE flags ensure that other insns will be kept in place.  */
2502*38fd1498Szrj   if (sel_sched_p ())
2503*38fd1498Szrj     return;
2504*38fd1498Szrj 
2505*38fd1498Szrj   /* Make sure these insns are scheduled last in their block.  */
2506*38fd1498Szrj   insn = last;
2507*38fd1498Szrj   if (insn != 0)
2508*38fd1498Szrj     while (insn != head)
2509*38fd1498Szrj       {
2510*38fd1498Szrj 	insn = prev_nonnote_insn (insn);
2511*38fd1498Szrj 
2512*38fd1498Szrj 	if (bitmap_bit_p (insn_referenced, INSN_LUID (insn))
2513*38fd1498Szrj 	    || DEBUG_INSN_P (insn))
2514*38fd1498Szrj 	  continue;
2515*38fd1498Szrj 
2516*38fd1498Szrj 	if (! sched_insns_conditions_mutex_p (last, insn))
2517*38fd1498Szrj 	  add_dependence (last, insn, REG_DEP_ANTI);
2518*38fd1498Szrj       }
2519*38fd1498Szrj 
2520*38fd1498Szrj   if (!targetm.have_conditional_execution ())
2521*38fd1498Szrj     return;
2522*38fd1498Szrj 
2523*38fd1498Szrj   /* Finally, if the block ends in a jump, and we are doing intra-block
2524*38fd1498Szrj      scheduling, make sure that the branch depends on any COND_EXEC insns
2525*38fd1498Szrj      inside the block to avoid moving the COND_EXECs past the branch insn.
2526*38fd1498Szrj 
2527*38fd1498Szrj      We only have to do this after reload, because (1) before reload there
2528*38fd1498Szrj      are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2529*38fd1498Szrj      scheduler after reload.
2530*38fd1498Szrj 
2531*38fd1498Szrj      FIXME: We could in some cases move COND_EXEC insns past the branch if
2532*38fd1498Szrj      this scheduler would be a little smarter.  Consider this code:
2533*38fd1498Szrj 
2534*38fd1498Szrj 		T = [addr]
2535*38fd1498Szrj 	C  ?	addr += 4
2536*38fd1498Szrj 	!C ?	X += 12
2537*38fd1498Szrj 	C  ?	T += 1
2538*38fd1498Szrj 	C  ?	jump foo
2539*38fd1498Szrj 
2540*38fd1498Szrj      On a target with a one cycle stall on a memory access the optimal
2541*38fd1498Szrj      sequence would be:
2542*38fd1498Szrj 
2543*38fd1498Szrj 		T = [addr]
2544*38fd1498Szrj 	C  ?	addr += 4
2545*38fd1498Szrj 	C  ?	T += 1
2546*38fd1498Szrj 	C  ?	jump foo
2547*38fd1498Szrj 	!C ?	X += 12
2548*38fd1498Szrj 
2549*38fd1498Szrj      We don't want to put the 'X += 12' before the branch because it just
2550*38fd1498Szrj      wastes a cycle of execution time when the branch is taken.
2551*38fd1498Szrj 
2552*38fd1498Szrj      Note that in the example "!C" will always be true.  That is another
2553*38fd1498Szrj      possible improvement for handling COND_EXECs in this scheduler: it
2554*38fd1498Szrj      could remove always-true predicates.  */
2555*38fd1498Szrj 
2556*38fd1498Szrj   if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
2557*38fd1498Szrj     return;
2558*38fd1498Szrj 
2559*38fd1498Szrj   insn = tail;
2560*38fd1498Szrj   while (insn != head)
2561*38fd1498Szrj     {
2562*38fd1498Szrj       insn = PREV_INSN (insn);
2563*38fd1498Szrj 
2564*38fd1498Szrj       /* Note that we want to add this dependency even when
2565*38fd1498Szrj 	 sched_insns_conditions_mutex_p returns true.  The whole point
2566*38fd1498Szrj 	 is that we _want_ this dependency, even if these insns really
2567*38fd1498Szrj 	 are independent.  */
2568*38fd1498Szrj       if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2569*38fd1498Szrj 	add_dependence (tail, insn, REG_DEP_ANTI);
2570*38fd1498Szrj     }
2571*38fd1498Szrj }
2572*38fd1498Szrj 
2573*38fd1498Szrj /* Data structures for the computation of data dependences in a regions.  We
2574*38fd1498Szrj    keep one `deps' structure for every basic block.  Before analyzing the
2575*38fd1498Szrj    data dependences for a bb, its variables are initialized as a function of
2576*38fd1498Szrj    the variables of its predecessors.  When the analysis for a bb completes,
2577*38fd1498Szrj    we save the contents to the corresponding bb_deps[bb] variable.  */
2578*38fd1498Szrj 
2579*38fd1498Szrj static struct deps_desc *bb_deps;
2580*38fd1498Szrj 
2581*38fd1498Szrj static void
concat_insn_mem_list(rtx_insn_list * copy_insns,rtx_expr_list * copy_mems,rtx_insn_list ** old_insns_p,rtx_expr_list ** old_mems_p)2582*38fd1498Szrj concat_insn_mem_list (rtx_insn_list *copy_insns,
2583*38fd1498Szrj 		      rtx_expr_list *copy_mems,
2584*38fd1498Szrj 		      rtx_insn_list **old_insns_p,
2585*38fd1498Szrj 		      rtx_expr_list **old_mems_p)
2586*38fd1498Szrj {
2587*38fd1498Szrj   rtx_insn_list *new_insns = *old_insns_p;
2588*38fd1498Szrj   rtx_expr_list *new_mems = *old_mems_p;
2589*38fd1498Szrj 
2590*38fd1498Szrj   while (copy_insns)
2591*38fd1498Szrj     {
2592*38fd1498Szrj       new_insns = alloc_INSN_LIST (copy_insns->insn (), new_insns);
2593*38fd1498Szrj       new_mems = alloc_EXPR_LIST (VOIDmode, copy_mems->element (), new_mems);
2594*38fd1498Szrj       copy_insns = copy_insns->next ();
2595*38fd1498Szrj       copy_mems = copy_mems->next ();
2596*38fd1498Szrj     }
2597*38fd1498Szrj 
2598*38fd1498Szrj   *old_insns_p = new_insns;
2599*38fd1498Szrj   *old_mems_p = new_mems;
2600*38fd1498Szrj }
2601*38fd1498Szrj 
2602*38fd1498Szrj /* Join PRED_DEPS to the SUCC_DEPS.  */
2603*38fd1498Szrj void
deps_join(struct deps_desc * succ_deps,struct deps_desc * pred_deps)2604*38fd1498Szrj deps_join (struct deps_desc *succ_deps, struct deps_desc *pred_deps)
2605*38fd1498Szrj {
2606*38fd1498Szrj   unsigned reg;
2607*38fd1498Szrj   reg_set_iterator rsi;
2608*38fd1498Szrj 
2609*38fd1498Szrj   /* The reg_last lists are inherited by successor.  */
2610*38fd1498Szrj   EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2611*38fd1498Szrj     {
2612*38fd1498Szrj       struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2613*38fd1498Szrj       struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2614*38fd1498Szrj 
2615*38fd1498Szrj       succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2616*38fd1498Szrj       succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2617*38fd1498Szrj       succ_rl->implicit_sets
2618*38fd1498Szrj 	= concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2619*38fd1498Szrj       succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2620*38fd1498Szrj                                             succ_rl->clobbers);
2621*38fd1498Szrj       succ_rl->uses_length += pred_rl->uses_length;
2622*38fd1498Szrj       succ_rl->clobbers_length += pred_rl->clobbers_length;
2623*38fd1498Szrj     }
2624*38fd1498Szrj   IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2625*38fd1498Szrj 
2626*38fd1498Szrj   /* Mem read/write lists are inherited by successor.  */
2627*38fd1498Szrj   concat_insn_mem_list (pred_deps->pending_read_insns,
2628*38fd1498Szrj                         pred_deps->pending_read_mems,
2629*38fd1498Szrj                         &succ_deps->pending_read_insns,
2630*38fd1498Szrj                         &succ_deps->pending_read_mems);
2631*38fd1498Szrj   concat_insn_mem_list (pred_deps->pending_write_insns,
2632*38fd1498Szrj                         pred_deps->pending_write_mems,
2633*38fd1498Szrj                         &succ_deps->pending_write_insns,
2634*38fd1498Szrj                         &succ_deps->pending_write_mems);
2635*38fd1498Szrj 
2636*38fd1498Szrj   succ_deps->pending_jump_insns
2637*38fd1498Szrj     = concat_INSN_LIST (pred_deps->pending_jump_insns,
2638*38fd1498Szrj                         succ_deps->pending_jump_insns);
2639*38fd1498Szrj   succ_deps->last_pending_memory_flush
2640*38fd1498Szrj     = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2641*38fd1498Szrj                         succ_deps->last_pending_memory_flush);
2642*38fd1498Szrj 
2643*38fd1498Szrj   succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2644*38fd1498Szrj   succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2645*38fd1498Szrj   succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2646*38fd1498Szrj 
2647*38fd1498Szrj   /* last_function_call is inherited by successor.  */
2648*38fd1498Szrj   succ_deps->last_function_call
2649*38fd1498Szrj     = concat_INSN_LIST (pred_deps->last_function_call,
2650*38fd1498Szrj                         succ_deps->last_function_call);
2651*38fd1498Szrj 
2652*38fd1498Szrj   /* last_function_call_may_noreturn is inherited by successor.  */
2653*38fd1498Szrj   succ_deps->last_function_call_may_noreturn
2654*38fd1498Szrj     = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2655*38fd1498Szrj                         succ_deps->last_function_call_may_noreturn);
2656*38fd1498Szrj 
2657*38fd1498Szrj   /* sched_before_next_call is inherited by successor.  */
2658*38fd1498Szrj   succ_deps->sched_before_next_call
2659*38fd1498Szrj     = concat_INSN_LIST (pred_deps->sched_before_next_call,
2660*38fd1498Szrj                         succ_deps->sched_before_next_call);
2661*38fd1498Szrj }
2662*38fd1498Szrj 
2663*38fd1498Szrj /* After computing the dependencies for block BB, propagate the dependencies
2664*38fd1498Szrj    found in TMP_DEPS to the successors of the block.  */
2665*38fd1498Szrj static void
propagate_deps(int bb,struct deps_desc * pred_deps)2666*38fd1498Szrj propagate_deps (int bb, struct deps_desc *pred_deps)
2667*38fd1498Szrj {
2668*38fd1498Szrj   basic_block block = BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb));
2669*38fd1498Szrj   edge_iterator ei;
2670*38fd1498Szrj   edge e;
2671*38fd1498Szrj 
2672*38fd1498Szrj   /* bb's structures are inherited by its successors.  */
2673*38fd1498Szrj   FOR_EACH_EDGE (e, ei, block->succs)
2674*38fd1498Szrj     {
2675*38fd1498Szrj       /* Only bbs "below" bb, in the same region, are interesting.  */
2676*38fd1498Szrj       if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2677*38fd1498Szrj 	  || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2678*38fd1498Szrj 	  || BLOCK_TO_BB (e->dest->index) <= bb)
2679*38fd1498Szrj 	continue;
2680*38fd1498Szrj 
2681*38fd1498Szrj       deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2682*38fd1498Szrj     }
2683*38fd1498Szrj 
2684*38fd1498Szrj   /* These lists should point to the right place, for correct
2685*38fd1498Szrj      freeing later.  */
2686*38fd1498Szrj   bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2687*38fd1498Szrj   bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2688*38fd1498Szrj   bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2689*38fd1498Szrj   bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2690*38fd1498Szrj   bb_deps[bb].pending_jump_insns = pred_deps->pending_jump_insns;
2691*38fd1498Szrj 
2692*38fd1498Szrj   /* Can't allow these to be freed twice.  */
2693*38fd1498Szrj   pred_deps->pending_read_insns = 0;
2694*38fd1498Szrj   pred_deps->pending_read_mems = 0;
2695*38fd1498Szrj   pred_deps->pending_write_insns = 0;
2696*38fd1498Szrj   pred_deps->pending_write_mems = 0;
2697*38fd1498Szrj   pred_deps->pending_jump_insns = 0;
2698*38fd1498Szrj }
2699*38fd1498Szrj 
2700*38fd1498Szrj /* Compute dependences inside bb.  In a multiple blocks region:
2701*38fd1498Szrj    (1) a bb is analyzed after its predecessors, and (2) the lists in
2702*38fd1498Szrj    effect at the end of bb (after analyzing for bb) are inherited by
2703*38fd1498Szrj    bb's successors.
2704*38fd1498Szrj 
2705*38fd1498Szrj    Specifically for reg-reg data dependences, the block insns are
2706*38fd1498Szrj    scanned by sched_analyze () top-to-bottom.  Three lists are
2707*38fd1498Szrj    maintained by sched_analyze (): reg_last[].sets for register DEFs,
2708*38fd1498Szrj    reg_last[].implicit_sets for implicit hard register DEFs, and
2709*38fd1498Szrj    reg_last[].uses for register USEs.
2710*38fd1498Szrj 
2711*38fd1498Szrj    When analysis is completed for bb, we update for its successors:
2712*38fd1498Szrj    ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2713*38fd1498Szrj    ;  - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2714*38fd1498Szrj    ;  - USES[succ] = Union (USES [succ], DEFS [bb])
2715*38fd1498Szrj 
2716*38fd1498Szrj    The mechanism for computing mem-mem data dependence is very
2717*38fd1498Szrj    similar, and the result is interblock dependences in the region.  */
2718*38fd1498Szrj 
2719*38fd1498Szrj static void
compute_block_dependences(int bb)2720*38fd1498Szrj compute_block_dependences (int bb)
2721*38fd1498Szrj {
2722*38fd1498Szrj   rtx_insn *head, *tail;
2723*38fd1498Szrj   struct deps_desc tmp_deps;
2724*38fd1498Szrj 
2725*38fd1498Szrj   tmp_deps = bb_deps[bb];
2726*38fd1498Szrj 
2727*38fd1498Szrj   /* Do the analysis for this block.  */
2728*38fd1498Szrj   gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2729*38fd1498Szrj   get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2730*38fd1498Szrj 
2731*38fd1498Szrj   sched_analyze (&tmp_deps, head, tail);
2732*38fd1498Szrj 
2733*38fd1498Szrj   add_branch_dependences (head, tail);
2734*38fd1498Szrj 
2735*38fd1498Szrj   if (current_nr_blocks > 1)
2736*38fd1498Szrj     propagate_deps (bb, &tmp_deps);
2737*38fd1498Szrj 
2738*38fd1498Szrj   /* Free up the INSN_LISTs.  */
2739*38fd1498Szrj   free_deps (&tmp_deps);
2740*38fd1498Szrj 
2741*38fd1498Szrj   if (targetm.sched.dependencies_evaluation_hook)
2742*38fd1498Szrj     targetm.sched.dependencies_evaluation_hook (head, tail);
2743*38fd1498Szrj }
2744*38fd1498Szrj 
2745*38fd1498Szrj /* Free dependencies of instructions inside BB.  */
2746*38fd1498Szrj static void
free_block_dependencies(int bb)2747*38fd1498Szrj free_block_dependencies (int bb)
2748*38fd1498Szrj {
2749*38fd1498Szrj   rtx_insn *head;
2750*38fd1498Szrj   rtx_insn *tail;
2751*38fd1498Szrj 
2752*38fd1498Szrj   get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2753*38fd1498Szrj 
2754*38fd1498Szrj   if (no_real_insns_p (head, tail))
2755*38fd1498Szrj     return;
2756*38fd1498Szrj 
2757*38fd1498Szrj   sched_free_deps (head, tail, true);
2758*38fd1498Szrj }
2759*38fd1498Szrj 
2760*38fd1498Szrj /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2761*38fd1498Szrj    them to the unused_*_list variables, so that they can be reused.  */
2762*38fd1498Szrj 
2763*38fd1498Szrj static void
free_pending_lists(void)2764*38fd1498Szrj free_pending_lists (void)
2765*38fd1498Szrj {
2766*38fd1498Szrj   int bb;
2767*38fd1498Szrj 
2768*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; bb++)
2769*38fd1498Szrj     {
2770*38fd1498Szrj       free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2771*38fd1498Szrj       free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2772*38fd1498Szrj       free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2773*38fd1498Szrj       free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2774*38fd1498Szrj       free_INSN_LIST_list (&bb_deps[bb].pending_jump_insns);
2775*38fd1498Szrj     }
2776*38fd1498Szrj }
2777*38fd1498Szrj 
2778*38fd1498Szrj /* Print dependences for debugging starting from FROM_BB.
2779*38fd1498Szrj    Callable from debugger.  */
2780*38fd1498Szrj /* Print dependences for debugging starting from FROM_BB.
2781*38fd1498Szrj    Callable from debugger.  */
2782*38fd1498Szrj DEBUG_FUNCTION void
debug_rgn_dependencies(int from_bb)2783*38fd1498Szrj debug_rgn_dependencies (int from_bb)
2784*38fd1498Szrj {
2785*38fd1498Szrj   int bb;
2786*38fd1498Szrj 
2787*38fd1498Szrj   fprintf (sched_dump,
2788*38fd1498Szrj 	   ";;   --------------- forward dependences: ------------ \n");
2789*38fd1498Szrj 
2790*38fd1498Szrj   for (bb = from_bb; bb < current_nr_blocks; bb++)
2791*38fd1498Szrj     {
2792*38fd1498Szrj       rtx_insn *head, *tail;
2793*38fd1498Szrj 
2794*38fd1498Szrj       get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2795*38fd1498Szrj       fprintf (sched_dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
2796*38fd1498Szrj 	       BB_TO_BLOCK (bb), bb);
2797*38fd1498Szrj 
2798*38fd1498Szrj       debug_dependencies (head, tail);
2799*38fd1498Szrj     }
2800*38fd1498Szrj }
2801*38fd1498Szrj 
2802*38fd1498Szrj /* Print dependencies information for instructions between HEAD and TAIL.
2803*38fd1498Szrj    ??? This function would probably fit best in haifa-sched.c.  */
debug_dependencies(rtx_insn * head,rtx_insn * tail)2804*38fd1498Szrj void debug_dependencies (rtx_insn *head, rtx_insn *tail)
2805*38fd1498Szrj {
2806*38fd1498Szrj   rtx_insn *insn;
2807*38fd1498Szrj   rtx_insn *next_tail = NEXT_INSN (tail);
2808*38fd1498Szrj 
2809*38fd1498Szrj   fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2810*38fd1498Szrj 	   "insn", "code", "bb", "dep", "prio", "cost",
2811*38fd1498Szrj 	   "reservation");
2812*38fd1498Szrj   fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2813*38fd1498Szrj 	   "----", "----", "--", "---", "----", "----",
2814*38fd1498Szrj 	   "-----------");
2815*38fd1498Szrj 
2816*38fd1498Szrj   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2817*38fd1498Szrj     {
2818*38fd1498Szrj       if (! INSN_P (insn))
2819*38fd1498Szrj 	{
2820*38fd1498Szrj 	  int n;
2821*38fd1498Szrj 	  fprintf (sched_dump, ";;   %6d ", INSN_UID (insn));
2822*38fd1498Szrj 	  if (NOTE_P (insn))
2823*38fd1498Szrj 	    {
2824*38fd1498Szrj 	      n = NOTE_KIND (insn);
2825*38fd1498Szrj 	      fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2826*38fd1498Szrj 	    }
2827*38fd1498Szrj 	  else
2828*38fd1498Szrj 	    fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2829*38fd1498Szrj 	  continue;
2830*38fd1498Szrj 	}
2831*38fd1498Szrj 
2832*38fd1498Szrj       fprintf (sched_dump,
2833*38fd1498Szrj 	       ";;   %s%5d%6d%6d%6d%6d%6d   ",
2834*38fd1498Szrj 	       (SCHED_GROUP_P (insn) ? "+" : " "),
2835*38fd1498Szrj 	       INSN_UID (insn),
2836*38fd1498Szrj 	       INSN_CODE (insn),
2837*38fd1498Szrj 	       BLOCK_NUM (insn),
2838*38fd1498Szrj 	       sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2839*38fd1498Szrj 	       (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2840*38fd1498Szrj 			       : INSN_PRIORITY (insn))
2841*38fd1498Szrj 		: INSN_PRIORITY (insn)),
2842*38fd1498Szrj 	       (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2843*38fd1498Szrj 			       : insn_sched_cost (insn))
2844*38fd1498Szrj 		: insn_sched_cost (insn)));
2845*38fd1498Szrj 
2846*38fd1498Szrj       if (recog_memoized (insn) < 0)
2847*38fd1498Szrj 	fprintf (sched_dump, "nothing");
2848*38fd1498Szrj       else
2849*38fd1498Szrj 	print_reservation (sched_dump, insn);
2850*38fd1498Szrj 
2851*38fd1498Szrj       fprintf (sched_dump, "\t: ");
2852*38fd1498Szrj       {
2853*38fd1498Szrj 	sd_iterator_def sd_it;
2854*38fd1498Szrj 	dep_t dep;
2855*38fd1498Szrj 
2856*38fd1498Szrj 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2857*38fd1498Szrj 	  fprintf (sched_dump, "%d%s%s ", INSN_UID (DEP_CON (dep)),
2858*38fd1498Szrj 		   DEP_NONREG (dep) ? "n" : "",
2859*38fd1498Szrj 		   DEP_MULTIPLE (dep) ? "m" : "");
2860*38fd1498Szrj       }
2861*38fd1498Szrj       fprintf (sched_dump, "\n");
2862*38fd1498Szrj     }
2863*38fd1498Szrj 
2864*38fd1498Szrj   fprintf (sched_dump, "\n");
2865*38fd1498Szrj }
2866*38fd1498Szrj 
2867*38fd1498Szrj /* Dump dependency graph for the current region to a file using dot syntax.  */
2868*38fd1498Szrj 
2869*38fd1498Szrj void
dump_rgn_dependencies_dot(FILE * file)2870*38fd1498Szrj dump_rgn_dependencies_dot (FILE *file)
2871*38fd1498Szrj {
2872*38fd1498Szrj   rtx_insn *head, *tail, *con, *pro;
2873*38fd1498Szrj   sd_iterator_def sd_it;
2874*38fd1498Szrj   dep_t dep;
2875*38fd1498Szrj   int bb;
2876*38fd1498Szrj   pretty_printer pp;
2877*38fd1498Szrj 
2878*38fd1498Szrj   pp.buffer->stream = file;
2879*38fd1498Szrj   pp_printf (&pp, "digraph SchedDG {\n");
2880*38fd1498Szrj 
2881*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; ++bb)
2882*38fd1498Szrj     {
2883*38fd1498Szrj       /* Begin subgraph (basic block).  */
2884*38fd1498Szrj       pp_printf (&pp, "subgraph cluster_block_%d {\n", bb);
2885*38fd1498Szrj       pp_printf (&pp, "\t" "color=blue;" "\n");
2886*38fd1498Szrj       pp_printf (&pp, "\t" "style=bold;" "\n");
2887*38fd1498Szrj       pp_printf (&pp, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
2888*38fd1498Szrj 
2889*38fd1498Szrj       /* Setup head and tail (no support for EBBs).  */
2890*38fd1498Szrj       gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2891*38fd1498Szrj       get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2892*38fd1498Szrj       tail = NEXT_INSN (tail);
2893*38fd1498Szrj 
2894*38fd1498Szrj       /* Dump all insns.  */
2895*38fd1498Szrj       for (con = head; con != tail; con = NEXT_INSN (con))
2896*38fd1498Szrj 	{
2897*38fd1498Szrj 	  if (!INSN_P (con))
2898*38fd1498Szrj 	    continue;
2899*38fd1498Szrj 
2900*38fd1498Szrj 	  /* Pretty print the insn.  */
2901*38fd1498Szrj 	  pp_printf (&pp, "\t%d [label=\"{", INSN_UID (con));
2902*38fd1498Szrj 	  pp_write_text_to_stream (&pp);
2903*38fd1498Szrj 	  print_insn (&pp, con, /*verbose=*/false);
2904*38fd1498Szrj 	  pp_write_text_as_dot_label_to_stream (&pp, /*for_record=*/true);
2905*38fd1498Szrj 	  pp_write_text_to_stream (&pp);
2906*38fd1498Szrj 
2907*38fd1498Szrj 	  /* Dump instruction attributes.  */
2908*38fd1498Szrj 	  pp_printf (&pp, "|{ uid:%d | luid:%d | prio:%d }}\",shape=record]\n",
2909*38fd1498Szrj 		     INSN_UID (con), INSN_LUID (con), INSN_PRIORITY (con));
2910*38fd1498Szrj 
2911*38fd1498Szrj 	  /* Dump all deps.  */
2912*38fd1498Szrj 	  FOR_EACH_DEP (con, SD_LIST_BACK, sd_it, dep)
2913*38fd1498Szrj 	    {
2914*38fd1498Szrj 	      int weight = 0;
2915*38fd1498Szrj 	      const char *color;
2916*38fd1498Szrj 	      pro = DEP_PRO (dep);
2917*38fd1498Szrj 
2918*38fd1498Szrj 	      switch (DEP_TYPE (dep))
2919*38fd1498Szrj 		{
2920*38fd1498Szrj 		case REG_DEP_TRUE:
2921*38fd1498Szrj 		  color = "black";
2922*38fd1498Szrj 		  weight = 1;
2923*38fd1498Szrj 		  break;
2924*38fd1498Szrj 		case REG_DEP_OUTPUT:
2925*38fd1498Szrj 		case REG_DEP_ANTI:
2926*38fd1498Szrj 		  color = "orange";
2927*38fd1498Szrj 		  break;
2928*38fd1498Szrj 		case REG_DEP_CONTROL:
2929*38fd1498Szrj 		  color = "blue";
2930*38fd1498Szrj 		  break;
2931*38fd1498Szrj 		default:
2932*38fd1498Szrj 		  gcc_unreachable ();
2933*38fd1498Szrj 		}
2934*38fd1498Szrj 
2935*38fd1498Szrj 	      pp_printf (&pp, "\t%d -> %d [color=%s",
2936*38fd1498Szrj 			 INSN_UID (pro), INSN_UID (con), color);
2937*38fd1498Szrj 	      if (int cost = dep_cost (dep))
2938*38fd1498Szrj 		pp_printf (&pp, ",label=%d", cost);
2939*38fd1498Szrj 	      pp_printf (&pp, ",weight=%d", weight);
2940*38fd1498Szrj 	      pp_printf (&pp, "];\n");
2941*38fd1498Szrj 	    }
2942*38fd1498Szrj 	}
2943*38fd1498Szrj       pp_printf (&pp, "}\n");
2944*38fd1498Szrj     }
2945*38fd1498Szrj 
2946*38fd1498Szrj   pp_printf (&pp, "}\n");
2947*38fd1498Szrj   pp_flush (&pp);
2948*38fd1498Szrj }
2949*38fd1498Szrj 
2950*38fd1498Szrj /* Dump dependency graph for the current region to a file using dot syntax.  */
2951*38fd1498Szrj 
2952*38fd1498Szrj DEBUG_FUNCTION void
dump_rgn_dependencies_dot(const char * fname)2953*38fd1498Szrj dump_rgn_dependencies_dot (const char *fname)
2954*38fd1498Szrj {
2955*38fd1498Szrj   FILE *fp;
2956*38fd1498Szrj 
2957*38fd1498Szrj   fp = fopen (fname, "w");
2958*38fd1498Szrj   if (!fp)
2959*38fd1498Szrj     {
2960*38fd1498Szrj       perror ("fopen");
2961*38fd1498Szrj       return;
2962*38fd1498Szrj     }
2963*38fd1498Szrj 
2964*38fd1498Szrj   dump_rgn_dependencies_dot (fp);
2965*38fd1498Szrj   fclose (fp);
2966*38fd1498Szrj }
2967*38fd1498Szrj 
2968*38fd1498Szrj 
2969*38fd1498Szrj /* Returns true if all the basic blocks of the current region have
2970*38fd1498Szrj    NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region.  */
2971*38fd1498Szrj bool
sched_is_disabled_for_current_region_p(void)2972*38fd1498Szrj sched_is_disabled_for_current_region_p (void)
2973*38fd1498Szrj {
2974*38fd1498Szrj   int bb;
2975*38fd1498Szrj 
2976*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; bb++)
2977*38fd1498Szrj     if (!(BASIC_BLOCK_FOR_FN (cfun,
2978*38fd1498Szrj 			      BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2979*38fd1498Szrj       return false;
2980*38fd1498Szrj 
2981*38fd1498Szrj   return true;
2982*38fd1498Szrj }
2983*38fd1498Szrj 
2984*38fd1498Szrj /* Free all region dependencies saved in INSN_BACK_DEPS and
2985*38fd1498Szrj    INSN_RESOLVED_BACK_DEPS.  The Haifa scheduler does this on the fly
2986*38fd1498Szrj    when scheduling, so this function is supposed to be called from
2987*38fd1498Szrj    the selective scheduling only.  */
2988*38fd1498Szrj void
free_rgn_deps(void)2989*38fd1498Szrj free_rgn_deps (void)
2990*38fd1498Szrj {
2991*38fd1498Szrj   int bb;
2992*38fd1498Szrj 
2993*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; bb++)
2994*38fd1498Szrj     {
2995*38fd1498Szrj       rtx_insn *head, *tail;
2996*38fd1498Szrj 
2997*38fd1498Szrj       gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2998*38fd1498Szrj       get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2999*38fd1498Szrj 
3000*38fd1498Szrj       sched_free_deps (head, tail, false);
3001*38fd1498Szrj     }
3002*38fd1498Szrj }
3003*38fd1498Szrj 
3004*38fd1498Szrj static int rgn_n_insns;
3005*38fd1498Szrj 
3006*38fd1498Szrj /* Compute insn priority for a current region.  */
3007*38fd1498Szrj void
compute_priorities(void)3008*38fd1498Szrj compute_priorities (void)
3009*38fd1498Szrj {
3010*38fd1498Szrj   int bb;
3011*38fd1498Szrj 
3012*38fd1498Szrj   current_sched_info->sched_max_insns_priority = 0;
3013*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; bb++)
3014*38fd1498Szrj     {
3015*38fd1498Szrj       rtx_insn *head, *tail;
3016*38fd1498Szrj 
3017*38fd1498Szrj       gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3018*38fd1498Szrj       get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3019*38fd1498Szrj 
3020*38fd1498Szrj       if (no_real_insns_p (head, tail))
3021*38fd1498Szrj 	continue;
3022*38fd1498Szrj 
3023*38fd1498Szrj       rgn_n_insns += set_priorities (head, tail);
3024*38fd1498Szrj     }
3025*38fd1498Szrj   current_sched_info->sched_max_insns_priority++;
3026*38fd1498Szrj }
3027*38fd1498Szrj 
3028*38fd1498Szrj /* (Re-)initialize the arrays of DFA states at the end of each basic block.
3029*38fd1498Szrj 
3030*38fd1498Szrj    SAVED_LAST_BASIC_BLOCK is the previous length of the arrays.  It must be
3031*38fd1498Szrj    zero for the first call to this function, to allocate the arrays for the
3032*38fd1498Szrj    first time.
3033*38fd1498Szrj 
3034*38fd1498Szrj    This function is called once during initialization of the scheduler, and
3035*38fd1498Szrj    called again to resize the arrays if new basic blocks have been created,
3036*38fd1498Szrj    for example for speculation recovery code.  */
3037*38fd1498Szrj 
3038*38fd1498Szrj static void
realloc_bb_state_array(int saved_last_basic_block)3039*38fd1498Szrj realloc_bb_state_array (int saved_last_basic_block)
3040*38fd1498Szrj {
3041*38fd1498Szrj   char *old_bb_state_array = bb_state_array;
3042*38fd1498Szrj   size_t lbb = (size_t) last_basic_block_for_fn (cfun);
3043*38fd1498Szrj   size_t slbb = (size_t) saved_last_basic_block;
3044*38fd1498Szrj 
3045*38fd1498Szrj   /* Nothing to do if nothing changed since the last time this was called.  */
3046*38fd1498Szrj   if (saved_last_basic_block == last_basic_block_for_fn (cfun))
3047*38fd1498Szrj     return;
3048*38fd1498Szrj 
3049*38fd1498Szrj   /* The selective scheduler doesn't use the state arrays.  */
3050*38fd1498Szrj   if (sel_sched_p ())
3051*38fd1498Szrj     {
3052*38fd1498Szrj       gcc_assert (bb_state_array == NULL && bb_state == NULL);
3053*38fd1498Szrj       return;
3054*38fd1498Szrj     }
3055*38fd1498Szrj 
3056*38fd1498Szrj   gcc_checking_assert (saved_last_basic_block == 0
3057*38fd1498Szrj 		       || (bb_state_array != NULL && bb_state != NULL));
3058*38fd1498Szrj 
3059*38fd1498Szrj   bb_state_array = XRESIZEVEC (char, bb_state_array, lbb * dfa_state_size);
3060*38fd1498Szrj   bb_state = XRESIZEVEC (state_t, bb_state, lbb);
3061*38fd1498Szrj 
3062*38fd1498Szrj   /* If BB_STATE_ARRAY has moved, fixup all the state pointers array.
3063*38fd1498Szrj      Otherwise only fixup the newly allocated ones.  For the state
3064*38fd1498Szrj      array itself, only initialize the new entries.  */
3065*38fd1498Szrj   bool bb_state_array_moved = (bb_state_array != old_bb_state_array);
3066*38fd1498Szrj   for (size_t i = bb_state_array_moved ? 0 : slbb; i < lbb; i++)
3067*38fd1498Szrj     bb_state[i] = (state_t) (bb_state_array + i * dfa_state_size);
3068*38fd1498Szrj   for (size_t i = slbb; i < lbb; i++)
3069*38fd1498Szrj     state_reset (bb_state[i]);
3070*38fd1498Szrj }
3071*38fd1498Szrj 
3072*38fd1498Szrj /* Free the arrays of DFA states at the end of each basic block.  */
3073*38fd1498Szrj 
3074*38fd1498Szrj static void
free_bb_state_array(void)3075*38fd1498Szrj free_bb_state_array (void)
3076*38fd1498Szrj {
3077*38fd1498Szrj   free (bb_state_array);
3078*38fd1498Szrj   free (bb_state);
3079*38fd1498Szrj   bb_state_array = NULL;
3080*38fd1498Szrj   bb_state = NULL;
3081*38fd1498Szrj }
3082*38fd1498Szrj 
3083*38fd1498Szrj /* Schedule a region.  A region is either an inner loop, a loop-free
3084*38fd1498Szrj    subroutine, or a single basic block.  Each bb in the region is
3085*38fd1498Szrj    scheduled after its flow predecessors.  */
3086*38fd1498Szrj 
3087*38fd1498Szrj static void
schedule_region(int rgn)3088*38fd1498Szrj schedule_region (int rgn)
3089*38fd1498Szrj {
3090*38fd1498Szrj   int bb;
3091*38fd1498Szrj   int sched_rgn_n_insns = 0;
3092*38fd1498Szrj 
3093*38fd1498Szrj   rgn_n_insns = 0;
3094*38fd1498Szrj 
3095*38fd1498Szrj   /* Do not support register pressure sensitive scheduling for the new regions
3096*38fd1498Szrj      as we don't update the liveness info for them.  */
3097*38fd1498Szrj   if (sched_pressure != SCHED_PRESSURE_NONE
3098*38fd1498Szrj       && rgn >= nr_regions_initial)
3099*38fd1498Szrj     {
3100*38fd1498Szrj       free_global_sched_pressure_data ();
3101*38fd1498Szrj       sched_pressure = SCHED_PRESSURE_NONE;
3102*38fd1498Szrj     }
3103*38fd1498Szrj 
3104*38fd1498Szrj   rgn_setup_region (rgn);
3105*38fd1498Szrj 
3106*38fd1498Szrj   /* Don't schedule region that is marked by
3107*38fd1498Szrj      NOTE_DISABLE_SCHED_OF_BLOCK.  */
3108*38fd1498Szrj   if (sched_is_disabled_for_current_region_p ())
3109*38fd1498Szrj     return;
3110*38fd1498Szrj 
3111*38fd1498Szrj   sched_rgn_compute_dependencies (rgn);
3112*38fd1498Szrj 
3113*38fd1498Szrj   sched_rgn_local_init (rgn);
3114*38fd1498Szrj 
3115*38fd1498Szrj   /* Set priorities.  */
3116*38fd1498Szrj   compute_priorities ();
3117*38fd1498Szrj 
3118*38fd1498Szrj   sched_extend_ready_list (rgn_n_insns);
3119*38fd1498Szrj 
3120*38fd1498Szrj   if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
3121*38fd1498Szrj     {
3122*38fd1498Szrj       sched_init_region_reg_pressure_info ();
3123*38fd1498Szrj       for (bb = 0; bb < current_nr_blocks; bb++)
3124*38fd1498Szrj 	{
3125*38fd1498Szrj 	  basic_block first_bb, last_bb;
3126*38fd1498Szrj 	  rtx_insn *head, *tail;
3127*38fd1498Szrj 
3128*38fd1498Szrj 	  first_bb = EBB_FIRST_BB (bb);
3129*38fd1498Szrj 	  last_bb = EBB_LAST_BB (bb);
3130*38fd1498Szrj 
3131*38fd1498Szrj 	  get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3132*38fd1498Szrj 
3133*38fd1498Szrj 	  if (no_real_insns_p (head, tail))
3134*38fd1498Szrj 	    {
3135*38fd1498Szrj 	      gcc_assert (first_bb == last_bb);
3136*38fd1498Szrj 	      continue;
3137*38fd1498Szrj 	    }
3138*38fd1498Szrj 	  sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
3139*38fd1498Szrj 	}
3140*38fd1498Szrj     }
3141*38fd1498Szrj 
3142*38fd1498Szrj   /* Now we can schedule all blocks.  */
3143*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; bb++)
3144*38fd1498Szrj     {
3145*38fd1498Szrj       basic_block first_bb, last_bb, curr_bb;
3146*38fd1498Szrj       rtx_insn *head, *tail;
3147*38fd1498Szrj 
3148*38fd1498Szrj       first_bb = EBB_FIRST_BB (bb);
3149*38fd1498Szrj       last_bb = EBB_LAST_BB (bb);
3150*38fd1498Szrj 
3151*38fd1498Szrj       get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3152*38fd1498Szrj 
3153*38fd1498Szrj       if (no_real_insns_p (head, tail))
3154*38fd1498Szrj 	{
3155*38fd1498Szrj 	  gcc_assert (first_bb == last_bb);
3156*38fd1498Szrj 	  continue;
3157*38fd1498Szrj 	}
3158*38fd1498Szrj 
3159*38fd1498Szrj       current_sched_info->prev_head = PREV_INSN (head);
3160*38fd1498Szrj       current_sched_info->next_tail = NEXT_INSN (tail);
3161*38fd1498Szrj 
3162*38fd1498Szrj       remove_notes (head, tail);
3163*38fd1498Szrj 
3164*38fd1498Szrj       unlink_bb_notes (first_bb, last_bb);
3165*38fd1498Szrj 
3166*38fd1498Szrj       target_bb = bb;
3167*38fd1498Szrj 
3168*38fd1498Szrj       gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
3169*38fd1498Szrj       current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
3170*38fd1498Szrj 
3171*38fd1498Szrj       curr_bb = first_bb;
3172*38fd1498Szrj       if (dbg_cnt (sched_block))
3173*38fd1498Szrj         {
3174*38fd1498Szrj 	  edge f;
3175*38fd1498Szrj 	  int saved_last_basic_block = last_basic_block_for_fn (cfun);
3176*38fd1498Szrj 
3177*38fd1498Szrj 	  schedule_block (&curr_bb, bb_state[first_bb->index]);
3178*38fd1498Szrj 	  gcc_assert (EBB_FIRST_BB (bb) == first_bb);
3179*38fd1498Szrj 	  sched_rgn_n_insns += sched_n_insns;
3180*38fd1498Szrj 	  realloc_bb_state_array (saved_last_basic_block);
3181*38fd1498Szrj 	  f = find_fallthru_edge (last_bb->succs);
3182*38fd1498Szrj 	  if (f
3183*38fd1498Szrj 	      && (!f->probability.initialized_p ()
3184*38fd1498Szrj 		  || f->probability.to_reg_br_prob_base () * 100 / REG_BR_PROB_BASE >=
3185*38fd1498Szrj 	             PARAM_VALUE (PARAM_SCHED_STATE_EDGE_PROB_CUTOFF)))
3186*38fd1498Szrj 	    {
3187*38fd1498Szrj 	      memcpy (bb_state[f->dest->index], curr_state,
3188*38fd1498Szrj 		      dfa_state_size);
3189*38fd1498Szrj 	      if (sched_verbose >= 5)
3190*38fd1498Szrj 		fprintf (sched_dump, "saving state for edge %d->%d\n",
3191*38fd1498Szrj 			 f->src->index, f->dest->index);
3192*38fd1498Szrj 	    }
3193*38fd1498Szrj         }
3194*38fd1498Szrj       else
3195*38fd1498Szrj         {
3196*38fd1498Szrj           sched_rgn_n_insns += rgn_n_insns;
3197*38fd1498Szrj         }
3198*38fd1498Szrj 
3199*38fd1498Szrj       /* Clean up.  */
3200*38fd1498Szrj       if (current_nr_blocks > 1)
3201*38fd1498Szrj 	free_trg_info ();
3202*38fd1498Szrj     }
3203*38fd1498Szrj 
3204*38fd1498Szrj   /* Sanity check: verify that all region insns were scheduled.  */
3205*38fd1498Szrj   gcc_assert (sched_rgn_n_insns == rgn_n_insns);
3206*38fd1498Szrj 
3207*38fd1498Szrj   sched_finish_ready_list ();
3208*38fd1498Szrj 
3209*38fd1498Szrj   /* Done with this region.  */
3210*38fd1498Szrj   sched_rgn_local_finish ();
3211*38fd1498Szrj 
3212*38fd1498Szrj   /* Free dependencies.  */
3213*38fd1498Szrj   for (bb = 0; bb < current_nr_blocks; ++bb)
3214*38fd1498Szrj     free_block_dependencies (bb);
3215*38fd1498Szrj 
3216*38fd1498Szrj   gcc_assert (haifa_recovery_bb_ever_added_p
3217*38fd1498Szrj 	      || deps_pools_are_empty_p ());
3218*38fd1498Szrj }
3219*38fd1498Szrj 
3220*38fd1498Szrj /* Initialize data structures for region scheduling.  */
3221*38fd1498Szrj 
3222*38fd1498Szrj void
sched_rgn_init(bool single_blocks_p)3223*38fd1498Szrj sched_rgn_init (bool single_blocks_p)
3224*38fd1498Szrj {
3225*38fd1498Szrj   min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
3226*38fd1498Szrj 		    / 100);
3227*38fd1498Szrj 
3228*38fd1498Szrj   nr_inter = 0;
3229*38fd1498Szrj   nr_spec = 0;
3230*38fd1498Szrj 
3231*38fd1498Szrj   extend_regions ();
3232*38fd1498Szrj 
3233*38fd1498Szrj   CONTAINING_RGN (ENTRY_BLOCK) = -1;
3234*38fd1498Szrj   CONTAINING_RGN (EXIT_BLOCK) = -1;
3235*38fd1498Szrj 
3236*38fd1498Szrj   realloc_bb_state_array (0);
3237*38fd1498Szrj 
3238*38fd1498Szrj   /* Compute regions for scheduling.  */
3239*38fd1498Szrj   if (single_blocks_p
3240*38fd1498Szrj       || n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS + 1
3241*38fd1498Szrj       || !flag_schedule_interblock
3242*38fd1498Szrj       || is_cfg_nonregular ())
3243*38fd1498Szrj     {
3244*38fd1498Szrj       find_single_block_region (sel_sched_p ());
3245*38fd1498Szrj     }
3246*38fd1498Szrj   else
3247*38fd1498Szrj     {
3248*38fd1498Szrj       /* Compute the dominators and post dominators.  */
3249*38fd1498Szrj       if (!sel_sched_p ())
3250*38fd1498Szrj 	calculate_dominance_info (CDI_DOMINATORS);
3251*38fd1498Szrj 
3252*38fd1498Szrj       /* Find regions.  */
3253*38fd1498Szrj       find_rgns ();
3254*38fd1498Szrj 
3255*38fd1498Szrj       if (sched_verbose >= 3)
3256*38fd1498Szrj 	debug_regions ();
3257*38fd1498Szrj 
3258*38fd1498Szrj       /* For now.  This will move as more and more of haifa is converted
3259*38fd1498Szrj 	 to using the cfg code.  */
3260*38fd1498Szrj       if (!sel_sched_p ())
3261*38fd1498Szrj 	free_dominance_info (CDI_DOMINATORS);
3262*38fd1498Szrj     }
3263*38fd1498Szrj 
3264*38fd1498Szrj   gcc_assert (nr_regions > 0 && nr_regions <= n_basic_blocks_for_fn (cfun));
3265*38fd1498Szrj 
3266*38fd1498Szrj   RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1)
3267*38fd1498Szrj 			     + RGN_NR_BLOCKS (nr_regions - 1));
3268*38fd1498Szrj   nr_regions_initial = nr_regions;
3269*38fd1498Szrj }
3270*38fd1498Szrj 
3271*38fd1498Szrj /* Free data structures for region scheduling.  */
3272*38fd1498Szrj void
sched_rgn_finish(void)3273*38fd1498Szrj sched_rgn_finish (void)
3274*38fd1498Szrj {
3275*38fd1498Szrj   free_bb_state_array ();
3276*38fd1498Szrj 
3277*38fd1498Szrj   /* Reposition the prologue and epilogue notes in case we moved the
3278*38fd1498Szrj      prologue/epilogue insns.  */
3279*38fd1498Szrj   if (reload_completed)
3280*38fd1498Szrj     reposition_prologue_and_epilogue_notes ();
3281*38fd1498Szrj 
3282*38fd1498Szrj   if (sched_verbose)
3283*38fd1498Szrj     {
3284*38fd1498Szrj       if (reload_completed == 0
3285*38fd1498Szrj 	  && flag_schedule_interblock)
3286*38fd1498Szrj 	{
3287*38fd1498Szrj 	  fprintf (sched_dump,
3288*38fd1498Szrj 		   "\n;; Procedure interblock/speculative motions == %d/%d \n",
3289*38fd1498Szrj 		   nr_inter, nr_spec);
3290*38fd1498Szrj 	}
3291*38fd1498Szrj       else
3292*38fd1498Szrj 	gcc_assert (nr_inter <= 0);
3293*38fd1498Szrj       fprintf (sched_dump, "\n\n");
3294*38fd1498Szrj     }
3295*38fd1498Szrj 
3296*38fd1498Szrj   nr_regions = 0;
3297*38fd1498Szrj 
3298*38fd1498Szrj   free (rgn_table);
3299*38fd1498Szrj   rgn_table = NULL;
3300*38fd1498Szrj 
3301*38fd1498Szrj   free (rgn_bb_table);
3302*38fd1498Szrj   rgn_bb_table = NULL;
3303*38fd1498Szrj 
3304*38fd1498Szrj   free (block_to_bb);
3305*38fd1498Szrj   block_to_bb = NULL;
3306*38fd1498Szrj 
3307*38fd1498Szrj   free (containing_rgn);
3308*38fd1498Szrj   containing_rgn = NULL;
3309*38fd1498Szrj 
3310*38fd1498Szrj   free (ebb_head);
3311*38fd1498Szrj   ebb_head = NULL;
3312*38fd1498Szrj }
3313*38fd1498Szrj 
3314*38fd1498Szrj /* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3315*38fd1498Szrj    point to the region RGN.  */
3316*38fd1498Szrj void
rgn_setup_region(int rgn)3317*38fd1498Szrj rgn_setup_region (int rgn)
3318*38fd1498Szrj {
3319*38fd1498Szrj   int bb;
3320*38fd1498Szrj 
3321*38fd1498Szrj   /* Set variables for the current region.  */
3322*38fd1498Szrj   current_nr_blocks = RGN_NR_BLOCKS (rgn);
3323*38fd1498Szrj   current_blocks = RGN_BLOCKS (rgn);
3324*38fd1498Szrj 
3325*38fd1498Szrj   /* EBB_HEAD is a region-scope structure.  But we realloc it for
3326*38fd1498Szrj      each region to save time/memory/something else.
3327*38fd1498Szrj      See comments in add_block1, for what reasons we allocate +1 element.  */
3328*38fd1498Szrj   ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3329*38fd1498Szrj   for (bb = 0; bb <= current_nr_blocks; bb++)
3330*38fd1498Szrj     ebb_head[bb] = current_blocks + bb;
3331*38fd1498Szrj }
3332*38fd1498Szrj 
3333*38fd1498Szrj /* Compute instruction dependencies in region RGN.  */
3334*38fd1498Szrj void
sched_rgn_compute_dependencies(int rgn)3335*38fd1498Szrj sched_rgn_compute_dependencies (int rgn)
3336*38fd1498Szrj {
3337*38fd1498Szrj   if (!RGN_DONT_CALC_DEPS (rgn))
3338*38fd1498Szrj     {
3339*38fd1498Szrj       int bb;
3340*38fd1498Szrj 
3341*38fd1498Szrj       if (sel_sched_p ())
3342*38fd1498Szrj 	sched_emulate_haifa_p = 1;
3343*38fd1498Szrj 
3344*38fd1498Szrj       init_deps_global ();
3345*38fd1498Szrj 
3346*38fd1498Szrj       /* Initializations for region data dependence analysis.  */
3347*38fd1498Szrj       bb_deps = XNEWVEC (struct deps_desc, current_nr_blocks);
3348*38fd1498Szrj       for (bb = 0; bb < current_nr_blocks; bb++)
3349*38fd1498Szrj 	init_deps (bb_deps + bb, false);
3350*38fd1498Szrj 
3351*38fd1498Szrj       /* Initialize bitmap used in add_branch_dependences.  */
3352*38fd1498Szrj       insn_referenced = sbitmap_alloc (sched_max_luid);
3353*38fd1498Szrj       bitmap_clear (insn_referenced);
3354*38fd1498Szrj 
3355*38fd1498Szrj       /* Compute backward dependencies.  */
3356*38fd1498Szrj       for (bb = 0; bb < current_nr_blocks; bb++)
3357*38fd1498Szrj 	compute_block_dependences (bb);
3358*38fd1498Szrj 
3359*38fd1498Szrj       sbitmap_free (insn_referenced);
3360*38fd1498Szrj       free_pending_lists ();
3361*38fd1498Szrj       finish_deps_global ();
3362*38fd1498Szrj       free (bb_deps);
3363*38fd1498Szrj 
3364*38fd1498Szrj       /* We don't want to recalculate this twice.  */
3365*38fd1498Szrj       RGN_DONT_CALC_DEPS (rgn) = 1;
3366*38fd1498Szrj 
3367*38fd1498Szrj       if (sel_sched_p ())
3368*38fd1498Szrj 	sched_emulate_haifa_p = 0;
3369*38fd1498Szrj     }
3370*38fd1498Szrj   else
3371*38fd1498Szrj     /* (This is a recovery block.  It is always a single block region.)
3372*38fd1498Szrj        OR (We use selective scheduling.)  */
3373*38fd1498Szrj     gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3374*38fd1498Szrj }
3375*38fd1498Szrj 
3376*38fd1498Szrj /* Init region data structures.  Returns true if this region should
3377*38fd1498Szrj    not be scheduled.  */
3378*38fd1498Szrj void
sched_rgn_local_init(int rgn)3379*38fd1498Szrj sched_rgn_local_init (int rgn)
3380*38fd1498Szrj {
3381*38fd1498Szrj   int bb;
3382*38fd1498Szrj 
3383*38fd1498Szrj   /* Compute interblock info: probabilities, split-edges, dominators, etc.  */
3384*38fd1498Szrj   if (current_nr_blocks > 1)
3385*38fd1498Szrj     {
3386*38fd1498Szrj       basic_block block;
3387*38fd1498Szrj       edge e;
3388*38fd1498Szrj       edge_iterator ei;
3389*38fd1498Szrj 
3390*38fd1498Szrj       prob = XNEWVEC (int, current_nr_blocks);
3391*38fd1498Szrj 
3392*38fd1498Szrj       dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3393*38fd1498Szrj       bitmap_vector_clear (dom, current_nr_blocks);
3394*38fd1498Szrj 
3395*38fd1498Szrj       /* Use ->aux to implement EDGE_TO_BIT mapping.  */
3396*38fd1498Szrj       rgn_nr_edges = 0;
3397*38fd1498Szrj       FOR_EACH_BB_FN (block, cfun)
3398*38fd1498Szrj 	{
3399*38fd1498Szrj 	  if (CONTAINING_RGN (block->index) != rgn)
3400*38fd1498Szrj 	    continue;
3401*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, block->succs)
3402*38fd1498Szrj 	    SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3403*38fd1498Szrj 	}
3404*38fd1498Szrj 
3405*38fd1498Szrj       rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3406*38fd1498Szrj       rgn_nr_edges = 0;
3407*38fd1498Szrj       FOR_EACH_BB_FN (block, cfun)
3408*38fd1498Szrj 	{
3409*38fd1498Szrj 	  if (CONTAINING_RGN (block->index) != rgn)
3410*38fd1498Szrj 	    continue;
3411*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, block->succs)
3412*38fd1498Szrj 	    rgn_edges[rgn_nr_edges++] = e;
3413*38fd1498Szrj 	}
3414*38fd1498Szrj 
3415*38fd1498Szrj       /* Split edges.  */
3416*38fd1498Szrj       pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3417*38fd1498Szrj       bitmap_vector_clear (pot_split, current_nr_blocks);
3418*38fd1498Szrj       ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3419*38fd1498Szrj       bitmap_vector_clear (ancestor_edges, current_nr_blocks);
3420*38fd1498Szrj 
3421*38fd1498Szrj       /* Compute probabilities, dominators, split_edges.  */
3422*38fd1498Szrj       for (bb = 0; bb < current_nr_blocks; bb++)
3423*38fd1498Szrj 	compute_dom_prob_ps (bb);
3424*38fd1498Szrj 
3425*38fd1498Szrj       /* Cleanup ->aux used for EDGE_TO_BIT mapping.  */
3426*38fd1498Szrj       /* We don't need them anymore.  But we want to avoid duplication of
3427*38fd1498Szrj 	 aux fields in the newly created edges.  */
3428*38fd1498Szrj       FOR_EACH_BB_FN (block, cfun)
3429*38fd1498Szrj 	{
3430*38fd1498Szrj 	  if (CONTAINING_RGN (block->index) != rgn)
3431*38fd1498Szrj 	    continue;
3432*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, block->succs)
3433*38fd1498Szrj 	    e->aux = NULL;
3434*38fd1498Szrj         }
3435*38fd1498Szrj     }
3436*38fd1498Szrj }
3437*38fd1498Szrj 
3438*38fd1498Szrj /* Free data computed for the finished region.  */
3439*38fd1498Szrj void
sched_rgn_local_free(void)3440*38fd1498Szrj sched_rgn_local_free (void)
3441*38fd1498Szrj {
3442*38fd1498Szrj   free (prob);
3443*38fd1498Szrj   sbitmap_vector_free (dom);
3444*38fd1498Szrj   sbitmap_vector_free (pot_split);
3445*38fd1498Szrj   sbitmap_vector_free (ancestor_edges);
3446*38fd1498Szrj   free (rgn_edges);
3447*38fd1498Szrj }
3448*38fd1498Szrj 
3449*38fd1498Szrj /* Free data computed for the finished region.  */
3450*38fd1498Szrj void
sched_rgn_local_finish(void)3451*38fd1498Szrj sched_rgn_local_finish (void)
3452*38fd1498Szrj {
3453*38fd1498Szrj   if (current_nr_blocks > 1 && !sel_sched_p ())
3454*38fd1498Szrj     {
3455*38fd1498Szrj       sched_rgn_local_free ();
3456*38fd1498Szrj     }
3457*38fd1498Szrj }
3458*38fd1498Szrj 
3459*38fd1498Szrj /* Setup scheduler infos.  */
3460*38fd1498Szrj void
rgn_setup_common_sched_info(void)3461*38fd1498Szrj rgn_setup_common_sched_info (void)
3462*38fd1498Szrj {
3463*38fd1498Szrj   memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3464*38fd1498Szrj 	  sizeof (rgn_common_sched_info));
3465*38fd1498Szrj 
3466*38fd1498Szrj   rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3467*38fd1498Szrj   rgn_common_sched_info.add_block = rgn_add_block;
3468*38fd1498Szrj   rgn_common_sched_info.estimate_number_of_insns
3469*38fd1498Szrj     = rgn_estimate_number_of_insns;
3470*38fd1498Szrj   rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3471*38fd1498Szrj 
3472*38fd1498Szrj   common_sched_info = &rgn_common_sched_info;
3473*38fd1498Szrj }
3474*38fd1498Szrj 
3475*38fd1498Szrj /* Setup all *_sched_info structures (for the Haifa frontend
3476*38fd1498Szrj    and for the dependence analysis) in the interblock scheduler.  */
3477*38fd1498Szrj void
rgn_setup_sched_infos(void)3478*38fd1498Szrj rgn_setup_sched_infos (void)
3479*38fd1498Szrj {
3480*38fd1498Szrj   if (!sel_sched_p ())
3481*38fd1498Szrj     memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3482*38fd1498Szrj 	    sizeof (rgn_sched_deps_info));
3483*38fd1498Szrj   else
3484*38fd1498Szrj     memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3485*38fd1498Szrj 	    sizeof (rgn_sched_deps_info));
3486*38fd1498Szrj 
3487*38fd1498Szrj   sched_deps_info = &rgn_sched_deps_info;
3488*38fd1498Szrj 
3489*38fd1498Szrj   memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3490*38fd1498Szrj   current_sched_info = &rgn_sched_info;
3491*38fd1498Szrj }
3492*38fd1498Szrj 
3493*38fd1498Szrj /* The one entry point in this file.  */
3494*38fd1498Szrj void
schedule_insns(void)3495*38fd1498Szrj schedule_insns (void)
3496*38fd1498Szrj {
3497*38fd1498Szrj   int rgn;
3498*38fd1498Szrj 
3499*38fd1498Szrj   /* Taking care of this degenerate case makes the rest of
3500*38fd1498Szrj      this code simpler.  */
3501*38fd1498Szrj   if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3502*38fd1498Szrj     return;
3503*38fd1498Szrj 
3504*38fd1498Szrj   rgn_setup_common_sched_info ();
3505*38fd1498Szrj   rgn_setup_sched_infos ();
3506*38fd1498Szrj 
3507*38fd1498Szrj   haifa_sched_init ();
3508*38fd1498Szrj   sched_rgn_init (reload_completed);
3509*38fd1498Szrj 
3510*38fd1498Szrj   bitmap_initialize (&not_in_df, 0);
3511*38fd1498Szrj   bitmap_clear (&not_in_df);
3512*38fd1498Szrj 
3513*38fd1498Szrj   /* Schedule every region in the subroutine.  */
3514*38fd1498Szrj   for (rgn = 0; rgn < nr_regions; rgn++)
3515*38fd1498Szrj     if (dbg_cnt (sched_region))
3516*38fd1498Szrj       schedule_region (rgn);
3517*38fd1498Szrj 
3518*38fd1498Szrj   /* Clean up.  */
3519*38fd1498Szrj   sched_rgn_finish ();
3520*38fd1498Szrj   bitmap_clear (&not_in_df);
3521*38fd1498Szrj 
3522*38fd1498Szrj   haifa_sched_finish ();
3523*38fd1498Szrj }
3524*38fd1498Szrj 
3525*38fd1498Szrj /* INSN has been added to/removed from current region.  */
3526*38fd1498Szrj static void
rgn_add_remove_insn(rtx_insn * insn,int remove_p)3527*38fd1498Szrj rgn_add_remove_insn (rtx_insn *insn, int remove_p)
3528*38fd1498Szrj {
3529*38fd1498Szrj   if (!remove_p)
3530*38fd1498Szrj     rgn_n_insns++;
3531*38fd1498Szrj   else
3532*38fd1498Szrj     rgn_n_insns--;
3533*38fd1498Szrj 
3534*38fd1498Szrj   if (INSN_BB (insn) == target_bb)
3535*38fd1498Szrj     {
3536*38fd1498Szrj       if (!remove_p)
3537*38fd1498Szrj 	target_n_insns++;
3538*38fd1498Szrj       else
3539*38fd1498Szrj 	target_n_insns--;
3540*38fd1498Szrj     }
3541*38fd1498Szrj }
3542*38fd1498Szrj 
3543*38fd1498Szrj /* Extend internal data structures.  */
3544*38fd1498Szrj void
extend_regions(void)3545*38fd1498Szrj extend_regions (void)
3546*38fd1498Szrj {
3547*38fd1498Szrj   rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks_for_fn (cfun));
3548*38fd1498Szrj   rgn_bb_table = XRESIZEVEC (int, rgn_bb_table,
3549*38fd1498Szrj 			     n_basic_blocks_for_fn (cfun));
3550*38fd1498Szrj   block_to_bb = XRESIZEVEC (int, block_to_bb,
3551*38fd1498Szrj 			    last_basic_block_for_fn (cfun));
3552*38fd1498Szrj   containing_rgn = XRESIZEVEC (int, containing_rgn,
3553*38fd1498Szrj 			       last_basic_block_for_fn (cfun));
3554*38fd1498Szrj }
3555*38fd1498Szrj 
3556*38fd1498Szrj void
rgn_make_new_region_out_of_new_block(basic_block bb)3557*38fd1498Szrj rgn_make_new_region_out_of_new_block (basic_block bb)
3558*38fd1498Szrj {
3559*38fd1498Szrj   int i;
3560*38fd1498Szrj 
3561*38fd1498Szrj   i = RGN_BLOCKS (nr_regions);
3562*38fd1498Szrj   /* I - first free position in rgn_bb_table.  */
3563*38fd1498Szrj 
3564*38fd1498Szrj   rgn_bb_table[i] = bb->index;
3565*38fd1498Szrj   RGN_NR_BLOCKS (nr_regions) = 1;
3566*38fd1498Szrj   RGN_HAS_REAL_EBB (nr_regions) = 0;
3567*38fd1498Szrj   RGN_DONT_CALC_DEPS (nr_regions) = 0;
3568*38fd1498Szrj   CONTAINING_RGN (bb->index) = nr_regions;
3569*38fd1498Szrj   BLOCK_TO_BB (bb->index) = 0;
3570*38fd1498Szrj 
3571*38fd1498Szrj   nr_regions++;
3572*38fd1498Szrj 
3573*38fd1498Szrj   RGN_BLOCKS (nr_regions) = i + 1;
3574*38fd1498Szrj }
3575*38fd1498Szrj 
3576*38fd1498Szrj /* BB was added to ebb after AFTER.  */
3577*38fd1498Szrj static void
rgn_add_block(basic_block bb,basic_block after)3578*38fd1498Szrj rgn_add_block (basic_block bb, basic_block after)
3579*38fd1498Szrj {
3580*38fd1498Szrj   extend_regions ();
3581*38fd1498Szrj   bitmap_set_bit (&not_in_df, bb->index);
3582*38fd1498Szrj 
3583*38fd1498Szrj   if (after == 0 || after == EXIT_BLOCK_PTR_FOR_FN (cfun))
3584*38fd1498Szrj     {
3585*38fd1498Szrj       rgn_make_new_region_out_of_new_block (bb);
3586*38fd1498Szrj       RGN_DONT_CALC_DEPS (nr_regions - 1) = (after
3587*38fd1498Szrj 					     == EXIT_BLOCK_PTR_FOR_FN (cfun));
3588*38fd1498Szrj     }
3589*38fd1498Szrj   else
3590*38fd1498Szrj     {
3591*38fd1498Szrj       int i, pos;
3592*38fd1498Szrj 
3593*38fd1498Szrj       /* We need to fix rgn_table, block_to_bb, containing_rgn
3594*38fd1498Szrj 	 and ebb_head.  */
3595*38fd1498Szrj 
3596*38fd1498Szrj       BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3597*38fd1498Szrj 
3598*38fd1498Szrj       /* We extend ebb_head to one more position to
3599*38fd1498Szrj 	 easily find the last position of the last ebb in
3600*38fd1498Szrj 	 the current region.  Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3601*38fd1498Szrj 	 is _always_ valid for access.  */
3602*38fd1498Szrj 
3603*38fd1498Szrj       i = BLOCK_TO_BB (after->index) + 1;
3604*38fd1498Szrj       pos = ebb_head[i] - 1;
3605*38fd1498Szrj       /* Now POS is the index of the last block in the region.  */
3606*38fd1498Szrj 
3607*38fd1498Szrj       /* Find index of basic block AFTER.  */
3608*38fd1498Szrj       for (; rgn_bb_table[pos] != after->index; pos--)
3609*38fd1498Szrj 	;
3610*38fd1498Szrj 
3611*38fd1498Szrj       pos++;
3612*38fd1498Szrj       gcc_assert (pos > ebb_head[i - 1]);
3613*38fd1498Szrj 
3614*38fd1498Szrj       /* i - ebb right after "AFTER".  */
3615*38fd1498Szrj       /* ebb_head[i] - VALID.  */
3616*38fd1498Szrj 
3617*38fd1498Szrj       /* Source position: ebb_head[i]
3618*38fd1498Szrj 	 Destination position: ebb_head[i] + 1
3619*38fd1498Szrj 	 Last position:
3620*38fd1498Szrj 	   RGN_BLOCKS (nr_regions) - 1
3621*38fd1498Szrj 	 Number of elements to copy: (last_position) - (source_position) + 1
3622*38fd1498Szrj        */
3623*38fd1498Szrj 
3624*38fd1498Szrj       memmove (rgn_bb_table + pos + 1,
3625*38fd1498Szrj 	       rgn_bb_table + pos,
3626*38fd1498Szrj 	       ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3627*38fd1498Szrj 	       * sizeof (*rgn_bb_table));
3628*38fd1498Szrj 
3629*38fd1498Szrj       rgn_bb_table[pos] = bb->index;
3630*38fd1498Szrj 
3631*38fd1498Szrj       for (; i <= current_nr_blocks; i++)
3632*38fd1498Szrj 	ebb_head [i]++;
3633*38fd1498Szrj 
3634*38fd1498Szrj       i = CONTAINING_RGN (after->index);
3635*38fd1498Szrj       CONTAINING_RGN (bb->index) = i;
3636*38fd1498Szrj 
3637*38fd1498Szrj       RGN_HAS_REAL_EBB (i) = 1;
3638*38fd1498Szrj 
3639*38fd1498Szrj       for (++i; i <= nr_regions; i++)
3640*38fd1498Szrj 	RGN_BLOCKS (i)++;
3641*38fd1498Szrj     }
3642*38fd1498Szrj }
3643*38fd1498Szrj 
3644*38fd1498Szrj /* Fix internal data after interblock movement of jump instruction.
3645*38fd1498Szrj    For parameter meaning please refer to
3646*38fd1498Szrj    sched-int.h: struct sched_info: fix_recovery_cfg.  */
3647*38fd1498Szrj static void
rgn_fix_recovery_cfg(int bbi,int check_bbi,int check_bb_nexti)3648*38fd1498Szrj rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3649*38fd1498Szrj {
3650*38fd1498Szrj   int old_pos, new_pos, i;
3651*38fd1498Szrj 
3652*38fd1498Szrj   BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3653*38fd1498Szrj 
3654*38fd1498Szrj   for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3655*38fd1498Szrj        rgn_bb_table[old_pos] != check_bb_nexti;
3656*38fd1498Szrj        old_pos--)
3657*38fd1498Szrj     ;
3658*38fd1498Szrj   gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3659*38fd1498Szrj 
3660*38fd1498Szrj   for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3661*38fd1498Szrj        rgn_bb_table[new_pos] != bbi;
3662*38fd1498Szrj        new_pos--)
3663*38fd1498Szrj     ;
3664*38fd1498Szrj   new_pos++;
3665*38fd1498Szrj   gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3666*38fd1498Szrj 
3667*38fd1498Szrj   gcc_assert (new_pos < old_pos);
3668*38fd1498Szrj 
3669*38fd1498Szrj   memmove (rgn_bb_table + new_pos + 1,
3670*38fd1498Szrj 	   rgn_bb_table + new_pos,
3671*38fd1498Szrj 	   (old_pos - new_pos) * sizeof (*rgn_bb_table));
3672*38fd1498Szrj 
3673*38fd1498Szrj   rgn_bb_table[new_pos] = check_bb_nexti;
3674*38fd1498Szrj 
3675*38fd1498Szrj   for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3676*38fd1498Szrj     ebb_head[i]++;
3677*38fd1498Szrj }
3678*38fd1498Szrj 
3679*38fd1498Szrj /* Return next block in ebb chain.  For parameter meaning please refer to
3680*38fd1498Szrj    sched-int.h: struct sched_info: advance_target_bb.  */
3681*38fd1498Szrj static basic_block
advance_target_bb(basic_block bb,rtx_insn * insn)3682*38fd1498Szrj advance_target_bb (basic_block bb, rtx_insn *insn)
3683*38fd1498Szrj {
3684*38fd1498Szrj   if (insn)
3685*38fd1498Szrj     return 0;
3686*38fd1498Szrj 
3687*38fd1498Szrj   gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3688*38fd1498Szrj 	      && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3689*38fd1498Szrj   return bb->next_bb;
3690*38fd1498Szrj }
3691*38fd1498Szrj 
3692*38fd1498Szrj #endif
3693*38fd1498Szrj 
3694*38fd1498Szrj /* Run instruction scheduler.  */
3695*38fd1498Szrj static unsigned int
rest_of_handle_live_range_shrinkage(void)3696*38fd1498Szrj rest_of_handle_live_range_shrinkage (void)
3697*38fd1498Szrj {
3698*38fd1498Szrj #ifdef INSN_SCHEDULING
3699*38fd1498Szrj   int saved;
3700*38fd1498Szrj 
3701*38fd1498Szrj   initialize_live_range_shrinkage ();
3702*38fd1498Szrj   saved = flag_schedule_interblock;
3703*38fd1498Szrj   flag_schedule_interblock = false;
3704*38fd1498Szrj   schedule_insns ();
3705*38fd1498Szrj   flag_schedule_interblock = saved;
3706*38fd1498Szrj   finish_live_range_shrinkage ();
3707*38fd1498Szrj #endif
3708*38fd1498Szrj   return 0;
3709*38fd1498Szrj }
3710*38fd1498Szrj 
3711*38fd1498Szrj /* Run instruction scheduler.  */
3712*38fd1498Szrj static unsigned int
rest_of_handle_sched(void)3713*38fd1498Szrj rest_of_handle_sched (void)
3714*38fd1498Szrj {
3715*38fd1498Szrj #ifdef INSN_SCHEDULING
3716*38fd1498Szrj   if (flag_selective_scheduling
3717*38fd1498Szrj       && ! maybe_skip_selective_scheduling ())
3718*38fd1498Szrj     run_selective_scheduling ();
3719*38fd1498Szrj   else
3720*38fd1498Szrj     schedule_insns ();
3721*38fd1498Szrj #endif
3722*38fd1498Szrj   return 0;
3723*38fd1498Szrj }
3724*38fd1498Szrj 
3725*38fd1498Szrj /* Run second scheduling pass after reload.  */
3726*38fd1498Szrj static unsigned int
rest_of_handle_sched2(void)3727*38fd1498Szrj rest_of_handle_sched2 (void)
3728*38fd1498Szrj {
3729*38fd1498Szrj #ifdef INSN_SCHEDULING
3730*38fd1498Szrj   if (flag_selective_scheduling2
3731*38fd1498Szrj       && ! maybe_skip_selective_scheduling ())
3732*38fd1498Szrj     run_selective_scheduling ();
3733*38fd1498Szrj   else
3734*38fd1498Szrj     {
3735*38fd1498Szrj       /* Do control and data sched analysis again,
3736*38fd1498Szrj 	 and write some more of the results to dump file.  */
3737*38fd1498Szrj       if (flag_sched2_use_superblocks)
3738*38fd1498Szrj 	schedule_ebbs ();
3739*38fd1498Szrj       else
3740*38fd1498Szrj 	schedule_insns ();
3741*38fd1498Szrj     }
3742*38fd1498Szrj #endif
3743*38fd1498Szrj   return 0;
3744*38fd1498Szrj }
3745*38fd1498Szrj 
3746*38fd1498Szrj static unsigned int
rest_of_handle_sched_fusion(void)3747*38fd1498Szrj rest_of_handle_sched_fusion (void)
3748*38fd1498Szrj {
3749*38fd1498Szrj #ifdef INSN_SCHEDULING
3750*38fd1498Szrj   sched_fusion = true;
3751*38fd1498Szrj   schedule_insns ();
3752*38fd1498Szrj   sched_fusion = false;
3753*38fd1498Szrj #endif
3754*38fd1498Szrj   return 0;
3755*38fd1498Szrj }
3756*38fd1498Szrj 
3757*38fd1498Szrj namespace {
3758*38fd1498Szrj 
3759*38fd1498Szrj const pass_data pass_data_live_range_shrinkage =
3760*38fd1498Szrj {
3761*38fd1498Szrj   RTL_PASS, /* type */
3762*38fd1498Szrj   "lr_shrinkage", /* name */
3763*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3764*38fd1498Szrj   TV_LIVE_RANGE_SHRINKAGE, /* tv_id */
3765*38fd1498Szrj   0, /* properties_required */
3766*38fd1498Szrj   0, /* properties_provided */
3767*38fd1498Szrj   0, /* properties_destroyed */
3768*38fd1498Szrj   0, /* todo_flags_start */
3769*38fd1498Szrj   TODO_df_finish, /* todo_flags_finish */
3770*38fd1498Szrj };
3771*38fd1498Szrj 
3772*38fd1498Szrj class pass_live_range_shrinkage : public rtl_opt_pass
3773*38fd1498Szrj {
3774*38fd1498Szrj public:
pass_live_range_shrinkage(gcc::context * ctxt)3775*38fd1498Szrj   pass_live_range_shrinkage(gcc::context *ctxt)
3776*38fd1498Szrj     : rtl_opt_pass(pass_data_live_range_shrinkage, ctxt)
3777*38fd1498Szrj   {}
3778*38fd1498Szrj 
3779*38fd1498Szrj   /* opt_pass methods: */
gate(function *)3780*38fd1498Szrj   virtual bool gate (function *)
3781*38fd1498Szrj     {
3782*38fd1498Szrj #ifdef INSN_SCHEDULING
3783*38fd1498Szrj       return flag_live_range_shrinkage;
3784*38fd1498Szrj #else
3785*38fd1498Szrj       return 0;
3786*38fd1498Szrj #endif
3787*38fd1498Szrj     }
3788*38fd1498Szrj 
execute(function *)3789*38fd1498Szrj   virtual unsigned int execute (function *)
3790*38fd1498Szrj     {
3791*38fd1498Szrj       return rest_of_handle_live_range_shrinkage ();
3792*38fd1498Szrj     }
3793*38fd1498Szrj 
3794*38fd1498Szrj }; // class pass_live_range_shrinkage
3795*38fd1498Szrj 
3796*38fd1498Szrj } // anon namespace
3797*38fd1498Szrj 
3798*38fd1498Szrj rtl_opt_pass *
make_pass_live_range_shrinkage(gcc::context * ctxt)3799*38fd1498Szrj make_pass_live_range_shrinkage (gcc::context *ctxt)
3800*38fd1498Szrj {
3801*38fd1498Szrj   return new pass_live_range_shrinkage (ctxt);
3802*38fd1498Szrj }
3803*38fd1498Szrj 
3804*38fd1498Szrj namespace {
3805*38fd1498Szrj 
3806*38fd1498Szrj const pass_data pass_data_sched =
3807*38fd1498Szrj {
3808*38fd1498Szrj   RTL_PASS, /* type */
3809*38fd1498Szrj   "sched1", /* name */
3810*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3811*38fd1498Szrj   TV_SCHED, /* tv_id */
3812*38fd1498Szrj   0, /* properties_required */
3813*38fd1498Szrj   0, /* properties_provided */
3814*38fd1498Szrj   0, /* properties_destroyed */
3815*38fd1498Szrj   0, /* todo_flags_start */
3816*38fd1498Szrj   TODO_df_finish, /* todo_flags_finish */
3817*38fd1498Szrj };
3818*38fd1498Szrj 
3819*38fd1498Szrj class pass_sched : public rtl_opt_pass
3820*38fd1498Szrj {
3821*38fd1498Szrj public:
pass_sched(gcc::context * ctxt)3822*38fd1498Szrj   pass_sched (gcc::context *ctxt)
3823*38fd1498Szrj     : rtl_opt_pass (pass_data_sched, ctxt)
3824*38fd1498Szrj   {}
3825*38fd1498Szrj 
3826*38fd1498Szrj   /* opt_pass methods: */
3827*38fd1498Szrj   virtual bool gate (function *);
execute(function *)3828*38fd1498Szrj   virtual unsigned int execute (function *) { return rest_of_handle_sched (); }
3829*38fd1498Szrj 
3830*38fd1498Szrj }; // class pass_sched
3831*38fd1498Szrj 
3832*38fd1498Szrj bool
gate(function *)3833*38fd1498Szrj pass_sched::gate (function *)
3834*38fd1498Szrj {
3835*38fd1498Szrj #ifdef INSN_SCHEDULING
3836*38fd1498Szrj   return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
3837*38fd1498Szrj #else
3838*38fd1498Szrj   return 0;
3839*38fd1498Szrj #endif
3840*38fd1498Szrj }
3841*38fd1498Szrj 
3842*38fd1498Szrj } // anon namespace
3843*38fd1498Szrj 
3844*38fd1498Szrj rtl_opt_pass *
make_pass_sched(gcc::context * ctxt)3845*38fd1498Szrj make_pass_sched (gcc::context *ctxt)
3846*38fd1498Szrj {
3847*38fd1498Szrj   return new pass_sched (ctxt);
3848*38fd1498Szrj }
3849*38fd1498Szrj 
3850*38fd1498Szrj namespace {
3851*38fd1498Szrj 
3852*38fd1498Szrj const pass_data pass_data_sched2 =
3853*38fd1498Szrj {
3854*38fd1498Szrj   RTL_PASS, /* type */
3855*38fd1498Szrj   "sched2", /* name */
3856*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3857*38fd1498Szrj   TV_SCHED2, /* tv_id */
3858*38fd1498Szrj   0, /* properties_required */
3859*38fd1498Szrj   0, /* properties_provided */
3860*38fd1498Szrj   0, /* properties_destroyed */
3861*38fd1498Szrj   0, /* todo_flags_start */
3862*38fd1498Szrj   TODO_df_finish, /* todo_flags_finish */
3863*38fd1498Szrj };
3864*38fd1498Szrj 
3865*38fd1498Szrj class pass_sched2 : public rtl_opt_pass
3866*38fd1498Szrj {
3867*38fd1498Szrj public:
pass_sched2(gcc::context * ctxt)3868*38fd1498Szrj   pass_sched2 (gcc::context *ctxt)
3869*38fd1498Szrj     : rtl_opt_pass (pass_data_sched2, ctxt)
3870*38fd1498Szrj   {}
3871*38fd1498Szrj 
3872*38fd1498Szrj   /* opt_pass methods: */
3873*38fd1498Szrj   virtual bool gate (function *);
execute(function *)3874*38fd1498Szrj   virtual unsigned int execute (function *)
3875*38fd1498Szrj     {
3876*38fd1498Szrj       return rest_of_handle_sched2 ();
3877*38fd1498Szrj     }
3878*38fd1498Szrj 
3879*38fd1498Szrj }; // class pass_sched2
3880*38fd1498Szrj 
3881*38fd1498Szrj bool
gate(function *)3882*38fd1498Szrj pass_sched2::gate (function *)
3883*38fd1498Szrj {
3884*38fd1498Szrj #ifdef INSN_SCHEDULING
3885*38fd1498Szrj   return optimize > 0 && flag_schedule_insns_after_reload
3886*38fd1498Szrj     && !targetm.delay_sched2 && dbg_cnt (sched2_func);
3887*38fd1498Szrj #else
3888*38fd1498Szrj   return 0;
3889*38fd1498Szrj #endif
3890*38fd1498Szrj }
3891*38fd1498Szrj 
3892*38fd1498Szrj } // anon namespace
3893*38fd1498Szrj 
3894*38fd1498Szrj rtl_opt_pass *
make_pass_sched2(gcc::context * ctxt)3895*38fd1498Szrj make_pass_sched2 (gcc::context *ctxt)
3896*38fd1498Szrj {
3897*38fd1498Szrj   return new pass_sched2 (ctxt);
3898*38fd1498Szrj }
3899*38fd1498Szrj 
3900*38fd1498Szrj namespace {
3901*38fd1498Szrj 
3902*38fd1498Szrj const pass_data pass_data_sched_fusion =
3903*38fd1498Szrj {
3904*38fd1498Szrj   RTL_PASS, /* type */
3905*38fd1498Szrj   "sched_fusion", /* name */
3906*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3907*38fd1498Szrj   TV_SCHED_FUSION, /* tv_id */
3908*38fd1498Szrj   0, /* properties_required */
3909*38fd1498Szrj   0, /* properties_provided */
3910*38fd1498Szrj   0, /* properties_destroyed */
3911*38fd1498Szrj   0, /* todo_flags_start */
3912*38fd1498Szrj   TODO_df_finish, /* todo_flags_finish */
3913*38fd1498Szrj };
3914*38fd1498Szrj 
3915*38fd1498Szrj class pass_sched_fusion : public rtl_opt_pass
3916*38fd1498Szrj {
3917*38fd1498Szrj public:
pass_sched_fusion(gcc::context * ctxt)3918*38fd1498Szrj   pass_sched_fusion (gcc::context *ctxt)
3919*38fd1498Szrj     : rtl_opt_pass (pass_data_sched_fusion, ctxt)
3920*38fd1498Szrj   {}
3921*38fd1498Szrj 
3922*38fd1498Szrj   /* opt_pass methods: */
3923*38fd1498Szrj   virtual bool gate (function *);
execute(function *)3924*38fd1498Szrj   virtual unsigned int execute (function *)
3925*38fd1498Szrj     {
3926*38fd1498Szrj       return rest_of_handle_sched_fusion ();
3927*38fd1498Szrj     }
3928*38fd1498Szrj 
3929*38fd1498Szrj }; // class pass_sched2
3930*38fd1498Szrj 
3931*38fd1498Szrj bool
gate(function *)3932*38fd1498Szrj pass_sched_fusion::gate (function *)
3933*38fd1498Szrj {
3934*38fd1498Szrj #ifdef INSN_SCHEDULING
3935*38fd1498Szrj   /* Scheduling fusion relies on peephole2 to do real fusion work,
3936*38fd1498Szrj      so only enable it if peephole2 is in effect.  */
3937*38fd1498Szrj   return (optimize > 0 && flag_peephole2
3938*38fd1498Szrj     && flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
3939*38fd1498Szrj #else
3940*38fd1498Szrj   return 0;
3941*38fd1498Szrj #endif
3942*38fd1498Szrj }
3943*38fd1498Szrj 
3944*38fd1498Szrj } // anon namespace
3945*38fd1498Szrj 
3946*38fd1498Szrj rtl_opt_pass *
make_pass_sched_fusion(gcc::context * ctxt)3947*38fd1498Szrj make_pass_sched_fusion (gcc::context *ctxt)
3948*38fd1498Szrj {
3949*38fd1498Szrj   return new pass_sched_fusion (ctxt);
3950*38fd1498Szrj }
3951