1*38fd1498Szrj /* Instruction scheduling pass.
2*38fd1498Szrj Copyright (C) 1992-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4*38fd1498Szrj and currently maintained by, Jim Wilson (wilson@cygnus.com)
5*38fd1498Szrj
6*38fd1498Szrj This file is part of GCC.
7*38fd1498Szrj
8*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
9*38fd1498Szrj the terms of the GNU General Public License as published by the Free
10*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
11*38fd1498Szrj version.
12*38fd1498Szrj
13*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16*38fd1498Szrj for more details.
17*38fd1498Szrj
18*38fd1498Szrj You should have received a copy of the GNU General Public License
19*38fd1498Szrj along with GCC; see the file COPYING3. If not see
20*38fd1498Szrj <http://www.gnu.org/licenses/>. */
21*38fd1498Szrj
22*38fd1498Szrj /* This pass implements list scheduling within basic blocks. It is
23*38fd1498Szrj run twice: (1) after flow analysis, but before register allocation,
24*38fd1498Szrj and (2) after register allocation.
25*38fd1498Szrj
26*38fd1498Szrj The first run performs interblock scheduling, moving insns between
27*38fd1498Szrj different blocks in the same "region", and the second runs only
28*38fd1498Szrj basic block scheduling.
29*38fd1498Szrj
30*38fd1498Szrj Interblock motions performed are useful motions and speculative
31*38fd1498Szrj motions, including speculative loads. Motions requiring code
32*38fd1498Szrj duplication are not supported. The identification of motion type
33*38fd1498Szrj and the check for validity of speculative motions requires
34*38fd1498Szrj construction and analysis of the function's control flow graph.
35*38fd1498Szrj
36*38fd1498Szrj The main entry point for this pass is schedule_insns(), called for
37*38fd1498Szrj each function. The work of the scheduler is organized in three
38*38fd1498Szrj levels: (1) function level: insns are subject to splitting,
39*38fd1498Szrj control-flow-graph is constructed, regions are computed (after
40*38fd1498Szrj reload, each region is of one block), (2) region level: control
41*38fd1498Szrj flow graph attributes required for interblock scheduling are
42*38fd1498Szrj computed (dominators, reachability, etc.), data dependences and
43*38fd1498Szrj priorities are computed, and (3) block level: insns in the block
44*38fd1498Szrj are actually scheduled. */
45*38fd1498Szrj
46*38fd1498Szrj #include "config.h"
47*38fd1498Szrj #include "system.h"
48*38fd1498Szrj #include "coretypes.h"
49*38fd1498Szrj #include "backend.h"
50*38fd1498Szrj #include "target.h"
51*38fd1498Szrj #include "rtl.h"
52*38fd1498Szrj #include "df.h"
53*38fd1498Szrj #include "memmodel.h"
54*38fd1498Szrj #include "tm_p.h"
55*38fd1498Szrj #include "insn-config.h"
56*38fd1498Szrj #include "emit-rtl.h"
57*38fd1498Szrj #include "recog.h"
58*38fd1498Szrj #include "profile.h"
59*38fd1498Szrj #include "insn-attr.h"
60*38fd1498Szrj #include "except.h"
61*38fd1498Szrj #include "params.h"
62*38fd1498Szrj #include "cfganal.h"
63*38fd1498Szrj #include "sched-int.h"
64*38fd1498Szrj #include "sel-sched.h"
65*38fd1498Szrj #include "tree-pass.h"
66*38fd1498Szrj #include "dbgcnt.h"
67*38fd1498Szrj #include "pretty-print.h"
68*38fd1498Szrj #include "print-rtl.h"
69*38fd1498Szrj
70*38fd1498Szrj #ifdef INSN_SCHEDULING
71*38fd1498Szrj
72*38fd1498Szrj /* Some accessor macros for h_i_d members only used within this file. */
73*38fd1498Szrj #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
74*38fd1498Szrj #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
75*38fd1498Szrj
76*38fd1498Szrj /* nr_inter/spec counts interblock/speculative motion for the function. */
77*38fd1498Szrj static int nr_inter, nr_spec;
78*38fd1498Szrj
79*38fd1498Szrj static int is_cfg_nonregular (void);
80*38fd1498Szrj
81*38fd1498Szrj /* Number of regions in the procedure. */
82*38fd1498Szrj int nr_regions = 0;
83*38fd1498Szrj
84*38fd1498Szrj /* Same as above before adding any new regions. */
85*38fd1498Szrj static int nr_regions_initial = 0;
86*38fd1498Szrj
87*38fd1498Szrj /* Table of region descriptions. */
88*38fd1498Szrj region *rgn_table = NULL;
89*38fd1498Szrj
90*38fd1498Szrj /* Array of lists of regions' blocks. */
91*38fd1498Szrj int *rgn_bb_table = NULL;
92*38fd1498Szrj
93*38fd1498Szrj /* Topological order of blocks in the region (if b2 is reachable from
94*38fd1498Szrj b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
95*38fd1498Szrj always referred to by either block or b, while its topological
96*38fd1498Szrj order name (in the region) is referred to by bb. */
97*38fd1498Szrj int *block_to_bb = NULL;
98*38fd1498Szrj
99*38fd1498Szrj /* The number of the region containing a block. */
100*38fd1498Szrj int *containing_rgn = NULL;
101*38fd1498Szrj
102*38fd1498Szrj /* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
103*38fd1498Szrj Currently we can get a ebb only through splitting of currently
104*38fd1498Szrj scheduling block, therefore, we don't need ebb_head array for every region,
105*38fd1498Szrj hence, its sufficient to hold it for current one only. */
106*38fd1498Szrj int *ebb_head = NULL;
107*38fd1498Szrj
108*38fd1498Szrj /* The minimum probability of reaching a source block so that it will be
109*38fd1498Szrj considered for speculative scheduling. */
110*38fd1498Szrj static int min_spec_prob;
111*38fd1498Szrj
112*38fd1498Szrj static void find_single_block_region (bool);
113*38fd1498Szrj static void find_rgns (void);
114*38fd1498Szrj static bool too_large (int, int *, int *);
115*38fd1498Szrj
116*38fd1498Szrj /* Blocks of the current region being scheduled. */
117*38fd1498Szrj int current_nr_blocks;
118*38fd1498Szrj int current_blocks;
119*38fd1498Szrj
120*38fd1498Szrj /* A speculative motion requires checking live information on the path
121*38fd1498Szrj from 'source' to 'target'. The split blocks are those to be checked.
122*38fd1498Szrj After a speculative motion, live information should be modified in
123*38fd1498Szrj the 'update' blocks.
124*38fd1498Szrj
125*38fd1498Szrj Lists of split and update blocks for each candidate of the current
126*38fd1498Szrj target are in array bblst_table. */
127*38fd1498Szrj static basic_block *bblst_table;
128*38fd1498Szrj static int bblst_size, bblst_last;
129*38fd1498Szrj
130*38fd1498Szrj /* Arrays that hold the DFA state at the end of a basic block, to re-use
131*38fd1498Szrj as the initial state at the start of successor blocks. The BB_STATE
132*38fd1498Szrj array holds the actual DFA state, and BB_STATE_ARRAY[I] is a pointer
133*38fd1498Szrj into BB_STATE for basic block I. FIXME: This should be a vec. */
134*38fd1498Szrj static char *bb_state_array = NULL;
135*38fd1498Szrj static state_t *bb_state = NULL;
136*38fd1498Szrj
137*38fd1498Szrj /* Target info declarations.
138*38fd1498Szrj
139*38fd1498Szrj The block currently being scheduled is referred to as the "target" block,
140*38fd1498Szrj while other blocks in the region from which insns can be moved to the
141*38fd1498Szrj target are called "source" blocks. The candidate structure holds info
142*38fd1498Szrj about such sources: are they valid? Speculative? Etc. */
143*38fd1498Szrj struct bblst
144*38fd1498Szrj {
145*38fd1498Szrj basic_block *first_member;
146*38fd1498Szrj int nr_members;
147*38fd1498Szrj };
148*38fd1498Szrj
149*38fd1498Szrj struct candidate
150*38fd1498Szrj {
151*38fd1498Szrj char is_valid;
152*38fd1498Szrj char is_speculative;
153*38fd1498Szrj int src_prob;
154*38fd1498Szrj bblst split_bbs;
155*38fd1498Szrj bblst update_bbs;
156*38fd1498Szrj };
157*38fd1498Szrj
158*38fd1498Szrj static candidate *candidate_table;
159*38fd1498Szrj #define IS_VALID(src) (candidate_table[src].is_valid)
160*38fd1498Szrj #define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
161*38fd1498Szrj #define IS_SPECULATIVE_INSN(INSN) \
162*38fd1498Szrj (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
163*38fd1498Szrj #define SRC_PROB(src) ( candidate_table[src].src_prob )
164*38fd1498Szrj
165*38fd1498Szrj /* The bb being currently scheduled. */
166*38fd1498Szrj int target_bb;
167*38fd1498Szrj
168*38fd1498Szrj /* List of edges. */
169*38fd1498Szrj struct edgelst
170*38fd1498Szrj {
171*38fd1498Szrj edge *first_member;
172*38fd1498Szrj int nr_members;
173*38fd1498Szrj };
174*38fd1498Szrj
175*38fd1498Szrj static edge *edgelst_table;
176*38fd1498Szrj static int edgelst_last;
177*38fd1498Szrj
178*38fd1498Szrj static void extract_edgelst (sbitmap, edgelst *);
179*38fd1498Szrj
180*38fd1498Szrj /* Target info functions. */
181*38fd1498Szrj static void split_edges (int, int, edgelst *);
182*38fd1498Szrj static void compute_trg_info (int);
183*38fd1498Szrj void debug_candidate (int);
184*38fd1498Szrj void debug_candidates (int);
185*38fd1498Szrj
186*38fd1498Szrj /* Dominators array: dom[i] contains the sbitmap of dominators of
187*38fd1498Szrj bb i in the region. */
188*38fd1498Szrj static sbitmap *dom;
189*38fd1498Szrj
190*38fd1498Szrj /* bb 0 is the only region entry. */
191*38fd1498Szrj #define IS_RGN_ENTRY(bb) (!bb)
192*38fd1498Szrj
193*38fd1498Szrj /* Is bb_src dominated by bb_trg. */
194*38fd1498Szrj #define IS_DOMINATED(bb_src, bb_trg) \
195*38fd1498Szrj ( bitmap_bit_p (dom[bb_src], bb_trg) )
196*38fd1498Szrj
197*38fd1498Szrj /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
198*38fd1498Szrj the probability of bb i relative to the region entry. */
199*38fd1498Szrj static int *prob;
200*38fd1498Szrj
201*38fd1498Szrj /* Bit-set of edges, where bit i stands for edge i. */
202*38fd1498Szrj typedef sbitmap edgeset;
203*38fd1498Szrj
204*38fd1498Szrj /* Number of edges in the region. */
205*38fd1498Szrj static int rgn_nr_edges;
206*38fd1498Szrj
207*38fd1498Szrj /* Array of size rgn_nr_edges. */
208*38fd1498Szrj static edge *rgn_edges;
209*38fd1498Szrj
210*38fd1498Szrj /* Mapping from each edge in the graph to its number in the rgn. */
211*38fd1498Szrj #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
212*38fd1498Szrj #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
213*38fd1498Szrj
214*38fd1498Szrj /* The split edges of a source bb is different for each target
215*38fd1498Szrj bb. In order to compute this efficiently, the 'potential-split edges'
216*38fd1498Szrj are computed for each bb prior to scheduling a region. This is actually
217*38fd1498Szrj the split edges of each bb relative to the region entry.
218*38fd1498Szrj
219*38fd1498Szrj pot_split[bb] is the set of potential split edges of bb. */
220*38fd1498Szrj static edgeset *pot_split;
221*38fd1498Szrj
222*38fd1498Szrj /* For every bb, a set of its ancestor edges. */
223*38fd1498Szrj static edgeset *ancestor_edges;
224*38fd1498Szrj
225*38fd1498Szrj #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
226*38fd1498Szrj
227*38fd1498Szrj /* Speculative scheduling functions. */
228*38fd1498Szrj static int check_live_1 (int, rtx);
229*38fd1498Szrj static void update_live_1 (int, rtx);
230*38fd1498Szrj static int is_pfree (rtx, int, int);
231*38fd1498Szrj static int find_conditional_protection (rtx_insn *, int);
232*38fd1498Szrj static int is_conditionally_protected (rtx, int, int);
233*38fd1498Szrj static int is_prisky (rtx, int, int);
234*38fd1498Szrj static int is_exception_free (rtx_insn *, int, int);
235*38fd1498Szrj
236*38fd1498Szrj static bool sets_likely_spilled (rtx);
237*38fd1498Szrj static void sets_likely_spilled_1 (rtx, const_rtx, void *);
238*38fd1498Szrj static void add_branch_dependences (rtx_insn *, rtx_insn *);
239*38fd1498Szrj static void compute_block_dependences (int);
240*38fd1498Szrj
241*38fd1498Szrj static void schedule_region (int);
242*38fd1498Szrj static void concat_insn_mem_list (rtx_insn_list *, rtx_expr_list *,
243*38fd1498Szrj rtx_insn_list **, rtx_expr_list **);
244*38fd1498Szrj static void propagate_deps (int, struct deps_desc *);
245*38fd1498Szrj static void free_pending_lists (void);
246*38fd1498Szrj
247*38fd1498Szrj /* Functions for construction of the control flow graph. */
248*38fd1498Szrj
249*38fd1498Szrj /* Return 1 if control flow graph should not be constructed, 0 otherwise.
250*38fd1498Szrj
251*38fd1498Szrj We decide not to build the control flow graph if there is possibly more
252*38fd1498Szrj than one entry to the function, if computed branches exist, if we
253*38fd1498Szrj have nonlocal gotos, or if we have an unreachable loop. */
254*38fd1498Szrj
255*38fd1498Szrj static int
is_cfg_nonregular(void)256*38fd1498Szrj is_cfg_nonregular (void)
257*38fd1498Szrj {
258*38fd1498Szrj basic_block b;
259*38fd1498Szrj rtx_insn *insn;
260*38fd1498Szrj
261*38fd1498Szrj /* If we have a label that could be the target of a nonlocal goto, then
262*38fd1498Szrj the cfg is not well structured. */
263*38fd1498Szrj if (nonlocal_goto_handler_labels)
264*38fd1498Szrj return 1;
265*38fd1498Szrj
266*38fd1498Szrj /* If we have any forced labels, then the cfg is not well structured. */
267*38fd1498Szrj if (forced_labels)
268*38fd1498Szrj return 1;
269*38fd1498Szrj
270*38fd1498Szrj /* If we have exception handlers, then we consider the cfg not well
271*38fd1498Szrj structured. ?!? We should be able to handle this now that we
272*38fd1498Szrj compute an accurate cfg for EH. */
273*38fd1498Szrj if (current_function_has_exception_handlers ())
274*38fd1498Szrj return 1;
275*38fd1498Szrj
276*38fd1498Szrj /* If we have insns which refer to labels as non-jumped-to operands,
277*38fd1498Szrj then we consider the cfg not well structured. */
278*38fd1498Szrj FOR_EACH_BB_FN (b, cfun)
279*38fd1498Szrj FOR_BB_INSNS (b, insn)
280*38fd1498Szrj {
281*38fd1498Szrj rtx note, set, dest;
282*38fd1498Szrj rtx_insn *next;
283*38fd1498Szrj
284*38fd1498Szrj /* If this function has a computed jump, then we consider the cfg
285*38fd1498Szrj not well structured. */
286*38fd1498Szrj if (JUMP_P (insn) && computed_jump_p (insn))
287*38fd1498Szrj return 1;
288*38fd1498Szrj
289*38fd1498Szrj if (!INSN_P (insn))
290*38fd1498Szrj continue;
291*38fd1498Szrj
292*38fd1498Szrj note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
293*38fd1498Szrj if (note == NULL_RTX)
294*38fd1498Szrj continue;
295*38fd1498Szrj
296*38fd1498Szrj /* For that label not to be seen as a referred-to label, this
297*38fd1498Szrj must be a single-set which is feeding a jump *only*. This
298*38fd1498Szrj could be a conditional jump with the label split off for
299*38fd1498Szrj machine-specific reasons or a casesi/tablejump. */
300*38fd1498Szrj next = next_nonnote_insn (insn);
301*38fd1498Szrj if (next == NULL_RTX
302*38fd1498Szrj || !JUMP_P (next)
303*38fd1498Szrj || (JUMP_LABEL (next) != XEXP (note, 0)
304*38fd1498Szrj && find_reg_note (next, REG_LABEL_TARGET,
305*38fd1498Szrj XEXP (note, 0)) == NULL_RTX)
306*38fd1498Szrj || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
307*38fd1498Szrj return 1;
308*38fd1498Szrj
309*38fd1498Szrj set = single_set (insn);
310*38fd1498Szrj if (set == NULL_RTX)
311*38fd1498Szrj return 1;
312*38fd1498Szrj
313*38fd1498Szrj dest = SET_DEST (set);
314*38fd1498Szrj if (!REG_P (dest) || !dead_or_set_p (next, dest))
315*38fd1498Szrj return 1;
316*38fd1498Szrj }
317*38fd1498Szrj
318*38fd1498Szrj /* Unreachable loops with more than one basic block are detected
319*38fd1498Szrj during the DFS traversal in find_rgns.
320*38fd1498Szrj
321*38fd1498Szrj Unreachable loops with a single block are detected here. This
322*38fd1498Szrj test is redundant with the one in find_rgns, but it's much
323*38fd1498Szrj cheaper to go ahead and catch the trivial case here. */
324*38fd1498Szrj FOR_EACH_BB_FN (b, cfun)
325*38fd1498Szrj {
326*38fd1498Szrj if (EDGE_COUNT (b->preds) == 0
327*38fd1498Szrj || (single_pred_p (b)
328*38fd1498Szrj && single_pred (b) == b))
329*38fd1498Szrj return 1;
330*38fd1498Szrj }
331*38fd1498Szrj
332*38fd1498Szrj /* All the tests passed. Consider the cfg well structured. */
333*38fd1498Szrj return 0;
334*38fd1498Szrj }
335*38fd1498Szrj
336*38fd1498Szrj /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
337*38fd1498Szrj
338*38fd1498Szrj static void
extract_edgelst(sbitmap set,edgelst * el)339*38fd1498Szrj extract_edgelst (sbitmap set, edgelst *el)
340*38fd1498Szrj {
341*38fd1498Szrj unsigned int i = 0;
342*38fd1498Szrj sbitmap_iterator sbi;
343*38fd1498Szrj
344*38fd1498Szrj /* edgelst table space is reused in each call to extract_edgelst. */
345*38fd1498Szrj edgelst_last = 0;
346*38fd1498Szrj
347*38fd1498Szrj el->first_member = &edgelst_table[edgelst_last];
348*38fd1498Szrj el->nr_members = 0;
349*38fd1498Szrj
350*38fd1498Szrj /* Iterate over each word in the bitset. */
351*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (set, 0, i, sbi)
352*38fd1498Szrj {
353*38fd1498Szrj edgelst_table[edgelst_last++] = rgn_edges[i];
354*38fd1498Szrj el->nr_members++;
355*38fd1498Szrj }
356*38fd1498Szrj }
357*38fd1498Szrj
358*38fd1498Szrj /* Functions for the construction of regions. */
359*38fd1498Szrj
360*38fd1498Szrj /* Print the regions, for debugging purposes. Callable from debugger. */
361*38fd1498Szrj
362*38fd1498Szrj DEBUG_FUNCTION void
debug_regions(void)363*38fd1498Szrj debug_regions (void)
364*38fd1498Szrj {
365*38fd1498Szrj int rgn, bb;
366*38fd1498Szrj
367*38fd1498Szrj fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
368*38fd1498Szrj for (rgn = 0; rgn < nr_regions; rgn++)
369*38fd1498Szrj {
370*38fd1498Szrj fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
371*38fd1498Szrj rgn_table[rgn].rgn_nr_blocks);
372*38fd1498Szrj fprintf (sched_dump, ";;\tbb/block: ");
373*38fd1498Szrj
374*38fd1498Szrj /* We don't have ebb_head initialized yet, so we can't use
375*38fd1498Szrj BB_TO_BLOCK (). */
376*38fd1498Szrj current_blocks = RGN_BLOCKS (rgn);
377*38fd1498Szrj
378*38fd1498Szrj for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
379*38fd1498Szrj fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
380*38fd1498Szrj
381*38fd1498Szrj fprintf (sched_dump, "\n\n");
382*38fd1498Szrj }
383*38fd1498Szrj }
384*38fd1498Szrj
385*38fd1498Szrj /* Print the region's basic blocks. */
386*38fd1498Szrj
387*38fd1498Szrj DEBUG_FUNCTION void
debug_region(int rgn)388*38fd1498Szrj debug_region (int rgn)
389*38fd1498Szrj {
390*38fd1498Szrj int bb;
391*38fd1498Szrj
392*38fd1498Szrj fprintf (stderr, "\n;; ------------ REGION %d ----------\n\n", rgn);
393*38fd1498Szrj fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
394*38fd1498Szrj rgn_table[rgn].rgn_nr_blocks);
395*38fd1498Szrj fprintf (stderr, ";;\tbb/block: ");
396*38fd1498Szrj
397*38fd1498Szrj /* We don't have ebb_head initialized yet, so we can't use
398*38fd1498Szrj BB_TO_BLOCK (). */
399*38fd1498Szrj current_blocks = RGN_BLOCKS (rgn);
400*38fd1498Szrj
401*38fd1498Szrj for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
402*38fd1498Szrj fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
403*38fd1498Szrj
404*38fd1498Szrj fprintf (stderr, "\n\n");
405*38fd1498Szrj
406*38fd1498Szrj for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
407*38fd1498Szrj {
408*38fd1498Szrj dump_bb (stderr,
409*38fd1498Szrj BASIC_BLOCK_FOR_FN (cfun, rgn_bb_table[current_blocks + bb]),
410*38fd1498Szrj 0, TDF_SLIM | TDF_BLOCKS);
411*38fd1498Szrj fprintf (stderr, "\n");
412*38fd1498Szrj }
413*38fd1498Szrj
414*38fd1498Szrj fprintf (stderr, "\n");
415*38fd1498Szrj
416*38fd1498Szrj }
417*38fd1498Szrj
418*38fd1498Szrj /* True when a bb with index BB_INDEX contained in region RGN. */
419*38fd1498Szrj static bool
bb_in_region_p(int bb_index,int rgn)420*38fd1498Szrj bb_in_region_p (int bb_index, int rgn)
421*38fd1498Szrj {
422*38fd1498Szrj int i;
423*38fd1498Szrj
424*38fd1498Szrj for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
425*38fd1498Szrj if (rgn_bb_table[current_blocks + i] == bb_index)
426*38fd1498Szrj return true;
427*38fd1498Szrj
428*38fd1498Szrj return false;
429*38fd1498Szrj }
430*38fd1498Szrj
431*38fd1498Szrj /* Dump region RGN to file F using dot syntax. */
432*38fd1498Szrj void
dump_region_dot(FILE * f,int rgn)433*38fd1498Szrj dump_region_dot (FILE *f, int rgn)
434*38fd1498Szrj {
435*38fd1498Szrj int i;
436*38fd1498Szrj
437*38fd1498Szrj fprintf (f, "digraph Region_%d {\n", rgn);
438*38fd1498Szrj
439*38fd1498Szrj /* We don't have ebb_head initialized yet, so we can't use
440*38fd1498Szrj BB_TO_BLOCK (). */
441*38fd1498Szrj current_blocks = RGN_BLOCKS (rgn);
442*38fd1498Szrj
443*38fd1498Szrj for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
444*38fd1498Szrj {
445*38fd1498Szrj edge e;
446*38fd1498Szrj edge_iterator ei;
447*38fd1498Szrj int src_bb_num = rgn_bb_table[current_blocks + i];
448*38fd1498Szrj basic_block bb = BASIC_BLOCK_FOR_FN (cfun, src_bb_num);
449*38fd1498Szrj
450*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
451*38fd1498Szrj if (bb_in_region_p (e->dest->index, rgn))
452*38fd1498Szrj fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
453*38fd1498Szrj }
454*38fd1498Szrj fprintf (f, "}\n");
455*38fd1498Szrj }
456*38fd1498Szrj
457*38fd1498Szrj /* The same, but first open a file specified by FNAME. */
458*38fd1498Szrj void
dump_region_dot_file(const char * fname,int rgn)459*38fd1498Szrj dump_region_dot_file (const char *fname, int rgn)
460*38fd1498Szrj {
461*38fd1498Szrj FILE *f = fopen (fname, "wt");
462*38fd1498Szrj dump_region_dot (f, rgn);
463*38fd1498Szrj fclose (f);
464*38fd1498Szrj }
465*38fd1498Szrj
466*38fd1498Szrj /* Build a single block region for each basic block in the function.
467*38fd1498Szrj This allows for using the same code for interblock and basic block
468*38fd1498Szrj scheduling. */
469*38fd1498Szrj
470*38fd1498Szrj static void
find_single_block_region(bool ebbs_p)471*38fd1498Szrj find_single_block_region (bool ebbs_p)
472*38fd1498Szrj {
473*38fd1498Szrj basic_block bb, ebb_start;
474*38fd1498Szrj int i = 0;
475*38fd1498Szrj
476*38fd1498Szrj nr_regions = 0;
477*38fd1498Szrj
478*38fd1498Szrj if (ebbs_p) {
479*38fd1498Szrj int probability_cutoff;
480*38fd1498Szrj if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
481*38fd1498Szrj probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
482*38fd1498Szrj else
483*38fd1498Szrj probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
484*38fd1498Szrj probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
485*38fd1498Szrj
486*38fd1498Szrj FOR_EACH_BB_FN (ebb_start, cfun)
487*38fd1498Szrj {
488*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 0;
489*38fd1498Szrj RGN_BLOCKS (nr_regions) = i;
490*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
491*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
492*38fd1498Szrj
493*38fd1498Szrj for (bb = ebb_start; ; bb = bb->next_bb)
494*38fd1498Szrj {
495*38fd1498Szrj edge e;
496*38fd1498Szrj
497*38fd1498Szrj rgn_bb_table[i] = bb->index;
498*38fd1498Szrj RGN_NR_BLOCKS (nr_regions)++;
499*38fd1498Szrj CONTAINING_RGN (bb->index) = nr_regions;
500*38fd1498Szrj BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
501*38fd1498Szrj i++;
502*38fd1498Szrj
503*38fd1498Szrj if (bb->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
504*38fd1498Szrj || LABEL_P (BB_HEAD (bb->next_bb)))
505*38fd1498Szrj break;
506*38fd1498Szrj
507*38fd1498Szrj e = find_fallthru_edge (bb->succs);
508*38fd1498Szrj if (! e)
509*38fd1498Szrj break;
510*38fd1498Szrj if (e->probability.initialized_p ()
511*38fd1498Szrj && e->probability.to_reg_br_prob_base () <= probability_cutoff)
512*38fd1498Szrj break;
513*38fd1498Szrj }
514*38fd1498Szrj
515*38fd1498Szrj ebb_start = bb;
516*38fd1498Szrj nr_regions++;
517*38fd1498Szrj }
518*38fd1498Szrj }
519*38fd1498Szrj else
520*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
521*38fd1498Szrj {
522*38fd1498Szrj rgn_bb_table[nr_regions] = bb->index;
523*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 1;
524*38fd1498Szrj RGN_BLOCKS (nr_regions) = nr_regions;
525*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
526*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
527*38fd1498Szrj
528*38fd1498Szrj CONTAINING_RGN (bb->index) = nr_regions;
529*38fd1498Szrj BLOCK_TO_BB (bb->index) = 0;
530*38fd1498Szrj nr_regions++;
531*38fd1498Szrj }
532*38fd1498Szrj }
533*38fd1498Szrj
534*38fd1498Szrj /* Estimate number of the insns in the BB. */
535*38fd1498Szrj static int
rgn_estimate_number_of_insns(basic_block bb)536*38fd1498Szrj rgn_estimate_number_of_insns (basic_block bb)
537*38fd1498Szrj {
538*38fd1498Szrj int count;
539*38fd1498Szrj
540*38fd1498Szrj count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
541*38fd1498Szrj
542*38fd1498Szrj if (MAY_HAVE_DEBUG_INSNS)
543*38fd1498Szrj {
544*38fd1498Szrj rtx_insn *insn;
545*38fd1498Szrj
546*38fd1498Szrj FOR_BB_INSNS (bb, insn)
547*38fd1498Szrj if (DEBUG_INSN_P (insn))
548*38fd1498Szrj count--;
549*38fd1498Szrj }
550*38fd1498Szrj
551*38fd1498Szrj return count;
552*38fd1498Szrj }
553*38fd1498Szrj
554*38fd1498Szrj /* Update number of blocks and the estimate for number of insns
555*38fd1498Szrj in the region. Return true if the region is "too large" for interblock
556*38fd1498Szrj scheduling (compile time considerations). */
557*38fd1498Szrj
558*38fd1498Szrj static bool
too_large(int block,int * num_bbs,int * num_insns)559*38fd1498Szrj too_large (int block, int *num_bbs, int *num_insns)
560*38fd1498Szrj {
561*38fd1498Szrj (*num_bbs)++;
562*38fd1498Szrj (*num_insns) += (common_sched_info->estimate_number_of_insns
563*38fd1498Szrj (BASIC_BLOCK_FOR_FN (cfun, block)));
564*38fd1498Szrj
565*38fd1498Szrj return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
566*38fd1498Szrj || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
567*38fd1498Szrj }
568*38fd1498Szrj
569*38fd1498Szrj /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
570*38fd1498Szrj is still an inner loop. Put in max_hdr[blk] the header of the most inner
571*38fd1498Szrj loop containing blk. */
572*38fd1498Szrj #define UPDATE_LOOP_RELATIONS(blk, hdr) \
573*38fd1498Szrj { \
574*38fd1498Szrj if (max_hdr[blk] == -1) \
575*38fd1498Szrj max_hdr[blk] = hdr; \
576*38fd1498Szrj else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
577*38fd1498Szrj bitmap_clear_bit (inner, hdr); \
578*38fd1498Szrj else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
579*38fd1498Szrj { \
580*38fd1498Szrj bitmap_clear_bit (inner,max_hdr[blk]); \
581*38fd1498Szrj max_hdr[blk] = hdr; \
582*38fd1498Szrj } \
583*38fd1498Szrj }
584*38fd1498Szrj
585*38fd1498Szrj /* Find regions for interblock scheduling.
586*38fd1498Szrj
587*38fd1498Szrj A region for scheduling can be:
588*38fd1498Szrj
589*38fd1498Szrj * A loop-free procedure, or
590*38fd1498Szrj
591*38fd1498Szrj * A reducible inner loop, or
592*38fd1498Szrj
593*38fd1498Szrj * A basic block not contained in any other region.
594*38fd1498Szrj
595*38fd1498Szrj ?!? In theory we could build other regions based on extended basic
596*38fd1498Szrj blocks or reverse extended basic blocks. Is it worth the trouble?
597*38fd1498Szrj
598*38fd1498Szrj Loop blocks that form a region are put into the region's block list
599*38fd1498Szrj in topological order.
600*38fd1498Szrj
601*38fd1498Szrj This procedure stores its results into the following global (ick) variables
602*38fd1498Szrj
603*38fd1498Szrj * rgn_nr
604*38fd1498Szrj * rgn_table
605*38fd1498Szrj * rgn_bb_table
606*38fd1498Szrj * block_to_bb
607*38fd1498Szrj * containing region
608*38fd1498Szrj
609*38fd1498Szrj We use dominator relationships to avoid making regions out of non-reducible
610*38fd1498Szrj loops.
611*38fd1498Szrj
612*38fd1498Szrj This procedure needs to be converted to work on pred/succ lists instead
613*38fd1498Szrj of edge tables. That would simplify it somewhat. */
614*38fd1498Szrj
615*38fd1498Szrj static void
haifa_find_rgns(void)616*38fd1498Szrj haifa_find_rgns (void)
617*38fd1498Szrj {
618*38fd1498Szrj int *max_hdr, *dfs_nr, *degree;
619*38fd1498Szrj char no_loops = 1;
620*38fd1498Szrj int node, child, loop_head, i, head, tail;
621*38fd1498Szrj int count = 0, sp, idx = 0;
622*38fd1498Szrj edge_iterator current_edge;
623*38fd1498Szrj edge_iterator *stack;
624*38fd1498Szrj int num_bbs, num_insns, unreachable;
625*38fd1498Szrj int too_large_failure;
626*38fd1498Szrj basic_block bb;
627*38fd1498Szrj
628*38fd1498Szrj /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
629*38fd1498Szrj and a mapping from block to its loop header (if the block is contained
630*38fd1498Szrj in a loop, else -1).
631*38fd1498Szrj
632*38fd1498Szrj Store results in HEADER, INNER, and MAX_HDR respectively, these will
633*38fd1498Szrj be used as inputs to the second traversal.
634*38fd1498Szrj
635*38fd1498Szrj STACK, SP and DFS_NR are only used during the first traversal. */
636*38fd1498Szrj
637*38fd1498Szrj /* Allocate and initialize variables for the first traversal. */
638*38fd1498Szrj max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
639*38fd1498Szrj dfs_nr = XCNEWVEC (int, last_basic_block_for_fn (cfun));
640*38fd1498Szrj stack = XNEWVEC (edge_iterator, n_edges_for_fn (cfun));
641*38fd1498Szrj
642*38fd1498Szrj /* Note if a block is a natural inner loop header. */
643*38fd1498Szrj auto_sbitmap inner (last_basic_block_for_fn (cfun));
644*38fd1498Szrj bitmap_ones (inner);
645*38fd1498Szrj
646*38fd1498Szrj /* Note if a block is a natural loop header. */
647*38fd1498Szrj auto_sbitmap header (last_basic_block_for_fn (cfun));
648*38fd1498Szrj bitmap_clear (header);
649*38fd1498Szrj
650*38fd1498Szrj /* Note if a block is in the block queue. */
651*38fd1498Szrj auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
652*38fd1498Szrj bitmap_clear (in_queue);
653*38fd1498Szrj
654*38fd1498Szrj /* Note if a block is in the block queue. */
655*38fd1498Szrj auto_sbitmap in_stack (last_basic_block_for_fn (cfun));
656*38fd1498Szrj bitmap_clear (in_stack);
657*38fd1498Szrj
658*38fd1498Szrj for (i = 0; i < last_basic_block_for_fn (cfun); i++)
659*38fd1498Szrj max_hdr[i] = -1;
660*38fd1498Szrj
661*38fd1498Szrj #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
662*38fd1498Szrj #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
663*38fd1498Szrj
664*38fd1498Szrj /* DFS traversal to find inner loops in the cfg. */
665*38fd1498Szrj
666*38fd1498Szrj current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))->succs);
667*38fd1498Szrj sp = -1;
668*38fd1498Szrj
669*38fd1498Szrj while (1)
670*38fd1498Szrj {
671*38fd1498Szrj if (EDGE_PASSED (current_edge))
672*38fd1498Szrj {
673*38fd1498Szrj /* We have reached a leaf node or a node that was already
674*38fd1498Szrj processed. Pop edges off the stack until we find
675*38fd1498Szrj an edge that has not yet been processed. */
676*38fd1498Szrj while (sp >= 0 && EDGE_PASSED (current_edge))
677*38fd1498Szrj {
678*38fd1498Szrj /* Pop entry off the stack. */
679*38fd1498Szrj current_edge = stack[sp--];
680*38fd1498Szrj node = ei_edge (current_edge)->src->index;
681*38fd1498Szrj gcc_assert (node != ENTRY_BLOCK);
682*38fd1498Szrj child = ei_edge (current_edge)->dest->index;
683*38fd1498Szrj gcc_assert (child != EXIT_BLOCK);
684*38fd1498Szrj bitmap_clear_bit (in_stack, child);
685*38fd1498Szrj if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
686*38fd1498Szrj UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
687*38fd1498Szrj ei_next (¤t_edge);
688*38fd1498Szrj }
689*38fd1498Szrj
690*38fd1498Szrj /* See if have finished the DFS tree traversal. */
691*38fd1498Szrj if (sp < 0 && EDGE_PASSED (current_edge))
692*38fd1498Szrj break;
693*38fd1498Szrj
694*38fd1498Szrj /* Nope, continue the traversal with the popped node. */
695*38fd1498Szrj continue;
696*38fd1498Szrj }
697*38fd1498Szrj
698*38fd1498Szrj /* Process a node. */
699*38fd1498Szrj node = ei_edge (current_edge)->src->index;
700*38fd1498Szrj gcc_assert (node != ENTRY_BLOCK);
701*38fd1498Szrj bitmap_set_bit (in_stack, node);
702*38fd1498Szrj dfs_nr[node] = ++count;
703*38fd1498Szrj
704*38fd1498Szrj /* We don't traverse to the exit block. */
705*38fd1498Szrj child = ei_edge (current_edge)->dest->index;
706*38fd1498Szrj if (child == EXIT_BLOCK)
707*38fd1498Szrj {
708*38fd1498Szrj SET_EDGE_PASSED (current_edge);
709*38fd1498Szrj ei_next (¤t_edge);
710*38fd1498Szrj continue;
711*38fd1498Szrj }
712*38fd1498Szrj
713*38fd1498Szrj /* If the successor is in the stack, then we've found a loop.
714*38fd1498Szrj Mark the loop, if it is not a natural loop, then it will
715*38fd1498Szrj be rejected during the second traversal. */
716*38fd1498Szrj if (bitmap_bit_p (in_stack, child))
717*38fd1498Szrj {
718*38fd1498Szrj no_loops = 0;
719*38fd1498Szrj bitmap_set_bit (header, child);
720*38fd1498Szrj UPDATE_LOOP_RELATIONS (node, child);
721*38fd1498Szrj SET_EDGE_PASSED (current_edge);
722*38fd1498Szrj ei_next (¤t_edge);
723*38fd1498Szrj continue;
724*38fd1498Szrj }
725*38fd1498Szrj
726*38fd1498Szrj /* If the child was already visited, then there is no need to visit
727*38fd1498Szrj it again. Just update the loop relationships and restart
728*38fd1498Szrj with a new edge. */
729*38fd1498Szrj if (dfs_nr[child])
730*38fd1498Szrj {
731*38fd1498Szrj if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
732*38fd1498Szrj UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
733*38fd1498Szrj SET_EDGE_PASSED (current_edge);
734*38fd1498Szrj ei_next (¤t_edge);
735*38fd1498Szrj continue;
736*38fd1498Szrj }
737*38fd1498Szrj
738*38fd1498Szrj /* Push an entry on the stack and continue DFS traversal. */
739*38fd1498Szrj stack[++sp] = current_edge;
740*38fd1498Szrj SET_EDGE_PASSED (current_edge);
741*38fd1498Szrj current_edge = ei_start (ei_edge (current_edge)->dest->succs);
742*38fd1498Szrj }
743*38fd1498Szrj
744*38fd1498Szrj /* Reset ->aux field used by EDGE_PASSED. */
745*38fd1498Szrj FOR_ALL_BB_FN (bb, cfun)
746*38fd1498Szrj {
747*38fd1498Szrj edge_iterator ei;
748*38fd1498Szrj edge e;
749*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
750*38fd1498Szrj e->aux = NULL;
751*38fd1498Szrj }
752*38fd1498Szrj
753*38fd1498Szrj
754*38fd1498Szrj /* Another check for unreachable blocks. The earlier test in
755*38fd1498Szrj is_cfg_nonregular only finds unreachable blocks that do not
756*38fd1498Szrj form a loop.
757*38fd1498Szrj
758*38fd1498Szrj The DFS traversal will mark every block that is reachable from
759*38fd1498Szrj the entry node by placing a nonzero value in dfs_nr. Thus if
760*38fd1498Szrj dfs_nr is zero for any block, then it must be unreachable. */
761*38fd1498Szrj unreachable = 0;
762*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
763*38fd1498Szrj if (dfs_nr[bb->index] == 0)
764*38fd1498Szrj {
765*38fd1498Szrj unreachable = 1;
766*38fd1498Szrj break;
767*38fd1498Szrj }
768*38fd1498Szrj
769*38fd1498Szrj /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
770*38fd1498Szrj to hold degree counts. */
771*38fd1498Szrj degree = dfs_nr;
772*38fd1498Szrj
773*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
774*38fd1498Szrj degree[bb->index] = EDGE_COUNT (bb->preds);
775*38fd1498Szrj
776*38fd1498Szrj /* Do not perform region scheduling if there are any unreachable
777*38fd1498Szrj blocks. */
778*38fd1498Szrj if (!unreachable)
779*38fd1498Szrj {
780*38fd1498Szrj int *queue, *degree1 = NULL;
781*38fd1498Szrj /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
782*38fd1498Szrj there basic blocks, which are forced to be region heads.
783*38fd1498Szrj This is done to try to assemble few smaller regions
784*38fd1498Szrj from a too_large region. */
785*38fd1498Szrj sbitmap extended_rgn_header = NULL;
786*38fd1498Szrj bool extend_regions_p;
787*38fd1498Szrj
788*38fd1498Szrj if (no_loops)
789*38fd1498Szrj bitmap_set_bit (header, 0);
790*38fd1498Szrj
791*38fd1498Szrj /* Second traversal:find reducible inner loops and topologically sort
792*38fd1498Szrj block of each region. */
793*38fd1498Szrj
794*38fd1498Szrj queue = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
795*38fd1498Szrj
796*38fd1498Szrj extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
797*38fd1498Szrj if (extend_regions_p)
798*38fd1498Szrj {
799*38fd1498Szrj degree1 = XNEWVEC (int, last_basic_block_for_fn (cfun));
800*38fd1498Szrj extended_rgn_header =
801*38fd1498Szrj sbitmap_alloc (last_basic_block_for_fn (cfun));
802*38fd1498Szrj bitmap_clear (extended_rgn_header);
803*38fd1498Szrj }
804*38fd1498Szrj
805*38fd1498Szrj /* Find blocks which are inner loop headers. We still have non-reducible
806*38fd1498Szrj loops to consider at this point. */
807*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
808*38fd1498Szrj {
809*38fd1498Szrj if (bitmap_bit_p (header, bb->index) && bitmap_bit_p (inner, bb->index))
810*38fd1498Szrj {
811*38fd1498Szrj edge e;
812*38fd1498Szrj edge_iterator ei;
813*38fd1498Szrj basic_block jbb;
814*38fd1498Szrj
815*38fd1498Szrj /* Now check that the loop is reducible. We do this separate
816*38fd1498Szrj from finding inner loops so that we do not find a reducible
817*38fd1498Szrj loop which contains an inner non-reducible loop.
818*38fd1498Szrj
819*38fd1498Szrj A simple way to find reducible/natural loops is to verify
820*38fd1498Szrj that each block in the loop is dominated by the loop
821*38fd1498Szrj header.
822*38fd1498Szrj
823*38fd1498Szrj If there exists a block that is not dominated by the loop
824*38fd1498Szrj header, then the block is reachable from outside the loop
825*38fd1498Szrj and thus the loop is not a natural loop. */
826*38fd1498Szrj FOR_EACH_BB_FN (jbb, cfun)
827*38fd1498Szrj {
828*38fd1498Szrj /* First identify blocks in the loop, except for the loop
829*38fd1498Szrj entry block. */
830*38fd1498Szrj if (bb->index == max_hdr[jbb->index] && bb != jbb)
831*38fd1498Szrj {
832*38fd1498Szrj /* Now verify that the block is dominated by the loop
833*38fd1498Szrj header. */
834*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
835*38fd1498Szrj break;
836*38fd1498Szrj }
837*38fd1498Szrj }
838*38fd1498Szrj
839*38fd1498Szrj /* If we exited the loop early, then I is the header of
840*38fd1498Szrj a non-reducible loop and we should quit processing it
841*38fd1498Szrj now. */
842*38fd1498Szrj if (jbb != EXIT_BLOCK_PTR_FOR_FN (cfun))
843*38fd1498Szrj continue;
844*38fd1498Szrj
845*38fd1498Szrj /* I is a header of an inner loop, or block 0 in a subroutine
846*38fd1498Szrj with no loops at all. */
847*38fd1498Szrj head = tail = -1;
848*38fd1498Szrj too_large_failure = 0;
849*38fd1498Szrj loop_head = max_hdr[bb->index];
850*38fd1498Szrj
851*38fd1498Szrj if (extend_regions_p)
852*38fd1498Szrj /* We save degree in case when we meet a too_large region
853*38fd1498Szrj and cancel it. We need a correct degree later when
854*38fd1498Szrj calling extend_rgns. */
855*38fd1498Szrj memcpy (degree1, degree,
856*38fd1498Szrj last_basic_block_for_fn (cfun) * sizeof (int));
857*38fd1498Szrj
858*38fd1498Szrj /* Decrease degree of all I's successors for topological
859*38fd1498Szrj ordering. */
860*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
861*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
862*38fd1498Szrj --degree[e->dest->index];
863*38fd1498Szrj
864*38fd1498Szrj /* Estimate # insns, and count # blocks in the region. */
865*38fd1498Szrj num_bbs = 1;
866*38fd1498Szrj num_insns = common_sched_info->estimate_number_of_insns (bb);
867*38fd1498Szrj
868*38fd1498Szrj /* Find all loop latches (blocks with back edges to the loop
869*38fd1498Szrj header) or all the leaf blocks in the cfg has no loops.
870*38fd1498Szrj
871*38fd1498Szrj Place those blocks into the queue. */
872*38fd1498Szrj if (no_loops)
873*38fd1498Szrj {
874*38fd1498Szrj FOR_EACH_BB_FN (jbb, cfun)
875*38fd1498Szrj /* Leaf nodes have only a single successor which must
876*38fd1498Szrj be EXIT_BLOCK. */
877*38fd1498Szrj if (single_succ_p (jbb)
878*38fd1498Szrj && single_succ (jbb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
879*38fd1498Szrj {
880*38fd1498Szrj queue[++tail] = jbb->index;
881*38fd1498Szrj bitmap_set_bit (in_queue, jbb->index);
882*38fd1498Szrj
883*38fd1498Szrj if (too_large (jbb->index, &num_bbs, &num_insns))
884*38fd1498Szrj {
885*38fd1498Szrj too_large_failure = 1;
886*38fd1498Szrj break;
887*38fd1498Szrj }
888*38fd1498Szrj }
889*38fd1498Szrj }
890*38fd1498Szrj else
891*38fd1498Szrj {
892*38fd1498Szrj edge e;
893*38fd1498Szrj
894*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
895*38fd1498Szrj {
896*38fd1498Szrj if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
897*38fd1498Szrj continue;
898*38fd1498Szrj
899*38fd1498Szrj node = e->src->index;
900*38fd1498Szrj
901*38fd1498Szrj if (max_hdr[node] == loop_head && node != bb->index)
902*38fd1498Szrj {
903*38fd1498Szrj /* This is a loop latch. */
904*38fd1498Szrj queue[++tail] = node;
905*38fd1498Szrj bitmap_set_bit (in_queue, node);
906*38fd1498Szrj
907*38fd1498Szrj if (too_large (node, &num_bbs, &num_insns))
908*38fd1498Szrj {
909*38fd1498Szrj too_large_failure = 1;
910*38fd1498Szrj break;
911*38fd1498Szrj }
912*38fd1498Szrj }
913*38fd1498Szrj }
914*38fd1498Szrj }
915*38fd1498Szrj
916*38fd1498Szrj /* Now add all the blocks in the loop to the queue.
917*38fd1498Szrj
918*38fd1498Szrj We know the loop is a natural loop; however the algorithm
919*38fd1498Szrj above will not always mark certain blocks as being in the
920*38fd1498Szrj loop. Consider:
921*38fd1498Szrj node children
922*38fd1498Szrj a b,c
923*38fd1498Szrj b c
924*38fd1498Szrj c a,d
925*38fd1498Szrj d b
926*38fd1498Szrj
927*38fd1498Szrj The algorithm in the DFS traversal may not mark B & D as part
928*38fd1498Szrj of the loop (i.e. they will not have max_hdr set to A).
929*38fd1498Szrj
930*38fd1498Szrj We know they can not be loop latches (else they would have
931*38fd1498Szrj had max_hdr set since they'd have a backedge to a dominator
932*38fd1498Szrj block). So we don't need them on the initial queue.
933*38fd1498Szrj
934*38fd1498Szrj We know they are part of the loop because they are dominated
935*38fd1498Szrj by the loop header and can be reached by a backwards walk of
936*38fd1498Szrj the edges starting with nodes on the initial queue.
937*38fd1498Szrj
938*38fd1498Szrj It is safe and desirable to include those nodes in the
939*38fd1498Szrj loop/scheduling region. To do so we would need to decrease
940*38fd1498Szrj the degree of a node if it is the target of a backedge
941*38fd1498Szrj within the loop itself as the node is placed in the queue.
942*38fd1498Szrj
943*38fd1498Szrj We do not do this because I'm not sure that the actual
944*38fd1498Szrj scheduling code will properly handle this case. ?!? */
945*38fd1498Szrj
946*38fd1498Szrj while (head < tail && !too_large_failure)
947*38fd1498Szrj {
948*38fd1498Szrj edge e;
949*38fd1498Szrj child = queue[++head];
950*38fd1498Szrj
951*38fd1498Szrj FOR_EACH_EDGE (e, ei,
952*38fd1498Szrj BASIC_BLOCK_FOR_FN (cfun, child)->preds)
953*38fd1498Szrj {
954*38fd1498Szrj node = e->src->index;
955*38fd1498Szrj
956*38fd1498Szrj /* See discussion above about nodes not marked as in
957*38fd1498Szrj this loop during the initial DFS traversal. */
958*38fd1498Szrj if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
959*38fd1498Szrj || max_hdr[node] != loop_head)
960*38fd1498Szrj {
961*38fd1498Szrj tail = -1;
962*38fd1498Szrj break;
963*38fd1498Szrj }
964*38fd1498Szrj else if (!bitmap_bit_p (in_queue, node) && node != bb->index)
965*38fd1498Szrj {
966*38fd1498Szrj queue[++tail] = node;
967*38fd1498Szrj bitmap_set_bit (in_queue, node);
968*38fd1498Szrj
969*38fd1498Szrj if (too_large (node, &num_bbs, &num_insns))
970*38fd1498Szrj {
971*38fd1498Szrj too_large_failure = 1;
972*38fd1498Szrj break;
973*38fd1498Szrj }
974*38fd1498Szrj }
975*38fd1498Szrj }
976*38fd1498Szrj }
977*38fd1498Szrj
978*38fd1498Szrj if (tail >= 0 && !too_large_failure)
979*38fd1498Szrj {
980*38fd1498Szrj /* Place the loop header into list of region blocks. */
981*38fd1498Szrj degree[bb->index] = -1;
982*38fd1498Szrj rgn_bb_table[idx] = bb->index;
983*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = num_bbs;
984*38fd1498Szrj RGN_BLOCKS (nr_regions) = idx++;
985*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
986*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
987*38fd1498Szrj CONTAINING_RGN (bb->index) = nr_regions;
988*38fd1498Szrj BLOCK_TO_BB (bb->index) = count = 0;
989*38fd1498Szrj
990*38fd1498Szrj /* Remove blocks from queue[] when their in degree
991*38fd1498Szrj becomes zero. Repeat until no blocks are left on the
992*38fd1498Szrj list. This produces a topological list of blocks in
993*38fd1498Szrj the region. */
994*38fd1498Szrj while (tail >= 0)
995*38fd1498Szrj {
996*38fd1498Szrj if (head < 0)
997*38fd1498Szrj head = tail;
998*38fd1498Szrj child = queue[head];
999*38fd1498Szrj if (degree[child] == 0)
1000*38fd1498Szrj {
1001*38fd1498Szrj edge e;
1002*38fd1498Szrj
1003*38fd1498Szrj degree[child] = -1;
1004*38fd1498Szrj rgn_bb_table[idx++] = child;
1005*38fd1498Szrj BLOCK_TO_BB (child) = ++count;
1006*38fd1498Szrj CONTAINING_RGN (child) = nr_regions;
1007*38fd1498Szrj queue[head] = queue[tail--];
1008*38fd1498Szrj
1009*38fd1498Szrj FOR_EACH_EDGE (e, ei,
1010*38fd1498Szrj BASIC_BLOCK_FOR_FN (cfun,
1011*38fd1498Szrj child)->succs)
1012*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1013*38fd1498Szrj --degree[e->dest->index];
1014*38fd1498Szrj }
1015*38fd1498Szrj else
1016*38fd1498Szrj --head;
1017*38fd1498Szrj }
1018*38fd1498Szrj ++nr_regions;
1019*38fd1498Szrj }
1020*38fd1498Szrj else if (extend_regions_p)
1021*38fd1498Szrj {
1022*38fd1498Szrj /* Restore DEGREE. */
1023*38fd1498Szrj int *t = degree;
1024*38fd1498Szrj
1025*38fd1498Szrj degree = degree1;
1026*38fd1498Szrj degree1 = t;
1027*38fd1498Szrj
1028*38fd1498Szrj /* And force successors of BB to be region heads.
1029*38fd1498Szrj This may provide several smaller regions instead
1030*38fd1498Szrj of one too_large region. */
1031*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1032*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1033*38fd1498Szrj bitmap_set_bit (extended_rgn_header, e->dest->index);
1034*38fd1498Szrj }
1035*38fd1498Szrj }
1036*38fd1498Szrj }
1037*38fd1498Szrj free (queue);
1038*38fd1498Szrj
1039*38fd1498Szrj if (extend_regions_p)
1040*38fd1498Szrj {
1041*38fd1498Szrj free (degree1);
1042*38fd1498Szrj
1043*38fd1498Szrj bitmap_ior (header, header, extended_rgn_header);
1044*38fd1498Szrj sbitmap_free (extended_rgn_header);
1045*38fd1498Szrj
1046*38fd1498Szrj extend_rgns (degree, &idx, header, max_hdr);
1047*38fd1498Szrj }
1048*38fd1498Szrj }
1049*38fd1498Szrj
1050*38fd1498Szrj /* Any block that did not end up in a region is placed into a region
1051*38fd1498Szrj by itself. */
1052*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1053*38fd1498Szrj if (degree[bb->index] >= 0)
1054*38fd1498Szrj {
1055*38fd1498Szrj rgn_bb_table[idx] = bb->index;
1056*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 1;
1057*38fd1498Szrj RGN_BLOCKS (nr_regions) = idx++;
1058*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
1059*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
1060*38fd1498Szrj CONTAINING_RGN (bb->index) = nr_regions++;
1061*38fd1498Szrj BLOCK_TO_BB (bb->index) = 0;
1062*38fd1498Szrj }
1063*38fd1498Szrj
1064*38fd1498Szrj free (max_hdr);
1065*38fd1498Szrj free (degree);
1066*38fd1498Szrj free (stack);
1067*38fd1498Szrj }
1068*38fd1498Szrj
1069*38fd1498Szrj
1070*38fd1498Szrj /* Wrapper function.
1071*38fd1498Szrj If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1072*38fd1498Szrj regions. Otherwise just call find_rgns_haifa. */
1073*38fd1498Szrj static void
find_rgns(void)1074*38fd1498Szrj find_rgns (void)
1075*38fd1498Szrj {
1076*38fd1498Szrj if (sel_sched_p () && flag_sel_sched_pipelining)
1077*38fd1498Szrj sel_find_rgns ();
1078*38fd1498Szrj else
1079*38fd1498Szrj haifa_find_rgns ();
1080*38fd1498Szrj }
1081*38fd1498Szrj
1082*38fd1498Szrj static int gather_region_statistics (int **);
1083*38fd1498Szrj static void print_region_statistics (int *, int, int *, int);
1084*38fd1498Szrj
1085*38fd1498Szrj /* Calculate the histogram that shows the number of regions having the
1086*38fd1498Szrj given number of basic blocks, and store it in the RSP array. Return
1087*38fd1498Szrj the size of this array. */
1088*38fd1498Szrj static int
gather_region_statistics(int ** rsp)1089*38fd1498Szrj gather_region_statistics (int **rsp)
1090*38fd1498Szrj {
1091*38fd1498Szrj int i, *a = 0, a_sz = 0;
1092*38fd1498Szrj
1093*38fd1498Szrj /* a[i] is the number of regions that have (i + 1) basic blocks. */
1094*38fd1498Szrj for (i = 0; i < nr_regions; i++)
1095*38fd1498Szrj {
1096*38fd1498Szrj int nr_blocks = RGN_NR_BLOCKS (i);
1097*38fd1498Szrj
1098*38fd1498Szrj gcc_assert (nr_blocks >= 1);
1099*38fd1498Szrj
1100*38fd1498Szrj if (nr_blocks > a_sz)
1101*38fd1498Szrj {
1102*38fd1498Szrj a = XRESIZEVEC (int, a, nr_blocks);
1103*38fd1498Szrj do
1104*38fd1498Szrj a[a_sz++] = 0;
1105*38fd1498Szrj while (a_sz != nr_blocks);
1106*38fd1498Szrj }
1107*38fd1498Szrj
1108*38fd1498Szrj a[nr_blocks - 1]++;
1109*38fd1498Szrj }
1110*38fd1498Szrj
1111*38fd1498Szrj *rsp = a;
1112*38fd1498Szrj return a_sz;
1113*38fd1498Szrj }
1114*38fd1498Szrj
1115*38fd1498Szrj /* Print regions statistics. S1 and S2 denote the data before and after
1116*38fd1498Szrj calling extend_rgns, respectively. */
1117*38fd1498Szrj static void
print_region_statistics(int * s1,int s1_sz,int * s2,int s2_sz)1118*38fd1498Szrj print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1119*38fd1498Szrj {
1120*38fd1498Szrj int i;
1121*38fd1498Szrj
1122*38fd1498Szrj /* We iterate until s2_sz because extend_rgns does not decrease
1123*38fd1498Szrj the maximal region size. */
1124*38fd1498Szrj for (i = 1; i < s2_sz; i++)
1125*38fd1498Szrj {
1126*38fd1498Szrj int n1, n2;
1127*38fd1498Szrj
1128*38fd1498Szrj n2 = s2[i];
1129*38fd1498Szrj
1130*38fd1498Szrj if (n2 == 0)
1131*38fd1498Szrj continue;
1132*38fd1498Szrj
1133*38fd1498Szrj if (i >= s1_sz)
1134*38fd1498Szrj n1 = 0;
1135*38fd1498Szrj else
1136*38fd1498Szrj n1 = s1[i];
1137*38fd1498Szrj
1138*38fd1498Szrj fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1139*38fd1498Szrj "was %d + %d more\n", i + 1, n1, n2 - n1);
1140*38fd1498Szrj }
1141*38fd1498Szrj }
1142*38fd1498Szrj
1143*38fd1498Szrj /* Extend regions.
1144*38fd1498Szrj DEGREE - Array of incoming edge count, considering only
1145*38fd1498Szrj the edges, that don't have their sources in formed regions yet.
1146*38fd1498Szrj IDXP - pointer to the next available index in rgn_bb_table.
1147*38fd1498Szrj HEADER - set of all region heads.
1148*38fd1498Szrj LOOP_HDR - mapping from block to the containing loop
1149*38fd1498Szrj (two blocks can reside within one region if they have
1150*38fd1498Szrj the same loop header). */
1151*38fd1498Szrj void
extend_rgns(int * degree,int * idxp,sbitmap header,int * loop_hdr)1152*38fd1498Szrj extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1153*38fd1498Szrj {
1154*38fd1498Szrj int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1155*38fd1498Szrj int nblocks = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
1156*38fd1498Szrj
1157*38fd1498Szrj max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1158*38fd1498Szrj
1159*38fd1498Szrj max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
1160*38fd1498Szrj
1161*38fd1498Szrj order = XNEWVEC (int, last_basic_block_for_fn (cfun));
1162*38fd1498Szrj post_order_compute (order, false, false);
1163*38fd1498Szrj
1164*38fd1498Szrj for (i = nblocks - 1; i >= 0; i--)
1165*38fd1498Szrj {
1166*38fd1498Szrj int bbn = order[i];
1167*38fd1498Szrj if (degree[bbn] >= 0)
1168*38fd1498Szrj {
1169*38fd1498Szrj max_hdr[bbn] = bbn;
1170*38fd1498Szrj rescan = 1;
1171*38fd1498Szrj }
1172*38fd1498Szrj else
1173*38fd1498Szrj /* This block already was processed in find_rgns. */
1174*38fd1498Szrj max_hdr[bbn] = -1;
1175*38fd1498Szrj }
1176*38fd1498Szrj
1177*38fd1498Szrj /* The idea is to topologically walk through CFG in top-down order.
1178*38fd1498Szrj During the traversal, if all the predecessors of a node are
1179*38fd1498Szrj marked to be in the same region (they all have the same max_hdr),
1180*38fd1498Szrj then current node is also marked to be a part of that region.
1181*38fd1498Szrj Otherwise the node starts its own region.
1182*38fd1498Szrj CFG should be traversed until no further changes are made. On each
1183*38fd1498Szrj iteration the set of the region heads is extended (the set of those
1184*38fd1498Szrj blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
1185*38fd1498Szrj set of all basic blocks, thus the algorithm is guaranteed to
1186*38fd1498Szrj terminate. */
1187*38fd1498Szrj
1188*38fd1498Szrj while (rescan && iter < max_iter)
1189*38fd1498Szrj {
1190*38fd1498Szrj rescan = 0;
1191*38fd1498Szrj
1192*38fd1498Szrj for (i = nblocks - 1; i >= 0; i--)
1193*38fd1498Szrj {
1194*38fd1498Szrj edge e;
1195*38fd1498Szrj edge_iterator ei;
1196*38fd1498Szrj int bbn = order[i];
1197*38fd1498Szrj
1198*38fd1498Szrj if (max_hdr[bbn] != -1 && !bitmap_bit_p (header, bbn))
1199*38fd1498Szrj {
1200*38fd1498Szrj int hdr = -1;
1201*38fd1498Szrj
1202*38fd1498Szrj FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->preds)
1203*38fd1498Szrj {
1204*38fd1498Szrj int predn = e->src->index;
1205*38fd1498Szrj
1206*38fd1498Szrj if (predn != ENTRY_BLOCK
1207*38fd1498Szrj /* If pred wasn't processed in find_rgns. */
1208*38fd1498Szrj && max_hdr[predn] != -1
1209*38fd1498Szrj /* And pred and bb reside in the same loop.
1210*38fd1498Szrj (Or out of any loop). */
1211*38fd1498Szrj && loop_hdr[bbn] == loop_hdr[predn])
1212*38fd1498Szrj {
1213*38fd1498Szrj if (hdr == -1)
1214*38fd1498Szrj /* Then bb extends the containing region of pred. */
1215*38fd1498Szrj hdr = max_hdr[predn];
1216*38fd1498Szrj else if (hdr != max_hdr[predn])
1217*38fd1498Szrj /* Too bad, there are at least two predecessors
1218*38fd1498Szrj that reside in different regions. Thus, BB should
1219*38fd1498Szrj begin its own region. */
1220*38fd1498Szrj {
1221*38fd1498Szrj hdr = bbn;
1222*38fd1498Szrj break;
1223*38fd1498Szrj }
1224*38fd1498Szrj }
1225*38fd1498Szrj else
1226*38fd1498Szrj /* BB starts its own region. */
1227*38fd1498Szrj {
1228*38fd1498Szrj hdr = bbn;
1229*38fd1498Szrj break;
1230*38fd1498Szrj }
1231*38fd1498Szrj }
1232*38fd1498Szrj
1233*38fd1498Szrj if (hdr == bbn)
1234*38fd1498Szrj {
1235*38fd1498Szrj /* If BB start its own region,
1236*38fd1498Szrj update set of headers with BB. */
1237*38fd1498Szrj bitmap_set_bit (header, bbn);
1238*38fd1498Szrj rescan = 1;
1239*38fd1498Szrj }
1240*38fd1498Szrj else
1241*38fd1498Szrj gcc_assert (hdr != -1);
1242*38fd1498Szrj
1243*38fd1498Szrj max_hdr[bbn] = hdr;
1244*38fd1498Szrj }
1245*38fd1498Szrj }
1246*38fd1498Szrj
1247*38fd1498Szrj iter++;
1248*38fd1498Szrj }
1249*38fd1498Szrj
1250*38fd1498Szrj /* Statistics were gathered on the SPEC2000 package of tests with
1251*38fd1498Szrj mainline weekly snapshot gcc-4.1-20051015 on ia64.
1252*38fd1498Szrj
1253*38fd1498Szrj Statistics for SPECint:
1254*38fd1498Szrj 1 iteration : 1751 cases (38.7%)
1255*38fd1498Szrj 2 iterations: 2770 cases (61.3%)
1256*38fd1498Szrj Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1257*38fd1498Szrj Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1258*38fd1498Szrj (We don't count single block regions here).
1259*38fd1498Szrj
1260*38fd1498Szrj Statistics for SPECfp:
1261*38fd1498Szrj 1 iteration : 621 cases (35.9%)
1262*38fd1498Szrj 2 iterations: 1110 cases (64.1%)
1263*38fd1498Szrj Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1264*38fd1498Szrj Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1265*38fd1498Szrj (We don't count single block regions here).
1266*38fd1498Szrj
1267*38fd1498Szrj By default we do at most 2 iterations.
1268*38fd1498Szrj This can be overridden with max-sched-extend-regions-iters parameter:
1269*38fd1498Szrj 0 - disable region extension,
1270*38fd1498Szrj N > 0 - do at most N iterations. */
1271*38fd1498Szrj
1272*38fd1498Szrj if (sched_verbose && iter != 0)
1273*38fd1498Szrj fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1274*38fd1498Szrj rescan ? "... failed" : "");
1275*38fd1498Szrj
1276*38fd1498Szrj if (!rescan && iter != 0)
1277*38fd1498Szrj {
1278*38fd1498Szrj int *s1 = NULL, s1_sz = 0;
1279*38fd1498Szrj
1280*38fd1498Szrj /* Save the old statistics for later printout. */
1281*38fd1498Szrj if (sched_verbose >= 6)
1282*38fd1498Szrj s1_sz = gather_region_statistics (&s1);
1283*38fd1498Szrj
1284*38fd1498Szrj /* We have succeeded. Now assemble the regions. */
1285*38fd1498Szrj for (i = nblocks - 1; i >= 0; i--)
1286*38fd1498Szrj {
1287*38fd1498Szrj int bbn = order[i];
1288*38fd1498Szrj
1289*38fd1498Szrj if (max_hdr[bbn] == bbn)
1290*38fd1498Szrj /* BBN is a region head. */
1291*38fd1498Szrj {
1292*38fd1498Szrj edge e;
1293*38fd1498Szrj edge_iterator ei;
1294*38fd1498Szrj int num_bbs = 0, j, num_insns = 0, large;
1295*38fd1498Szrj
1296*38fd1498Szrj large = too_large (bbn, &num_bbs, &num_insns);
1297*38fd1498Szrj
1298*38fd1498Szrj degree[bbn] = -1;
1299*38fd1498Szrj rgn_bb_table[idx] = bbn;
1300*38fd1498Szrj RGN_BLOCKS (nr_regions) = idx++;
1301*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
1302*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
1303*38fd1498Szrj CONTAINING_RGN (bbn) = nr_regions;
1304*38fd1498Szrj BLOCK_TO_BB (bbn) = 0;
1305*38fd1498Szrj
1306*38fd1498Szrj FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->succs)
1307*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1308*38fd1498Szrj degree[e->dest->index]--;
1309*38fd1498Szrj
1310*38fd1498Szrj if (!large)
1311*38fd1498Szrj /* Here we check whether the region is too_large. */
1312*38fd1498Szrj for (j = i - 1; j >= 0; j--)
1313*38fd1498Szrj {
1314*38fd1498Szrj int succn = order[j];
1315*38fd1498Szrj if (max_hdr[succn] == bbn)
1316*38fd1498Szrj {
1317*38fd1498Szrj if ((large = too_large (succn, &num_bbs, &num_insns)))
1318*38fd1498Szrj break;
1319*38fd1498Szrj }
1320*38fd1498Szrj }
1321*38fd1498Szrj
1322*38fd1498Szrj if (large)
1323*38fd1498Szrj /* If the region is too_large, then wrap every block of
1324*38fd1498Szrj the region into single block region.
1325*38fd1498Szrj Here we wrap region head only. Other blocks are
1326*38fd1498Szrj processed in the below cycle. */
1327*38fd1498Szrj {
1328*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 1;
1329*38fd1498Szrj nr_regions++;
1330*38fd1498Szrj }
1331*38fd1498Szrj
1332*38fd1498Szrj num_bbs = 1;
1333*38fd1498Szrj
1334*38fd1498Szrj for (j = i - 1; j >= 0; j--)
1335*38fd1498Szrj {
1336*38fd1498Szrj int succn = order[j];
1337*38fd1498Szrj
1338*38fd1498Szrj if (max_hdr[succn] == bbn)
1339*38fd1498Szrj /* This cycle iterates over all basic blocks, that
1340*38fd1498Szrj are supposed to be in the region with head BBN,
1341*38fd1498Szrj and wraps them into that region (or in single
1342*38fd1498Szrj block region). */
1343*38fd1498Szrj {
1344*38fd1498Szrj gcc_assert (degree[succn] == 0);
1345*38fd1498Szrj
1346*38fd1498Szrj degree[succn] = -1;
1347*38fd1498Szrj rgn_bb_table[idx] = succn;
1348*38fd1498Szrj BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1349*38fd1498Szrj CONTAINING_RGN (succn) = nr_regions;
1350*38fd1498Szrj
1351*38fd1498Szrj if (large)
1352*38fd1498Szrj /* Wrap SUCCN into single block region. */
1353*38fd1498Szrj {
1354*38fd1498Szrj RGN_BLOCKS (nr_regions) = idx;
1355*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 1;
1356*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
1357*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
1358*38fd1498Szrj nr_regions++;
1359*38fd1498Szrj }
1360*38fd1498Szrj
1361*38fd1498Szrj idx++;
1362*38fd1498Szrj
1363*38fd1498Szrj FOR_EACH_EDGE (e, ei,
1364*38fd1498Szrj BASIC_BLOCK_FOR_FN (cfun, succn)->succs)
1365*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1366*38fd1498Szrj degree[e->dest->index]--;
1367*38fd1498Szrj }
1368*38fd1498Szrj }
1369*38fd1498Szrj
1370*38fd1498Szrj if (!large)
1371*38fd1498Szrj {
1372*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = num_bbs;
1373*38fd1498Szrj nr_regions++;
1374*38fd1498Szrj }
1375*38fd1498Szrj }
1376*38fd1498Szrj }
1377*38fd1498Szrj
1378*38fd1498Szrj if (sched_verbose >= 6)
1379*38fd1498Szrj {
1380*38fd1498Szrj int *s2, s2_sz;
1381*38fd1498Szrj
1382*38fd1498Szrj /* Get the new statistics and print the comparison with the
1383*38fd1498Szrj one before calling this function. */
1384*38fd1498Szrj s2_sz = gather_region_statistics (&s2);
1385*38fd1498Szrj print_region_statistics (s1, s1_sz, s2, s2_sz);
1386*38fd1498Szrj free (s1);
1387*38fd1498Szrj free (s2);
1388*38fd1498Szrj }
1389*38fd1498Szrj }
1390*38fd1498Szrj
1391*38fd1498Szrj free (order);
1392*38fd1498Szrj free (max_hdr);
1393*38fd1498Szrj
1394*38fd1498Szrj *idxp = idx;
1395*38fd1498Szrj }
1396*38fd1498Szrj
1397*38fd1498Szrj /* Functions for regions scheduling information. */
1398*38fd1498Szrj
1399*38fd1498Szrj /* Compute dominators, probability, and potential-split-edges of bb.
1400*38fd1498Szrj Assume that these values were already computed for bb's predecessors. */
1401*38fd1498Szrj
1402*38fd1498Szrj static void
compute_dom_prob_ps(int bb)1403*38fd1498Szrj compute_dom_prob_ps (int bb)
1404*38fd1498Szrj {
1405*38fd1498Szrj edge_iterator in_ei;
1406*38fd1498Szrj edge in_edge;
1407*38fd1498Szrj
1408*38fd1498Szrj /* We shouldn't have any real ebbs yet. */
1409*38fd1498Szrj gcc_assert (ebb_head [bb] == bb + current_blocks);
1410*38fd1498Szrj
1411*38fd1498Szrj if (IS_RGN_ENTRY (bb))
1412*38fd1498Szrj {
1413*38fd1498Szrj bitmap_set_bit (dom[bb], 0);
1414*38fd1498Szrj prob[bb] = REG_BR_PROB_BASE;
1415*38fd1498Szrj return;
1416*38fd1498Szrj }
1417*38fd1498Szrj
1418*38fd1498Szrj prob[bb] = 0;
1419*38fd1498Szrj
1420*38fd1498Szrj /* Initialize dom[bb] to '111..1'. */
1421*38fd1498Szrj bitmap_ones (dom[bb]);
1422*38fd1498Szrj
1423*38fd1498Szrj FOR_EACH_EDGE (in_edge, in_ei,
1424*38fd1498Szrj BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb))->preds)
1425*38fd1498Szrj {
1426*38fd1498Szrj int pred_bb;
1427*38fd1498Szrj edge out_edge;
1428*38fd1498Szrj edge_iterator out_ei;
1429*38fd1498Szrj
1430*38fd1498Szrj if (in_edge->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1431*38fd1498Szrj continue;
1432*38fd1498Szrj
1433*38fd1498Szrj pred_bb = BLOCK_TO_BB (in_edge->src->index);
1434*38fd1498Szrj bitmap_and (dom[bb], dom[bb], dom[pred_bb]);
1435*38fd1498Szrj bitmap_ior (ancestor_edges[bb],
1436*38fd1498Szrj ancestor_edges[bb], ancestor_edges[pred_bb]);
1437*38fd1498Szrj
1438*38fd1498Szrj bitmap_set_bit (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1439*38fd1498Szrj
1440*38fd1498Szrj bitmap_ior (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1441*38fd1498Szrj
1442*38fd1498Szrj FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1443*38fd1498Szrj bitmap_set_bit (pot_split[bb], EDGE_TO_BIT (out_edge));
1444*38fd1498Szrj
1445*38fd1498Szrj prob[bb] += combine_probabilities
1446*38fd1498Szrj (prob[pred_bb],
1447*38fd1498Szrj in_edge->probability.initialized_p ()
1448*38fd1498Szrj ? in_edge->probability.to_reg_br_prob_base ()
1449*38fd1498Szrj : 0);
1450*38fd1498Szrj // The rounding divide in combine_probabilities can result in an extra
1451*38fd1498Szrj // probability increment propagating along 50-50 edges. Eventually when
1452*38fd1498Szrj // the edges re-merge, the accumulated probability can go slightly above
1453*38fd1498Szrj // REG_BR_PROB_BASE.
1454*38fd1498Szrj if (prob[bb] > REG_BR_PROB_BASE)
1455*38fd1498Szrj prob[bb] = REG_BR_PROB_BASE;
1456*38fd1498Szrj }
1457*38fd1498Szrj
1458*38fd1498Szrj bitmap_set_bit (dom[bb], bb);
1459*38fd1498Szrj bitmap_and_compl (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1460*38fd1498Szrj
1461*38fd1498Szrj if (sched_verbose >= 2)
1462*38fd1498Szrj fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1463*38fd1498Szrj (100 * prob[bb]) / REG_BR_PROB_BASE);
1464*38fd1498Szrj }
1465*38fd1498Szrj
1466*38fd1498Szrj /* Functions for target info. */
1467*38fd1498Szrj
1468*38fd1498Szrj /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1469*38fd1498Szrj Note that bb_trg dominates bb_src. */
1470*38fd1498Szrj
1471*38fd1498Szrj static void
split_edges(int bb_src,int bb_trg,edgelst * bl)1472*38fd1498Szrj split_edges (int bb_src, int bb_trg, edgelst *bl)
1473*38fd1498Szrj {
1474*38fd1498Szrj auto_sbitmap src (SBITMAP_SIZE (pot_split[bb_src]));
1475*38fd1498Szrj bitmap_copy (src, pot_split[bb_src]);
1476*38fd1498Szrj
1477*38fd1498Szrj bitmap_and_compl (src, src, pot_split[bb_trg]);
1478*38fd1498Szrj extract_edgelst (src, bl);
1479*38fd1498Szrj }
1480*38fd1498Szrj
1481*38fd1498Szrj /* Find the valid candidate-source-blocks for the target block TRG, compute
1482*38fd1498Szrj their probability, and check if they are speculative or not.
1483*38fd1498Szrj For speculative sources, compute their update-blocks and split-blocks. */
1484*38fd1498Szrj
1485*38fd1498Szrj static void
compute_trg_info(int trg)1486*38fd1498Szrj compute_trg_info (int trg)
1487*38fd1498Szrj {
1488*38fd1498Szrj candidate *sp;
1489*38fd1498Szrj edgelst el = { NULL, 0 };
1490*38fd1498Szrj int i, j, k, update_idx;
1491*38fd1498Szrj basic_block block;
1492*38fd1498Szrj edge_iterator ei;
1493*38fd1498Szrj edge e;
1494*38fd1498Szrj
1495*38fd1498Szrj candidate_table = XNEWVEC (candidate, current_nr_blocks);
1496*38fd1498Szrj
1497*38fd1498Szrj bblst_last = 0;
1498*38fd1498Szrj /* bblst_table holds split blocks and update blocks for each block after
1499*38fd1498Szrj the current one in the region. split blocks and update blocks are
1500*38fd1498Szrj the TO blocks of region edges, so there can be at most rgn_nr_edges
1501*38fd1498Szrj of them. */
1502*38fd1498Szrj bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1503*38fd1498Szrj bblst_table = XNEWVEC (basic_block, bblst_size);
1504*38fd1498Szrj
1505*38fd1498Szrj edgelst_last = 0;
1506*38fd1498Szrj edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1507*38fd1498Szrj
1508*38fd1498Szrj /* Define some of the fields for the target bb as well. */
1509*38fd1498Szrj sp = candidate_table + trg;
1510*38fd1498Szrj sp->is_valid = 1;
1511*38fd1498Szrj sp->is_speculative = 0;
1512*38fd1498Szrj sp->src_prob = REG_BR_PROB_BASE;
1513*38fd1498Szrj
1514*38fd1498Szrj auto_sbitmap visited (last_basic_block_for_fn (cfun));
1515*38fd1498Szrj
1516*38fd1498Szrj for (i = trg + 1; i < current_nr_blocks; i++)
1517*38fd1498Szrj {
1518*38fd1498Szrj sp = candidate_table + i;
1519*38fd1498Szrj
1520*38fd1498Szrj sp->is_valid = IS_DOMINATED (i, trg);
1521*38fd1498Szrj if (sp->is_valid)
1522*38fd1498Szrj {
1523*38fd1498Szrj int tf = prob[trg], cf = prob[i];
1524*38fd1498Szrj
1525*38fd1498Szrj /* In CFGs with low probability edges TF can possibly be zero. */
1526*38fd1498Szrj sp->src_prob = (tf ? GCOV_COMPUTE_SCALE (cf, tf) : 0);
1527*38fd1498Szrj sp->is_valid = (sp->src_prob >= min_spec_prob);
1528*38fd1498Szrj }
1529*38fd1498Szrj
1530*38fd1498Szrj if (sp->is_valid)
1531*38fd1498Szrj {
1532*38fd1498Szrj split_edges (i, trg, &el);
1533*38fd1498Szrj sp->is_speculative = (el.nr_members) ? 1 : 0;
1534*38fd1498Szrj if (sp->is_speculative && !flag_schedule_speculative)
1535*38fd1498Szrj sp->is_valid = 0;
1536*38fd1498Szrj }
1537*38fd1498Szrj
1538*38fd1498Szrj if (sp->is_valid)
1539*38fd1498Szrj {
1540*38fd1498Szrj /* Compute split blocks and store them in bblst_table.
1541*38fd1498Szrj The TO block of every split edge is a split block. */
1542*38fd1498Szrj sp->split_bbs.first_member = &bblst_table[bblst_last];
1543*38fd1498Szrj sp->split_bbs.nr_members = el.nr_members;
1544*38fd1498Szrj for (j = 0; j < el.nr_members; bblst_last++, j++)
1545*38fd1498Szrj bblst_table[bblst_last] = el.first_member[j]->dest;
1546*38fd1498Szrj sp->update_bbs.first_member = &bblst_table[bblst_last];
1547*38fd1498Szrj
1548*38fd1498Szrj /* Compute update blocks and store them in bblst_table.
1549*38fd1498Szrj For every split edge, look at the FROM block, and check
1550*38fd1498Szrj all out edges. For each out edge that is not a split edge,
1551*38fd1498Szrj add the TO block to the update block list. This list can end
1552*38fd1498Szrj up with a lot of duplicates. We need to weed them out to avoid
1553*38fd1498Szrj overrunning the end of the bblst_table. */
1554*38fd1498Szrj
1555*38fd1498Szrj update_idx = 0;
1556*38fd1498Szrj bitmap_clear (visited);
1557*38fd1498Szrj for (j = 0; j < el.nr_members; j++)
1558*38fd1498Szrj {
1559*38fd1498Szrj block = el.first_member[j]->src;
1560*38fd1498Szrj FOR_EACH_EDGE (e, ei, block->succs)
1561*38fd1498Szrj {
1562*38fd1498Szrj if (!bitmap_bit_p (visited, e->dest->index))
1563*38fd1498Szrj {
1564*38fd1498Szrj for (k = 0; k < el.nr_members; k++)
1565*38fd1498Szrj if (e == el.first_member[k])
1566*38fd1498Szrj break;
1567*38fd1498Szrj
1568*38fd1498Szrj if (k >= el.nr_members)
1569*38fd1498Szrj {
1570*38fd1498Szrj bblst_table[bblst_last++] = e->dest;
1571*38fd1498Szrj bitmap_set_bit (visited, e->dest->index);
1572*38fd1498Szrj update_idx++;
1573*38fd1498Szrj }
1574*38fd1498Szrj }
1575*38fd1498Szrj }
1576*38fd1498Szrj }
1577*38fd1498Szrj sp->update_bbs.nr_members = update_idx;
1578*38fd1498Szrj
1579*38fd1498Szrj /* Make sure we didn't overrun the end of bblst_table. */
1580*38fd1498Szrj gcc_assert (bblst_last <= bblst_size);
1581*38fd1498Szrj }
1582*38fd1498Szrj else
1583*38fd1498Szrj {
1584*38fd1498Szrj sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1585*38fd1498Szrj
1586*38fd1498Szrj sp->is_speculative = 0;
1587*38fd1498Szrj sp->src_prob = 0;
1588*38fd1498Szrj }
1589*38fd1498Szrj }
1590*38fd1498Szrj }
1591*38fd1498Szrj
1592*38fd1498Szrj /* Free the computed target info. */
1593*38fd1498Szrj static void
free_trg_info(void)1594*38fd1498Szrj free_trg_info (void)
1595*38fd1498Szrj {
1596*38fd1498Szrj free (candidate_table);
1597*38fd1498Szrj free (bblst_table);
1598*38fd1498Szrj free (edgelst_table);
1599*38fd1498Szrj }
1600*38fd1498Szrj
1601*38fd1498Szrj /* Print candidates info, for debugging purposes. Callable from debugger. */
1602*38fd1498Szrj
1603*38fd1498Szrj DEBUG_FUNCTION void
debug_candidate(int i)1604*38fd1498Szrj debug_candidate (int i)
1605*38fd1498Szrj {
1606*38fd1498Szrj if (!candidate_table[i].is_valid)
1607*38fd1498Szrj return;
1608*38fd1498Szrj
1609*38fd1498Szrj if (candidate_table[i].is_speculative)
1610*38fd1498Szrj {
1611*38fd1498Szrj int j;
1612*38fd1498Szrj fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1613*38fd1498Szrj
1614*38fd1498Szrj fprintf (sched_dump, "split path: ");
1615*38fd1498Szrj for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1616*38fd1498Szrj {
1617*38fd1498Szrj int b = candidate_table[i].split_bbs.first_member[j]->index;
1618*38fd1498Szrj
1619*38fd1498Szrj fprintf (sched_dump, " %d ", b);
1620*38fd1498Szrj }
1621*38fd1498Szrj fprintf (sched_dump, "\n");
1622*38fd1498Szrj
1623*38fd1498Szrj fprintf (sched_dump, "update path: ");
1624*38fd1498Szrj for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1625*38fd1498Szrj {
1626*38fd1498Szrj int b = candidate_table[i].update_bbs.first_member[j]->index;
1627*38fd1498Szrj
1628*38fd1498Szrj fprintf (sched_dump, " %d ", b);
1629*38fd1498Szrj }
1630*38fd1498Szrj fprintf (sched_dump, "\n");
1631*38fd1498Szrj }
1632*38fd1498Szrj else
1633*38fd1498Szrj {
1634*38fd1498Szrj fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1635*38fd1498Szrj }
1636*38fd1498Szrj }
1637*38fd1498Szrj
1638*38fd1498Szrj /* Print candidates info, for debugging purposes. Callable from debugger. */
1639*38fd1498Szrj
1640*38fd1498Szrj DEBUG_FUNCTION void
debug_candidates(int trg)1641*38fd1498Szrj debug_candidates (int trg)
1642*38fd1498Szrj {
1643*38fd1498Szrj int i;
1644*38fd1498Szrj
1645*38fd1498Szrj fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1646*38fd1498Szrj BB_TO_BLOCK (trg), trg);
1647*38fd1498Szrj for (i = trg + 1; i < current_nr_blocks; i++)
1648*38fd1498Szrj debug_candidate (i);
1649*38fd1498Szrj }
1650*38fd1498Szrj
1651*38fd1498Szrj /* Functions for speculative scheduling. */
1652*38fd1498Szrj
1653*38fd1498Szrj static bitmap_head not_in_df;
1654*38fd1498Szrj
1655*38fd1498Szrj /* Return 0 if x is a set of a register alive in the beginning of one
1656*38fd1498Szrj of the split-blocks of src, otherwise return 1. */
1657*38fd1498Szrj
1658*38fd1498Szrj static int
check_live_1(int src,rtx x)1659*38fd1498Szrj check_live_1 (int src, rtx x)
1660*38fd1498Szrj {
1661*38fd1498Szrj int i;
1662*38fd1498Szrj int regno;
1663*38fd1498Szrj rtx reg = SET_DEST (x);
1664*38fd1498Szrj
1665*38fd1498Szrj if (reg == 0)
1666*38fd1498Szrj return 1;
1667*38fd1498Szrj
1668*38fd1498Szrj while (GET_CODE (reg) == SUBREG
1669*38fd1498Szrj || GET_CODE (reg) == ZERO_EXTRACT
1670*38fd1498Szrj || GET_CODE (reg) == STRICT_LOW_PART)
1671*38fd1498Szrj reg = XEXP (reg, 0);
1672*38fd1498Szrj
1673*38fd1498Szrj if (GET_CODE (reg) == PARALLEL)
1674*38fd1498Szrj {
1675*38fd1498Szrj int i;
1676*38fd1498Szrj
1677*38fd1498Szrj for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1678*38fd1498Szrj if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1679*38fd1498Szrj if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1680*38fd1498Szrj return 1;
1681*38fd1498Szrj
1682*38fd1498Szrj return 0;
1683*38fd1498Szrj }
1684*38fd1498Szrj
1685*38fd1498Szrj if (!REG_P (reg))
1686*38fd1498Szrj return 1;
1687*38fd1498Szrj
1688*38fd1498Szrj regno = REGNO (reg);
1689*38fd1498Szrj
1690*38fd1498Szrj if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1691*38fd1498Szrj {
1692*38fd1498Szrj /* Global registers are assumed live. */
1693*38fd1498Szrj return 0;
1694*38fd1498Szrj }
1695*38fd1498Szrj else
1696*38fd1498Szrj {
1697*38fd1498Szrj if (regno < FIRST_PSEUDO_REGISTER)
1698*38fd1498Szrj {
1699*38fd1498Szrj /* Check for hard registers. */
1700*38fd1498Szrj int j = REG_NREGS (reg);
1701*38fd1498Szrj while (--j >= 0)
1702*38fd1498Szrj {
1703*38fd1498Szrj for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1704*38fd1498Szrj {
1705*38fd1498Szrj basic_block b = candidate_table[src].split_bbs.first_member[i];
1706*38fd1498Szrj int t = bitmap_bit_p (¬_in_df, b->index);
1707*38fd1498Szrj
1708*38fd1498Szrj /* We can have split blocks, that were recently generated.
1709*38fd1498Szrj Such blocks are always outside current region. */
1710*38fd1498Szrj gcc_assert (!t || (CONTAINING_RGN (b->index)
1711*38fd1498Szrj != CONTAINING_RGN (BB_TO_BLOCK (src))));
1712*38fd1498Szrj
1713*38fd1498Szrj if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1714*38fd1498Szrj return 0;
1715*38fd1498Szrj }
1716*38fd1498Szrj }
1717*38fd1498Szrj }
1718*38fd1498Szrj else
1719*38fd1498Szrj {
1720*38fd1498Szrj /* Check for pseudo registers. */
1721*38fd1498Szrj for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1722*38fd1498Szrj {
1723*38fd1498Szrj basic_block b = candidate_table[src].split_bbs.first_member[i];
1724*38fd1498Szrj int t = bitmap_bit_p (¬_in_df, b->index);
1725*38fd1498Szrj
1726*38fd1498Szrj gcc_assert (!t || (CONTAINING_RGN (b->index)
1727*38fd1498Szrj != CONTAINING_RGN (BB_TO_BLOCK (src))));
1728*38fd1498Szrj
1729*38fd1498Szrj if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1730*38fd1498Szrj return 0;
1731*38fd1498Szrj }
1732*38fd1498Szrj }
1733*38fd1498Szrj }
1734*38fd1498Szrj
1735*38fd1498Szrj return 1;
1736*38fd1498Szrj }
1737*38fd1498Szrj
1738*38fd1498Szrj /* If x is a set of a register R, mark that R is alive in the beginning
1739*38fd1498Szrj of every update-block of src. */
1740*38fd1498Szrj
1741*38fd1498Szrj static void
update_live_1(int src,rtx x)1742*38fd1498Szrj update_live_1 (int src, rtx x)
1743*38fd1498Szrj {
1744*38fd1498Szrj int i;
1745*38fd1498Szrj int regno;
1746*38fd1498Szrj rtx reg = SET_DEST (x);
1747*38fd1498Szrj
1748*38fd1498Szrj if (reg == 0)
1749*38fd1498Szrj return;
1750*38fd1498Szrj
1751*38fd1498Szrj while (GET_CODE (reg) == SUBREG
1752*38fd1498Szrj || GET_CODE (reg) == ZERO_EXTRACT
1753*38fd1498Szrj || GET_CODE (reg) == STRICT_LOW_PART)
1754*38fd1498Szrj reg = XEXP (reg, 0);
1755*38fd1498Szrj
1756*38fd1498Szrj if (GET_CODE (reg) == PARALLEL)
1757*38fd1498Szrj {
1758*38fd1498Szrj int i;
1759*38fd1498Szrj
1760*38fd1498Szrj for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1761*38fd1498Szrj if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1762*38fd1498Szrj update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1763*38fd1498Szrj
1764*38fd1498Szrj return;
1765*38fd1498Szrj }
1766*38fd1498Szrj
1767*38fd1498Szrj if (!REG_P (reg))
1768*38fd1498Szrj return;
1769*38fd1498Szrj
1770*38fd1498Szrj /* Global registers are always live, so the code below does not apply
1771*38fd1498Szrj to them. */
1772*38fd1498Szrj
1773*38fd1498Szrj regno = REGNO (reg);
1774*38fd1498Szrj
1775*38fd1498Szrj if (! HARD_REGISTER_NUM_P (regno)
1776*38fd1498Szrj || !global_regs[regno])
1777*38fd1498Szrj {
1778*38fd1498Szrj for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1779*38fd1498Szrj {
1780*38fd1498Szrj basic_block b = candidate_table[src].update_bbs.first_member[i];
1781*38fd1498Szrj bitmap_set_range (df_get_live_in (b), regno, REG_NREGS (reg));
1782*38fd1498Szrj }
1783*38fd1498Szrj }
1784*38fd1498Szrj }
1785*38fd1498Szrj
1786*38fd1498Szrj /* Return 1 if insn can be speculatively moved from block src to trg,
1787*38fd1498Szrj otherwise return 0. Called before first insertion of insn to
1788*38fd1498Szrj ready-list or before the scheduling. */
1789*38fd1498Szrj
1790*38fd1498Szrj static int
check_live(rtx_insn * insn,int src)1791*38fd1498Szrj check_live (rtx_insn *insn, int src)
1792*38fd1498Szrj {
1793*38fd1498Szrj /* Find the registers set by instruction. */
1794*38fd1498Szrj if (GET_CODE (PATTERN (insn)) == SET
1795*38fd1498Szrj || GET_CODE (PATTERN (insn)) == CLOBBER)
1796*38fd1498Szrj return check_live_1 (src, PATTERN (insn));
1797*38fd1498Szrj else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1798*38fd1498Szrj {
1799*38fd1498Szrj int j;
1800*38fd1498Szrj for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1801*38fd1498Szrj if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1802*38fd1498Szrj || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1803*38fd1498Szrj && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1804*38fd1498Szrj return 0;
1805*38fd1498Szrj
1806*38fd1498Szrj return 1;
1807*38fd1498Szrj }
1808*38fd1498Szrj
1809*38fd1498Szrj return 1;
1810*38fd1498Szrj }
1811*38fd1498Szrj
1812*38fd1498Szrj /* Update the live registers info after insn was moved speculatively from
1813*38fd1498Szrj block src to trg. */
1814*38fd1498Szrj
1815*38fd1498Szrj static void
update_live(rtx_insn * insn,int src)1816*38fd1498Szrj update_live (rtx_insn *insn, int src)
1817*38fd1498Szrj {
1818*38fd1498Szrj /* Find the registers set by instruction. */
1819*38fd1498Szrj if (GET_CODE (PATTERN (insn)) == SET
1820*38fd1498Szrj || GET_CODE (PATTERN (insn)) == CLOBBER)
1821*38fd1498Szrj update_live_1 (src, PATTERN (insn));
1822*38fd1498Szrj else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1823*38fd1498Szrj {
1824*38fd1498Szrj int j;
1825*38fd1498Szrj for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1826*38fd1498Szrj if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1827*38fd1498Szrj || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1828*38fd1498Szrj update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1829*38fd1498Szrj }
1830*38fd1498Szrj }
1831*38fd1498Szrj
1832*38fd1498Szrj /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1833*38fd1498Szrj #define IS_REACHABLE(bb_from, bb_to) \
1834*38fd1498Szrj (bb_from == bb_to \
1835*38fd1498Szrj || IS_RGN_ENTRY (bb_from) \
1836*38fd1498Szrj || (bitmap_bit_p (ancestor_edges[bb_to], \
1837*38fd1498Szrj EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK_FOR_FN (cfun, \
1838*38fd1498Szrj BB_TO_BLOCK (bb_from)))))))
1839*38fd1498Szrj
1840*38fd1498Szrj /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1841*38fd1498Szrj
1842*38fd1498Szrj static void
set_spec_fed(rtx load_insn)1843*38fd1498Szrj set_spec_fed (rtx load_insn)
1844*38fd1498Szrj {
1845*38fd1498Szrj sd_iterator_def sd_it;
1846*38fd1498Szrj dep_t dep;
1847*38fd1498Szrj
1848*38fd1498Szrj FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1849*38fd1498Szrj if (DEP_TYPE (dep) == REG_DEP_TRUE)
1850*38fd1498Szrj FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1851*38fd1498Szrj }
1852*38fd1498Szrj
1853*38fd1498Szrj /* On the path from the insn to load_insn_bb, find a conditional
1854*38fd1498Szrj branch depending on insn, that guards the speculative load. */
1855*38fd1498Szrj
1856*38fd1498Szrj static int
find_conditional_protection(rtx_insn * insn,int load_insn_bb)1857*38fd1498Szrj find_conditional_protection (rtx_insn *insn, int load_insn_bb)
1858*38fd1498Szrj {
1859*38fd1498Szrj sd_iterator_def sd_it;
1860*38fd1498Szrj dep_t dep;
1861*38fd1498Szrj
1862*38fd1498Szrj /* Iterate through DEF-USE forward dependences. */
1863*38fd1498Szrj FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1864*38fd1498Szrj {
1865*38fd1498Szrj rtx_insn *next = DEP_CON (dep);
1866*38fd1498Szrj
1867*38fd1498Szrj if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1868*38fd1498Szrj CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1869*38fd1498Szrj && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1870*38fd1498Szrj && load_insn_bb != INSN_BB (next)
1871*38fd1498Szrj && DEP_TYPE (dep) == REG_DEP_TRUE
1872*38fd1498Szrj && (JUMP_P (next)
1873*38fd1498Szrj || find_conditional_protection (next, load_insn_bb)))
1874*38fd1498Szrj return 1;
1875*38fd1498Szrj }
1876*38fd1498Szrj return 0;
1877*38fd1498Szrj } /* find_conditional_protection */
1878*38fd1498Szrj
1879*38fd1498Szrj /* Returns 1 if the same insn1 that participates in the computation
1880*38fd1498Szrj of load_insn's address is feeding a conditional branch that is
1881*38fd1498Szrj guarding on load_insn. This is true if we find two DEF-USE
1882*38fd1498Szrj chains:
1883*38fd1498Szrj insn1 -> ... -> conditional-branch
1884*38fd1498Szrj insn1 -> ... -> load_insn,
1885*38fd1498Szrj and if a flow path exists:
1886*38fd1498Szrj insn1 -> ... -> conditional-branch -> ... -> load_insn,
1887*38fd1498Szrj and if insn1 is on the path
1888*38fd1498Szrj region-entry -> ... -> bb_trg -> ... load_insn.
1889*38fd1498Szrj
1890*38fd1498Szrj Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1891*38fd1498Szrj Locate the branch by following INSN_FORW_DEPS from insn1. */
1892*38fd1498Szrj
1893*38fd1498Szrj static int
is_conditionally_protected(rtx load_insn,int bb_src,int bb_trg)1894*38fd1498Szrj is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1895*38fd1498Szrj {
1896*38fd1498Szrj sd_iterator_def sd_it;
1897*38fd1498Szrj dep_t dep;
1898*38fd1498Szrj
1899*38fd1498Szrj FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1900*38fd1498Szrj {
1901*38fd1498Szrj rtx_insn *insn1 = DEP_PRO (dep);
1902*38fd1498Szrj
1903*38fd1498Szrj /* Must be a DEF-USE dependence upon non-branch. */
1904*38fd1498Szrj if (DEP_TYPE (dep) != REG_DEP_TRUE
1905*38fd1498Szrj || JUMP_P (insn1))
1906*38fd1498Szrj continue;
1907*38fd1498Szrj
1908*38fd1498Szrj /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1909*38fd1498Szrj if (INSN_BB (insn1) == bb_src
1910*38fd1498Szrj || (CONTAINING_RGN (BLOCK_NUM (insn1))
1911*38fd1498Szrj != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1912*38fd1498Szrj || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1913*38fd1498Szrj && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1914*38fd1498Szrj continue;
1915*38fd1498Szrj
1916*38fd1498Szrj /* Now search for the conditional-branch. */
1917*38fd1498Szrj if (find_conditional_protection (insn1, bb_src))
1918*38fd1498Szrj return 1;
1919*38fd1498Szrj
1920*38fd1498Szrj /* Recursive step: search another insn1, "above" current insn1. */
1921*38fd1498Szrj return is_conditionally_protected (insn1, bb_src, bb_trg);
1922*38fd1498Szrj }
1923*38fd1498Szrj
1924*38fd1498Szrj /* The chain does not exist. */
1925*38fd1498Szrj return 0;
1926*38fd1498Szrj } /* is_conditionally_protected */
1927*38fd1498Szrj
1928*38fd1498Szrj /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1929*38fd1498Szrj load_insn can move speculatively from bb_src to bb_trg. All the
1930*38fd1498Szrj following must hold:
1931*38fd1498Szrj
1932*38fd1498Szrj (1) both loads have 1 base register (PFREE_CANDIDATEs).
1933*38fd1498Szrj (2) load_insn and load1 have a def-use dependence upon
1934*38fd1498Szrj the same insn 'insn1'.
1935*38fd1498Szrj (3) either load2 is in bb_trg, or:
1936*38fd1498Szrj - there's only one split-block, and
1937*38fd1498Szrj - load1 is on the escape path, and
1938*38fd1498Szrj
1939*38fd1498Szrj From all these we can conclude that the two loads access memory
1940*38fd1498Szrj addresses that differ at most by a constant, and hence if moving
1941*38fd1498Szrj load_insn would cause an exception, it would have been caused by
1942*38fd1498Szrj load2 anyhow. */
1943*38fd1498Szrj
1944*38fd1498Szrj static int
is_pfree(rtx load_insn,int bb_src,int bb_trg)1945*38fd1498Szrj is_pfree (rtx load_insn, int bb_src, int bb_trg)
1946*38fd1498Szrj {
1947*38fd1498Szrj sd_iterator_def back_sd_it;
1948*38fd1498Szrj dep_t back_dep;
1949*38fd1498Szrj candidate *candp = candidate_table + bb_src;
1950*38fd1498Szrj
1951*38fd1498Szrj if (candp->split_bbs.nr_members != 1)
1952*38fd1498Szrj /* Must have exactly one escape block. */
1953*38fd1498Szrj return 0;
1954*38fd1498Szrj
1955*38fd1498Szrj FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1956*38fd1498Szrj {
1957*38fd1498Szrj rtx_insn *insn1 = DEP_PRO (back_dep);
1958*38fd1498Szrj
1959*38fd1498Szrj if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1960*38fd1498Szrj /* Found a DEF-USE dependence (insn1, load_insn). */
1961*38fd1498Szrj {
1962*38fd1498Szrj sd_iterator_def fore_sd_it;
1963*38fd1498Szrj dep_t fore_dep;
1964*38fd1498Szrj
1965*38fd1498Szrj FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1966*38fd1498Szrj {
1967*38fd1498Szrj rtx_insn *insn2 = DEP_CON (fore_dep);
1968*38fd1498Szrj
1969*38fd1498Szrj if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1970*38fd1498Szrj {
1971*38fd1498Szrj /* Found a DEF-USE dependence (insn1, insn2). */
1972*38fd1498Szrj if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1973*38fd1498Szrj /* insn2 not guaranteed to be a 1 base reg load. */
1974*38fd1498Szrj continue;
1975*38fd1498Szrj
1976*38fd1498Szrj if (INSN_BB (insn2) == bb_trg)
1977*38fd1498Szrj /* insn2 is the similar load, in the target block. */
1978*38fd1498Szrj return 1;
1979*38fd1498Szrj
1980*38fd1498Szrj if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1981*38fd1498Szrj /* insn2 is a similar load, in a split-block. */
1982*38fd1498Szrj return 1;
1983*38fd1498Szrj }
1984*38fd1498Szrj }
1985*38fd1498Szrj }
1986*38fd1498Szrj }
1987*38fd1498Szrj
1988*38fd1498Szrj /* Couldn't find a similar load. */
1989*38fd1498Szrj return 0;
1990*38fd1498Szrj } /* is_pfree */
1991*38fd1498Szrj
1992*38fd1498Szrj /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1993*38fd1498Szrj a load moved speculatively, or if load_insn is protected by
1994*38fd1498Szrj a compare on load_insn's address). */
1995*38fd1498Szrj
1996*38fd1498Szrj static int
is_prisky(rtx load_insn,int bb_src,int bb_trg)1997*38fd1498Szrj is_prisky (rtx load_insn, int bb_src, int bb_trg)
1998*38fd1498Szrj {
1999*38fd1498Szrj if (FED_BY_SPEC_LOAD (load_insn))
2000*38fd1498Szrj return 1;
2001*38fd1498Szrj
2002*38fd1498Szrj if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
2003*38fd1498Szrj /* Dependence may 'hide' out of the region. */
2004*38fd1498Szrj return 1;
2005*38fd1498Szrj
2006*38fd1498Szrj if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2007*38fd1498Szrj return 1;
2008*38fd1498Szrj
2009*38fd1498Szrj return 0;
2010*38fd1498Szrj }
2011*38fd1498Szrj
2012*38fd1498Szrj /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2013*38fd1498Szrj Return 1 if insn is exception-free (and the motion is valid)
2014*38fd1498Szrj and 0 otherwise. */
2015*38fd1498Szrj
2016*38fd1498Szrj static int
is_exception_free(rtx_insn * insn,int bb_src,int bb_trg)2017*38fd1498Szrj is_exception_free (rtx_insn *insn, int bb_src, int bb_trg)
2018*38fd1498Szrj {
2019*38fd1498Szrj int insn_class = haifa_classify_insn (insn);
2020*38fd1498Szrj
2021*38fd1498Szrj /* Handle non-load insns. */
2022*38fd1498Szrj switch (insn_class)
2023*38fd1498Szrj {
2024*38fd1498Szrj case TRAP_FREE:
2025*38fd1498Szrj return 1;
2026*38fd1498Szrj case TRAP_RISKY:
2027*38fd1498Szrj return 0;
2028*38fd1498Szrj default:;
2029*38fd1498Szrj }
2030*38fd1498Szrj
2031*38fd1498Szrj /* Handle loads. */
2032*38fd1498Szrj if (!flag_schedule_speculative_load)
2033*38fd1498Szrj return 0;
2034*38fd1498Szrj IS_LOAD_INSN (insn) = 1;
2035*38fd1498Szrj switch (insn_class)
2036*38fd1498Szrj {
2037*38fd1498Szrj case IFREE:
2038*38fd1498Szrj return (1);
2039*38fd1498Szrj case IRISKY:
2040*38fd1498Szrj return 0;
2041*38fd1498Szrj case PFREE_CANDIDATE:
2042*38fd1498Szrj if (is_pfree (insn, bb_src, bb_trg))
2043*38fd1498Szrj return 1;
2044*38fd1498Szrj /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2045*38fd1498Szrj /* FALLTHRU */
2046*38fd1498Szrj case PRISKY_CANDIDATE:
2047*38fd1498Szrj if (!flag_schedule_speculative_load_dangerous
2048*38fd1498Szrj || is_prisky (insn, bb_src, bb_trg))
2049*38fd1498Szrj return 0;
2050*38fd1498Szrj break;
2051*38fd1498Szrj default:;
2052*38fd1498Szrj }
2053*38fd1498Szrj
2054*38fd1498Szrj return flag_schedule_speculative_load_dangerous;
2055*38fd1498Szrj }
2056*38fd1498Szrj
2057*38fd1498Szrj /* The number of insns from the current block scheduled so far. */
2058*38fd1498Szrj static int sched_target_n_insns;
2059*38fd1498Szrj /* The number of insns from the current block to be scheduled in total. */
2060*38fd1498Szrj static int target_n_insns;
2061*38fd1498Szrj /* The number of insns from the entire region scheduled so far. */
2062*38fd1498Szrj static int sched_n_insns;
2063*38fd1498Szrj
2064*38fd1498Szrj /* Implementations of the sched_info functions for region scheduling. */
2065*38fd1498Szrj static void init_ready_list (void);
2066*38fd1498Szrj static int can_schedule_ready_p (rtx_insn *);
2067*38fd1498Szrj static void begin_schedule_ready (rtx_insn *);
2068*38fd1498Szrj static ds_t new_ready (rtx_insn *, ds_t);
2069*38fd1498Szrj static int schedule_more_p (void);
2070*38fd1498Szrj static const char *rgn_print_insn (const rtx_insn *, int);
2071*38fd1498Szrj static int rgn_rank (rtx_insn *, rtx_insn *);
2072*38fd1498Szrj static void compute_jump_reg_dependencies (rtx, regset);
2073*38fd1498Szrj
2074*38fd1498Szrj /* Functions for speculative scheduling. */
2075*38fd1498Szrj static void rgn_add_remove_insn (rtx_insn *, int);
2076*38fd1498Szrj static void rgn_add_block (basic_block, basic_block);
2077*38fd1498Szrj static void rgn_fix_recovery_cfg (int, int, int);
2078*38fd1498Szrj static basic_block advance_target_bb (basic_block, rtx_insn *);
2079*38fd1498Szrj
2080*38fd1498Szrj /* Return nonzero if there are more insns that should be scheduled. */
2081*38fd1498Szrj
2082*38fd1498Szrj static int
schedule_more_p(void)2083*38fd1498Szrj schedule_more_p (void)
2084*38fd1498Szrj {
2085*38fd1498Szrj return sched_target_n_insns < target_n_insns;
2086*38fd1498Szrj }
2087*38fd1498Szrj
2088*38fd1498Szrj /* Add all insns that are initially ready to the ready list READY. Called
2089*38fd1498Szrj once before scheduling a set of insns. */
2090*38fd1498Szrj
2091*38fd1498Szrj static void
init_ready_list(void)2092*38fd1498Szrj init_ready_list (void)
2093*38fd1498Szrj {
2094*38fd1498Szrj rtx_insn *prev_head = current_sched_info->prev_head;
2095*38fd1498Szrj rtx_insn *next_tail = current_sched_info->next_tail;
2096*38fd1498Szrj int bb_src;
2097*38fd1498Szrj rtx_insn *insn;
2098*38fd1498Szrj
2099*38fd1498Szrj target_n_insns = 0;
2100*38fd1498Szrj sched_target_n_insns = 0;
2101*38fd1498Szrj sched_n_insns = 0;
2102*38fd1498Szrj
2103*38fd1498Szrj /* Print debugging information. */
2104*38fd1498Szrj if (sched_verbose >= 5)
2105*38fd1498Szrj debug_rgn_dependencies (target_bb);
2106*38fd1498Szrj
2107*38fd1498Szrj /* Prepare current target block info. */
2108*38fd1498Szrj if (current_nr_blocks > 1)
2109*38fd1498Szrj compute_trg_info (target_bb);
2110*38fd1498Szrj
2111*38fd1498Szrj /* Initialize ready list with all 'ready' insns in target block.
2112*38fd1498Szrj Count number of insns in the target block being scheduled. */
2113*38fd1498Szrj for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2114*38fd1498Szrj {
2115*38fd1498Szrj gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2116*38fd1498Szrj TODO_SPEC (insn) = HARD_DEP;
2117*38fd1498Szrj try_ready (insn);
2118*38fd1498Szrj target_n_insns++;
2119*38fd1498Szrj
2120*38fd1498Szrj gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2121*38fd1498Szrj }
2122*38fd1498Szrj
2123*38fd1498Szrj /* Add to ready list all 'ready' insns in valid source blocks.
2124*38fd1498Szrj For speculative insns, check-live, exception-free, and
2125*38fd1498Szrj issue-delay. */
2126*38fd1498Szrj for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2127*38fd1498Szrj if (IS_VALID (bb_src))
2128*38fd1498Szrj {
2129*38fd1498Szrj rtx_insn *src_head;
2130*38fd1498Szrj rtx_insn *src_next_tail;
2131*38fd1498Szrj rtx_insn *tail, *head;
2132*38fd1498Szrj
2133*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2134*38fd1498Szrj &head, &tail);
2135*38fd1498Szrj src_next_tail = NEXT_INSN (tail);
2136*38fd1498Szrj src_head = head;
2137*38fd1498Szrj
2138*38fd1498Szrj for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2139*38fd1498Szrj if (INSN_P (insn))
2140*38fd1498Szrj {
2141*38fd1498Szrj gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2142*38fd1498Szrj TODO_SPEC (insn) = HARD_DEP;
2143*38fd1498Szrj try_ready (insn);
2144*38fd1498Szrj }
2145*38fd1498Szrj }
2146*38fd1498Szrj }
2147*38fd1498Szrj
2148*38fd1498Szrj /* Called after taking INSN from the ready list. Returns nonzero if this
2149*38fd1498Szrj insn can be scheduled, nonzero if we should silently discard it. */
2150*38fd1498Szrj
2151*38fd1498Szrj static int
can_schedule_ready_p(rtx_insn * insn)2152*38fd1498Szrj can_schedule_ready_p (rtx_insn *insn)
2153*38fd1498Szrj {
2154*38fd1498Szrj /* An interblock motion? */
2155*38fd1498Szrj if (INSN_BB (insn) != target_bb && IS_SPECULATIVE_INSN (insn))
2156*38fd1498Szrj {
2157*38fd1498Szrj /* Cannot schedule this insn unless all operands are live. */
2158*38fd1498Szrj if (!check_live (insn, INSN_BB (insn)))
2159*38fd1498Szrj return 0;
2160*38fd1498Szrj
2161*38fd1498Szrj /* Should not move expensive instructions speculatively. */
2162*38fd1498Szrj if (GET_CODE (PATTERN (insn)) != CLOBBER
2163*38fd1498Szrj && !targetm.sched.can_speculate_insn (insn))
2164*38fd1498Szrj return 0;
2165*38fd1498Szrj }
2166*38fd1498Szrj
2167*38fd1498Szrj return 1;
2168*38fd1498Szrj }
2169*38fd1498Szrj
2170*38fd1498Szrj /* Updates counter and other information. Split from can_schedule_ready_p ()
2171*38fd1498Szrj because when we schedule insn speculatively then insn passed to
2172*38fd1498Szrj can_schedule_ready_p () differs from the one passed to
2173*38fd1498Szrj begin_schedule_ready (). */
2174*38fd1498Szrj static void
begin_schedule_ready(rtx_insn * insn)2175*38fd1498Szrj begin_schedule_ready (rtx_insn *insn)
2176*38fd1498Szrj {
2177*38fd1498Szrj /* An interblock motion? */
2178*38fd1498Szrj if (INSN_BB (insn) != target_bb)
2179*38fd1498Szrj {
2180*38fd1498Szrj if (IS_SPECULATIVE_INSN (insn))
2181*38fd1498Szrj {
2182*38fd1498Szrj gcc_assert (check_live (insn, INSN_BB (insn)));
2183*38fd1498Szrj
2184*38fd1498Szrj update_live (insn, INSN_BB (insn));
2185*38fd1498Szrj
2186*38fd1498Szrj /* For speculative load, mark insns fed by it. */
2187*38fd1498Szrj if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2188*38fd1498Szrj set_spec_fed (insn);
2189*38fd1498Szrj
2190*38fd1498Szrj nr_spec++;
2191*38fd1498Szrj }
2192*38fd1498Szrj nr_inter++;
2193*38fd1498Szrj }
2194*38fd1498Szrj else
2195*38fd1498Szrj {
2196*38fd1498Szrj /* In block motion. */
2197*38fd1498Szrj sched_target_n_insns++;
2198*38fd1498Szrj }
2199*38fd1498Szrj sched_n_insns++;
2200*38fd1498Szrj }
2201*38fd1498Szrj
2202*38fd1498Szrj /* Called after INSN has all its hard dependencies resolved and the speculation
2203*38fd1498Szrj of type TS is enough to overcome them all.
2204*38fd1498Szrj Return nonzero if it should be moved to the ready list or the queue, or zero
2205*38fd1498Szrj if we should silently discard it. */
2206*38fd1498Szrj static ds_t
new_ready(rtx_insn * next,ds_t ts)2207*38fd1498Szrj new_ready (rtx_insn *next, ds_t ts)
2208*38fd1498Szrj {
2209*38fd1498Szrj if (INSN_BB (next) != target_bb)
2210*38fd1498Szrj {
2211*38fd1498Szrj int not_ex_free = 0;
2212*38fd1498Szrj
2213*38fd1498Szrj /* For speculative insns, before inserting to ready/queue,
2214*38fd1498Szrj check live, exception-free, and issue-delay. */
2215*38fd1498Szrj if (!IS_VALID (INSN_BB (next))
2216*38fd1498Szrj || CANT_MOVE (next)
2217*38fd1498Szrj || (IS_SPECULATIVE_INSN (next)
2218*38fd1498Szrj && ((recog_memoized (next) >= 0
2219*38fd1498Szrj && min_insn_conflict_delay (curr_state, next, next)
2220*38fd1498Szrj > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2221*38fd1498Szrj || IS_SPECULATION_CHECK_P (next)
2222*38fd1498Szrj || !check_live (next, INSN_BB (next))
2223*38fd1498Szrj || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2224*38fd1498Szrj target_bb)))))
2225*38fd1498Szrj {
2226*38fd1498Szrj if (not_ex_free
2227*38fd1498Szrj /* We are here because is_exception_free () == false.
2228*38fd1498Szrj But we possibly can handle that with control speculation. */
2229*38fd1498Szrj && sched_deps_info->generate_spec_deps
2230*38fd1498Szrj && spec_info->mask & BEGIN_CONTROL)
2231*38fd1498Szrj {
2232*38fd1498Szrj ds_t new_ds;
2233*38fd1498Szrj
2234*38fd1498Szrj /* Add control speculation to NEXT's dependency type. */
2235*38fd1498Szrj new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2236*38fd1498Szrj
2237*38fd1498Szrj /* Check if NEXT can be speculated with new dependency type. */
2238*38fd1498Szrj if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2239*38fd1498Szrj /* Here we got new control-speculative instruction. */
2240*38fd1498Szrj ts = new_ds;
2241*38fd1498Szrj else
2242*38fd1498Szrj /* NEXT isn't ready yet. */
2243*38fd1498Szrj ts = DEP_POSTPONED;
2244*38fd1498Szrj }
2245*38fd1498Szrj else
2246*38fd1498Szrj /* NEXT isn't ready yet. */
2247*38fd1498Szrj ts = DEP_POSTPONED;
2248*38fd1498Szrj }
2249*38fd1498Szrj }
2250*38fd1498Szrj
2251*38fd1498Szrj return ts;
2252*38fd1498Szrj }
2253*38fd1498Szrj
2254*38fd1498Szrj /* Return a string that contains the insn uid and optionally anything else
2255*38fd1498Szrj necessary to identify this insn in an output. It's valid to use a
2256*38fd1498Szrj static buffer for this. The ALIGNED parameter should cause the string
2257*38fd1498Szrj to be formatted so that multiple output lines will line up nicely. */
2258*38fd1498Szrj
2259*38fd1498Szrj static const char *
rgn_print_insn(const rtx_insn * insn,int aligned)2260*38fd1498Szrj rgn_print_insn (const rtx_insn *insn, int aligned)
2261*38fd1498Szrj {
2262*38fd1498Szrj static char tmp[80];
2263*38fd1498Szrj
2264*38fd1498Szrj if (aligned)
2265*38fd1498Szrj sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2266*38fd1498Szrj else
2267*38fd1498Szrj {
2268*38fd1498Szrj if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2269*38fd1498Szrj sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2270*38fd1498Szrj else
2271*38fd1498Szrj sprintf (tmp, "%d", INSN_UID (insn));
2272*38fd1498Szrj }
2273*38fd1498Szrj return tmp;
2274*38fd1498Szrj }
2275*38fd1498Szrj
2276*38fd1498Szrj /* Compare priority of two insns. Return a positive number if the second
2277*38fd1498Szrj insn is to be preferred for scheduling, and a negative one if the first
2278*38fd1498Szrj is to be preferred. Zero if they are equally good. */
2279*38fd1498Szrj
2280*38fd1498Szrj static int
rgn_rank(rtx_insn * insn1,rtx_insn * insn2)2281*38fd1498Szrj rgn_rank (rtx_insn *insn1, rtx_insn *insn2)
2282*38fd1498Szrj {
2283*38fd1498Szrj /* Some comparison make sense in interblock scheduling only. */
2284*38fd1498Szrj if (INSN_BB (insn1) != INSN_BB (insn2))
2285*38fd1498Szrj {
2286*38fd1498Szrj int spec_val, prob_val;
2287*38fd1498Szrj
2288*38fd1498Szrj /* Prefer an inblock motion on an interblock motion. */
2289*38fd1498Szrj if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2290*38fd1498Szrj return 1;
2291*38fd1498Szrj if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2292*38fd1498Szrj return -1;
2293*38fd1498Szrj
2294*38fd1498Szrj /* Prefer a useful motion on a speculative one. */
2295*38fd1498Szrj spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2296*38fd1498Szrj if (spec_val)
2297*38fd1498Szrj return spec_val;
2298*38fd1498Szrj
2299*38fd1498Szrj /* Prefer a more probable (speculative) insn. */
2300*38fd1498Szrj prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2301*38fd1498Szrj if (prob_val)
2302*38fd1498Szrj return prob_val;
2303*38fd1498Szrj }
2304*38fd1498Szrj return 0;
2305*38fd1498Szrj }
2306*38fd1498Szrj
2307*38fd1498Szrj /* NEXT is an instruction that depends on INSN (a backward dependence);
2308*38fd1498Szrj return nonzero if we should include this dependence in priority
2309*38fd1498Szrj calculations. */
2310*38fd1498Szrj
2311*38fd1498Szrj int
contributes_to_priority(rtx_insn * next,rtx_insn * insn)2312*38fd1498Szrj contributes_to_priority (rtx_insn *next, rtx_insn *insn)
2313*38fd1498Szrj {
2314*38fd1498Szrj /* NEXT and INSN reside in one ebb. */
2315*38fd1498Szrj return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2316*38fd1498Szrj }
2317*38fd1498Szrj
2318*38fd1498Szrj /* INSN is a JUMP_INSN. Store the set of registers that must be
2319*38fd1498Szrj considered as used by this jump in USED. */
2320*38fd1498Szrj
2321*38fd1498Szrj static void
compute_jump_reg_dependencies(rtx insn ATTRIBUTE_UNUSED,regset used ATTRIBUTE_UNUSED)2322*38fd1498Szrj compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2323*38fd1498Szrj regset used ATTRIBUTE_UNUSED)
2324*38fd1498Szrj {
2325*38fd1498Szrj /* Nothing to do here, since we postprocess jumps in
2326*38fd1498Szrj add_branch_dependences. */
2327*38fd1498Szrj }
2328*38fd1498Szrj
2329*38fd1498Szrj /* This variable holds common_sched_info hooks and data relevant to
2330*38fd1498Szrj the interblock scheduler. */
2331*38fd1498Szrj static struct common_sched_info_def rgn_common_sched_info;
2332*38fd1498Szrj
2333*38fd1498Szrj
2334*38fd1498Szrj /* This holds data for the dependence analysis relevant to
2335*38fd1498Szrj the interblock scheduler. */
2336*38fd1498Szrj static struct sched_deps_info_def rgn_sched_deps_info;
2337*38fd1498Szrj
2338*38fd1498Szrj /* This holds constant data used for initializing the above structure
2339*38fd1498Szrj for the Haifa scheduler. */
2340*38fd1498Szrj static const struct sched_deps_info_def rgn_const_sched_deps_info =
2341*38fd1498Szrj {
2342*38fd1498Szrj compute_jump_reg_dependencies,
2343*38fd1498Szrj NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2344*38fd1498Szrj 0, 0, 0
2345*38fd1498Szrj };
2346*38fd1498Szrj
2347*38fd1498Szrj /* Same as above, but for the selective scheduler. */
2348*38fd1498Szrj static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2349*38fd1498Szrj {
2350*38fd1498Szrj compute_jump_reg_dependencies,
2351*38fd1498Szrj NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2352*38fd1498Szrj 0, 0, 0
2353*38fd1498Szrj };
2354*38fd1498Szrj
2355*38fd1498Szrj /* Return true if scheduling INSN will trigger finish of scheduling
2356*38fd1498Szrj current block. */
2357*38fd1498Szrj static bool
rgn_insn_finishes_block_p(rtx_insn * insn)2358*38fd1498Szrj rgn_insn_finishes_block_p (rtx_insn *insn)
2359*38fd1498Szrj {
2360*38fd1498Szrj if (INSN_BB (insn) == target_bb
2361*38fd1498Szrj && sched_target_n_insns + 1 == target_n_insns)
2362*38fd1498Szrj /* INSN is the last not-scheduled instruction in the current block. */
2363*38fd1498Szrj return true;
2364*38fd1498Szrj
2365*38fd1498Szrj return false;
2366*38fd1498Szrj }
2367*38fd1498Szrj
2368*38fd1498Szrj /* Used in schedule_insns to initialize current_sched_info for scheduling
2369*38fd1498Szrj regions (or single basic blocks). */
2370*38fd1498Szrj
2371*38fd1498Szrj static const struct haifa_sched_info rgn_const_sched_info =
2372*38fd1498Szrj {
2373*38fd1498Szrj init_ready_list,
2374*38fd1498Szrj can_schedule_ready_p,
2375*38fd1498Szrj schedule_more_p,
2376*38fd1498Szrj new_ready,
2377*38fd1498Szrj rgn_rank,
2378*38fd1498Szrj rgn_print_insn,
2379*38fd1498Szrj contributes_to_priority,
2380*38fd1498Szrj rgn_insn_finishes_block_p,
2381*38fd1498Szrj
2382*38fd1498Szrj NULL, NULL,
2383*38fd1498Szrj NULL, NULL,
2384*38fd1498Szrj 0, 0,
2385*38fd1498Szrj
2386*38fd1498Szrj rgn_add_remove_insn,
2387*38fd1498Szrj begin_schedule_ready,
2388*38fd1498Szrj NULL,
2389*38fd1498Szrj advance_target_bb,
2390*38fd1498Szrj NULL, NULL,
2391*38fd1498Szrj SCHED_RGN
2392*38fd1498Szrj };
2393*38fd1498Szrj
2394*38fd1498Szrj /* This variable holds the data and hooks needed to the Haifa scheduler backend
2395*38fd1498Szrj for the interblock scheduler frontend. */
2396*38fd1498Szrj static struct haifa_sched_info rgn_sched_info;
2397*38fd1498Szrj
2398*38fd1498Szrj /* Returns maximum priority that an insn was assigned to. */
2399*38fd1498Szrj
2400*38fd1498Szrj int
get_rgn_sched_max_insns_priority(void)2401*38fd1498Szrj get_rgn_sched_max_insns_priority (void)
2402*38fd1498Szrj {
2403*38fd1498Szrj return rgn_sched_info.sched_max_insns_priority;
2404*38fd1498Szrj }
2405*38fd1498Szrj
2406*38fd1498Szrj /* Determine if PAT sets a TARGET_CLASS_LIKELY_SPILLED_P register. */
2407*38fd1498Szrj
2408*38fd1498Szrj static bool
sets_likely_spilled(rtx pat)2409*38fd1498Szrj sets_likely_spilled (rtx pat)
2410*38fd1498Szrj {
2411*38fd1498Szrj bool ret = false;
2412*38fd1498Szrj note_stores (pat, sets_likely_spilled_1, &ret);
2413*38fd1498Szrj return ret;
2414*38fd1498Szrj }
2415*38fd1498Szrj
2416*38fd1498Szrj static void
sets_likely_spilled_1(rtx x,const_rtx pat,void * data)2417*38fd1498Szrj sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2418*38fd1498Szrj {
2419*38fd1498Szrj bool *ret = (bool *) data;
2420*38fd1498Szrj
2421*38fd1498Szrj if (GET_CODE (pat) == SET
2422*38fd1498Szrj && REG_P (x)
2423*38fd1498Szrj && HARD_REGISTER_P (x)
2424*38fd1498Szrj && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO (x))))
2425*38fd1498Szrj *ret = true;
2426*38fd1498Szrj }
2427*38fd1498Szrj
2428*38fd1498Szrj /* A bitmap to note insns that participate in any dependency. Used in
2429*38fd1498Szrj add_branch_dependences. */
2430*38fd1498Szrj static sbitmap insn_referenced;
2431*38fd1498Szrj
2432*38fd1498Szrj /* Add dependences so that branches are scheduled to run last in their
2433*38fd1498Szrj block. */
2434*38fd1498Szrj static void
add_branch_dependences(rtx_insn * head,rtx_insn * tail)2435*38fd1498Szrj add_branch_dependences (rtx_insn *head, rtx_insn *tail)
2436*38fd1498Szrj {
2437*38fd1498Szrj rtx_insn *insn, *last;
2438*38fd1498Szrj
2439*38fd1498Szrj /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2440*38fd1498Szrj that can throw exceptions, force them to remain in order at the end of
2441*38fd1498Szrj the block by adding dependencies and giving the last a high priority.
2442*38fd1498Szrj There may be notes present, and prev_head may also be a note.
2443*38fd1498Szrj
2444*38fd1498Szrj Branches must obviously remain at the end. Calls should remain at the
2445*38fd1498Szrj end since moving them results in worse register allocation. Uses remain
2446*38fd1498Szrj at the end to ensure proper register allocation.
2447*38fd1498Szrj
2448*38fd1498Szrj cc0 setters remain at the end because they can't be moved away from
2449*38fd1498Szrj their cc0 user.
2450*38fd1498Szrj
2451*38fd1498Szrj Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
2452*38fd1498Szrj
2453*38fd1498Szrj COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2454*38fd1498Szrj
2455*38fd1498Szrj Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
2456*38fd1498Szrj values) are not moved before reload because we can wind up with register
2457*38fd1498Szrj allocation failures. */
2458*38fd1498Szrj
2459*38fd1498Szrj while (tail != head && DEBUG_INSN_P (tail))
2460*38fd1498Szrj tail = PREV_INSN (tail);
2461*38fd1498Szrj
2462*38fd1498Szrj insn = tail;
2463*38fd1498Szrj last = 0;
2464*38fd1498Szrj while (CALL_P (insn)
2465*38fd1498Szrj || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
2466*38fd1498Szrj || (NONJUMP_INSN_P (insn)
2467*38fd1498Szrj && (GET_CODE (PATTERN (insn)) == USE
2468*38fd1498Szrj || GET_CODE (PATTERN (insn)) == CLOBBER
2469*38fd1498Szrj || can_throw_internal (insn)
2470*38fd1498Szrj || (HAVE_cc0 && sets_cc0_p (PATTERN (insn)))
2471*38fd1498Szrj || (!reload_completed
2472*38fd1498Szrj && sets_likely_spilled (PATTERN (insn)))))
2473*38fd1498Szrj || NOTE_P (insn)
2474*38fd1498Szrj || (last != 0 && SCHED_GROUP_P (last)))
2475*38fd1498Szrj {
2476*38fd1498Szrj if (!NOTE_P (insn))
2477*38fd1498Szrj {
2478*38fd1498Szrj if (last != 0
2479*38fd1498Szrj && sd_find_dep_between (insn, last, false) == NULL)
2480*38fd1498Szrj {
2481*38fd1498Szrj if (! sched_insns_conditions_mutex_p (last, insn))
2482*38fd1498Szrj add_dependence (last, insn, REG_DEP_ANTI);
2483*38fd1498Szrj bitmap_set_bit (insn_referenced, INSN_LUID (insn));
2484*38fd1498Szrj }
2485*38fd1498Szrj
2486*38fd1498Szrj CANT_MOVE (insn) = 1;
2487*38fd1498Szrj
2488*38fd1498Szrj last = insn;
2489*38fd1498Szrj }
2490*38fd1498Szrj
2491*38fd1498Szrj /* Don't overrun the bounds of the basic block. */
2492*38fd1498Szrj if (insn == head)
2493*38fd1498Szrj break;
2494*38fd1498Szrj
2495*38fd1498Szrj do
2496*38fd1498Szrj insn = PREV_INSN (insn);
2497*38fd1498Szrj while (insn != head && DEBUG_INSN_P (insn));
2498*38fd1498Szrj }
2499*38fd1498Szrj
2500*38fd1498Szrj /* Selective scheduling handles control dependencies by itself, and
2501*38fd1498Szrj CANT_MOVE flags ensure that other insns will be kept in place. */
2502*38fd1498Szrj if (sel_sched_p ())
2503*38fd1498Szrj return;
2504*38fd1498Szrj
2505*38fd1498Szrj /* Make sure these insns are scheduled last in their block. */
2506*38fd1498Szrj insn = last;
2507*38fd1498Szrj if (insn != 0)
2508*38fd1498Szrj while (insn != head)
2509*38fd1498Szrj {
2510*38fd1498Szrj insn = prev_nonnote_insn (insn);
2511*38fd1498Szrj
2512*38fd1498Szrj if (bitmap_bit_p (insn_referenced, INSN_LUID (insn))
2513*38fd1498Szrj || DEBUG_INSN_P (insn))
2514*38fd1498Szrj continue;
2515*38fd1498Szrj
2516*38fd1498Szrj if (! sched_insns_conditions_mutex_p (last, insn))
2517*38fd1498Szrj add_dependence (last, insn, REG_DEP_ANTI);
2518*38fd1498Szrj }
2519*38fd1498Szrj
2520*38fd1498Szrj if (!targetm.have_conditional_execution ())
2521*38fd1498Szrj return;
2522*38fd1498Szrj
2523*38fd1498Szrj /* Finally, if the block ends in a jump, and we are doing intra-block
2524*38fd1498Szrj scheduling, make sure that the branch depends on any COND_EXEC insns
2525*38fd1498Szrj inside the block to avoid moving the COND_EXECs past the branch insn.
2526*38fd1498Szrj
2527*38fd1498Szrj We only have to do this after reload, because (1) before reload there
2528*38fd1498Szrj are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2529*38fd1498Szrj scheduler after reload.
2530*38fd1498Szrj
2531*38fd1498Szrj FIXME: We could in some cases move COND_EXEC insns past the branch if
2532*38fd1498Szrj this scheduler would be a little smarter. Consider this code:
2533*38fd1498Szrj
2534*38fd1498Szrj T = [addr]
2535*38fd1498Szrj C ? addr += 4
2536*38fd1498Szrj !C ? X += 12
2537*38fd1498Szrj C ? T += 1
2538*38fd1498Szrj C ? jump foo
2539*38fd1498Szrj
2540*38fd1498Szrj On a target with a one cycle stall on a memory access the optimal
2541*38fd1498Szrj sequence would be:
2542*38fd1498Szrj
2543*38fd1498Szrj T = [addr]
2544*38fd1498Szrj C ? addr += 4
2545*38fd1498Szrj C ? T += 1
2546*38fd1498Szrj C ? jump foo
2547*38fd1498Szrj !C ? X += 12
2548*38fd1498Szrj
2549*38fd1498Szrj We don't want to put the 'X += 12' before the branch because it just
2550*38fd1498Szrj wastes a cycle of execution time when the branch is taken.
2551*38fd1498Szrj
2552*38fd1498Szrj Note that in the example "!C" will always be true. That is another
2553*38fd1498Szrj possible improvement for handling COND_EXECs in this scheduler: it
2554*38fd1498Szrj could remove always-true predicates. */
2555*38fd1498Szrj
2556*38fd1498Szrj if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
2557*38fd1498Szrj return;
2558*38fd1498Szrj
2559*38fd1498Szrj insn = tail;
2560*38fd1498Szrj while (insn != head)
2561*38fd1498Szrj {
2562*38fd1498Szrj insn = PREV_INSN (insn);
2563*38fd1498Szrj
2564*38fd1498Szrj /* Note that we want to add this dependency even when
2565*38fd1498Szrj sched_insns_conditions_mutex_p returns true. The whole point
2566*38fd1498Szrj is that we _want_ this dependency, even if these insns really
2567*38fd1498Szrj are independent. */
2568*38fd1498Szrj if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2569*38fd1498Szrj add_dependence (tail, insn, REG_DEP_ANTI);
2570*38fd1498Szrj }
2571*38fd1498Szrj }
2572*38fd1498Szrj
2573*38fd1498Szrj /* Data structures for the computation of data dependences in a regions. We
2574*38fd1498Szrj keep one `deps' structure for every basic block. Before analyzing the
2575*38fd1498Szrj data dependences for a bb, its variables are initialized as a function of
2576*38fd1498Szrj the variables of its predecessors. When the analysis for a bb completes,
2577*38fd1498Szrj we save the contents to the corresponding bb_deps[bb] variable. */
2578*38fd1498Szrj
2579*38fd1498Szrj static struct deps_desc *bb_deps;
2580*38fd1498Szrj
2581*38fd1498Szrj static void
concat_insn_mem_list(rtx_insn_list * copy_insns,rtx_expr_list * copy_mems,rtx_insn_list ** old_insns_p,rtx_expr_list ** old_mems_p)2582*38fd1498Szrj concat_insn_mem_list (rtx_insn_list *copy_insns,
2583*38fd1498Szrj rtx_expr_list *copy_mems,
2584*38fd1498Szrj rtx_insn_list **old_insns_p,
2585*38fd1498Szrj rtx_expr_list **old_mems_p)
2586*38fd1498Szrj {
2587*38fd1498Szrj rtx_insn_list *new_insns = *old_insns_p;
2588*38fd1498Szrj rtx_expr_list *new_mems = *old_mems_p;
2589*38fd1498Szrj
2590*38fd1498Szrj while (copy_insns)
2591*38fd1498Szrj {
2592*38fd1498Szrj new_insns = alloc_INSN_LIST (copy_insns->insn (), new_insns);
2593*38fd1498Szrj new_mems = alloc_EXPR_LIST (VOIDmode, copy_mems->element (), new_mems);
2594*38fd1498Szrj copy_insns = copy_insns->next ();
2595*38fd1498Szrj copy_mems = copy_mems->next ();
2596*38fd1498Szrj }
2597*38fd1498Szrj
2598*38fd1498Szrj *old_insns_p = new_insns;
2599*38fd1498Szrj *old_mems_p = new_mems;
2600*38fd1498Szrj }
2601*38fd1498Szrj
2602*38fd1498Szrj /* Join PRED_DEPS to the SUCC_DEPS. */
2603*38fd1498Szrj void
deps_join(struct deps_desc * succ_deps,struct deps_desc * pred_deps)2604*38fd1498Szrj deps_join (struct deps_desc *succ_deps, struct deps_desc *pred_deps)
2605*38fd1498Szrj {
2606*38fd1498Szrj unsigned reg;
2607*38fd1498Szrj reg_set_iterator rsi;
2608*38fd1498Szrj
2609*38fd1498Szrj /* The reg_last lists are inherited by successor. */
2610*38fd1498Szrj EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2611*38fd1498Szrj {
2612*38fd1498Szrj struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2613*38fd1498Szrj struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2614*38fd1498Szrj
2615*38fd1498Szrj succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2616*38fd1498Szrj succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2617*38fd1498Szrj succ_rl->implicit_sets
2618*38fd1498Szrj = concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2619*38fd1498Szrj succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2620*38fd1498Szrj succ_rl->clobbers);
2621*38fd1498Szrj succ_rl->uses_length += pred_rl->uses_length;
2622*38fd1498Szrj succ_rl->clobbers_length += pred_rl->clobbers_length;
2623*38fd1498Szrj }
2624*38fd1498Szrj IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2625*38fd1498Szrj
2626*38fd1498Szrj /* Mem read/write lists are inherited by successor. */
2627*38fd1498Szrj concat_insn_mem_list (pred_deps->pending_read_insns,
2628*38fd1498Szrj pred_deps->pending_read_mems,
2629*38fd1498Szrj &succ_deps->pending_read_insns,
2630*38fd1498Szrj &succ_deps->pending_read_mems);
2631*38fd1498Szrj concat_insn_mem_list (pred_deps->pending_write_insns,
2632*38fd1498Szrj pred_deps->pending_write_mems,
2633*38fd1498Szrj &succ_deps->pending_write_insns,
2634*38fd1498Szrj &succ_deps->pending_write_mems);
2635*38fd1498Szrj
2636*38fd1498Szrj succ_deps->pending_jump_insns
2637*38fd1498Szrj = concat_INSN_LIST (pred_deps->pending_jump_insns,
2638*38fd1498Szrj succ_deps->pending_jump_insns);
2639*38fd1498Szrj succ_deps->last_pending_memory_flush
2640*38fd1498Szrj = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2641*38fd1498Szrj succ_deps->last_pending_memory_flush);
2642*38fd1498Szrj
2643*38fd1498Szrj succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2644*38fd1498Szrj succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2645*38fd1498Szrj succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2646*38fd1498Szrj
2647*38fd1498Szrj /* last_function_call is inherited by successor. */
2648*38fd1498Szrj succ_deps->last_function_call
2649*38fd1498Szrj = concat_INSN_LIST (pred_deps->last_function_call,
2650*38fd1498Szrj succ_deps->last_function_call);
2651*38fd1498Szrj
2652*38fd1498Szrj /* last_function_call_may_noreturn is inherited by successor. */
2653*38fd1498Szrj succ_deps->last_function_call_may_noreturn
2654*38fd1498Szrj = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2655*38fd1498Szrj succ_deps->last_function_call_may_noreturn);
2656*38fd1498Szrj
2657*38fd1498Szrj /* sched_before_next_call is inherited by successor. */
2658*38fd1498Szrj succ_deps->sched_before_next_call
2659*38fd1498Szrj = concat_INSN_LIST (pred_deps->sched_before_next_call,
2660*38fd1498Szrj succ_deps->sched_before_next_call);
2661*38fd1498Szrj }
2662*38fd1498Szrj
2663*38fd1498Szrj /* After computing the dependencies for block BB, propagate the dependencies
2664*38fd1498Szrj found in TMP_DEPS to the successors of the block. */
2665*38fd1498Szrj static void
propagate_deps(int bb,struct deps_desc * pred_deps)2666*38fd1498Szrj propagate_deps (int bb, struct deps_desc *pred_deps)
2667*38fd1498Szrj {
2668*38fd1498Szrj basic_block block = BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb));
2669*38fd1498Szrj edge_iterator ei;
2670*38fd1498Szrj edge e;
2671*38fd1498Szrj
2672*38fd1498Szrj /* bb's structures are inherited by its successors. */
2673*38fd1498Szrj FOR_EACH_EDGE (e, ei, block->succs)
2674*38fd1498Szrj {
2675*38fd1498Szrj /* Only bbs "below" bb, in the same region, are interesting. */
2676*38fd1498Szrj if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2677*38fd1498Szrj || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2678*38fd1498Szrj || BLOCK_TO_BB (e->dest->index) <= bb)
2679*38fd1498Szrj continue;
2680*38fd1498Szrj
2681*38fd1498Szrj deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2682*38fd1498Szrj }
2683*38fd1498Szrj
2684*38fd1498Szrj /* These lists should point to the right place, for correct
2685*38fd1498Szrj freeing later. */
2686*38fd1498Szrj bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2687*38fd1498Szrj bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2688*38fd1498Szrj bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2689*38fd1498Szrj bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2690*38fd1498Szrj bb_deps[bb].pending_jump_insns = pred_deps->pending_jump_insns;
2691*38fd1498Szrj
2692*38fd1498Szrj /* Can't allow these to be freed twice. */
2693*38fd1498Szrj pred_deps->pending_read_insns = 0;
2694*38fd1498Szrj pred_deps->pending_read_mems = 0;
2695*38fd1498Szrj pred_deps->pending_write_insns = 0;
2696*38fd1498Szrj pred_deps->pending_write_mems = 0;
2697*38fd1498Szrj pred_deps->pending_jump_insns = 0;
2698*38fd1498Szrj }
2699*38fd1498Szrj
2700*38fd1498Szrj /* Compute dependences inside bb. In a multiple blocks region:
2701*38fd1498Szrj (1) a bb is analyzed after its predecessors, and (2) the lists in
2702*38fd1498Szrj effect at the end of bb (after analyzing for bb) are inherited by
2703*38fd1498Szrj bb's successors.
2704*38fd1498Szrj
2705*38fd1498Szrj Specifically for reg-reg data dependences, the block insns are
2706*38fd1498Szrj scanned by sched_analyze () top-to-bottom. Three lists are
2707*38fd1498Szrj maintained by sched_analyze (): reg_last[].sets for register DEFs,
2708*38fd1498Szrj reg_last[].implicit_sets for implicit hard register DEFs, and
2709*38fd1498Szrj reg_last[].uses for register USEs.
2710*38fd1498Szrj
2711*38fd1498Szrj When analysis is completed for bb, we update for its successors:
2712*38fd1498Szrj ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2713*38fd1498Szrj ; - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2714*38fd1498Szrj ; - USES[succ] = Union (USES [succ], DEFS [bb])
2715*38fd1498Szrj
2716*38fd1498Szrj The mechanism for computing mem-mem data dependence is very
2717*38fd1498Szrj similar, and the result is interblock dependences in the region. */
2718*38fd1498Szrj
2719*38fd1498Szrj static void
compute_block_dependences(int bb)2720*38fd1498Szrj compute_block_dependences (int bb)
2721*38fd1498Szrj {
2722*38fd1498Szrj rtx_insn *head, *tail;
2723*38fd1498Szrj struct deps_desc tmp_deps;
2724*38fd1498Szrj
2725*38fd1498Szrj tmp_deps = bb_deps[bb];
2726*38fd1498Szrj
2727*38fd1498Szrj /* Do the analysis for this block. */
2728*38fd1498Szrj gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2729*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2730*38fd1498Szrj
2731*38fd1498Szrj sched_analyze (&tmp_deps, head, tail);
2732*38fd1498Szrj
2733*38fd1498Szrj add_branch_dependences (head, tail);
2734*38fd1498Szrj
2735*38fd1498Szrj if (current_nr_blocks > 1)
2736*38fd1498Szrj propagate_deps (bb, &tmp_deps);
2737*38fd1498Szrj
2738*38fd1498Szrj /* Free up the INSN_LISTs. */
2739*38fd1498Szrj free_deps (&tmp_deps);
2740*38fd1498Szrj
2741*38fd1498Szrj if (targetm.sched.dependencies_evaluation_hook)
2742*38fd1498Szrj targetm.sched.dependencies_evaluation_hook (head, tail);
2743*38fd1498Szrj }
2744*38fd1498Szrj
2745*38fd1498Szrj /* Free dependencies of instructions inside BB. */
2746*38fd1498Szrj static void
free_block_dependencies(int bb)2747*38fd1498Szrj free_block_dependencies (int bb)
2748*38fd1498Szrj {
2749*38fd1498Szrj rtx_insn *head;
2750*38fd1498Szrj rtx_insn *tail;
2751*38fd1498Szrj
2752*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2753*38fd1498Szrj
2754*38fd1498Szrj if (no_real_insns_p (head, tail))
2755*38fd1498Szrj return;
2756*38fd1498Szrj
2757*38fd1498Szrj sched_free_deps (head, tail, true);
2758*38fd1498Szrj }
2759*38fd1498Szrj
2760*38fd1498Szrj /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2761*38fd1498Szrj them to the unused_*_list variables, so that they can be reused. */
2762*38fd1498Szrj
2763*38fd1498Szrj static void
free_pending_lists(void)2764*38fd1498Szrj free_pending_lists (void)
2765*38fd1498Szrj {
2766*38fd1498Szrj int bb;
2767*38fd1498Szrj
2768*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
2769*38fd1498Szrj {
2770*38fd1498Szrj free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2771*38fd1498Szrj free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2772*38fd1498Szrj free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2773*38fd1498Szrj free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2774*38fd1498Szrj free_INSN_LIST_list (&bb_deps[bb].pending_jump_insns);
2775*38fd1498Szrj }
2776*38fd1498Szrj }
2777*38fd1498Szrj
2778*38fd1498Szrj /* Print dependences for debugging starting from FROM_BB.
2779*38fd1498Szrj Callable from debugger. */
2780*38fd1498Szrj /* Print dependences for debugging starting from FROM_BB.
2781*38fd1498Szrj Callable from debugger. */
2782*38fd1498Szrj DEBUG_FUNCTION void
debug_rgn_dependencies(int from_bb)2783*38fd1498Szrj debug_rgn_dependencies (int from_bb)
2784*38fd1498Szrj {
2785*38fd1498Szrj int bb;
2786*38fd1498Szrj
2787*38fd1498Szrj fprintf (sched_dump,
2788*38fd1498Szrj ";; --------------- forward dependences: ------------ \n");
2789*38fd1498Szrj
2790*38fd1498Szrj for (bb = from_bb; bb < current_nr_blocks; bb++)
2791*38fd1498Szrj {
2792*38fd1498Szrj rtx_insn *head, *tail;
2793*38fd1498Szrj
2794*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2795*38fd1498Szrj fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2796*38fd1498Szrj BB_TO_BLOCK (bb), bb);
2797*38fd1498Szrj
2798*38fd1498Szrj debug_dependencies (head, tail);
2799*38fd1498Szrj }
2800*38fd1498Szrj }
2801*38fd1498Szrj
2802*38fd1498Szrj /* Print dependencies information for instructions between HEAD and TAIL.
2803*38fd1498Szrj ??? This function would probably fit best in haifa-sched.c. */
debug_dependencies(rtx_insn * head,rtx_insn * tail)2804*38fd1498Szrj void debug_dependencies (rtx_insn *head, rtx_insn *tail)
2805*38fd1498Szrj {
2806*38fd1498Szrj rtx_insn *insn;
2807*38fd1498Szrj rtx_insn *next_tail = NEXT_INSN (tail);
2808*38fd1498Szrj
2809*38fd1498Szrj fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2810*38fd1498Szrj "insn", "code", "bb", "dep", "prio", "cost",
2811*38fd1498Szrj "reservation");
2812*38fd1498Szrj fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2813*38fd1498Szrj "----", "----", "--", "---", "----", "----",
2814*38fd1498Szrj "-----------");
2815*38fd1498Szrj
2816*38fd1498Szrj for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2817*38fd1498Szrj {
2818*38fd1498Szrj if (! INSN_P (insn))
2819*38fd1498Szrj {
2820*38fd1498Szrj int n;
2821*38fd1498Szrj fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2822*38fd1498Szrj if (NOTE_P (insn))
2823*38fd1498Szrj {
2824*38fd1498Szrj n = NOTE_KIND (insn);
2825*38fd1498Szrj fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2826*38fd1498Szrj }
2827*38fd1498Szrj else
2828*38fd1498Szrj fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2829*38fd1498Szrj continue;
2830*38fd1498Szrj }
2831*38fd1498Szrj
2832*38fd1498Szrj fprintf (sched_dump,
2833*38fd1498Szrj ";; %s%5d%6d%6d%6d%6d%6d ",
2834*38fd1498Szrj (SCHED_GROUP_P (insn) ? "+" : " "),
2835*38fd1498Szrj INSN_UID (insn),
2836*38fd1498Szrj INSN_CODE (insn),
2837*38fd1498Szrj BLOCK_NUM (insn),
2838*38fd1498Szrj sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2839*38fd1498Szrj (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2840*38fd1498Szrj : INSN_PRIORITY (insn))
2841*38fd1498Szrj : INSN_PRIORITY (insn)),
2842*38fd1498Szrj (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2843*38fd1498Szrj : insn_sched_cost (insn))
2844*38fd1498Szrj : insn_sched_cost (insn)));
2845*38fd1498Szrj
2846*38fd1498Szrj if (recog_memoized (insn) < 0)
2847*38fd1498Szrj fprintf (sched_dump, "nothing");
2848*38fd1498Szrj else
2849*38fd1498Szrj print_reservation (sched_dump, insn);
2850*38fd1498Szrj
2851*38fd1498Szrj fprintf (sched_dump, "\t: ");
2852*38fd1498Szrj {
2853*38fd1498Szrj sd_iterator_def sd_it;
2854*38fd1498Szrj dep_t dep;
2855*38fd1498Szrj
2856*38fd1498Szrj FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2857*38fd1498Szrj fprintf (sched_dump, "%d%s%s ", INSN_UID (DEP_CON (dep)),
2858*38fd1498Szrj DEP_NONREG (dep) ? "n" : "",
2859*38fd1498Szrj DEP_MULTIPLE (dep) ? "m" : "");
2860*38fd1498Szrj }
2861*38fd1498Szrj fprintf (sched_dump, "\n");
2862*38fd1498Szrj }
2863*38fd1498Szrj
2864*38fd1498Szrj fprintf (sched_dump, "\n");
2865*38fd1498Szrj }
2866*38fd1498Szrj
2867*38fd1498Szrj /* Dump dependency graph for the current region to a file using dot syntax. */
2868*38fd1498Szrj
2869*38fd1498Szrj void
dump_rgn_dependencies_dot(FILE * file)2870*38fd1498Szrj dump_rgn_dependencies_dot (FILE *file)
2871*38fd1498Szrj {
2872*38fd1498Szrj rtx_insn *head, *tail, *con, *pro;
2873*38fd1498Szrj sd_iterator_def sd_it;
2874*38fd1498Szrj dep_t dep;
2875*38fd1498Szrj int bb;
2876*38fd1498Szrj pretty_printer pp;
2877*38fd1498Szrj
2878*38fd1498Szrj pp.buffer->stream = file;
2879*38fd1498Szrj pp_printf (&pp, "digraph SchedDG {\n");
2880*38fd1498Szrj
2881*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; ++bb)
2882*38fd1498Szrj {
2883*38fd1498Szrj /* Begin subgraph (basic block). */
2884*38fd1498Szrj pp_printf (&pp, "subgraph cluster_block_%d {\n", bb);
2885*38fd1498Szrj pp_printf (&pp, "\t" "color=blue;" "\n");
2886*38fd1498Szrj pp_printf (&pp, "\t" "style=bold;" "\n");
2887*38fd1498Szrj pp_printf (&pp, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
2888*38fd1498Szrj
2889*38fd1498Szrj /* Setup head and tail (no support for EBBs). */
2890*38fd1498Szrj gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2891*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2892*38fd1498Szrj tail = NEXT_INSN (tail);
2893*38fd1498Szrj
2894*38fd1498Szrj /* Dump all insns. */
2895*38fd1498Szrj for (con = head; con != tail; con = NEXT_INSN (con))
2896*38fd1498Szrj {
2897*38fd1498Szrj if (!INSN_P (con))
2898*38fd1498Szrj continue;
2899*38fd1498Szrj
2900*38fd1498Szrj /* Pretty print the insn. */
2901*38fd1498Szrj pp_printf (&pp, "\t%d [label=\"{", INSN_UID (con));
2902*38fd1498Szrj pp_write_text_to_stream (&pp);
2903*38fd1498Szrj print_insn (&pp, con, /*verbose=*/false);
2904*38fd1498Szrj pp_write_text_as_dot_label_to_stream (&pp, /*for_record=*/true);
2905*38fd1498Szrj pp_write_text_to_stream (&pp);
2906*38fd1498Szrj
2907*38fd1498Szrj /* Dump instruction attributes. */
2908*38fd1498Szrj pp_printf (&pp, "|{ uid:%d | luid:%d | prio:%d }}\",shape=record]\n",
2909*38fd1498Szrj INSN_UID (con), INSN_LUID (con), INSN_PRIORITY (con));
2910*38fd1498Szrj
2911*38fd1498Szrj /* Dump all deps. */
2912*38fd1498Szrj FOR_EACH_DEP (con, SD_LIST_BACK, sd_it, dep)
2913*38fd1498Szrj {
2914*38fd1498Szrj int weight = 0;
2915*38fd1498Szrj const char *color;
2916*38fd1498Szrj pro = DEP_PRO (dep);
2917*38fd1498Szrj
2918*38fd1498Szrj switch (DEP_TYPE (dep))
2919*38fd1498Szrj {
2920*38fd1498Szrj case REG_DEP_TRUE:
2921*38fd1498Szrj color = "black";
2922*38fd1498Szrj weight = 1;
2923*38fd1498Szrj break;
2924*38fd1498Szrj case REG_DEP_OUTPUT:
2925*38fd1498Szrj case REG_DEP_ANTI:
2926*38fd1498Szrj color = "orange";
2927*38fd1498Szrj break;
2928*38fd1498Szrj case REG_DEP_CONTROL:
2929*38fd1498Szrj color = "blue";
2930*38fd1498Szrj break;
2931*38fd1498Szrj default:
2932*38fd1498Szrj gcc_unreachable ();
2933*38fd1498Szrj }
2934*38fd1498Szrj
2935*38fd1498Szrj pp_printf (&pp, "\t%d -> %d [color=%s",
2936*38fd1498Szrj INSN_UID (pro), INSN_UID (con), color);
2937*38fd1498Szrj if (int cost = dep_cost (dep))
2938*38fd1498Szrj pp_printf (&pp, ",label=%d", cost);
2939*38fd1498Szrj pp_printf (&pp, ",weight=%d", weight);
2940*38fd1498Szrj pp_printf (&pp, "];\n");
2941*38fd1498Szrj }
2942*38fd1498Szrj }
2943*38fd1498Szrj pp_printf (&pp, "}\n");
2944*38fd1498Szrj }
2945*38fd1498Szrj
2946*38fd1498Szrj pp_printf (&pp, "}\n");
2947*38fd1498Szrj pp_flush (&pp);
2948*38fd1498Szrj }
2949*38fd1498Szrj
2950*38fd1498Szrj /* Dump dependency graph for the current region to a file using dot syntax. */
2951*38fd1498Szrj
2952*38fd1498Szrj DEBUG_FUNCTION void
dump_rgn_dependencies_dot(const char * fname)2953*38fd1498Szrj dump_rgn_dependencies_dot (const char *fname)
2954*38fd1498Szrj {
2955*38fd1498Szrj FILE *fp;
2956*38fd1498Szrj
2957*38fd1498Szrj fp = fopen (fname, "w");
2958*38fd1498Szrj if (!fp)
2959*38fd1498Szrj {
2960*38fd1498Szrj perror ("fopen");
2961*38fd1498Szrj return;
2962*38fd1498Szrj }
2963*38fd1498Szrj
2964*38fd1498Szrj dump_rgn_dependencies_dot (fp);
2965*38fd1498Szrj fclose (fp);
2966*38fd1498Szrj }
2967*38fd1498Szrj
2968*38fd1498Szrj
2969*38fd1498Szrj /* Returns true if all the basic blocks of the current region have
2970*38fd1498Szrj NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2971*38fd1498Szrj bool
sched_is_disabled_for_current_region_p(void)2972*38fd1498Szrj sched_is_disabled_for_current_region_p (void)
2973*38fd1498Szrj {
2974*38fd1498Szrj int bb;
2975*38fd1498Szrj
2976*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
2977*38fd1498Szrj if (!(BASIC_BLOCK_FOR_FN (cfun,
2978*38fd1498Szrj BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2979*38fd1498Szrj return false;
2980*38fd1498Szrj
2981*38fd1498Szrj return true;
2982*38fd1498Szrj }
2983*38fd1498Szrj
2984*38fd1498Szrj /* Free all region dependencies saved in INSN_BACK_DEPS and
2985*38fd1498Szrj INSN_RESOLVED_BACK_DEPS. The Haifa scheduler does this on the fly
2986*38fd1498Szrj when scheduling, so this function is supposed to be called from
2987*38fd1498Szrj the selective scheduling only. */
2988*38fd1498Szrj void
free_rgn_deps(void)2989*38fd1498Szrj free_rgn_deps (void)
2990*38fd1498Szrj {
2991*38fd1498Szrj int bb;
2992*38fd1498Szrj
2993*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
2994*38fd1498Szrj {
2995*38fd1498Szrj rtx_insn *head, *tail;
2996*38fd1498Szrj
2997*38fd1498Szrj gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2998*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2999*38fd1498Szrj
3000*38fd1498Szrj sched_free_deps (head, tail, false);
3001*38fd1498Szrj }
3002*38fd1498Szrj }
3003*38fd1498Szrj
3004*38fd1498Szrj static int rgn_n_insns;
3005*38fd1498Szrj
3006*38fd1498Szrj /* Compute insn priority for a current region. */
3007*38fd1498Szrj void
compute_priorities(void)3008*38fd1498Szrj compute_priorities (void)
3009*38fd1498Szrj {
3010*38fd1498Szrj int bb;
3011*38fd1498Szrj
3012*38fd1498Szrj current_sched_info->sched_max_insns_priority = 0;
3013*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3014*38fd1498Szrj {
3015*38fd1498Szrj rtx_insn *head, *tail;
3016*38fd1498Szrj
3017*38fd1498Szrj gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3018*38fd1498Szrj get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3019*38fd1498Szrj
3020*38fd1498Szrj if (no_real_insns_p (head, tail))
3021*38fd1498Szrj continue;
3022*38fd1498Szrj
3023*38fd1498Szrj rgn_n_insns += set_priorities (head, tail);
3024*38fd1498Szrj }
3025*38fd1498Szrj current_sched_info->sched_max_insns_priority++;
3026*38fd1498Szrj }
3027*38fd1498Szrj
3028*38fd1498Szrj /* (Re-)initialize the arrays of DFA states at the end of each basic block.
3029*38fd1498Szrj
3030*38fd1498Szrj SAVED_LAST_BASIC_BLOCK is the previous length of the arrays. It must be
3031*38fd1498Szrj zero for the first call to this function, to allocate the arrays for the
3032*38fd1498Szrj first time.
3033*38fd1498Szrj
3034*38fd1498Szrj This function is called once during initialization of the scheduler, and
3035*38fd1498Szrj called again to resize the arrays if new basic blocks have been created,
3036*38fd1498Szrj for example for speculation recovery code. */
3037*38fd1498Szrj
3038*38fd1498Szrj static void
realloc_bb_state_array(int saved_last_basic_block)3039*38fd1498Szrj realloc_bb_state_array (int saved_last_basic_block)
3040*38fd1498Szrj {
3041*38fd1498Szrj char *old_bb_state_array = bb_state_array;
3042*38fd1498Szrj size_t lbb = (size_t) last_basic_block_for_fn (cfun);
3043*38fd1498Szrj size_t slbb = (size_t) saved_last_basic_block;
3044*38fd1498Szrj
3045*38fd1498Szrj /* Nothing to do if nothing changed since the last time this was called. */
3046*38fd1498Szrj if (saved_last_basic_block == last_basic_block_for_fn (cfun))
3047*38fd1498Szrj return;
3048*38fd1498Szrj
3049*38fd1498Szrj /* The selective scheduler doesn't use the state arrays. */
3050*38fd1498Szrj if (sel_sched_p ())
3051*38fd1498Szrj {
3052*38fd1498Szrj gcc_assert (bb_state_array == NULL && bb_state == NULL);
3053*38fd1498Szrj return;
3054*38fd1498Szrj }
3055*38fd1498Szrj
3056*38fd1498Szrj gcc_checking_assert (saved_last_basic_block == 0
3057*38fd1498Szrj || (bb_state_array != NULL && bb_state != NULL));
3058*38fd1498Szrj
3059*38fd1498Szrj bb_state_array = XRESIZEVEC (char, bb_state_array, lbb * dfa_state_size);
3060*38fd1498Szrj bb_state = XRESIZEVEC (state_t, bb_state, lbb);
3061*38fd1498Szrj
3062*38fd1498Szrj /* If BB_STATE_ARRAY has moved, fixup all the state pointers array.
3063*38fd1498Szrj Otherwise only fixup the newly allocated ones. For the state
3064*38fd1498Szrj array itself, only initialize the new entries. */
3065*38fd1498Szrj bool bb_state_array_moved = (bb_state_array != old_bb_state_array);
3066*38fd1498Szrj for (size_t i = bb_state_array_moved ? 0 : slbb; i < lbb; i++)
3067*38fd1498Szrj bb_state[i] = (state_t) (bb_state_array + i * dfa_state_size);
3068*38fd1498Szrj for (size_t i = slbb; i < lbb; i++)
3069*38fd1498Szrj state_reset (bb_state[i]);
3070*38fd1498Szrj }
3071*38fd1498Szrj
3072*38fd1498Szrj /* Free the arrays of DFA states at the end of each basic block. */
3073*38fd1498Szrj
3074*38fd1498Szrj static void
free_bb_state_array(void)3075*38fd1498Szrj free_bb_state_array (void)
3076*38fd1498Szrj {
3077*38fd1498Szrj free (bb_state_array);
3078*38fd1498Szrj free (bb_state);
3079*38fd1498Szrj bb_state_array = NULL;
3080*38fd1498Szrj bb_state = NULL;
3081*38fd1498Szrj }
3082*38fd1498Szrj
3083*38fd1498Szrj /* Schedule a region. A region is either an inner loop, a loop-free
3084*38fd1498Szrj subroutine, or a single basic block. Each bb in the region is
3085*38fd1498Szrj scheduled after its flow predecessors. */
3086*38fd1498Szrj
3087*38fd1498Szrj static void
schedule_region(int rgn)3088*38fd1498Szrj schedule_region (int rgn)
3089*38fd1498Szrj {
3090*38fd1498Szrj int bb;
3091*38fd1498Szrj int sched_rgn_n_insns = 0;
3092*38fd1498Szrj
3093*38fd1498Szrj rgn_n_insns = 0;
3094*38fd1498Szrj
3095*38fd1498Szrj /* Do not support register pressure sensitive scheduling for the new regions
3096*38fd1498Szrj as we don't update the liveness info for them. */
3097*38fd1498Szrj if (sched_pressure != SCHED_PRESSURE_NONE
3098*38fd1498Szrj && rgn >= nr_regions_initial)
3099*38fd1498Szrj {
3100*38fd1498Szrj free_global_sched_pressure_data ();
3101*38fd1498Szrj sched_pressure = SCHED_PRESSURE_NONE;
3102*38fd1498Szrj }
3103*38fd1498Szrj
3104*38fd1498Szrj rgn_setup_region (rgn);
3105*38fd1498Szrj
3106*38fd1498Szrj /* Don't schedule region that is marked by
3107*38fd1498Szrj NOTE_DISABLE_SCHED_OF_BLOCK. */
3108*38fd1498Szrj if (sched_is_disabled_for_current_region_p ())
3109*38fd1498Szrj return;
3110*38fd1498Szrj
3111*38fd1498Szrj sched_rgn_compute_dependencies (rgn);
3112*38fd1498Szrj
3113*38fd1498Szrj sched_rgn_local_init (rgn);
3114*38fd1498Szrj
3115*38fd1498Szrj /* Set priorities. */
3116*38fd1498Szrj compute_priorities ();
3117*38fd1498Szrj
3118*38fd1498Szrj sched_extend_ready_list (rgn_n_insns);
3119*38fd1498Szrj
3120*38fd1498Szrj if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
3121*38fd1498Szrj {
3122*38fd1498Szrj sched_init_region_reg_pressure_info ();
3123*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3124*38fd1498Szrj {
3125*38fd1498Szrj basic_block first_bb, last_bb;
3126*38fd1498Szrj rtx_insn *head, *tail;
3127*38fd1498Szrj
3128*38fd1498Szrj first_bb = EBB_FIRST_BB (bb);
3129*38fd1498Szrj last_bb = EBB_LAST_BB (bb);
3130*38fd1498Szrj
3131*38fd1498Szrj get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3132*38fd1498Szrj
3133*38fd1498Szrj if (no_real_insns_p (head, tail))
3134*38fd1498Szrj {
3135*38fd1498Szrj gcc_assert (first_bb == last_bb);
3136*38fd1498Szrj continue;
3137*38fd1498Szrj }
3138*38fd1498Szrj sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
3139*38fd1498Szrj }
3140*38fd1498Szrj }
3141*38fd1498Szrj
3142*38fd1498Szrj /* Now we can schedule all blocks. */
3143*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3144*38fd1498Szrj {
3145*38fd1498Szrj basic_block first_bb, last_bb, curr_bb;
3146*38fd1498Szrj rtx_insn *head, *tail;
3147*38fd1498Szrj
3148*38fd1498Szrj first_bb = EBB_FIRST_BB (bb);
3149*38fd1498Szrj last_bb = EBB_LAST_BB (bb);
3150*38fd1498Szrj
3151*38fd1498Szrj get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3152*38fd1498Szrj
3153*38fd1498Szrj if (no_real_insns_p (head, tail))
3154*38fd1498Szrj {
3155*38fd1498Szrj gcc_assert (first_bb == last_bb);
3156*38fd1498Szrj continue;
3157*38fd1498Szrj }
3158*38fd1498Szrj
3159*38fd1498Szrj current_sched_info->prev_head = PREV_INSN (head);
3160*38fd1498Szrj current_sched_info->next_tail = NEXT_INSN (tail);
3161*38fd1498Szrj
3162*38fd1498Szrj remove_notes (head, tail);
3163*38fd1498Szrj
3164*38fd1498Szrj unlink_bb_notes (first_bb, last_bb);
3165*38fd1498Szrj
3166*38fd1498Szrj target_bb = bb;
3167*38fd1498Szrj
3168*38fd1498Szrj gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
3169*38fd1498Szrj current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
3170*38fd1498Szrj
3171*38fd1498Szrj curr_bb = first_bb;
3172*38fd1498Szrj if (dbg_cnt (sched_block))
3173*38fd1498Szrj {
3174*38fd1498Szrj edge f;
3175*38fd1498Szrj int saved_last_basic_block = last_basic_block_for_fn (cfun);
3176*38fd1498Szrj
3177*38fd1498Szrj schedule_block (&curr_bb, bb_state[first_bb->index]);
3178*38fd1498Szrj gcc_assert (EBB_FIRST_BB (bb) == first_bb);
3179*38fd1498Szrj sched_rgn_n_insns += sched_n_insns;
3180*38fd1498Szrj realloc_bb_state_array (saved_last_basic_block);
3181*38fd1498Szrj f = find_fallthru_edge (last_bb->succs);
3182*38fd1498Szrj if (f
3183*38fd1498Szrj && (!f->probability.initialized_p ()
3184*38fd1498Szrj || f->probability.to_reg_br_prob_base () * 100 / REG_BR_PROB_BASE >=
3185*38fd1498Szrj PARAM_VALUE (PARAM_SCHED_STATE_EDGE_PROB_CUTOFF)))
3186*38fd1498Szrj {
3187*38fd1498Szrj memcpy (bb_state[f->dest->index], curr_state,
3188*38fd1498Szrj dfa_state_size);
3189*38fd1498Szrj if (sched_verbose >= 5)
3190*38fd1498Szrj fprintf (sched_dump, "saving state for edge %d->%d\n",
3191*38fd1498Szrj f->src->index, f->dest->index);
3192*38fd1498Szrj }
3193*38fd1498Szrj }
3194*38fd1498Szrj else
3195*38fd1498Szrj {
3196*38fd1498Szrj sched_rgn_n_insns += rgn_n_insns;
3197*38fd1498Szrj }
3198*38fd1498Szrj
3199*38fd1498Szrj /* Clean up. */
3200*38fd1498Szrj if (current_nr_blocks > 1)
3201*38fd1498Szrj free_trg_info ();
3202*38fd1498Szrj }
3203*38fd1498Szrj
3204*38fd1498Szrj /* Sanity check: verify that all region insns were scheduled. */
3205*38fd1498Szrj gcc_assert (sched_rgn_n_insns == rgn_n_insns);
3206*38fd1498Szrj
3207*38fd1498Szrj sched_finish_ready_list ();
3208*38fd1498Szrj
3209*38fd1498Szrj /* Done with this region. */
3210*38fd1498Szrj sched_rgn_local_finish ();
3211*38fd1498Szrj
3212*38fd1498Szrj /* Free dependencies. */
3213*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; ++bb)
3214*38fd1498Szrj free_block_dependencies (bb);
3215*38fd1498Szrj
3216*38fd1498Szrj gcc_assert (haifa_recovery_bb_ever_added_p
3217*38fd1498Szrj || deps_pools_are_empty_p ());
3218*38fd1498Szrj }
3219*38fd1498Szrj
3220*38fd1498Szrj /* Initialize data structures for region scheduling. */
3221*38fd1498Szrj
3222*38fd1498Szrj void
sched_rgn_init(bool single_blocks_p)3223*38fd1498Szrj sched_rgn_init (bool single_blocks_p)
3224*38fd1498Szrj {
3225*38fd1498Szrj min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
3226*38fd1498Szrj / 100);
3227*38fd1498Szrj
3228*38fd1498Szrj nr_inter = 0;
3229*38fd1498Szrj nr_spec = 0;
3230*38fd1498Szrj
3231*38fd1498Szrj extend_regions ();
3232*38fd1498Szrj
3233*38fd1498Szrj CONTAINING_RGN (ENTRY_BLOCK) = -1;
3234*38fd1498Szrj CONTAINING_RGN (EXIT_BLOCK) = -1;
3235*38fd1498Szrj
3236*38fd1498Szrj realloc_bb_state_array (0);
3237*38fd1498Szrj
3238*38fd1498Szrj /* Compute regions for scheduling. */
3239*38fd1498Szrj if (single_blocks_p
3240*38fd1498Szrj || n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS + 1
3241*38fd1498Szrj || !flag_schedule_interblock
3242*38fd1498Szrj || is_cfg_nonregular ())
3243*38fd1498Szrj {
3244*38fd1498Szrj find_single_block_region (sel_sched_p ());
3245*38fd1498Szrj }
3246*38fd1498Szrj else
3247*38fd1498Szrj {
3248*38fd1498Szrj /* Compute the dominators and post dominators. */
3249*38fd1498Szrj if (!sel_sched_p ())
3250*38fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
3251*38fd1498Szrj
3252*38fd1498Szrj /* Find regions. */
3253*38fd1498Szrj find_rgns ();
3254*38fd1498Szrj
3255*38fd1498Szrj if (sched_verbose >= 3)
3256*38fd1498Szrj debug_regions ();
3257*38fd1498Szrj
3258*38fd1498Szrj /* For now. This will move as more and more of haifa is converted
3259*38fd1498Szrj to using the cfg code. */
3260*38fd1498Szrj if (!sel_sched_p ())
3261*38fd1498Szrj free_dominance_info (CDI_DOMINATORS);
3262*38fd1498Szrj }
3263*38fd1498Szrj
3264*38fd1498Szrj gcc_assert (nr_regions > 0 && nr_regions <= n_basic_blocks_for_fn (cfun));
3265*38fd1498Szrj
3266*38fd1498Szrj RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1)
3267*38fd1498Szrj + RGN_NR_BLOCKS (nr_regions - 1));
3268*38fd1498Szrj nr_regions_initial = nr_regions;
3269*38fd1498Szrj }
3270*38fd1498Szrj
3271*38fd1498Szrj /* Free data structures for region scheduling. */
3272*38fd1498Szrj void
sched_rgn_finish(void)3273*38fd1498Szrj sched_rgn_finish (void)
3274*38fd1498Szrj {
3275*38fd1498Szrj free_bb_state_array ();
3276*38fd1498Szrj
3277*38fd1498Szrj /* Reposition the prologue and epilogue notes in case we moved the
3278*38fd1498Szrj prologue/epilogue insns. */
3279*38fd1498Szrj if (reload_completed)
3280*38fd1498Szrj reposition_prologue_and_epilogue_notes ();
3281*38fd1498Szrj
3282*38fd1498Szrj if (sched_verbose)
3283*38fd1498Szrj {
3284*38fd1498Szrj if (reload_completed == 0
3285*38fd1498Szrj && flag_schedule_interblock)
3286*38fd1498Szrj {
3287*38fd1498Szrj fprintf (sched_dump,
3288*38fd1498Szrj "\n;; Procedure interblock/speculative motions == %d/%d \n",
3289*38fd1498Szrj nr_inter, nr_spec);
3290*38fd1498Szrj }
3291*38fd1498Szrj else
3292*38fd1498Szrj gcc_assert (nr_inter <= 0);
3293*38fd1498Szrj fprintf (sched_dump, "\n\n");
3294*38fd1498Szrj }
3295*38fd1498Szrj
3296*38fd1498Szrj nr_regions = 0;
3297*38fd1498Szrj
3298*38fd1498Szrj free (rgn_table);
3299*38fd1498Szrj rgn_table = NULL;
3300*38fd1498Szrj
3301*38fd1498Szrj free (rgn_bb_table);
3302*38fd1498Szrj rgn_bb_table = NULL;
3303*38fd1498Szrj
3304*38fd1498Szrj free (block_to_bb);
3305*38fd1498Szrj block_to_bb = NULL;
3306*38fd1498Szrj
3307*38fd1498Szrj free (containing_rgn);
3308*38fd1498Szrj containing_rgn = NULL;
3309*38fd1498Szrj
3310*38fd1498Szrj free (ebb_head);
3311*38fd1498Szrj ebb_head = NULL;
3312*38fd1498Szrj }
3313*38fd1498Szrj
3314*38fd1498Szrj /* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3315*38fd1498Szrj point to the region RGN. */
3316*38fd1498Szrj void
rgn_setup_region(int rgn)3317*38fd1498Szrj rgn_setup_region (int rgn)
3318*38fd1498Szrj {
3319*38fd1498Szrj int bb;
3320*38fd1498Szrj
3321*38fd1498Szrj /* Set variables for the current region. */
3322*38fd1498Szrj current_nr_blocks = RGN_NR_BLOCKS (rgn);
3323*38fd1498Szrj current_blocks = RGN_BLOCKS (rgn);
3324*38fd1498Szrj
3325*38fd1498Szrj /* EBB_HEAD is a region-scope structure. But we realloc it for
3326*38fd1498Szrj each region to save time/memory/something else.
3327*38fd1498Szrj See comments in add_block1, for what reasons we allocate +1 element. */
3328*38fd1498Szrj ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3329*38fd1498Szrj for (bb = 0; bb <= current_nr_blocks; bb++)
3330*38fd1498Szrj ebb_head[bb] = current_blocks + bb;
3331*38fd1498Szrj }
3332*38fd1498Szrj
3333*38fd1498Szrj /* Compute instruction dependencies in region RGN. */
3334*38fd1498Szrj void
sched_rgn_compute_dependencies(int rgn)3335*38fd1498Szrj sched_rgn_compute_dependencies (int rgn)
3336*38fd1498Szrj {
3337*38fd1498Szrj if (!RGN_DONT_CALC_DEPS (rgn))
3338*38fd1498Szrj {
3339*38fd1498Szrj int bb;
3340*38fd1498Szrj
3341*38fd1498Szrj if (sel_sched_p ())
3342*38fd1498Szrj sched_emulate_haifa_p = 1;
3343*38fd1498Szrj
3344*38fd1498Szrj init_deps_global ();
3345*38fd1498Szrj
3346*38fd1498Szrj /* Initializations for region data dependence analysis. */
3347*38fd1498Szrj bb_deps = XNEWVEC (struct deps_desc, current_nr_blocks);
3348*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3349*38fd1498Szrj init_deps (bb_deps + bb, false);
3350*38fd1498Szrj
3351*38fd1498Szrj /* Initialize bitmap used in add_branch_dependences. */
3352*38fd1498Szrj insn_referenced = sbitmap_alloc (sched_max_luid);
3353*38fd1498Szrj bitmap_clear (insn_referenced);
3354*38fd1498Szrj
3355*38fd1498Szrj /* Compute backward dependencies. */
3356*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3357*38fd1498Szrj compute_block_dependences (bb);
3358*38fd1498Szrj
3359*38fd1498Szrj sbitmap_free (insn_referenced);
3360*38fd1498Szrj free_pending_lists ();
3361*38fd1498Szrj finish_deps_global ();
3362*38fd1498Szrj free (bb_deps);
3363*38fd1498Szrj
3364*38fd1498Szrj /* We don't want to recalculate this twice. */
3365*38fd1498Szrj RGN_DONT_CALC_DEPS (rgn) = 1;
3366*38fd1498Szrj
3367*38fd1498Szrj if (sel_sched_p ())
3368*38fd1498Szrj sched_emulate_haifa_p = 0;
3369*38fd1498Szrj }
3370*38fd1498Szrj else
3371*38fd1498Szrj /* (This is a recovery block. It is always a single block region.)
3372*38fd1498Szrj OR (We use selective scheduling.) */
3373*38fd1498Szrj gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3374*38fd1498Szrj }
3375*38fd1498Szrj
3376*38fd1498Szrj /* Init region data structures. Returns true if this region should
3377*38fd1498Szrj not be scheduled. */
3378*38fd1498Szrj void
sched_rgn_local_init(int rgn)3379*38fd1498Szrj sched_rgn_local_init (int rgn)
3380*38fd1498Szrj {
3381*38fd1498Szrj int bb;
3382*38fd1498Szrj
3383*38fd1498Szrj /* Compute interblock info: probabilities, split-edges, dominators, etc. */
3384*38fd1498Szrj if (current_nr_blocks > 1)
3385*38fd1498Szrj {
3386*38fd1498Szrj basic_block block;
3387*38fd1498Szrj edge e;
3388*38fd1498Szrj edge_iterator ei;
3389*38fd1498Szrj
3390*38fd1498Szrj prob = XNEWVEC (int, current_nr_blocks);
3391*38fd1498Szrj
3392*38fd1498Szrj dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3393*38fd1498Szrj bitmap_vector_clear (dom, current_nr_blocks);
3394*38fd1498Szrj
3395*38fd1498Szrj /* Use ->aux to implement EDGE_TO_BIT mapping. */
3396*38fd1498Szrj rgn_nr_edges = 0;
3397*38fd1498Szrj FOR_EACH_BB_FN (block, cfun)
3398*38fd1498Szrj {
3399*38fd1498Szrj if (CONTAINING_RGN (block->index) != rgn)
3400*38fd1498Szrj continue;
3401*38fd1498Szrj FOR_EACH_EDGE (e, ei, block->succs)
3402*38fd1498Szrj SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3403*38fd1498Szrj }
3404*38fd1498Szrj
3405*38fd1498Szrj rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3406*38fd1498Szrj rgn_nr_edges = 0;
3407*38fd1498Szrj FOR_EACH_BB_FN (block, cfun)
3408*38fd1498Szrj {
3409*38fd1498Szrj if (CONTAINING_RGN (block->index) != rgn)
3410*38fd1498Szrj continue;
3411*38fd1498Szrj FOR_EACH_EDGE (e, ei, block->succs)
3412*38fd1498Szrj rgn_edges[rgn_nr_edges++] = e;
3413*38fd1498Szrj }
3414*38fd1498Szrj
3415*38fd1498Szrj /* Split edges. */
3416*38fd1498Szrj pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3417*38fd1498Szrj bitmap_vector_clear (pot_split, current_nr_blocks);
3418*38fd1498Szrj ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3419*38fd1498Szrj bitmap_vector_clear (ancestor_edges, current_nr_blocks);
3420*38fd1498Szrj
3421*38fd1498Szrj /* Compute probabilities, dominators, split_edges. */
3422*38fd1498Szrj for (bb = 0; bb < current_nr_blocks; bb++)
3423*38fd1498Szrj compute_dom_prob_ps (bb);
3424*38fd1498Szrj
3425*38fd1498Szrj /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
3426*38fd1498Szrj /* We don't need them anymore. But we want to avoid duplication of
3427*38fd1498Szrj aux fields in the newly created edges. */
3428*38fd1498Szrj FOR_EACH_BB_FN (block, cfun)
3429*38fd1498Szrj {
3430*38fd1498Szrj if (CONTAINING_RGN (block->index) != rgn)
3431*38fd1498Szrj continue;
3432*38fd1498Szrj FOR_EACH_EDGE (e, ei, block->succs)
3433*38fd1498Szrj e->aux = NULL;
3434*38fd1498Szrj }
3435*38fd1498Szrj }
3436*38fd1498Szrj }
3437*38fd1498Szrj
3438*38fd1498Szrj /* Free data computed for the finished region. */
3439*38fd1498Szrj void
sched_rgn_local_free(void)3440*38fd1498Szrj sched_rgn_local_free (void)
3441*38fd1498Szrj {
3442*38fd1498Szrj free (prob);
3443*38fd1498Szrj sbitmap_vector_free (dom);
3444*38fd1498Szrj sbitmap_vector_free (pot_split);
3445*38fd1498Szrj sbitmap_vector_free (ancestor_edges);
3446*38fd1498Szrj free (rgn_edges);
3447*38fd1498Szrj }
3448*38fd1498Szrj
3449*38fd1498Szrj /* Free data computed for the finished region. */
3450*38fd1498Szrj void
sched_rgn_local_finish(void)3451*38fd1498Szrj sched_rgn_local_finish (void)
3452*38fd1498Szrj {
3453*38fd1498Szrj if (current_nr_blocks > 1 && !sel_sched_p ())
3454*38fd1498Szrj {
3455*38fd1498Szrj sched_rgn_local_free ();
3456*38fd1498Szrj }
3457*38fd1498Szrj }
3458*38fd1498Szrj
3459*38fd1498Szrj /* Setup scheduler infos. */
3460*38fd1498Szrj void
rgn_setup_common_sched_info(void)3461*38fd1498Szrj rgn_setup_common_sched_info (void)
3462*38fd1498Szrj {
3463*38fd1498Szrj memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3464*38fd1498Szrj sizeof (rgn_common_sched_info));
3465*38fd1498Szrj
3466*38fd1498Szrj rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3467*38fd1498Szrj rgn_common_sched_info.add_block = rgn_add_block;
3468*38fd1498Szrj rgn_common_sched_info.estimate_number_of_insns
3469*38fd1498Szrj = rgn_estimate_number_of_insns;
3470*38fd1498Szrj rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3471*38fd1498Szrj
3472*38fd1498Szrj common_sched_info = &rgn_common_sched_info;
3473*38fd1498Szrj }
3474*38fd1498Szrj
3475*38fd1498Szrj /* Setup all *_sched_info structures (for the Haifa frontend
3476*38fd1498Szrj and for the dependence analysis) in the interblock scheduler. */
3477*38fd1498Szrj void
rgn_setup_sched_infos(void)3478*38fd1498Szrj rgn_setup_sched_infos (void)
3479*38fd1498Szrj {
3480*38fd1498Szrj if (!sel_sched_p ())
3481*38fd1498Szrj memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3482*38fd1498Szrj sizeof (rgn_sched_deps_info));
3483*38fd1498Szrj else
3484*38fd1498Szrj memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3485*38fd1498Szrj sizeof (rgn_sched_deps_info));
3486*38fd1498Szrj
3487*38fd1498Szrj sched_deps_info = &rgn_sched_deps_info;
3488*38fd1498Szrj
3489*38fd1498Szrj memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3490*38fd1498Szrj current_sched_info = &rgn_sched_info;
3491*38fd1498Szrj }
3492*38fd1498Szrj
3493*38fd1498Szrj /* The one entry point in this file. */
3494*38fd1498Szrj void
schedule_insns(void)3495*38fd1498Szrj schedule_insns (void)
3496*38fd1498Szrj {
3497*38fd1498Szrj int rgn;
3498*38fd1498Szrj
3499*38fd1498Szrj /* Taking care of this degenerate case makes the rest of
3500*38fd1498Szrj this code simpler. */
3501*38fd1498Szrj if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3502*38fd1498Szrj return;
3503*38fd1498Szrj
3504*38fd1498Szrj rgn_setup_common_sched_info ();
3505*38fd1498Szrj rgn_setup_sched_infos ();
3506*38fd1498Szrj
3507*38fd1498Szrj haifa_sched_init ();
3508*38fd1498Szrj sched_rgn_init (reload_completed);
3509*38fd1498Szrj
3510*38fd1498Szrj bitmap_initialize (¬_in_df, 0);
3511*38fd1498Szrj bitmap_clear (¬_in_df);
3512*38fd1498Szrj
3513*38fd1498Szrj /* Schedule every region in the subroutine. */
3514*38fd1498Szrj for (rgn = 0; rgn < nr_regions; rgn++)
3515*38fd1498Szrj if (dbg_cnt (sched_region))
3516*38fd1498Szrj schedule_region (rgn);
3517*38fd1498Szrj
3518*38fd1498Szrj /* Clean up. */
3519*38fd1498Szrj sched_rgn_finish ();
3520*38fd1498Szrj bitmap_clear (¬_in_df);
3521*38fd1498Szrj
3522*38fd1498Szrj haifa_sched_finish ();
3523*38fd1498Szrj }
3524*38fd1498Szrj
3525*38fd1498Szrj /* INSN has been added to/removed from current region. */
3526*38fd1498Szrj static void
rgn_add_remove_insn(rtx_insn * insn,int remove_p)3527*38fd1498Szrj rgn_add_remove_insn (rtx_insn *insn, int remove_p)
3528*38fd1498Szrj {
3529*38fd1498Szrj if (!remove_p)
3530*38fd1498Szrj rgn_n_insns++;
3531*38fd1498Szrj else
3532*38fd1498Szrj rgn_n_insns--;
3533*38fd1498Szrj
3534*38fd1498Szrj if (INSN_BB (insn) == target_bb)
3535*38fd1498Szrj {
3536*38fd1498Szrj if (!remove_p)
3537*38fd1498Szrj target_n_insns++;
3538*38fd1498Szrj else
3539*38fd1498Szrj target_n_insns--;
3540*38fd1498Szrj }
3541*38fd1498Szrj }
3542*38fd1498Szrj
3543*38fd1498Szrj /* Extend internal data structures. */
3544*38fd1498Szrj void
extend_regions(void)3545*38fd1498Szrj extend_regions (void)
3546*38fd1498Szrj {
3547*38fd1498Szrj rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks_for_fn (cfun));
3548*38fd1498Szrj rgn_bb_table = XRESIZEVEC (int, rgn_bb_table,
3549*38fd1498Szrj n_basic_blocks_for_fn (cfun));
3550*38fd1498Szrj block_to_bb = XRESIZEVEC (int, block_to_bb,
3551*38fd1498Szrj last_basic_block_for_fn (cfun));
3552*38fd1498Szrj containing_rgn = XRESIZEVEC (int, containing_rgn,
3553*38fd1498Szrj last_basic_block_for_fn (cfun));
3554*38fd1498Szrj }
3555*38fd1498Szrj
3556*38fd1498Szrj void
rgn_make_new_region_out_of_new_block(basic_block bb)3557*38fd1498Szrj rgn_make_new_region_out_of_new_block (basic_block bb)
3558*38fd1498Szrj {
3559*38fd1498Szrj int i;
3560*38fd1498Szrj
3561*38fd1498Szrj i = RGN_BLOCKS (nr_regions);
3562*38fd1498Szrj /* I - first free position in rgn_bb_table. */
3563*38fd1498Szrj
3564*38fd1498Szrj rgn_bb_table[i] = bb->index;
3565*38fd1498Szrj RGN_NR_BLOCKS (nr_regions) = 1;
3566*38fd1498Szrj RGN_HAS_REAL_EBB (nr_regions) = 0;
3567*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions) = 0;
3568*38fd1498Szrj CONTAINING_RGN (bb->index) = nr_regions;
3569*38fd1498Szrj BLOCK_TO_BB (bb->index) = 0;
3570*38fd1498Szrj
3571*38fd1498Szrj nr_regions++;
3572*38fd1498Szrj
3573*38fd1498Szrj RGN_BLOCKS (nr_regions) = i + 1;
3574*38fd1498Szrj }
3575*38fd1498Szrj
3576*38fd1498Szrj /* BB was added to ebb after AFTER. */
3577*38fd1498Szrj static void
rgn_add_block(basic_block bb,basic_block after)3578*38fd1498Szrj rgn_add_block (basic_block bb, basic_block after)
3579*38fd1498Szrj {
3580*38fd1498Szrj extend_regions ();
3581*38fd1498Szrj bitmap_set_bit (¬_in_df, bb->index);
3582*38fd1498Szrj
3583*38fd1498Szrj if (after == 0 || after == EXIT_BLOCK_PTR_FOR_FN (cfun))
3584*38fd1498Szrj {
3585*38fd1498Szrj rgn_make_new_region_out_of_new_block (bb);
3586*38fd1498Szrj RGN_DONT_CALC_DEPS (nr_regions - 1) = (after
3587*38fd1498Szrj == EXIT_BLOCK_PTR_FOR_FN (cfun));
3588*38fd1498Szrj }
3589*38fd1498Szrj else
3590*38fd1498Szrj {
3591*38fd1498Szrj int i, pos;
3592*38fd1498Szrj
3593*38fd1498Szrj /* We need to fix rgn_table, block_to_bb, containing_rgn
3594*38fd1498Szrj and ebb_head. */
3595*38fd1498Szrj
3596*38fd1498Szrj BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3597*38fd1498Szrj
3598*38fd1498Szrj /* We extend ebb_head to one more position to
3599*38fd1498Szrj easily find the last position of the last ebb in
3600*38fd1498Szrj the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3601*38fd1498Szrj is _always_ valid for access. */
3602*38fd1498Szrj
3603*38fd1498Szrj i = BLOCK_TO_BB (after->index) + 1;
3604*38fd1498Szrj pos = ebb_head[i] - 1;
3605*38fd1498Szrj /* Now POS is the index of the last block in the region. */
3606*38fd1498Szrj
3607*38fd1498Szrj /* Find index of basic block AFTER. */
3608*38fd1498Szrj for (; rgn_bb_table[pos] != after->index; pos--)
3609*38fd1498Szrj ;
3610*38fd1498Szrj
3611*38fd1498Szrj pos++;
3612*38fd1498Szrj gcc_assert (pos > ebb_head[i - 1]);
3613*38fd1498Szrj
3614*38fd1498Szrj /* i - ebb right after "AFTER". */
3615*38fd1498Szrj /* ebb_head[i] - VALID. */
3616*38fd1498Szrj
3617*38fd1498Szrj /* Source position: ebb_head[i]
3618*38fd1498Szrj Destination position: ebb_head[i] + 1
3619*38fd1498Szrj Last position:
3620*38fd1498Szrj RGN_BLOCKS (nr_regions) - 1
3621*38fd1498Szrj Number of elements to copy: (last_position) - (source_position) + 1
3622*38fd1498Szrj */
3623*38fd1498Szrj
3624*38fd1498Szrj memmove (rgn_bb_table + pos + 1,
3625*38fd1498Szrj rgn_bb_table + pos,
3626*38fd1498Szrj ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3627*38fd1498Szrj * sizeof (*rgn_bb_table));
3628*38fd1498Szrj
3629*38fd1498Szrj rgn_bb_table[pos] = bb->index;
3630*38fd1498Szrj
3631*38fd1498Szrj for (; i <= current_nr_blocks; i++)
3632*38fd1498Szrj ebb_head [i]++;
3633*38fd1498Szrj
3634*38fd1498Szrj i = CONTAINING_RGN (after->index);
3635*38fd1498Szrj CONTAINING_RGN (bb->index) = i;
3636*38fd1498Szrj
3637*38fd1498Szrj RGN_HAS_REAL_EBB (i) = 1;
3638*38fd1498Szrj
3639*38fd1498Szrj for (++i; i <= nr_regions; i++)
3640*38fd1498Szrj RGN_BLOCKS (i)++;
3641*38fd1498Szrj }
3642*38fd1498Szrj }
3643*38fd1498Szrj
3644*38fd1498Szrj /* Fix internal data after interblock movement of jump instruction.
3645*38fd1498Szrj For parameter meaning please refer to
3646*38fd1498Szrj sched-int.h: struct sched_info: fix_recovery_cfg. */
3647*38fd1498Szrj static void
rgn_fix_recovery_cfg(int bbi,int check_bbi,int check_bb_nexti)3648*38fd1498Szrj rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3649*38fd1498Szrj {
3650*38fd1498Szrj int old_pos, new_pos, i;
3651*38fd1498Szrj
3652*38fd1498Szrj BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3653*38fd1498Szrj
3654*38fd1498Szrj for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3655*38fd1498Szrj rgn_bb_table[old_pos] != check_bb_nexti;
3656*38fd1498Szrj old_pos--)
3657*38fd1498Szrj ;
3658*38fd1498Szrj gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3659*38fd1498Szrj
3660*38fd1498Szrj for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3661*38fd1498Szrj rgn_bb_table[new_pos] != bbi;
3662*38fd1498Szrj new_pos--)
3663*38fd1498Szrj ;
3664*38fd1498Szrj new_pos++;
3665*38fd1498Szrj gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3666*38fd1498Szrj
3667*38fd1498Szrj gcc_assert (new_pos < old_pos);
3668*38fd1498Szrj
3669*38fd1498Szrj memmove (rgn_bb_table + new_pos + 1,
3670*38fd1498Szrj rgn_bb_table + new_pos,
3671*38fd1498Szrj (old_pos - new_pos) * sizeof (*rgn_bb_table));
3672*38fd1498Szrj
3673*38fd1498Szrj rgn_bb_table[new_pos] = check_bb_nexti;
3674*38fd1498Szrj
3675*38fd1498Szrj for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3676*38fd1498Szrj ebb_head[i]++;
3677*38fd1498Szrj }
3678*38fd1498Szrj
3679*38fd1498Szrj /* Return next block in ebb chain. For parameter meaning please refer to
3680*38fd1498Szrj sched-int.h: struct sched_info: advance_target_bb. */
3681*38fd1498Szrj static basic_block
advance_target_bb(basic_block bb,rtx_insn * insn)3682*38fd1498Szrj advance_target_bb (basic_block bb, rtx_insn *insn)
3683*38fd1498Szrj {
3684*38fd1498Szrj if (insn)
3685*38fd1498Szrj return 0;
3686*38fd1498Szrj
3687*38fd1498Szrj gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3688*38fd1498Szrj && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3689*38fd1498Szrj return bb->next_bb;
3690*38fd1498Szrj }
3691*38fd1498Szrj
3692*38fd1498Szrj #endif
3693*38fd1498Szrj
3694*38fd1498Szrj /* Run instruction scheduler. */
3695*38fd1498Szrj static unsigned int
rest_of_handle_live_range_shrinkage(void)3696*38fd1498Szrj rest_of_handle_live_range_shrinkage (void)
3697*38fd1498Szrj {
3698*38fd1498Szrj #ifdef INSN_SCHEDULING
3699*38fd1498Szrj int saved;
3700*38fd1498Szrj
3701*38fd1498Szrj initialize_live_range_shrinkage ();
3702*38fd1498Szrj saved = flag_schedule_interblock;
3703*38fd1498Szrj flag_schedule_interblock = false;
3704*38fd1498Szrj schedule_insns ();
3705*38fd1498Szrj flag_schedule_interblock = saved;
3706*38fd1498Szrj finish_live_range_shrinkage ();
3707*38fd1498Szrj #endif
3708*38fd1498Szrj return 0;
3709*38fd1498Szrj }
3710*38fd1498Szrj
3711*38fd1498Szrj /* Run instruction scheduler. */
3712*38fd1498Szrj static unsigned int
rest_of_handle_sched(void)3713*38fd1498Szrj rest_of_handle_sched (void)
3714*38fd1498Szrj {
3715*38fd1498Szrj #ifdef INSN_SCHEDULING
3716*38fd1498Szrj if (flag_selective_scheduling
3717*38fd1498Szrj && ! maybe_skip_selective_scheduling ())
3718*38fd1498Szrj run_selective_scheduling ();
3719*38fd1498Szrj else
3720*38fd1498Szrj schedule_insns ();
3721*38fd1498Szrj #endif
3722*38fd1498Szrj return 0;
3723*38fd1498Szrj }
3724*38fd1498Szrj
3725*38fd1498Szrj /* Run second scheduling pass after reload. */
3726*38fd1498Szrj static unsigned int
rest_of_handle_sched2(void)3727*38fd1498Szrj rest_of_handle_sched2 (void)
3728*38fd1498Szrj {
3729*38fd1498Szrj #ifdef INSN_SCHEDULING
3730*38fd1498Szrj if (flag_selective_scheduling2
3731*38fd1498Szrj && ! maybe_skip_selective_scheduling ())
3732*38fd1498Szrj run_selective_scheduling ();
3733*38fd1498Szrj else
3734*38fd1498Szrj {
3735*38fd1498Szrj /* Do control and data sched analysis again,
3736*38fd1498Szrj and write some more of the results to dump file. */
3737*38fd1498Szrj if (flag_sched2_use_superblocks)
3738*38fd1498Szrj schedule_ebbs ();
3739*38fd1498Szrj else
3740*38fd1498Szrj schedule_insns ();
3741*38fd1498Szrj }
3742*38fd1498Szrj #endif
3743*38fd1498Szrj return 0;
3744*38fd1498Szrj }
3745*38fd1498Szrj
3746*38fd1498Szrj static unsigned int
rest_of_handle_sched_fusion(void)3747*38fd1498Szrj rest_of_handle_sched_fusion (void)
3748*38fd1498Szrj {
3749*38fd1498Szrj #ifdef INSN_SCHEDULING
3750*38fd1498Szrj sched_fusion = true;
3751*38fd1498Szrj schedule_insns ();
3752*38fd1498Szrj sched_fusion = false;
3753*38fd1498Szrj #endif
3754*38fd1498Szrj return 0;
3755*38fd1498Szrj }
3756*38fd1498Szrj
3757*38fd1498Szrj namespace {
3758*38fd1498Szrj
3759*38fd1498Szrj const pass_data pass_data_live_range_shrinkage =
3760*38fd1498Szrj {
3761*38fd1498Szrj RTL_PASS, /* type */
3762*38fd1498Szrj "lr_shrinkage", /* name */
3763*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3764*38fd1498Szrj TV_LIVE_RANGE_SHRINKAGE, /* tv_id */
3765*38fd1498Szrj 0, /* properties_required */
3766*38fd1498Szrj 0, /* properties_provided */
3767*38fd1498Szrj 0, /* properties_destroyed */
3768*38fd1498Szrj 0, /* todo_flags_start */
3769*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3770*38fd1498Szrj };
3771*38fd1498Szrj
3772*38fd1498Szrj class pass_live_range_shrinkage : public rtl_opt_pass
3773*38fd1498Szrj {
3774*38fd1498Szrj public:
pass_live_range_shrinkage(gcc::context * ctxt)3775*38fd1498Szrj pass_live_range_shrinkage(gcc::context *ctxt)
3776*38fd1498Szrj : rtl_opt_pass(pass_data_live_range_shrinkage, ctxt)
3777*38fd1498Szrj {}
3778*38fd1498Szrj
3779*38fd1498Szrj /* opt_pass methods: */
gate(function *)3780*38fd1498Szrj virtual bool gate (function *)
3781*38fd1498Szrj {
3782*38fd1498Szrj #ifdef INSN_SCHEDULING
3783*38fd1498Szrj return flag_live_range_shrinkage;
3784*38fd1498Szrj #else
3785*38fd1498Szrj return 0;
3786*38fd1498Szrj #endif
3787*38fd1498Szrj }
3788*38fd1498Szrj
execute(function *)3789*38fd1498Szrj virtual unsigned int execute (function *)
3790*38fd1498Szrj {
3791*38fd1498Szrj return rest_of_handle_live_range_shrinkage ();
3792*38fd1498Szrj }
3793*38fd1498Szrj
3794*38fd1498Szrj }; // class pass_live_range_shrinkage
3795*38fd1498Szrj
3796*38fd1498Szrj } // anon namespace
3797*38fd1498Szrj
3798*38fd1498Szrj rtl_opt_pass *
make_pass_live_range_shrinkage(gcc::context * ctxt)3799*38fd1498Szrj make_pass_live_range_shrinkage (gcc::context *ctxt)
3800*38fd1498Szrj {
3801*38fd1498Szrj return new pass_live_range_shrinkage (ctxt);
3802*38fd1498Szrj }
3803*38fd1498Szrj
3804*38fd1498Szrj namespace {
3805*38fd1498Szrj
3806*38fd1498Szrj const pass_data pass_data_sched =
3807*38fd1498Szrj {
3808*38fd1498Szrj RTL_PASS, /* type */
3809*38fd1498Szrj "sched1", /* name */
3810*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3811*38fd1498Szrj TV_SCHED, /* tv_id */
3812*38fd1498Szrj 0, /* properties_required */
3813*38fd1498Szrj 0, /* properties_provided */
3814*38fd1498Szrj 0, /* properties_destroyed */
3815*38fd1498Szrj 0, /* todo_flags_start */
3816*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3817*38fd1498Szrj };
3818*38fd1498Szrj
3819*38fd1498Szrj class pass_sched : public rtl_opt_pass
3820*38fd1498Szrj {
3821*38fd1498Szrj public:
pass_sched(gcc::context * ctxt)3822*38fd1498Szrj pass_sched (gcc::context *ctxt)
3823*38fd1498Szrj : rtl_opt_pass (pass_data_sched, ctxt)
3824*38fd1498Szrj {}
3825*38fd1498Szrj
3826*38fd1498Szrj /* opt_pass methods: */
3827*38fd1498Szrj virtual bool gate (function *);
execute(function *)3828*38fd1498Szrj virtual unsigned int execute (function *) { return rest_of_handle_sched (); }
3829*38fd1498Szrj
3830*38fd1498Szrj }; // class pass_sched
3831*38fd1498Szrj
3832*38fd1498Szrj bool
gate(function *)3833*38fd1498Szrj pass_sched::gate (function *)
3834*38fd1498Szrj {
3835*38fd1498Szrj #ifdef INSN_SCHEDULING
3836*38fd1498Szrj return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
3837*38fd1498Szrj #else
3838*38fd1498Szrj return 0;
3839*38fd1498Szrj #endif
3840*38fd1498Szrj }
3841*38fd1498Szrj
3842*38fd1498Szrj } // anon namespace
3843*38fd1498Szrj
3844*38fd1498Szrj rtl_opt_pass *
make_pass_sched(gcc::context * ctxt)3845*38fd1498Szrj make_pass_sched (gcc::context *ctxt)
3846*38fd1498Szrj {
3847*38fd1498Szrj return new pass_sched (ctxt);
3848*38fd1498Szrj }
3849*38fd1498Szrj
3850*38fd1498Szrj namespace {
3851*38fd1498Szrj
3852*38fd1498Szrj const pass_data pass_data_sched2 =
3853*38fd1498Szrj {
3854*38fd1498Szrj RTL_PASS, /* type */
3855*38fd1498Szrj "sched2", /* name */
3856*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3857*38fd1498Szrj TV_SCHED2, /* tv_id */
3858*38fd1498Szrj 0, /* properties_required */
3859*38fd1498Szrj 0, /* properties_provided */
3860*38fd1498Szrj 0, /* properties_destroyed */
3861*38fd1498Szrj 0, /* todo_flags_start */
3862*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3863*38fd1498Szrj };
3864*38fd1498Szrj
3865*38fd1498Szrj class pass_sched2 : public rtl_opt_pass
3866*38fd1498Szrj {
3867*38fd1498Szrj public:
pass_sched2(gcc::context * ctxt)3868*38fd1498Szrj pass_sched2 (gcc::context *ctxt)
3869*38fd1498Szrj : rtl_opt_pass (pass_data_sched2, ctxt)
3870*38fd1498Szrj {}
3871*38fd1498Szrj
3872*38fd1498Szrj /* opt_pass methods: */
3873*38fd1498Szrj virtual bool gate (function *);
execute(function *)3874*38fd1498Szrj virtual unsigned int execute (function *)
3875*38fd1498Szrj {
3876*38fd1498Szrj return rest_of_handle_sched2 ();
3877*38fd1498Szrj }
3878*38fd1498Szrj
3879*38fd1498Szrj }; // class pass_sched2
3880*38fd1498Szrj
3881*38fd1498Szrj bool
gate(function *)3882*38fd1498Szrj pass_sched2::gate (function *)
3883*38fd1498Szrj {
3884*38fd1498Szrj #ifdef INSN_SCHEDULING
3885*38fd1498Szrj return optimize > 0 && flag_schedule_insns_after_reload
3886*38fd1498Szrj && !targetm.delay_sched2 && dbg_cnt (sched2_func);
3887*38fd1498Szrj #else
3888*38fd1498Szrj return 0;
3889*38fd1498Szrj #endif
3890*38fd1498Szrj }
3891*38fd1498Szrj
3892*38fd1498Szrj } // anon namespace
3893*38fd1498Szrj
3894*38fd1498Szrj rtl_opt_pass *
make_pass_sched2(gcc::context * ctxt)3895*38fd1498Szrj make_pass_sched2 (gcc::context *ctxt)
3896*38fd1498Szrj {
3897*38fd1498Szrj return new pass_sched2 (ctxt);
3898*38fd1498Szrj }
3899*38fd1498Szrj
3900*38fd1498Szrj namespace {
3901*38fd1498Szrj
3902*38fd1498Szrj const pass_data pass_data_sched_fusion =
3903*38fd1498Szrj {
3904*38fd1498Szrj RTL_PASS, /* type */
3905*38fd1498Szrj "sched_fusion", /* name */
3906*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3907*38fd1498Szrj TV_SCHED_FUSION, /* tv_id */
3908*38fd1498Szrj 0, /* properties_required */
3909*38fd1498Szrj 0, /* properties_provided */
3910*38fd1498Szrj 0, /* properties_destroyed */
3911*38fd1498Szrj 0, /* todo_flags_start */
3912*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3913*38fd1498Szrj };
3914*38fd1498Szrj
3915*38fd1498Szrj class pass_sched_fusion : public rtl_opt_pass
3916*38fd1498Szrj {
3917*38fd1498Szrj public:
pass_sched_fusion(gcc::context * ctxt)3918*38fd1498Szrj pass_sched_fusion (gcc::context *ctxt)
3919*38fd1498Szrj : rtl_opt_pass (pass_data_sched_fusion, ctxt)
3920*38fd1498Szrj {}
3921*38fd1498Szrj
3922*38fd1498Szrj /* opt_pass methods: */
3923*38fd1498Szrj virtual bool gate (function *);
execute(function *)3924*38fd1498Szrj virtual unsigned int execute (function *)
3925*38fd1498Szrj {
3926*38fd1498Szrj return rest_of_handle_sched_fusion ();
3927*38fd1498Szrj }
3928*38fd1498Szrj
3929*38fd1498Szrj }; // class pass_sched2
3930*38fd1498Szrj
3931*38fd1498Szrj bool
gate(function *)3932*38fd1498Szrj pass_sched_fusion::gate (function *)
3933*38fd1498Szrj {
3934*38fd1498Szrj #ifdef INSN_SCHEDULING
3935*38fd1498Szrj /* Scheduling fusion relies on peephole2 to do real fusion work,
3936*38fd1498Szrj so only enable it if peephole2 is in effect. */
3937*38fd1498Szrj return (optimize > 0 && flag_peephole2
3938*38fd1498Szrj && flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
3939*38fd1498Szrj #else
3940*38fd1498Szrj return 0;
3941*38fd1498Szrj #endif
3942*38fd1498Szrj }
3943*38fd1498Szrj
3944*38fd1498Szrj } // anon namespace
3945*38fd1498Szrj
3946*38fd1498Szrj rtl_opt_pass *
make_pass_sched_fusion(gcc::context * ctxt)3947*38fd1498Szrj make_pass_sched_fusion (gcc::context *ctxt)
3948*38fd1498Szrj {
3949*38fd1498Szrj return new pass_sched_fusion (ctxt);
3950*38fd1498Szrj }
3951