xref: /openbsd-src/gnu/usr.bin/gcc/gcc/sched-rgn.c (revision 4e43c760ad4cd5f644ec700462679d05749498d8)
1 /* Instruction scheduling pass.
2    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3    1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4    Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5    and currently maintained by, Jim Wilson (wilson@cygnus.com)
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING.  If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA.  */
23 
24 /* This pass implements list scheduling within basic blocks.  It is
25    run twice: (1) after flow analysis, but before register allocation,
26    and (2) after register allocation.
27 
28    The first run performs interblock scheduling, moving insns between
29    different blocks in the same "region", and the second runs only
30    basic block scheduling.
31 
32    Interblock motions performed are useful motions and speculative
33    motions, including speculative loads.  Motions requiring code
34    duplication are not supported.  The identification of motion type
35    and the check for validity of speculative motions requires
36    construction and analysis of the function's control flow graph.
37 
38    The main entry point for this pass is schedule_insns(), called for
39    each function.  The work of the scheduler is organized in three
40    levels: (1) function level: insns are subject to splitting,
41    control-flow-graph is constructed, regions are computed (after
42    reload, each region is of one block), (2) region level: control
43    flow graph attributes required for interblock scheduling are
44    computed (dominators, reachability, etc.), data dependences and
45    priorities are computed, and (3) block level: insns in the block
46    are actually scheduled.  */
47 
48 #include "config.h"
49 #include "system.h"
50 #include "toplev.h"
51 #include "rtl.h"
52 #include "tm_p.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "regs.h"
56 #include "function.h"
57 #include "flags.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
60 #include "except.h"
61 #include "toplev.h"
62 #include "recog.h"
63 #include "cfglayout.h"
64 #include "sched-int.h"
65 #include "target.h"
66 
67 /* Define when we want to do count REG_DEAD notes before and after scheduling
68    for sanity checking.  We can't do that when conditional execution is used,
69    as REG_DEAD exist only for unconditional deaths.  */
70 
71 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
72 #define CHECK_DEAD_NOTES 1
73 #else
74 #define CHECK_DEAD_NOTES 0
75 #endif
76 
77 
78 #ifdef INSN_SCHEDULING
79 /* Some accessor macros for h_i_d members only used within this file.  */
80 #define INSN_REF_COUNT(INSN)	(h_i_d[INSN_UID (INSN)].ref_count)
81 #define FED_BY_SPEC_LOAD(insn)	(h_i_d[INSN_UID (insn)].fed_by_spec_load)
82 #define IS_LOAD_INSN(insn)	(h_i_d[INSN_UID (insn)].is_load_insn)
83 
84 #define MAX_RGN_BLOCKS 10
85 #define MAX_RGN_INSNS 100
86 
87 /* nr_inter/spec counts interblock/speculative motion for the function.  */
88 static int nr_inter, nr_spec;
89 
90 /* Control flow graph edges are kept in circular lists.  */
91 typedef struct
92 {
93   int from_block;
94   int to_block;
95   int next_in;
96   int next_out;
97 }
98 haifa_edge;
99 static haifa_edge *edge_table;
100 
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
105 
106 /* Number of edges in the control flow graph.  (In fact, larger than
107    that by 1, since edge 0 is unused.)  */
108 static int nr_edges;
109 
110 /* Circular list of incoming/outgoing edges of a block.  */
111 static int *in_edges;
112 static int *out_edges;
113 
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
116 
117 static int is_cfg_nonregular PARAMS ((void));
118 static int build_control_flow PARAMS ((struct edge_list *));
119 static void new_edge PARAMS ((int, int));
120 
121 /* A region is the main entity for interblock scheduling: insns
122    are allowed to move between blocks in the same region, along
123    control flow graph edges, in the 'up' direction.  */
124 typedef struct
125 {
126   int rgn_nr_blocks;		/* Number of blocks in region.  */
127   int rgn_blocks;		/* cblocks in the region (actually index in rgn_bb_table).  */
128 }
129 region;
130 
131 /* Number of regions in the procedure.  */
132 static int nr_regions;
133 
134 /* Table of region descriptions.  */
135 static region *rgn_table;
136 
137 /* Array of lists of regions' blocks.  */
138 static int *rgn_bb_table;
139 
140 /* Topological order of blocks in the region (if b2 is reachable from
141    b1, block_to_bb[b2] > block_to_bb[b1]).  Note: A basic block is
142    always referred to by either block or b, while its topological
143    order name (in the region) is refered to by bb.  */
144 static int *block_to_bb;
145 
146 /* The number of the region containing a block.  */
147 static int *containing_rgn;
148 
149 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
150 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
151 #define BLOCK_TO_BB(block) (block_to_bb[block])
152 #define CONTAINING_RGN(block) (containing_rgn[block])
153 
154 void debug_regions PARAMS ((void));
155 static void find_single_block_region PARAMS ((void));
156 static void find_rgns PARAMS ((struct edge_list *, dominance_info));
157 static int too_large PARAMS ((int, int *, int *));
158 
159 extern void debug_live PARAMS ((int, int));
160 
161 /* Blocks of the current region being scheduled.  */
162 static int current_nr_blocks;
163 static int current_blocks;
164 
165 /* The mapping from bb to block.  */
166 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
167 
168 typedef struct
169 {
170   int *first_member;		/* Pointer to the list start in bitlst_table.  */
171   int nr_members;		/* The number of members of the bit list.  */
172 }
173 bitlst;
174 
175 static int bitlst_table_last;
176 static int bitlst_table_size;
177 static int *bitlst_table;
178 
179 static void extract_bitlst PARAMS ((sbitmap, bitlst *));
180 
181 /* Target info declarations.
182 
183    The block currently being scheduled is referred to as the "target" block,
184    while other blocks in the region from which insns can be moved to the
185    target are called "source" blocks.  The candidate structure holds info
186    about such sources: are they valid?  Speculative?  Etc.  */
187 typedef bitlst bblst;
188 typedef struct
189 {
190   char is_valid;
191   char is_speculative;
192   int src_prob;
193   bblst split_bbs;
194   bblst update_bbs;
195 }
196 candidate;
197 
198 static candidate *candidate_table;
199 
200 /* A speculative motion requires checking live information on the path
201    from 'source' to 'target'.  The split blocks are those to be checked.
202    After a speculative motion, live information should be modified in
203    the 'update' blocks.
204 
205    Lists of split and update blocks for each candidate of the current
206    target are in array bblst_table.  */
207 static int *bblst_table, bblst_size, bblst_last;
208 
209 #define IS_VALID(src) ( candidate_table[src].is_valid )
210 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211 #define SRC_PROB(src) ( candidate_table[src].src_prob )
212 
213 /* The bb being currently scheduled.  */
214 static int target_bb;
215 
216 /* List of edges.  */
217 typedef bitlst edgelst;
218 
219 /* Target info functions.  */
220 static void split_edges PARAMS ((int, int, edgelst *));
221 static void compute_trg_info PARAMS ((int));
222 void debug_candidate PARAMS ((int));
223 void debug_candidates PARAMS ((int));
224 
225 /* Dominators array: dom[i] contains the sbitmap of dominators of
226    bb i in the region.  */
227 static sbitmap *dom;
228 
229 /* bb 0 is the only region entry.  */
230 #define IS_RGN_ENTRY(bb) (!bb)
231 
232 /* Is bb_src dominated by bb_trg.  */
233 #define IS_DOMINATED(bb_src, bb_trg)                                 \
234 ( TEST_BIT (dom[bb_src], bb_trg) )
235 
236 /* Probability: Prob[i] is a float in [0, 1] which is the probability
237    of bb i relative to the region entry.  */
238 static float *prob;
239 
240 /* The probability of bb_src, relative to bb_trg.  Note, that while the
241    'prob[bb]' is a float in [0, 1], this macro returns an integer
242    in [0, 100].  */
243 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 						      prob[bb_trg])))
245 
246 /* Bit-set of edges, where bit i stands for edge i.  */
247 typedef sbitmap edgeset;
248 
249 /* Number of edges in the region.  */
250 static int rgn_nr_edges;
251 
252 /* Array of size rgn_nr_edges.  */
253 static int *rgn_edges;
254 
255 
256 /* Mapping from each edge in the graph to its number in the rgn.  */
257 static int *edge_to_bit;
258 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
259 
260 /* The split edges of a source bb is different for each target
261    bb.  In order to compute this efficiently, the 'potential-split edges'
262    are computed for each bb prior to scheduling a region.  This is actually
263    the split edges of each bb relative to the region entry.
264 
265    pot_split[bb] is the set of potential split edges of bb.  */
266 static edgeset *pot_split;
267 
268 /* For every bb, a set of its ancestor edges.  */
269 static edgeset *ancestor_edges;
270 
271 static void compute_dom_prob_ps PARAMS ((int));
272 
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
276 
277 /* Parameters affecting the decision of rank_for_schedule().
278    ??? Nope.  But MIN_PROBABILITY is used in copmute_trg_info.  */
279 #define MIN_PROBABILITY 40
280 
281 /* Speculative scheduling functions.  */
282 static int check_live_1 PARAMS ((int, rtx));
283 static void update_live_1 PARAMS ((int, rtx));
284 static int check_live PARAMS ((rtx, int));
285 static void update_live PARAMS ((rtx, int));
286 static void set_spec_fed PARAMS ((rtx));
287 static int is_pfree PARAMS ((rtx, int, int));
288 static int find_conditional_protection PARAMS ((rtx, int));
289 static int is_conditionally_protected PARAMS ((rtx, int, int));
290 static int may_trap_exp PARAMS ((rtx, int));
291 static int haifa_classify_insn PARAMS ((rtx));
292 static int is_prisky PARAMS ((rtx, int, int));
293 static int is_exception_free PARAMS ((rtx, int, int));
294 
295 static bool sets_likely_spilled PARAMS ((rtx));
296 static void sets_likely_spilled_1 PARAMS ((rtx, rtx, void *));
297 static void add_branch_dependences PARAMS ((rtx, rtx));
298 static void compute_block_backward_dependences PARAMS ((int));
299 void debug_dependencies PARAMS ((void));
300 
301 static void init_regions PARAMS ((void));
302 static void schedule_region PARAMS ((int));
303 static rtx concat_INSN_LIST PARAMS ((rtx, rtx));
304 static void concat_insn_mem_list PARAMS ((rtx, rtx, rtx *, rtx *));
305 static void propagate_deps PARAMS ((int, struct deps *));
306 static void free_pending_lists PARAMS ((void));
307 
308 /* Functions for construction of the control flow graph.  */
309 
310 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
311 
312    We decide not to build the control flow graph if there is possibly more
313    than one entry to the function, if computed branches exist, of if we
314    have nonlocal gotos.  */
315 
316 static int
is_cfg_nonregular()317 is_cfg_nonregular ()
318 {
319   basic_block b;
320   rtx insn;
321   RTX_CODE code;
322 
323   /* If we have a label that could be the target of a nonlocal goto, then
324      the cfg is not well structured.  */
325   if (nonlocal_goto_handler_labels)
326     return 1;
327 
328   /* If we have any forced labels, then the cfg is not well structured.  */
329   if (forced_labels)
330     return 1;
331 
332   /* If this function has a computed jump, then we consider the cfg
333      not well structured.  */
334   if (current_function_has_computed_jump)
335     return 1;
336 
337   /* If we have exception handlers, then we consider the cfg not well
338      structured.  ?!?  We should be able to handle this now that flow.c
339      computes an accurate cfg for EH.  */
340   if (current_function_has_exception_handlers ())
341     return 1;
342 
343   /* If we have non-jumping insns which refer to labels, then we consider
344      the cfg not well structured.  */
345   /* Check for labels referred to other thn by jumps.  */
346   FOR_EACH_BB (b)
347     for (insn = b->head;; insn = NEXT_INSN (insn))
348       {
349 	code = GET_CODE (insn);
350 	if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
351 	  {
352 	    rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
353 
354 	    if (note
355 		&& ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
356 		      && find_reg_note (NEXT_INSN (insn), REG_LABEL,
357 					XEXP (note, 0))))
358 	      return 1;
359 	  }
360 
361 	if (insn == b->end)
362 	  break;
363       }
364 
365   /* All the tests passed.  Consider the cfg well structured.  */
366   return 0;
367 }
368 
369 /* Build the control flow graph and set nr_edges.
370 
371    Instead of trying to build a cfg ourselves, we rely on flow to
372    do it for us.  Stamp out useless code (and bug) duplication.
373 
374    Return nonzero if an irregularity in the cfg is found which would
375    prevent cross block scheduling.  */
376 
377 static int
build_control_flow(edge_list)378 build_control_flow (edge_list)
379      struct edge_list *edge_list;
380 {
381   int i, unreachable, num_edges;
382   basic_block b;
383 
384   /* This already accounts for entry/exit edges.  */
385   num_edges = NUM_EDGES (edge_list);
386 
387   /* Unreachable loops with more than one basic block are detected
388      during the DFS traversal in find_rgns.
389 
390      Unreachable loops with a single block are detected here.  This
391      test is redundant with the one in find_rgns, but it's much
392     cheaper to go ahead and catch the trivial case here.  */
393   unreachable = 0;
394   FOR_EACH_BB (b)
395     {
396       if (b->pred == NULL
397 	  || (b->pred->src == b
398 	      && b->pred->pred_next == NULL))
399 	unreachable = 1;
400     }
401 
402   /* ??? We can kill these soon.  */
403   in_edges = (int *) xcalloc (last_basic_block, sizeof (int));
404   out_edges = (int *) xcalloc (last_basic_block, sizeof (int));
405   edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
406 
407   nr_edges = 0;
408   for (i = 0; i < num_edges; i++)
409     {
410       edge e = INDEX_EDGE (edge_list, i);
411 
412       if (e->dest != EXIT_BLOCK_PTR
413 	  && e->src != ENTRY_BLOCK_PTR)
414 	new_edge (e->src->index, e->dest->index);
415     }
416 
417   /* Increment by 1, since edge 0 is unused.  */
418   nr_edges++;
419 
420   return unreachable;
421 }
422 
423 /* Record an edge in the control flow graph from SOURCE to TARGET.
424 
425    In theory, this is redundant with the s_succs computed above, but
426    we have not converted all of haifa to use information from the
427    integer lists.  */
428 
429 static void
new_edge(source,target)430 new_edge (source, target)
431      int source, target;
432 {
433   int e, next_edge;
434   int curr_edge, fst_edge;
435 
436   /* Check for duplicates.  */
437   fst_edge = curr_edge = OUT_EDGES (source);
438   while (curr_edge)
439     {
440       if (FROM_BLOCK (curr_edge) == source
441 	  && TO_BLOCK (curr_edge) == target)
442 	{
443 	  return;
444 	}
445 
446       curr_edge = NEXT_OUT (curr_edge);
447 
448       if (fst_edge == curr_edge)
449 	break;
450     }
451 
452   e = ++nr_edges;
453 
454   FROM_BLOCK (e) = source;
455   TO_BLOCK (e) = target;
456 
457   if (OUT_EDGES (source))
458     {
459       next_edge = NEXT_OUT (OUT_EDGES (source));
460       NEXT_OUT (OUT_EDGES (source)) = e;
461       NEXT_OUT (e) = next_edge;
462     }
463   else
464     {
465       OUT_EDGES (source) = e;
466       NEXT_OUT (e) = e;
467     }
468 
469   if (IN_EDGES (target))
470     {
471       next_edge = NEXT_IN (IN_EDGES (target));
472       NEXT_IN (IN_EDGES (target)) = e;
473       NEXT_IN (e) = next_edge;
474     }
475   else
476     {
477       IN_EDGES (target) = e;
478       NEXT_IN (e) = e;
479     }
480 }
481 
482 /* Translate a bit-set SET to a list BL of the bit-set members.  */
483 
484 static void
extract_bitlst(set,bl)485 extract_bitlst (set, bl)
486      sbitmap set;
487      bitlst *bl;
488 {
489   int i;
490 
491   /* bblst table space is reused in each call to extract_bitlst.  */
492   bitlst_table_last = 0;
493 
494   bl->first_member = &bitlst_table[bitlst_table_last];
495   bl->nr_members = 0;
496 
497   /* Iterate over each word in the bitset.  */
498   EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
499   {
500     bitlst_table[bitlst_table_last++] = i;
501     (bl->nr_members)++;
502   });
503 
504 }
505 
506 /* Functions for the construction of regions.  */
507 
508 /* Print the regions, for debugging purposes.  Callable from debugger.  */
509 
510 void
debug_regions()511 debug_regions ()
512 {
513   int rgn, bb;
514 
515   fprintf (sched_dump, "\n;;   ------------ REGIONS ----------\n\n");
516   for (rgn = 0; rgn < nr_regions; rgn++)
517     {
518       fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
519 	       rgn_table[rgn].rgn_nr_blocks);
520       fprintf (sched_dump, ";;\tbb/block: ");
521 
522       for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
523 	{
524 	  current_blocks = RGN_BLOCKS (rgn);
525 
526 	  if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
527 	    abort ();
528 
529 	  fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
530 	}
531 
532       fprintf (sched_dump, "\n\n");
533     }
534 }
535 
536 /* Build a single block region for each basic block in the function.
537    This allows for using the same code for interblock and basic block
538    scheduling.  */
539 
540 static void
find_single_block_region()541 find_single_block_region ()
542 {
543   basic_block bb;
544 
545   nr_regions = 0;
546 
547   FOR_EACH_BB (bb)
548     {
549       rgn_bb_table[nr_regions] = bb->index;
550       RGN_NR_BLOCKS (nr_regions) = 1;
551       RGN_BLOCKS (nr_regions) = nr_regions;
552       CONTAINING_RGN (bb->index) = nr_regions;
553       BLOCK_TO_BB (bb->index) = 0;
554       nr_regions++;
555     }
556 }
557 
558 /* Update number of blocks and the estimate for number of insns
559    in the region.  Return 1 if the region is "too large" for interblock
560    scheduling (compile time considerations), otherwise return 0.  */
561 
562 static int
too_large(block,num_bbs,num_insns)563 too_large (block, num_bbs, num_insns)
564      int block, *num_bbs, *num_insns;
565 {
566   (*num_bbs)++;
567   (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
568 		   INSN_LUID (BLOCK_HEAD (block)));
569   if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
570     return 1;
571   else
572     return 0;
573 }
574 
575 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
576    is still an inner loop.  Put in max_hdr[blk] the header of the most inner
577    loop containing blk.  */
578 #define UPDATE_LOOP_RELATIONS(blk, hdr)		\
579 {						\
580   if (max_hdr[blk] == -1)			\
581     max_hdr[blk] = hdr;				\
582   else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])	\
583     RESET_BIT (inner, hdr);			\
584   else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])	\
585     {						\
586       RESET_BIT (inner,max_hdr[blk]);		\
587       max_hdr[blk] = hdr;			\
588     }						\
589 }
590 
591 /* Find regions for interblock scheduling.
592 
593    A region for scheduling can be:
594 
595      * A loop-free procedure, or
596 
597      * A reducible inner loop, or
598 
599      * A basic block not contained in any other region.
600 
601    ?!? In theory we could build other regions based on extended basic
602    blocks or reverse extended basic blocks.  Is it worth the trouble?
603 
604    Loop blocks that form a region are put into the region's block list
605    in topological order.
606 
607    This procedure stores its results into the following global (ick) variables
608 
609      * rgn_nr
610      * rgn_table
611      * rgn_bb_table
612      * block_to_bb
613      * containing region
614 
615    We use dominator relationships to avoid making regions out of non-reducible
616    loops.
617 
618    This procedure needs to be converted to work on pred/succ lists instead
619    of edge tables.  That would simplify it somewhat.  */
620 
621 static void
find_rgns(edge_list,dom)622 find_rgns (edge_list, dom)
623      struct edge_list *edge_list;
624      dominance_info dom;
625 {
626   int *max_hdr, *dfs_nr, *stack, *degree;
627   char no_loops = 1;
628   int node, child, loop_head, i, head, tail;
629   int count = 0, sp, idx = 0;
630   int current_edge = out_edges[ENTRY_BLOCK_PTR->succ->dest->index];
631   int num_bbs, num_insns, unreachable;
632   int too_large_failure;
633   basic_block bb;
634 
635   /* Note if an edge has been passed.  */
636   sbitmap passed;
637 
638   /* Note if a block is a natural loop header.  */
639   sbitmap header;
640 
641   /* Note if a block is a natural inner loop header.  */
642   sbitmap inner;
643 
644   /* Note if a block is in the block queue.  */
645   sbitmap in_queue;
646 
647   /* Note if a block is in the block queue.  */
648   sbitmap in_stack;
649 
650   int num_edges = NUM_EDGES (edge_list);
651 
652   /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
653      and a mapping from block to its loop header (if the block is contained
654      in a loop, else -1).
655 
656      Store results in HEADER, INNER, and MAX_HDR respectively, these will
657      be used as inputs to the second traversal.
658 
659      STACK, SP and DFS_NR are only used during the first traversal.  */
660 
661   /* Allocate and initialize variables for the first traversal.  */
662   max_hdr = (int *) xmalloc (last_basic_block * sizeof (int));
663   dfs_nr = (int *) xcalloc (last_basic_block, sizeof (int));
664   stack = (int *) xmalloc (nr_edges * sizeof (int));
665 
666   inner = sbitmap_alloc (last_basic_block);
667   sbitmap_ones (inner);
668 
669   header = sbitmap_alloc (last_basic_block);
670   sbitmap_zero (header);
671 
672   passed = sbitmap_alloc (nr_edges);
673   sbitmap_zero (passed);
674 
675   in_queue = sbitmap_alloc (last_basic_block);
676   sbitmap_zero (in_queue);
677 
678   in_stack = sbitmap_alloc (last_basic_block);
679   sbitmap_zero (in_stack);
680 
681   for (i = 0; i < last_basic_block; i++)
682     max_hdr[i] = -1;
683 
684   /* DFS traversal to find inner loops in the cfg.  */
685 
686   sp = -1;
687   while (1)
688     {
689       if (current_edge == 0 || TEST_BIT (passed, current_edge))
690 	{
691 	  /* We have reached a leaf node or a node that was already
692 	     processed.  Pop edges off the stack until we find
693 	     an edge that has not yet been processed.  */
694 	  while (sp >= 0
695 		 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
696 	    {
697 	      /* Pop entry off the stack.  */
698 	      current_edge = stack[sp--];
699 	      node = FROM_BLOCK (current_edge);
700 	      child = TO_BLOCK (current_edge);
701 	      RESET_BIT (in_stack, child);
702 	      if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
703 		UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
704 	      current_edge = NEXT_OUT (current_edge);
705 	    }
706 
707 	  /* See if have finished the DFS tree traversal.  */
708 	  if (sp < 0 && TEST_BIT (passed, current_edge))
709 	    break;
710 
711 	  /* Nope, continue the traversal with the popped node.  */
712 	  continue;
713 	}
714 
715       /* Process a node.  */
716       node = FROM_BLOCK (current_edge);
717       child = TO_BLOCK (current_edge);
718       SET_BIT (in_stack, node);
719       dfs_nr[node] = ++count;
720 
721       /* If the successor is in the stack, then we've found a loop.
722 	 Mark the loop, if it is not a natural loop, then it will
723 	 be rejected during the second traversal.  */
724       if (TEST_BIT (in_stack, child))
725 	{
726 	  no_loops = 0;
727 	  SET_BIT (header, child);
728 	  UPDATE_LOOP_RELATIONS (node, child);
729 	  SET_BIT (passed, current_edge);
730 	  current_edge = NEXT_OUT (current_edge);
731 	  continue;
732 	}
733 
734       /* If the child was already visited, then there is no need to visit
735 	 it again.  Just update the loop relationships and restart
736 	 with a new edge.  */
737       if (dfs_nr[child])
738 	{
739 	  if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
740 	    UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
741 	  SET_BIT (passed, current_edge);
742 	  current_edge = NEXT_OUT (current_edge);
743 	  continue;
744 	}
745 
746       /* Push an entry on the stack and continue DFS traversal.  */
747       stack[++sp] = current_edge;
748       SET_BIT (passed, current_edge);
749       current_edge = OUT_EDGES (child);
750 
751       /* This is temporary until haifa is converted to use rth's new
752 	 cfg routines which have true entry/exit blocks and the
753 	 appropriate edges from/to those blocks.
754 
755 	 Generally we update dfs_nr for a node when we process its
756 	 out edge.  However, if the node has no out edge then we will
757 	 not set dfs_nr for that node.  This can confuse the scheduler
758 	 into thinking that we have unreachable blocks, which in turn
759 	 disables cross block scheduling.
760 
761 	 So, if we have a node with no out edges, go ahead and mark it
762 	 as reachable now.  */
763       if (current_edge == 0)
764 	dfs_nr[child] = ++count;
765     }
766 
767   /* Another check for unreachable blocks.  The earlier test in
768      is_cfg_nonregular only finds unreachable blocks that do not
769      form a loop.
770 
771      The DFS traversal will mark every block that is reachable from
772      the entry node by placing a nonzero value in dfs_nr.  Thus if
773      dfs_nr is zero for any block, then it must be unreachable.  */
774   unreachable = 0;
775   FOR_EACH_BB (bb)
776     if (dfs_nr[bb->index] == 0)
777       {
778 	unreachable = 1;
779 	break;
780       }
781 
782   /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
783      to hold degree counts.  */
784   degree = dfs_nr;
785 
786   FOR_EACH_BB (bb)
787     degree[bb->index] = 0;
788   for (i = 0; i < num_edges; i++)
789     {
790       edge e = INDEX_EDGE (edge_list, i);
791 
792       if (e->dest != EXIT_BLOCK_PTR)
793 	degree[e->dest->index]++;
794     }
795 
796   /* Do not perform region scheduling if there are any unreachable
797      blocks.  */
798   if (!unreachable)
799     {
800       int *queue;
801 
802       if (no_loops)
803 	SET_BIT (header, 0);
804 
805       /* Second travsersal:find reducible inner loops and topologically sort
806 	 block of each region.  */
807 
808       queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
809 
810       /* Find blocks which are inner loop headers.  We still have non-reducible
811 	 loops to consider at this point.  */
812       FOR_EACH_BB (bb)
813 	{
814 	  if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
815 	    {
816 	      edge e;
817 	      basic_block jbb;
818 
819 	      /* Now check that the loop is reducible.  We do this separate
820 		 from finding inner loops so that we do not find a reducible
821 		 loop which contains an inner non-reducible loop.
822 
823 		 A simple way to find reducible/natural loops is to verify
824 		 that each block in the loop is dominated by the loop
825 		 header.
826 
827 		 If there exists a block that is not dominated by the loop
828 		 header, then the block is reachable from outside the loop
829 		 and thus the loop is not a natural loop.  */
830 	      FOR_EACH_BB (jbb)
831 		{
832 		  /* First identify blocks in the loop, except for the loop
833 		     entry block.  */
834 		  if (bb->index == max_hdr[jbb->index] && bb != jbb)
835 		    {
836 		      /* Now verify that the block is dominated by the loop
837 			 header.  */
838 		      if (!dominated_by_p (dom, jbb, bb))
839 			break;
840 		    }
841 		}
842 
843 	      /* If we exited the loop early, then I is the header of
844 		 a non-reducible loop and we should quit processing it
845 		 now.  */
846 	      if (jbb != EXIT_BLOCK_PTR)
847 		continue;
848 
849 	      /* I is a header of an inner loop, or block 0 in a subroutine
850 		 with no loops at all.  */
851 	      head = tail = -1;
852 	      too_large_failure = 0;
853 	      loop_head = max_hdr[bb->index];
854 
855 	      /* Decrease degree of all I's successors for topological
856 		 ordering.  */
857 	      for (e = bb->succ; e; e = e->succ_next)
858 		if (e->dest != EXIT_BLOCK_PTR)
859 		  --degree[e->dest->index];
860 
861 	      /* Estimate # insns, and count # blocks in the region.  */
862 	      num_bbs = 1;
863 	      num_insns = (INSN_LUID (bb->end)
864 			   - INSN_LUID (bb->head));
865 
866 	      /* Find all loop latches (blocks with back edges to the loop
867 		 header) or all the leaf blocks in the cfg has no loops.
868 
869 		 Place those blocks into the queue.  */
870 	      if (no_loops)
871 		{
872 		  FOR_EACH_BB (jbb)
873 		    /* Leaf nodes have only a single successor which must
874 		       be EXIT_BLOCK.  */
875 		    if (jbb->succ
876 			&& jbb->succ->dest == EXIT_BLOCK_PTR
877 			&& jbb->succ->succ_next == NULL)
878 		      {
879 			queue[++tail] = jbb->index;
880 			SET_BIT (in_queue, jbb->index);
881 
882 			if (too_large (jbb->index, &num_bbs, &num_insns))
883 			  {
884 			    too_large_failure = 1;
885 			    break;
886 			  }
887 		      }
888 		}
889 	      else
890 		{
891 		  edge e;
892 
893 		  for (e = bb->pred; e; e = e->pred_next)
894 		    {
895 		      if (e->src == ENTRY_BLOCK_PTR)
896 			continue;
897 
898 		      node = e->src->index;
899 
900 		      if (max_hdr[node] == loop_head && node != bb->index)
901 			{
902 			  /* This is a loop latch.  */
903 			  queue[++tail] = node;
904 			  SET_BIT (in_queue, node);
905 
906 			  if (too_large (node, &num_bbs, &num_insns))
907 			    {
908 			      too_large_failure = 1;
909 			      break;
910 			    }
911 			}
912 		    }
913 		}
914 
915 	      /* Now add all the blocks in the loop to the queue.
916 
917 	     We know the loop is a natural loop; however the algorithm
918 	     above will not always mark certain blocks as being in the
919 	     loop.  Consider:
920 		node   children
921 		 a	  b,c
922 		 b	  c
923 		 c	  a,d
924 		 d	  b
925 
926 	     The algorithm in the DFS traversal may not mark B & D as part
927 	     of the loop (ie they will not have max_hdr set to A).
928 
929 	     We know they can not be loop latches (else they would have
930 	     had max_hdr set since they'd have a backedge to a dominator
931 	     block).  So we don't need them on the initial queue.
932 
933 	     We know they are part of the loop because they are dominated
934 	     by the loop header and can be reached by a backwards walk of
935 	     the edges starting with nodes on the initial queue.
936 
937 	     It is safe and desirable to include those nodes in the
938 	     loop/scheduling region.  To do so we would need to decrease
939 	     the degree of a node if it is the target of a backedge
940 	     within the loop itself as the node is placed in the queue.
941 
942 	     We do not do this because I'm not sure that the actual
943 	     scheduling code will properly handle this case. ?!? */
944 
945 	      while (head < tail && !too_large_failure)
946 		{
947 		  edge e;
948 		  child = queue[++head];
949 
950 		  for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
951 		    {
952 		      node = e->src->index;
953 
954 		      /* See discussion above about nodes not marked as in
955 			 this loop during the initial DFS traversal.  */
956 		      if (e->src == ENTRY_BLOCK_PTR
957 			  || max_hdr[node] != loop_head)
958 			{
959 			  tail = -1;
960 			  break;
961 			}
962 		      else if (!TEST_BIT (in_queue, node) && node != bb->index)
963 			{
964 			  queue[++tail] = node;
965 			  SET_BIT (in_queue, node);
966 
967 			  if (too_large (node, &num_bbs, &num_insns))
968 			    {
969 			      too_large_failure = 1;
970 			      break;
971 			    }
972 			}
973 		    }
974 		}
975 
976 	      if (tail >= 0 && !too_large_failure)
977 		{
978 		  /* Place the loop header into list of region blocks.  */
979 		  degree[bb->index] = -1;
980 		  rgn_bb_table[idx] = bb->index;
981 		  RGN_NR_BLOCKS (nr_regions) = num_bbs;
982 		  RGN_BLOCKS (nr_regions) = idx++;
983 		  CONTAINING_RGN (bb->index) = nr_regions;
984 		  BLOCK_TO_BB (bb->index) = count = 0;
985 
986 		  /* Remove blocks from queue[] when their in degree
987 		     becomes zero.  Repeat until no blocks are left on the
988 		     list.  This produces a topological list of blocks in
989 		     the region.  */
990 		  while (tail >= 0)
991 		    {
992 		      if (head < 0)
993 			head = tail;
994 		      child = queue[head];
995 		      if (degree[child] == 0)
996 			{
997 			  edge e;
998 
999 			  degree[child] = -1;
1000 			  rgn_bb_table[idx++] = child;
1001 			  BLOCK_TO_BB (child) = ++count;
1002 			  CONTAINING_RGN (child) = nr_regions;
1003 			  queue[head] = queue[tail--];
1004 
1005 			  for (e = BASIC_BLOCK (child)->succ;
1006 			       e;
1007 			       e = e->succ_next)
1008 			    if (e->dest != EXIT_BLOCK_PTR)
1009 			      --degree[e->dest->index];
1010 			}
1011 		      else
1012 			--head;
1013 		    }
1014 		  ++nr_regions;
1015 		}
1016 	    }
1017 	}
1018       free (queue);
1019     }
1020 
1021   /* Any block that did not end up in a region is placed into a region
1022      by itself.  */
1023   FOR_EACH_BB (bb)
1024     if (degree[bb->index] >= 0)
1025       {
1026 	rgn_bb_table[idx] = bb->index;
1027 	RGN_NR_BLOCKS (nr_regions) = 1;
1028 	RGN_BLOCKS (nr_regions) = idx++;
1029 	CONTAINING_RGN (bb->index) = nr_regions++;
1030 	BLOCK_TO_BB (bb->index) = 0;
1031       }
1032 
1033   free (max_hdr);
1034   free (dfs_nr);
1035   free (stack);
1036   sbitmap_free (passed);
1037   sbitmap_free (header);
1038   sbitmap_free (inner);
1039   sbitmap_free (in_queue);
1040   sbitmap_free (in_stack);
1041 }
1042 
1043 /* Functions for regions scheduling information.  */
1044 
1045 /* Compute dominators, probability, and potential-split-edges of bb.
1046    Assume that these values were already computed for bb's predecessors.  */
1047 
1048 static void
compute_dom_prob_ps(bb)1049 compute_dom_prob_ps (bb)
1050      int bb;
1051 {
1052   int nxt_in_edge, fst_in_edge, pred;
1053   int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1054 
1055   prob[bb] = 0.0;
1056   if (IS_RGN_ENTRY (bb))
1057     {
1058       SET_BIT (dom[bb], 0);
1059       prob[bb] = 1.0;
1060       return;
1061     }
1062 
1063   fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1064 
1065   /* Initialize dom[bb] to '111..1'.  */
1066   sbitmap_ones (dom[bb]);
1067 
1068   do
1069     {
1070       pred = FROM_BLOCK (nxt_in_edge);
1071       sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1072       sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1073 
1074       SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1075 
1076       nr_out_edges = 1;
1077       nr_rgn_out_edges = 0;
1078       fst_out_edge = OUT_EDGES (pred);
1079       nxt_out_edge = NEXT_OUT (fst_out_edge);
1080 
1081       sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1082 
1083       SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1084 
1085       /* The successor doesn't belong in the region?  */
1086       if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1087 	  CONTAINING_RGN (BB_TO_BLOCK (bb)))
1088 	++nr_rgn_out_edges;
1089 
1090       while (fst_out_edge != nxt_out_edge)
1091 	{
1092 	  ++nr_out_edges;
1093 	  /* The successor doesn't belong in the region?  */
1094 	  if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1095 	      CONTAINING_RGN (BB_TO_BLOCK (bb)))
1096 	    ++nr_rgn_out_edges;
1097 	  SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1098 	  nxt_out_edge = NEXT_OUT (nxt_out_edge);
1099 
1100 	}
1101 
1102       /* Now nr_rgn_out_edges is the number of region-exit edges from
1103          pred, and nr_out_edges will be the number of pred out edges
1104          not leaving the region.  */
1105       nr_out_edges -= nr_rgn_out_edges;
1106       if (nr_rgn_out_edges > 0)
1107 	prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1108       else
1109 	prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1110       nxt_in_edge = NEXT_IN (nxt_in_edge);
1111     }
1112   while (fst_in_edge != nxt_in_edge);
1113 
1114   SET_BIT (dom[bb], bb);
1115   sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1116 
1117   if (sched_verbose >= 2)
1118     fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1119 	     (int) (100.0 * prob[bb]));
1120 }
1121 
1122 /* Functions for target info.  */
1123 
1124 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1125    Note that bb_trg dominates bb_src.  */
1126 
1127 static void
split_edges(bb_src,bb_trg,bl)1128 split_edges (bb_src, bb_trg, bl)
1129      int bb_src;
1130      int bb_trg;
1131      edgelst *bl;
1132 {
1133   sbitmap src = (edgeset) sbitmap_alloc (pot_split[bb_src]->n_bits);
1134   sbitmap_copy (src, pot_split[bb_src]);
1135 
1136   sbitmap_difference (src, src, pot_split[bb_trg]);
1137   extract_bitlst (src, bl);
1138   sbitmap_free (src);
1139 }
1140 
1141 /* Find the valid candidate-source-blocks for the target block TRG, compute
1142    their probability, and check if they are speculative or not.
1143    For speculative sources, compute their update-blocks and split-blocks.  */
1144 
1145 static void
compute_trg_info(trg)1146 compute_trg_info (trg)
1147      int trg;
1148 {
1149   candidate *sp;
1150   edgelst el;
1151   int check_block, update_idx;
1152   int i, j, k, fst_edge, nxt_edge;
1153 
1154   /* Define some of the fields for the target bb as well.  */
1155   sp = candidate_table + trg;
1156   sp->is_valid = 1;
1157   sp->is_speculative = 0;
1158   sp->src_prob = 100;
1159 
1160   for (i = trg + 1; i < current_nr_blocks; i++)
1161     {
1162       sp = candidate_table + i;
1163 
1164       sp->is_valid = IS_DOMINATED (i, trg);
1165       if (sp->is_valid)
1166 	{
1167 	  sp->src_prob = GET_SRC_PROB (i, trg);
1168 	  sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1169 	}
1170 
1171       if (sp->is_valid)
1172 	{
1173 	  split_edges (i, trg, &el);
1174 	  sp->is_speculative = (el.nr_members) ? 1 : 0;
1175 	  if (sp->is_speculative && !flag_schedule_speculative)
1176 	    sp->is_valid = 0;
1177 	}
1178 
1179       if (sp->is_valid)
1180 	{
1181 	  char *update_blocks;
1182 
1183 	  /* Compute split blocks and store them in bblst_table.
1184 	     The TO block of every split edge is a split block.  */
1185 	  sp->split_bbs.first_member = &bblst_table[bblst_last];
1186 	  sp->split_bbs.nr_members = el.nr_members;
1187 	  for (j = 0; j < el.nr_members; bblst_last++, j++)
1188 	    bblst_table[bblst_last] =
1189 	      TO_BLOCK (rgn_edges[el.first_member[j]]);
1190 	  sp->update_bbs.first_member = &bblst_table[bblst_last];
1191 
1192 	  /* Compute update blocks and store them in bblst_table.
1193 	     For every split edge, look at the FROM block, and check
1194 	     all out edges.  For each out edge that is not a split edge,
1195 	     add the TO block to the update block list.  This list can end
1196 	     up with a lot of duplicates.  We need to weed them out to avoid
1197 	     overrunning the end of the bblst_table.  */
1198 	  update_blocks = (char *) alloca (last_basic_block);
1199 	  memset (update_blocks, 0, last_basic_block);
1200 
1201 	  update_idx = 0;
1202 	  for (j = 0; j < el.nr_members; j++)
1203 	    {
1204 	      check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1205 	      fst_edge = nxt_edge = OUT_EDGES (check_block);
1206 	      do
1207 		{
1208 		  if (! update_blocks[TO_BLOCK (nxt_edge)])
1209 		    {
1210 		      for (k = 0; k < el.nr_members; k++)
1211 			if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1212 			  break;
1213 
1214 		      if (k >= el.nr_members)
1215 			{
1216 			  bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1217 			  update_blocks[TO_BLOCK (nxt_edge)] = 1;
1218 			  update_idx++;
1219 			}
1220 		    }
1221 
1222 		  nxt_edge = NEXT_OUT (nxt_edge);
1223 		}
1224 	      while (fst_edge != nxt_edge);
1225 	    }
1226 	  sp->update_bbs.nr_members = update_idx;
1227 
1228 	  /* Make sure we didn't overrun the end of bblst_table.  */
1229 	  if (bblst_last > bblst_size)
1230 	    abort ();
1231 	}
1232       else
1233 	{
1234 	  sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1235 
1236 	  sp->is_speculative = 0;
1237 	  sp->src_prob = 0;
1238 	}
1239     }
1240 }
1241 
1242 /* Print candidates info, for debugging purposes.  Callable from debugger.  */
1243 
1244 void
debug_candidate(i)1245 debug_candidate (i)
1246      int i;
1247 {
1248   if (!candidate_table[i].is_valid)
1249     return;
1250 
1251   if (candidate_table[i].is_speculative)
1252     {
1253       int j;
1254       fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1255 
1256       fprintf (sched_dump, "split path: ");
1257       for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1258 	{
1259 	  int b = candidate_table[i].split_bbs.first_member[j];
1260 
1261 	  fprintf (sched_dump, " %d ", b);
1262 	}
1263       fprintf (sched_dump, "\n");
1264 
1265       fprintf (sched_dump, "update path: ");
1266       for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1267 	{
1268 	  int b = candidate_table[i].update_bbs.first_member[j];
1269 
1270 	  fprintf (sched_dump, " %d ", b);
1271 	}
1272       fprintf (sched_dump, "\n");
1273     }
1274   else
1275     {
1276       fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1277     }
1278 }
1279 
1280 /* Print candidates info, for debugging purposes.  Callable from debugger.  */
1281 
1282 void
debug_candidates(trg)1283 debug_candidates (trg)
1284      int trg;
1285 {
1286   int i;
1287 
1288   fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1289 	   BB_TO_BLOCK (trg), trg);
1290   for (i = trg + 1; i < current_nr_blocks; i++)
1291     debug_candidate (i);
1292 }
1293 
1294 /* Functions for speculative scheduing.  */
1295 
1296 /* Return 0 if x is a set of a register alive in the beginning of one
1297    of the split-blocks of src, otherwise return 1.  */
1298 
1299 static int
check_live_1(src,x)1300 check_live_1 (src, x)
1301      int src;
1302      rtx x;
1303 {
1304   int i;
1305   int regno;
1306   rtx reg = SET_DEST (x);
1307 
1308   if (reg == 0)
1309     return 1;
1310 
1311   while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1312 	 || GET_CODE (reg) == SIGN_EXTRACT
1313 	 || GET_CODE (reg) == STRICT_LOW_PART)
1314     reg = XEXP (reg, 0);
1315 
1316   if (GET_CODE (reg) == PARALLEL)
1317     {
1318       int i;
1319 
1320       for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1321 	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1322 	  if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1323 	    return 1;
1324 
1325       return 0;
1326     }
1327 
1328   if (GET_CODE (reg) != REG)
1329     return 1;
1330 
1331   regno = REGNO (reg);
1332 
1333   if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1334     {
1335       /* Global registers are assumed live.  */
1336       return 0;
1337     }
1338   else
1339     {
1340       if (regno < FIRST_PSEUDO_REGISTER)
1341 	{
1342 	  /* Check for hard registers.  */
1343 	  int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1344 	  while (--j >= 0)
1345 	    {
1346 	      for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1347 		{
1348 		  int b = candidate_table[src].split_bbs.first_member[i];
1349 
1350 		  if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1351 				       regno + j))
1352 		    {
1353 		      return 0;
1354 		    }
1355 		}
1356 	    }
1357 	}
1358       else
1359 	{
1360 	  /* Check for psuedo registers.  */
1361 	  for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1362 	    {
1363 	      int b = candidate_table[src].split_bbs.first_member[i];
1364 
1365 	      if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1366 		{
1367 		  return 0;
1368 		}
1369 	    }
1370 	}
1371     }
1372 
1373   return 1;
1374 }
1375 
1376 /* If x is a set of a register R, mark that R is alive in the beginning
1377    of every update-block of src.  */
1378 
1379 static void
update_live_1(src,x)1380 update_live_1 (src, x)
1381      int src;
1382      rtx x;
1383 {
1384   int i;
1385   int regno;
1386   rtx reg = SET_DEST (x);
1387 
1388   if (reg == 0)
1389     return;
1390 
1391   while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1392 	 || GET_CODE (reg) == SIGN_EXTRACT
1393 	 || GET_CODE (reg) == STRICT_LOW_PART)
1394     reg = XEXP (reg, 0);
1395 
1396   if (GET_CODE (reg) == PARALLEL)
1397     {
1398       int i;
1399 
1400       for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1401 	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1402 	  update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1403 
1404       return;
1405     }
1406 
1407   if (GET_CODE (reg) != REG)
1408     return;
1409 
1410   /* Global registers are always live, so the code below does not apply
1411      to them.  */
1412 
1413   regno = REGNO (reg);
1414 
1415   if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1416     {
1417       if (regno < FIRST_PSEUDO_REGISTER)
1418 	{
1419 	  int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1420 	  while (--j >= 0)
1421 	    {
1422 	      for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1423 		{
1424 		  int b = candidate_table[src].update_bbs.first_member[i];
1425 
1426 		  SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1427 				     regno + j);
1428 		}
1429 	    }
1430 	}
1431       else
1432 	{
1433 	  for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1434 	    {
1435 	      int b = candidate_table[src].update_bbs.first_member[i];
1436 
1437 	      SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1438 	    }
1439 	}
1440     }
1441 }
1442 
1443 /* Return 1 if insn can be speculatively moved from block src to trg,
1444    otherwise return 0.  Called before first insertion of insn to
1445    ready-list or before the scheduling.  */
1446 
1447 static int
check_live(insn,src)1448 check_live (insn, src)
1449      rtx insn;
1450      int src;
1451 {
1452   /* Find the registers set by instruction.  */
1453   if (GET_CODE (PATTERN (insn)) == SET
1454       || GET_CODE (PATTERN (insn)) == CLOBBER)
1455     return check_live_1 (src, PATTERN (insn));
1456   else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1457     {
1458       int j;
1459       for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1460 	if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1461 	     || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1462 	    && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1463 	  return 0;
1464 
1465       return 1;
1466     }
1467 
1468   return 1;
1469 }
1470 
1471 /* Update the live registers info after insn was moved speculatively from
1472    block src to trg.  */
1473 
1474 static void
update_live(insn,src)1475 update_live (insn, src)
1476      rtx insn;
1477      int src;
1478 {
1479   /* Find the registers set by instruction.  */
1480   if (GET_CODE (PATTERN (insn)) == SET
1481       || GET_CODE (PATTERN (insn)) == CLOBBER)
1482     update_live_1 (src, PATTERN (insn));
1483   else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1484     {
1485       int j;
1486       for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1487 	if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1488 	    || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1489 	  update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1490     }
1491 }
1492 
1493 /* Exception Free Loads:
1494 
1495    We define five classes of speculative loads: IFREE, IRISKY,
1496    PFREE, PRISKY, and MFREE.
1497 
1498    IFREE loads are loads that are proved to be exception-free, just
1499    by examining the load insn.  Examples for such loads are loads
1500    from TOC and loads of global data.
1501 
1502    IRISKY loads are loads that are proved to be exception-risky,
1503    just by examining the load insn.  Examples for such loads are
1504    volatile loads and loads from shared memory.
1505 
1506    PFREE loads are loads for which we can prove, by examining other
1507    insns, that they are exception-free.  Currently, this class consists
1508    of loads for which we are able to find a "similar load", either in
1509    the target block, or, if only one split-block exists, in that split
1510    block.  Load2 is similar to load1 if both have same single base
1511    register.  We identify only part of the similar loads, by finding
1512    an insn upon which both load1 and load2 have a DEF-USE dependence.
1513 
1514    PRISKY loads are loads for which we can prove, by examining other
1515    insns, that they are exception-risky.  Currently we have two proofs for
1516    such loads.  The first proof detects loads that are probably guarded by a
1517    test on the memory address.  This proof is based on the
1518    backward and forward data dependence information for the region.
1519    Let load-insn be the examined load.
1520    Load-insn is PRISKY iff ALL the following hold:
1521 
1522    - insn1 is not in the same block as load-insn
1523    - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1524    - test-insn is either a compare or a branch, not in the same block
1525      as load-insn
1526    - load-insn is reachable from test-insn
1527    - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1528 
1529    This proof might fail when the compare and the load are fed
1530    by an insn not in the region.  To solve this, we will add to this
1531    group all loads that have no input DEF-USE dependence.
1532 
1533    The second proof detects loads that are directly or indirectly
1534    fed by a speculative load.  This proof is affected by the
1535    scheduling process.  We will use the flag  fed_by_spec_load.
1536    Initially, all insns have this flag reset.  After a speculative
1537    motion of an insn, if insn is either a load, or marked as
1538    fed_by_spec_load, we will also mark as fed_by_spec_load every
1539    insn1 for which a DEF-USE dependence (insn, insn1) exists.  A
1540    load which is fed_by_spec_load is also PRISKY.
1541 
1542    MFREE (maybe-free) loads are all the remaining loads. They may be
1543    exception-free, but we cannot prove it.
1544 
1545    Now, all loads in IFREE and PFREE classes are considered
1546    exception-free, while all loads in IRISKY and PRISKY classes are
1547    considered exception-risky.  As for loads in the MFREE class,
1548    these are considered either exception-free or exception-risky,
1549    depending on whether we are pessimistic or optimistic.  We have
1550    to take the pessimistic approach to assure the safety of
1551    speculative scheduling, but we can take the optimistic approach
1552    by invoking the -fsched_spec_load_dangerous option.  */
1553 
1554 enum INSN_TRAP_CLASS
1555 {
1556   TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1557   PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1558 };
1559 
1560 #define WORST_CLASS(class1, class2) \
1561 ((class1 > class2) ? class1 : class2)
1562 
1563 /* Non-zero if block bb_to is equal to, or reachable from block bb_from.  */
1564 #define IS_REACHABLE(bb_from, bb_to)					\
1565   (bb_from == bb_to							\
1566    || IS_RGN_ENTRY (bb_from)						\
1567    || (TEST_BIT (ancestor_edges[bb_to],					\
1568 		 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1569 
1570 /* Non-zero iff the address is comprised from at most 1 register.  */
1571 #define CONST_BASED_ADDRESS_P(x)			\
1572   (GET_CODE (x) == REG					\
1573    || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS	\
1574 	|| (GET_CODE (x) == LO_SUM))			\
1575        && (CONSTANT_P (XEXP (x, 0))			\
1576 	   || CONSTANT_P (XEXP (x, 1)))))
1577 
1578 /* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
1579 
1580 static void
set_spec_fed(load_insn)1581 set_spec_fed (load_insn)
1582      rtx load_insn;
1583 {
1584   rtx link;
1585 
1586   for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1587     if (GET_MODE (link) == VOIDmode)
1588       FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1589 }				/* set_spec_fed */
1590 
1591 /* On the path from the insn to load_insn_bb, find a conditional
1592 branch depending on insn, that guards the speculative load.  */
1593 
1594 static int
find_conditional_protection(insn,load_insn_bb)1595 find_conditional_protection (insn, load_insn_bb)
1596      rtx insn;
1597      int load_insn_bb;
1598 {
1599   rtx link;
1600 
1601   /* Iterate through DEF-USE forward dependences.  */
1602   for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1603     {
1604       rtx next = XEXP (link, 0);
1605       if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1606 	   CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1607 	  && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1608 	  && load_insn_bb != INSN_BB (next)
1609 	  && GET_MODE (link) == VOIDmode
1610 	  && (GET_CODE (next) == JUMP_INSN
1611 	      || find_conditional_protection (next, load_insn_bb)))
1612 	return 1;
1613     }
1614   return 0;
1615 }				/* find_conditional_protection */
1616 
1617 /* Returns 1 if the same insn1 that participates in the computation
1618    of load_insn's address is feeding a conditional branch that is
1619    guarding on load_insn. This is true if we find a the two DEF-USE
1620    chains:
1621    insn1 -> ... -> conditional-branch
1622    insn1 -> ... -> load_insn,
1623    and if a flow path exist:
1624    insn1 -> ... -> conditional-branch -> ... -> load_insn,
1625    and if insn1 is on the path
1626    region-entry -> ... -> bb_trg -> ... load_insn.
1627 
1628    Locate insn1 by climbing on LOG_LINKS from load_insn.
1629    Locate the branch by following INSN_DEPEND from insn1.  */
1630 
1631 static int
is_conditionally_protected(load_insn,bb_src,bb_trg)1632 is_conditionally_protected (load_insn, bb_src, bb_trg)
1633      rtx load_insn;
1634      int bb_src, bb_trg;
1635 {
1636   rtx link;
1637 
1638   for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1639     {
1640       rtx insn1 = XEXP (link, 0);
1641 
1642       /* Must be a DEF-USE dependence upon non-branch.  */
1643       if (GET_MODE (link) != VOIDmode
1644 	  || GET_CODE (insn1) == JUMP_INSN)
1645 	continue;
1646 
1647       /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn.  */
1648       if (INSN_BB (insn1) == bb_src
1649 	  || (CONTAINING_RGN (BLOCK_NUM (insn1))
1650 	      != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1651 	  || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1652 	      && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1653 	continue;
1654 
1655       /* Now search for the conditional-branch.  */
1656       if (find_conditional_protection (insn1, bb_src))
1657 	return 1;
1658 
1659       /* Recursive step: search another insn1, "above" current insn1.  */
1660       return is_conditionally_protected (insn1, bb_src, bb_trg);
1661     }
1662 
1663   /* The chain does not exist.  */
1664   return 0;
1665 }				/* is_conditionally_protected */
1666 
1667 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1668    load_insn can move speculatively from bb_src to bb_trg.  All the
1669    following must hold:
1670 
1671    (1) both loads have 1 base register (PFREE_CANDIDATEs).
1672    (2) load_insn and load1 have a def-use dependence upon
1673    the same insn 'insn1'.
1674    (3) either load2 is in bb_trg, or:
1675    - there's only one split-block, and
1676    - load1 is on the escape path, and
1677 
1678    From all these we can conclude that the two loads access memory
1679    addresses that differ at most by a constant, and hence if moving
1680    load_insn would cause an exception, it would have been caused by
1681    load2 anyhow.  */
1682 
1683 static int
is_pfree(load_insn,bb_src,bb_trg)1684 is_pfree (load_insn, bb_src, bb_trg)
1685      rtx load_insn;
1686      int bb_src, bb_trg;
1687 {
1688   rtx back_link;
1689   candidate *candp = candidate_table + bb_src;
1690 
1691   if (candp->split_bbs.nr_members != 1)
1692     /* Must have exactly one escape block.  */
1693     return 0;
1694 
1695   for (back_link = LOG_LINKS (load_insn);
1696        back_link; back_link = XEXP (back_link, 1))
1697     {
1698       rtx insn1 = XEXP (back_link, 0);
1699 
1700       if (GET_MODE (back_link) == VOIDmode)
1701 	{
1702 	  /* Found a DEF-USE dependence (insn1, load_insn).  */
1703 	  rtx fore_link;
1704 
1705 	  for (fore_link = INSN_DEPEND (insn1);
1706 	       fore_link; fore_link = XEXP (fore_link, 1))
1707 	    {
1708 	      rtx insn2 = XEXP (fore_link, 0);
1709 	      if (GET_MODE (fore_link) == VOIDmode)
1710 		{
1711 		  /* Found a DEF-USE dependence (insn1, insn2).  */
1712 		  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1713 		    /* insn2 not guaranteed to be a 1 base reg load.  */
1714 		    continue;
1715 
1716 		  if (INSN_BB (insn2) == bb_trg)
1717 		    /* insn2 is the similar load, in the target block.  */
1718 		    return 1;
1719 
1720 		  if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1721 		    /* insn2 is a similar load, in a split-block.  */
1722 		    return 1;
1723 		}
1724 	    }
1725 	}
1726     }
1727 
1728   /* Couldn't find a similar load.  */
1729   return 0;
1730 }				/* is_pfree */
1731 
1732 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1733    as found by analyzing insn's expression.  */
1734 
1735 static int
may_trap_exp(x,is_store)1736 may_trap_exp (x, is_store)
1737      rtx x;
1738      int is_store;
1739 {
1740   enum rtx_code code;
1741 
1742   if (x == 0)
1743     return TRAP_FREE;
1744   code = GET_CODE (x);
1745   if (is_store)
1746     {
1747       if (code == MEM && may_trap_p (x))
1748 	return TRAP_RISKY;
1749       else
1750 	return TRAP_FREE;
1751     }
1752   if (code == MEM)
1753     {
1754       /* The insn uses memory:  a volatile load.  */
1755       if (MEM_VOLATILE_P (x))
1756 	return IRISKY;
1757       /* An exception-free load.  */
1758       if (!may_trap_p (x))
1759 	return IFREE;
1760       /* A load with 1 base register, to be further checked.  */
1761       if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1762 	return PFREE_CANDIDATE;
1763       /* No info on the load, to be further checked.  */
1764       return PRISKY_CANDIDATE;
1765     }
1766   else
1767     {
1768       const char *fmt;
1769       int i, insn_class = TRAP_FREE;
1770 
1771       /* Neither store nor load, check if it may cause a trap.  */
1772       if (may_trap_p (x))
1773 	return TRAP_RISKY;
1774       /* Recursive step: walk the insn...  */
1775       fmt = GET_RTX_FORMAT (code);
1776       for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1777 	{
1778 	  if (fmt[i] == 'e')
1779 	    {
1780 	      int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1781 	      insn_class = WORST_CLASS (insn_class, tmp_class);
1782 	    }
1783 	  else if (fmt[i] == 'E')
1784 	    {
1785 	      int j;
1786 	      for (j = 0; j < XVECLEN (x, i); j++)
1787 		{
1788 		  int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1789 		  insn_class = WORST_CLASS (insn_class, tmp_class);
1790 		  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1791 		    break;
1792 		}
1793 	    }
1794 	  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1795 	    break;
1796 	}
1797       return insn_class;
1798     }
1799 }
1800 
1801 /* Classifies insn for the purpose of verifying that it can be
1802    moved speculatively, by examining it's patterns, returning:
1803    TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1804    TRAP_FREE: non-load insn.
1805    IFREE: load from a globaly safe location.
1806    IRISKY: volatile load.
1807    PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1808    being either PFREE or PRISKY.  */
1809 
1810 static int
haifa_classify_insn(insn)1811 haifa_classify_insn (insn)
1812      rtx insn;
1813 {
1814   rtx pat = PATTERN (insn);
1815   int tmp_class = TRAP_FREE;
1816   int insn_class = TRAP_FREE;
1817   enum rtx_code code;
1818 
1819   if (GET_CODE (pat) == PARALLEL)
1820     {
1821       int i, len = XVECLEN (pat, 0);
1822 
1823       for (i = len - 1; i >= 0; i--)
1824 	{
1825 	  code = GET_CODE (XVECEXP (pat, 0, i));
1826 	  switch (code)
1827 	    {
1828 	    case CLOBBER:
1829 	      /* Test if it is a 'store'.  */
1830 	      tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1831 	      break;
1832 	    case SET:
1833 	      /* Test if it is a store.  */
1834 	      tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1835 	      if (tmp_class == TRAP_RISKY)
1836 		break;
1837 	      /* Test if it is a load.  */
1838 	      tmp_class
1839 		= WORST_CLASS (tmp_class,
1840 			       may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1841 					     0));
1842 	      break;
1843 	    case COND_EXEC:
1844 	    case TRAP_IF:
1845 	      tmp_class = TRAP_RISKY;
1846 	      break;
1847 	    default:
1848 	      ;
1849 	    }
1850 	  insn_class = WORST_CLASS (insn_class, tmp_class);
1851 	  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1852 	    break;
1853 	}
1854     }
1855   else
1856     {
1857       code = GET_CODE (pat);
1858       switch (code)
1859 	{
1860 	case CLOBBER:
1861 	  /* Test if it is a 'store'.  */
1862 	  tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1863 	  break;
1864 	case SET:
1865 	  /* Test if it is a store.  */
1866 	  tmp_class = may_trap_exp (SET_DEST (pat), 1);
1867 	  if (tmp_class == TRAP_RISKY)
1868 	    break;
1869 	  /* Test if it is a load.  */
1870 	  tmp_class =
1871 	    WORST_CLASS (tmp_class,
1872 			 may_trap_exp (SET_SRC (pat), 0));
1873 	  break;
1874 	case COND_EXEC:
1875 	case TRAP_IF:
1876 	  tmp_class = TRAP_RISKY;
1877 	  break;
1878 	default:;
1879 	}
1880       insn_class = tmp_class;
1881     }
1882 
1883   return insn_class;
1884 }
1885 
1886 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1887    a load moved speculatively, or if load_insn is protected by
1888    a compare on load_insn's address).  */
1889 
1890 static int
is_prisky(load_insn,bb_src,bb_trg)1891 is_prisky (load_insn, bb_src, bb_trg)
1892      rtx load_insn;
1893      int bb_src, bb_trg;
1894 {
1895   if (FED_BY_SPEC_LOAD (load_insn))
1896     return 1;
1897 
1898   if (LOG_LINKS (load_insn) == NULL)
1899     /* Dependence may 'hide' out of the region.  */
1900     return 1;
1901 
1902   if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1903     return 1;
1904 
1905   return 0;
1906 }
1907 
1908 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1909    Return 1 if insn is exception-free (and the motion is valid)
1910    and 0 otherwise.  */
1911 
1912 static int
is_exception_free(insn,bb_src,bb_trg)1913 is_exception_free (insn, bb_src, bb_trg)
1914      rtx insn;
1915      int bb_src, bb_trg;
1916 {
1917   int insn_class = haifa_classify_insn (insn);
1918 
1919   /* Handle non-load insns.  */
1920   switch (insn_class)
1921     {
1922     case TRAP_FREE:
1923       return 1;
1924     case TRAP_RISKY:
1925       return 0;
1926     default:;
1927     }
1928 
1929   /* Handle loads.  */
1930   if (!flag_schedule_speculative_load)
1931     return 0;
1932   IS_LOAD_INSN (insn) = 1;
1933   switch (insn_class)
1934     {
1935     case IFREE:
1936       return (1);
1937     case IRISKY:
1938       return 0;
1939     case PFREE_CANDIDATE:
1940       if (is_pfree (insn, bb_src, bb_trg))
1941 	return 1;
1942       /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate.  */
1943     case PRISKY_CANDIDATE:
1944       if (!flag_schedule_speculative_load_dangerous
1945 	  || is_prisky (insn, bb_src, bb_trg))
1946 	return 0;
1947       break;
1948     default:;
1949     }
1950 
1951   return flag_schedule_speculative_load_dangerous;
1952 }
1953 
1954 /* The number of insns from the current block scheduled so far.  */
1955 static int sched_target_n_insns;
1956 /* The number of insns from the current block to be scheduled in total.  */
1957 static int target_n_insns;
1958 /* The number of insns from the entire region scheduled so far.  */
1959 static int sched_n_insns;
1960 /* Nonzero if the last scheduled insn was a jump.  */
1961 static int last_was_jump;
1962 
1963 /* Implementations of the sched_info functions for region scheduling.  */
1964 static void init_ready_list PARAMS ((struct ready_list *));
1965 static int can_schedule_ready_p PARAMS ((rtx));
1966 static int new_ready PARAMS ((rtx));
1967 static int schedule_more_p PARAMS ((void));
1968 static const char *rgn_print_insn PARAMS ((rtx, int));
1969 static int rgn_rank PARAMS ((rtx, rtx));
1970 static int contributes_to_priority PARAMS ((rtx, rtx));
1971 static void compute_jump_reg_dependencies PARAMS ((rtx, regset, regset,
1972 						   regset));
1973 
1974 /* Return nonzero if there are more insns that should be scheduled.  */
1975 
1976 static int
schedule_more_p()1977 schedule_more_p ()
1978 {
1979   return ! last_was_jump && sched_target_n_insns < target_n_insns;
1980 }
1981 
1982 /* Add all insns that are initially ready to the ready list READY.  Called
1983    once before scheduling a set of insns.  */
1984 
1985 static void
init_ready_list(ready)1986 init_ready_list (ready)
1987      struct ready_list *ready;
1988 {
1989   rtx prev_head = current_sched_info->prev_head;
1990   rtx next_tail = current_sched_info->next_tail;
1991   int bb_src;
1992   rtx insn;
1993 
1994   target_n_insns = 0;
1995   sched_target_n_insns = 0;
1996   sched_n_insns = 0;
1997   last_was_jump = 0;
1998 
1999   /* Print debugging information.  */
2000   if (sched_verbose >= 5)
2001     debug_dependencies ();
2002 
2003   /* Prepare current target block info.  */
2004   if (current_nr_blocks > 1)
2005     {
2006       candidate_table = (candidate *) xmalloc (current_nr_blocks
2007 					       * sizeof (candidate));
2008 
2009       bblst_last = 0;
2010       /* bblst_table holds split blocks and update blocks for each block after
2011 	 the current one in the region.  split blocks and update blocks are
2012 	 the TO blocks of region edges, so there can be at most rgn_nr_edges
2013 	 of them.  */
2014       bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2015       bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2016 
2017       bitlst_table_last = 0;
2018       bitlst_table_size = rgn_nr_edges;
2019       bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2020 
2021       compute_trg_info (target_bb);
2022     }
2023 
2024   /* Initialize ready list with all 'ready' insns in target block.
2025      Count number of insns in the target block being scheduled.  */
2026   for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2027     {
2028       rtx next;
2029 
2030       if (! INSN_P (insn))
2031 	continue;
2032       next = NEXT_INSN (insn);
2033 
2034       if (INSN_DEP_COUNT (insn) == 0
2035 	  && (! INSN_P (next) || SCHED_GROUP_P (next) == 0))
2036 	ready_add (ready, insn);
2037       if (!(SCHED_GROUP_P (insn)))
2038 	target_n_insns++;
2039     }
2040 
2041   /* Add to ready list all 'ready' insns in valid source blocks.
2042      For speculative insns, check-live, exception-free, and
2043      issue-delay.  */
2044   for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2045     if (IS_VALID (bb_src))
2046       {
2047 	rtx src_head;
2048 	rtx src_next_tail;
2049 	rtx tail, head;
2050 
2051 	get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2052 	src_next_tail = NEXT_INSN (tail);
2053 	src_head = head;
2054 
2055 	for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2056 	  {
2057 	    if (! INSN_P (insn))
2058 	      continue;
2059 
2060 	    if (!CANT_MOVE (insn)
2061 		&& (!IS_SPECULATIVE_INSN (insn)
2062 		    || ((((!targetm.sched.use_dfa_pipeline_interface
2063 			   || !(*targetm.sched.use_dfa_pipeline_interface) ())
2064 			  && insn_issue_delay (insn) <= 3)
2065 			 || (targetm.sched.use_dfa_pipeline_interface
2066 			     && (*targetm.sched.use_dfa_pipeline_interface) ()
2067 			     && (recog_memoized (insn) < 0
2068 			         || min_insn_conflict_delay (curr_state,
2069 							     insn, insn) <= 3)))
2070 			&& check_live (insn, bb_src)
2071 			&& is_exception_free (insn, bb_src, target_bb))))
2072 	      {
2073 		rtx next;
2074 
2075 		/* Note that we haven't squirreled away the notes for
2076 		   blocks other than the current.  So if this is a
2077 		   speculative insn, NEXT might otherwise be a note.  */
2078 		next = next_nonnote_insn (insn);
2079 		if (INSN_DEP_COUNT (insn) == 0
2080 		    && (! next
2081 			|| ! INSN_P (next)
2082 			|| SCHED_GROUP_P (next) == 0))
2083 		  ready_add (ready, insn);
2084 	      }
2085 	  }
2086       }
2087 }
2088 
2089 /* Called after taking INSN from the ready list.  Returns nonzero if this
2090    insn can be scheduled, nonzero if we should silently discard it.  */
2091 
2092 static int
can_schedule_ready_p(insn)2093 can_schedule_ready_p (insn)
2094      rtx insn;
2095 {
2096   if (GET_CODE (insn) == JUMP_INSN)
2097     last_was_jump = 1;
2098 
2099   /* An interblock motion?  */
2100   if (INSN_BB (insn) != target_bb)
2101     {
2102       rtx temp;
2103       basic_block b1;
2104 
2105       if (IS_SPECULATIVE_INSN (insn))
2106 	{
2107 	  if (!check_live (insn, INSN_BB (insn)))
2108 	    return 0;
2109 	  update_live (insn, INSN_BB (insn));
2110 
2111 	  /* For speculative load, mark insns fed by it.  */
2112 	  if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2113 	    set_spec_fed (insn);
2114 
2115 	  nr_spec++;
2116 	}
2117       nr_inter++;
2118 
2119       /* Find the beginning of the scheduling group.  */
2120       /* ??? Ought to update basic block here, but later bits of
2121 	 schedule_block assumes the original insn block is
2122 	 still intact.  */
2123 
2124       temp = insn;
2125       while (SCHED_GROUP_P (temp))
2126 	temp = PREV_INSN (temp);
2127 
2128       /* Update source block boundaries.  */
2129       b1 = BLOCK_FOR_INSN (temp);
2130       if (temp == b1->head && insn == b1->end)
2131 	{
2132 	  /* We moved all the insns in the basic block.
2133 	     Emit a note after the last insn and update the
2134 	     begin/end boundaries to point to the note.  */
2135 	  rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2136 	  b1->head = note;
2137 	  b1->end = note;
2138 	}
2139       else if (insn == b1->end)
2140 	{
2141 	  /* We took insns from the end of the basic block,
2142 	     so update the end of block boundary so that it
2143 	     points to the first insn we did not move.  */
2144 	  b1->end = PREV_INSN (temp);
2145 	}
2146       else if (temp == b1->head)
2147 	{
2148 	  /* We took insns from the start of the basic block,
2149 	     so update the start of block boundary so that
2150 	     it points to the first insn we did not move.  */
2151 	  b1->head = NEXT_INSN (insn);
2152 	}
2153     }
2154   else
2155     {
2156       /* In block motion.  */
2157       sched_target_n_insns++;
2158     }
2159   sched_n_insns++;
2160 
2161   return 1;
2162 }
2163 
2164 /* Called after INSN has all its dependencies resolved.  Return nonzero
2165    if it should be moved to the ready list or the queue, or zero if we
2166    should silently discard it.  */
2167 static int
new_ready(next)2168 new_ready (next)
2169      rtx next;
2170 {
2171   /* For speculative insns, before inserting to ready/queue,
2172      check live, exception-free, and issue-delay.  */
2173   if (INSN_BB (next) != target_bb
2174       && (!IS_VALID (INSN_BB (next))
2175 	  || CANT_MOVE (next)
2176 	  || (IS_SPECULATIVE_INSN (next)
2177 	      && (0
2178 		  || (targetm.sched.use_dfa_pipeline_interface
2179 		      && (*targetm.sched.use_dfa_pipeline_interface) ()
2180 		      && recog_memoized (next) >= 0
2181 		      && min_insn_conflict_delay (curr_state, next,
2182 						  next) > 3)
2183 		  || ((!targetm.sched.use_dfa_pipeline_interface
2184 		       || !(*targetm.sched.use_dfa_pipeline_interface) ())
2185 		      && insn_issue_delay (next) > 3)
2186 		  || !check_live (next, INSN_BB (next))
2187 		  || !is_exception_free (next, INSN_BB (next), target_bb)))))
2188     return 0;
2189   return 1;
2190 }
2191 
2192 /* Return a string that contains the insn uid and optionally anything else
2193    necessary to identify this insn in an output.  It's valid to use a
2194    static buffer for this.  The ALIGNED parameter should cause the string
2195    to be formatted so that multiple output lines will line up nicely.  */
2196 
2197 static const char *
rgn_print_insn(insn,aligned)2198 rgn_print_insn (insn, aligned)
2199      rtx insn;
2200      int aligned;
2201 {
2202   static char tmp[80];
2203 
2204   if (aligned)
2205     sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2206   else
2207     {
2208       if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2209 	sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2210       else
2211 	sprintf (tmp, "%d", INSN_UID (insn));
2212     }
2213   return tmp;
2214 }
2215 
2216 /* Compare priority of two insns.  Return a positive number if the second
2217    insn is to be preferred for scheduling, and a negative one if the first
2218    is to be preferred.  Zero if they are equally good.  */
2219 
2220 static int
rgn_rank(insn1,insn2)2221 rgn_rank (insn1, insn2)
2222      rtx insn1, insn2;
2223 {
2224   /* Some comparison make sense in interblock scheduling only.  */
2225   if (INSN_BB (insn1) != INSN_BB (insn2))
2226     {
2227       int spec_val, prob_val;
2228 
2229       /* Prefer an inblock motion on an interblock motion.  */
2230       if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2231 	return 1;
2232       if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2233 	return -1;
2234 
2235       /* Prefer a useful motion on a speculative one.  */
2236       spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2237       if (spec_val)
2238 	return spec_val;
2239 
2240       /* Prefer a more probable (speculative) insn.  */
2241       prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2242       if (prob_val)
2243 	return prob_val;
2244     }
2245   return 0;
2246 }
2247 
2248 /* NEXT is an instruction that depends on INSN (a backward dependence);
2249    return nonzero if we should include this dependence in priority
2250    calculations.  */
2251 
2252 static int
contributes_to_priority(next,insn)2253 contributes_to_priority (next, insn)
2254      rtx next, insn;
2255 {
2256   return BLOCK_NUM (next) == BLOCK_NUM (insn);
2257 }
2258 
2259 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2260    conditionally set before INSN.  Store the set of registers that
2261    must be considered as used by this jump in USED and that of
2262    registers that must be considered as set in SET.  */
2263 
2264 static void
compute_jump_reg_dependencies(insn,cond_set,used,set)2265 compute_jump_reg_dependencies (insn, cond_set, used, set)
2266      rtx insn ATTRIBUTE_UNUSED;
2267      regset cond_set ATTRIBUTE_UNUSED;
2268      regset used ATTRIBUTE_UNUSED;
2269      regset set ATTRIBUTE_UNUSED;
2270 {
2271   /* Nothing to do here, since we postprocess jumps in
2272      add_branch_dependences.  */
2273 }
2274 
2275 /* Used in schedule_insns to initialize current_sched_info for scheduling
2276    regions (or single basic blocks).  */
2277 
2278 static struct sched_info region_sched_info =
2279 {
2280   init_ready_list,
2281   can_schedule_ready_p,
2282   schedule_more_p,
2283   new_ready,
2284   rgn_rank,
2285   rgn_print_insn,
2286   contributes_to_priority,
2287   compute_jump_reg_dependencies,
2288 
2289   NULL, NULL,
2290   NULL, NULL,
2291   0, 0
2292 };
2293 
2294 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register.  */
2295 
2296 static bool
sets_likely_spilled(pat)2297 sets_likely_spilled (pat)
2298      rtx pat;
2299 {
2300   bool ret = false;
2301   note_stores (pat, sets_likely_spilled_1, &ret);
2302   return ret;
2303 }
2304 
2305 static void
sets_likely_spilled_1(x,pat,data)2306 sets_likely_spilled_1 (x, pat, data)
2307      rtx x, pat;
2308      void *data;
2309 {
2310   bool *ret = (bool *) data;
2311 
2312   if (GET_CODE (pat) == SET
2313       && REG_P (x)
2314       && REGNO (x) < FIRST_PSEUDO_REGISTER
2315       && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2316     *ret = true;
2317 }
2318 
2319 /* Add dependences so that branches are scheduled to run last in their
2320    block.  */
2321 
2322 static void
add_branch_dependences(head,tail)2323 add_branch_dependences (head, tail)
2324      rtx head, tail;
2325 {
2326   rtx insn, last;
2327 
2328   /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2329      that can throw exceptions, force them to remain in order at the end of
2330      the block by adding dependencies and giving the last a high priority.
2331      There may be notes present, and prev_head may also be a note.
2332 
2333      Branches must obviously remain at the end.  Calls should remain at the
2334      end since moving them results in worse register allocation.  Uses remain
2335      at the end to ensure proper register allocation.
2336 
2337      cc0 setters remaim at the end because they can't be moved away from
2338      their cc0 user.
2339 
2340      Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2341      are not moved before reload because we can wind up with register
2342      allocation failures.  */
2343 
2344   insn = tail;
2345   last = 0;
2346   while (GET_CODE (insn) == CALL_INSN
2347 	 || GET_CODE (insn) == JUMP_INSN
2348 	 || (GET_CODE (insn) == INSN
2349 	     && (GET_CODE (PATTERN (insn)) == USE
2350 		 || GET_CODE (PATTERN (insn)) == CLOBBER
2351 		 || can_throw_internal (insn)
2352 #ifdef HAVE_cc0
2353 		 || sets_cc0_p (PATTERN (insn))
2354 #endif
2355 		 || (!reload_completed
2356 		     && sets_likely_spilled (PATTERN (insn)))))
2357 	 || GET_CODE (insn) == NOTE)
2358     {
2359       if (GET_CODE (insn) != NOTE)
2360 	{
2361 	  if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2362 	    {
2363 	      add_dependence (last, insn, REG_DEP_ANTI);
2364 	      INSN_REF_COUNT (insn)++;
2365 	    }
2366 
2367 	  CANT_MOVE (insn) = 1;
2368 
2369 	  last = insn;
2370 	  /* Skip over insns that are part of a group.
2371 	     Make each insn explicitly depend on the previous insn.
2372 	     This ensures that only the group header will ever enter
2373 	     the ready queue (and, when scheduled, will automatically
2374 	     schedule the SCHED_GROUP_P block).  */
2375 	  while (SCHED_GROUP_P (insn))
2376 	    {
2377 	      rtx temp = prev_nonnote_insn (insn);
2378 	      add_dependence (insn, temp, REG_DEP_ANTI);
2379 	      insn = temp;
2380 	    }
2381 	}
2382 
2383       /* Don't overrun the bounds of the basic block.  */
2384       if (insn == head)
2385 	break;
2386 
2387       insn = PREV_INSN (insn);
2388     }
2389 
2390   /* Make sure these insns are scheduled last in their block.  */
2391   insn = last;
2392   if (insn != 0)
2393     while (insn != head)
2394       {
2395 	insn = prev_nonnote_insn (insn);
2396 
2397 	if (INSN_REF_COUNT (insn) != 0)
2398 	  continue;
2399 
2400 	add_dependence (last, insn, REG_DEP_ANTI);
2401 	INSN_REF_COUNT (insn) = 1;
2402 
2403 	/* Skip over insns that are part of a group.  */
2404 	while (SCHED_GROUP_P (insn))
2405 	  insn = prev_nonnote_insn (insn);
2406       }
2407 }
2408 
2409 /* Data structures for the computation of data dependences in a regions.  We
2410    keep one `deps' structure for every basic block.  Before analyzing the
2411    data dependences for a bb, its variables are initialized as a function of
2412    the variables of its predecessors.  When the analysis for a bb completes,
2413    we save the contents to the corresponding bb_deps[bb] variable.  */
2414 
2415 static struct deps *bb_deps;
2416 
2417 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD.  */
2418 
2419 static rtx
concat_INSN_LIST(copy,old)2420 concat_INSN_LIST (copy, old)
2421      rtx copy, old;
2422 {
2423   rtx new = old;
2424   for (; copy ; copy = XEXP (copy, 1))
2425     new = alloc_INSN_LIST (XEXP (copy, 0), new);
2426   return new;
2427 }
2428 
2429 static void
concat_insn_mem_list(copy_insns,copy_mems,old_insns_p,old_mems_p)2430 concat_insn_mem_list (copy_insns, copy_mems, old_insns_p, old_mems_p)
2431      rtx copy_insns, copy_mems;
2432      rtx *old_insns_p, *old_mems_p;
2433 {
2434   rtx new_insns = *old_insns_p;
2435   rtx new_mems = *old_mems_p;
2436 
2437   while (copy_insns)
2438     {
2439       new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2440       new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2441       copy_insns = XEXP (copy_insns, 1);
2442       copy_mems = XEXP (copy_mems, 1);
2443     }
2444 
2445   *old_insns_p = new_insns;
2446   *old_mems_p = new_mems;
2447 }
2448 
2449 /* After computing the dependencies for block BB, propagate the dependencies
2450    found in TMP_DEPS to the successors of the block.  */
2451 static void
propagate_deps(bb,pred_deps)2452 propagate_deps (bb, pred_deps)
2453      int bb;
2454      struct deps *pred_deps;
2455 {
2456   int b = BB_TO_BLOCK (bb);
2457   int e, first_edge;
2458 
2459   /* bb's structures are inherited by its successors.  */
2460   first_edge = e = OUT_EDGES (b);
2461   if (e > 0)
2462     do
2463       {
2464 	int b_succ = TO_BLOCK (e);
2465 	int bb_succ = BLOCK_TO_BB (b_succ);
2466 	struct deps *succ_deps = bb_deps + bb_succ;
2467 	int reg;
2468 
2469 	/* Only bbs "below" bb, in the same region, are interesting.  */
2470 	if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2471 	    || bb_succ <= bb)
2472 	  {
2473 	    e = NEXT_OUT (e);
2474 	    continue;
2475 	  }
2476 
2477 	/* The reg_last lists are inherited by bb_succ.  */
2478 	EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2479 	  {
2480 	    struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2481 	    struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2482 
2483 	    succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2484 	    succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2485 	    succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2486 						  succ_rl->clobbers);
2487 	    succ_rl->uses_length += pred_rl->uses_length;
2488 	    succ_rl->clobbers_length += pred_rl->clobbers_length;
2489 	  });
2490 	IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2491 
2492 	/* Mem read/write lists are inherited by bb_succ.  */
2493 	concat_insn_mem_list (pred_deps->pending_read_insns,
2494 			      pred_deps->pending_read_mems,
2495 			      &succ_deps->pending_read_insns,
2496 			      &succ_deps->pending_read_mems);
2497 	concat_insn_mem_list (pred_deps->pending_write_insns,
2498 			      pred_deps->pending_write_mems,
2499 			      &succ_deps->pending_write_insns,
2500 			      &succ_deps->pending_write_mems);
2501 
2502 	succ_deps->last_pending_memory_flush
2503 	  = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2504 			      succ_deps->last_pending_memory_flush);
2505 
2506 	succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2507 	succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2508 
2509 	/* last_function_call is inherited by bb_succ.  */
2510 	succ_deps->last_function_call
2511 	  = concat_INSN_LIST (pred_deps->last_function_call,
2512 			      succ_deps->last_function_call);
2513 
2514 	/* sched_before_next_call is inherited by bb_succ.  */
2515 	succ_deps->sched_before_next_call
2516 	  = concat_INSN_LIST (pred_deps->sched_before_next_call,
2517 			      succ_deps->sched_before_next_call);
2518 
2519 	e = NEXT_OUT (e);
2520       }
2521     while (e != first_edge);
2522 
2523   /* These lists should point to the right place, for correct
2524      freeing later.  */
2525   bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2526   bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2527   bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2528   bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2529 
2530   /* Can't allow these to be freed twice.  */
2531   pred_deps->pending_read_insns = 0;
2532   pred_deps->pending_read_mems = 0;
2533   pred_deps->pending_write_insns = 0;
2534   pred_deps->pending_write_mems = 0;
2535 }
2536 
2537 /* Compute backward dependences inside bb.  In a multiple blocks region:
2538    (1) a bb is analyzed after its predecessors, and (2) the lists in
2539    effect at the end of bb (after analyzing for bb) are inherited by
2540    bb's successrs.
2541 
2542    Specifically for reg-reg data dependences, the block insns are
2543    scanned by sched_analyze () top-to-bottom.  Two lists are
2544    maintained by sched_analyze (): reg_last[].sets for register DEFs,
2545    and reg_last[].uses for register USEs.
2546 
2547    When analysis is completed for bb, we update for its successors:
2548    ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2549    ;  - USES[succ] = Union (USES [succ], DEFS [bb])
2550 
2551    The mechanism for computing mem-mem data dependence is very
2552    similar, and the result is interblock dependences in the region.  */
2553 
2554 static void
compute_block_backward_dependences(bb)2555 compute_block_backward_dependences (bb)
2556      int bb;
2557 {
2558   rtx head, tail;
2559   struct deps tmp_deps;
2560 
2561   tmp_deps = bb_deps[bb];
2562 
2563   /* Do the analysis for this block.  */
2564   get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2565   sched_analyze (&tmp_deps, head, tail);
2566   add_branch_dependences (head, tail);
2567 
2568   if (current_nr_blocks > 1)
2569     propagate_deps (bb, &tmp_deps);
2570 
2571   /* Free up the INSN_LISTs.  */
2572   free_deps (&tmp_deps);
2573 }
2574 
2575 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2576    them to the unused_*_list variables, so that they can be reused.  */
2577 
2578 static void
free_pending_lists()2579 free_pending_lists ()
2580 {
2581   int bb;
2582 
2583   for (bb = 0; bb < current_nr_blocks; bb++)
2584     {
2585       free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2586       free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2587       free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2588       free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2589     }
2590 }
2591 
2592 /* Print dependences for debugging, callable from debugger.  */
2593 
2594 void
debug_dependencies()2595 debug_dependencies ()
2596 {
2597   int bb;
2598 
2599   fprintf (sched_dump, ";;   --------------- forward dependences: ------------ \n");
2600   for (bb = 0; bb < current_nr_blocks; bb++)
2601     {
2602       if (1)
2603 	{
2604 	  rtx head, tail;
2605 	  rtx next_tail;
2606 	  rtx insn;
2607 
2608 	  get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2609 	  next_tail = NEXT_INSN (tail);
2610 	  fprintf (sched_dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
2611 		   BB_TO_BLOCK (bb), bb);
2612 
2613 	  if (targetm.sched.use_dfa_pipeline_interface
2614 	      && (*targetm.sched.use_dfa_pipeline_interface) ())
2615 	    {
2616 	      fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2617 		       "insn", "code", "bb", "dep", "prio", "cost",
2618 		       "reservation");
2619 	      fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2620 		       "----", "----", "--", "---", "----", "----",
2621 		       "-----------");
2622 	    }
2623 	  else
2624 	    {
2625 	      fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%11s%6s\n",
2626 	      "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2627 	      fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%11s%6s\n",
2628 	      "----", "----", "--", "---", "----", "----", "--------", "-----");
2629 	    }
2630 
2631 	  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2632 	    {
2633 	      rtx link;
2634 
2635 	      if (! INSN_P (insn))
2636 		{
2637 		  int n;
2638 		  fprintf (sched_dump, ";;   %6d ", INSN_UID (insn));
2639 		  if (GET_CODE (insn) == NOTE)
2640 		    {
2641 		      n = NOTE_LINE_NUMBER (insn);
2642 		      if (n < 0)
2643 			fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2644 		      else
2645 			fprintf (sched_dump, "line %d, file %s\n", n,
2646 				 NOTE_SOURCE_FILE (insn));
2647 		    }
2648 		  else
2649 		    fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2650 		  continue;
2651 		}
2652 
2653 	      if (targetm.sched.use_dfa_pipeline_interface
2654 		  && (*targetm.sched.use_dfa_pipeline_interface) ())
2655 		{
2656 		  fprintf (sched_dump,
2657 			   ";;   %s%5d%6d%6d%6d%6d%6d   ",
2658 			   (SCHED_GROUP_P (insn) ? "+" : " "),
2659 			   INSN_UID (insn),
2660 			   INSN_CODE (insn),
2661 			   INSN_BB (insn),
2662 			   INSN_DEP_COUNT (insn),
2663 			   INSN_PRIORITY (insn),
2664 			   insn_cost (insn, 0, 0));
2665 
2666 		  if (recog_memoized (insn) < 0)
2667 		    fprintf (sched_dump, "nothing");
2668 		  else
2669 		    print_reservation (sched_dump, insn);
2670 		}
2671 	      else
2672 		{
2673 		  int unit = insn_unit (insn);
2674 		  int range
2675 		    = (unit < 0
2676 		       || function_units[unit].blockage_range_function == 0
2677 		       ? 0
2678 		       : function_units[unit].blockage_range_function (insn));
2679 		  fprintf (sched_dump,
2680 			   ";;   %s%5d%6d%6d%6d%6d%6d  %3d -%3d   ",
2681 			   (SCHED_GROUP_P (insn) ? "+" : " "),
2682 			   INSN_UID (insn),
2683 			   INSN_CODE (insn),
2684 			   INSN_BB (insn),
2685 			   INSN_DEP_COUNT (insn),
2686 			   INSN_PRIORITY (insn),
2687 			   insn_cost (insn, 0, 0),
2688 			   (int) MIN_BLOCKAGE_COST (range),
2689 			   (int) MAX_BLOCKAGE_COST (range));
2690 		  insn_print_units (insn);
2691 		}
2692 
2693 	      fprintf (sched_dump, "\t: ");
2694 	      for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2695 		fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2696 	      fprintf (sched_dump, "\n");
2697 	    }
2698 	}
2699     }
2700   fprintf (sched_dump, "\n");
2701 }
2702 
2703 /* Schedule a region.  A region is either an inner loop, a loop-free
2704    subroutine, or a single basic block.  Each bb in the region is
2705    scheduled after its flow predecessors.  */
2706 
2707 static void
schedule_region(rgn)2708 schedule_region (rgn)
2709      int rgn;
2710 {
2711   int bb;
2712   int rgn_n_insns = 0;
2713   int sched_rgn_n_insns = 0;
2714 
2715   /* Set variables for the current region.  */
2716   current_nr_blocks = RGN_NR_BLOCKS (rgn);
2717   current_blocks = RGN_BLOCKS (rgn);
2718 
2719   init_deps_global ();
2720 
2721   /* Initializations for region data dependence analyisis.  */
2722   bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2723   for (bb = 0; bb < current_nr_blocks; bb++)
2724     init_deps (bb_deps + bb);
2725 
2726   /* Compute LOG_LINKS.  */
2727   for (bb = 0; bb < current_nr_blocks; bb++)
2728     compute_block_backward_dependences (bb);
2729 
2730   /* Compute INSN_DEPEND.  */
2731   for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2732     {
2733       rtx head, tail;
2734       get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2735 
2736       compute_forward_dependences (head, tail);
2737     }
2738 
2739   /* Set priorities.  */
2740   for (bb = 0; bb < current_nr_blocks; bb++)
2741     {
2742       rtx head, tail;
2743       get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2744 
2745       rgn_n_insns += set_priorities (head, tail);
2746     }
2747 
2748   /* Compute interblock info: probabilities, split-edges, dominators, etc.  */
2749   if (current_nr_blocks > 1)
2750     {
2751       int i;
2752 
2753       prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2754 
2755       dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2756       sbitmap_vector_zero (dom, current_nr_blocks);
2757       /* Edge to bit.  */
2758       rgn_nr_edges = 0;
2759       edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2760       for (i = 1; i < nr_edges; i++)
2761 	if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2762 	  EDGE_TO_BIT (i) = rgn_nr_edges++;
2763       rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2764 
2765       rgn_nr_edges = 0;
2766       for (i = 1; i < nr_edges; i++)
2767 	if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2768 	  rgn_edges[rgn_nr_edges++] = i;
2769 
2770       /* Split edges.  */
2771       pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2772       sbitmap_vector_zero (pot_split, current_nr_blocks);
2773       ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2774       sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2775 
2776       /* Compute probabilities, dominators, split_edges.  */
2777       for (bb = 0; bb < current_nr_blocks; bb++)
2778 	compute_dom_prob_ps (bb);
2779     }
2780 
2781   /* Now we can schedule all blocks.  */
2782   for (bb = 0; bb < current_nr_blocks; bb++)
2783     {
2784       rtx head, tail;
2785       int b = BB_TO_BLOCK (bb);
2786 
2787       get_block_head_tail (b, &head, &tail);
2788 
2789       if (no_real_insns_p (head, tail))
2790 	continue;
2791 
2792       current_sched_info->prev_head = PREV_INSN (head);
2793       current_sched_info->next_tail = NEXT_INSN (tail);
2794 
2795       if (write_symbols != NO_DEBUG)
2796 	{
2797 	  save_line_notes (b, head, tail);
2798 	  rm_line_notes (head, tail);
2799 	}
2800 
2801       /* rm_other_notes only removes notes which are _inside_ the
2802 	 block---that is, it won't remove notes before the first real insn
2803  	 or after the last real insn of the block.  So if the first insn
2804 	 has a REG_SAVE_NOTE which would otherwise be emitted before the
2805 	 insn, it is redundant with the note before the start of the
2806 	 block, and so we have to take it out.  */
2807       if (INSN_P (head))
2808 	{
2809 	  rtx note;
2810 
2811 	  for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2812 	    if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2813 	      {
2814 		remove_note (head, note);
2815 		note = XEXP (note, 1);
2816 		remove_note (head, note);
2817 	      }
2818 	}
2819 
2820       /* Remove remaining note insns from the block, save them in
2821 	 note_list.  These notes are restored at the end of
2822 	 schedule_block ().  */
2823       rm_other_notes (head, tail);
2824 
2825       target_bb = bb;
2826 
2827       current_sched_info->queue_must_finish_empty
2828 	= current_nr_blocks > 1 && !flag_schedule_interblock;
2829 
2830       schedule_block (b, rgn_n_insns);
2831       sched_rgn_n_insns += sched_n_insns;
2832 
2833       /* Update target block boundaries.  */
2834       if (head == BLOCK_HEAD (b))
2835 	BLOCK_HEAD (b) = current_sched_info->head;
2836       if (tail == BLOCK_END (b))
2837 	BLOCK_END (b) = current_sched_info->tail;
2838 
2839       /* Clean up.  */
2840       if (current_nr_blocks > 1)
2841 	{
2842 	  free (candidate_table);
2843 	  free (bblst_table);
2844 	  free (bitlst_table);
2845 	}
2846     }
2847 
2848   /* Sanity check: verify that all region insns were scheduled.  */
2849   if (sched_rgn_n_insns != rgn_n_insns)
2850     abort ();
2851 
2852   /* Restore line notes.  */
2853   if (write_symbols != NO_DEBUG)
2854     {
2855       for (bb = 0; bb < current_nr_blocks; bb++)
2856 	{
2857 	  rtx head, tail;
2858 	  get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2859 	  restore_line_notes (head, tail);
2860 	}
2861     }
2862 
2863   /* Done with this region.  */
2864   free_pending_lists ();
2865 
2866   finish_deps_global ();
2867 
2868   free (bb_deps);
2869 
2870   if (current_nr_blocks > 1)
2871     {
2872       free (prob);
2873       sbitmap_vector_free (dom);
2874       sbitmap_vector_free (pot_split);
2875       sbitmap_vector_free (ancestor_edges);
2876       free (edge_to_bit);
2877       free (rgn_edges);
2878     }
2879 }
2880 
2881 /* Indexed by region, holds the number of death notes found in that region.
2882    Used for consistency checks.  */
2883 static int *deaths_in_region;
2884 
2885 /* Initialize data structures for region scheduling.  */
2886 
2887 static void
init_regions()2888 init_regions ()
2889 {
2890   sbitmap blocks;
2891   int rgn;
2892 
2893   nr_regions = 0;
2894   rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2895   rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2896   block_to_bb = (int *) xmalloc ((last_basic_block) * sizeof (int));
2897   containing_rgn = (int *) xmalloc ((last_basic_block) * sizeof (int));
2898 
2899   /* Compute regions for scheduling.  */
2900   if (reload_completed
2901       || n_basic_blocks == 1
2902       || !flag_schedule_interblock)
2903     {
2904       find_single_block_region ();
2905     }
2906   else
2907     {
2908       /* Verify that a 'good' control flow graph can be built.  */
2909       if (is_cfg_nonregular ())
2910 	{
2911 	  find_single_block_region ();
2912 	}
2913       else
2914 	{
2915 	  dominance_info dom;
2916 	  struct edge_list *edge_list;
2917 
2918 	  /* The scheduler runs after flow; therefore, we can't blindly call
2919 	     back into find_basic_blocks since doing so could invalidate the
2920 	     info in global_live_at_start.
2921 
2922 	     Consider a block consisting entirely of dead stores; after life
2923 	     analysis it would be a block of NOTE_INSN_DELETED notes.  If
2924 	     we call find_basic_blocks again, then the block would be removed
2925 	     entirely and invalidate our the register live information.
2926 
2927 	     We could (should?) recompute register live information.  Doing
2928 	     so may even be beneficial.  */
2929 	  edge_list = create_edge_list ();
2930 
2931 	  /* Compute the dominators and post dominators.  */
2932 	  dom = calculate_dominance_info (CDI_DOMINATORS);
2933 
2934 	  /* build_control_flow will return nonzero if it detects unreachable
2935 	     blocks or any other irregularity with the cfg which prevents
2936 	     cross block scheduling.  */
2937 	  if (build_control_flow (edge_list) != 0)
2938 	    find_single_block_region ();
2939 	  else
2940 	    find_rgns (edge_list, dom);
2941 
2942 	  if (sched_verbose >= 3)
2943 	    debug_regions ();
2944 
2945 	  /* We are done with flow's edge list.  */
2946 	  free_edge_list (edge_list);
2947 
2948 	  /* For now.  This will move as more and more of haifa is converted
2949 	     to using the cfg code in flow.c.  */
2950 	  free_dominance_info (dom);
2951 	}
2952     }
2953 
2954 
2955   if (CHECK_DEAD_NOTES)
2956     {
2957       blocks = sbitmap_alloc (last_basic_block);
2958       deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2959       /* Remove all death notes from the subroutine.  */
2960       for (rgn = 0; rgn < nr_regions; rgn++)
2961 	{
2962 	  int b;
2963 
2964 	  sbitmap_zero (blocks);
2965 	  for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2966 	    SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2967 
2968 	  deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2969 	}
2970       sbitmap_free (blocks);
2971     }
2972   else
2973     count_or_remove_death_notes (NULL, 1);
2974 }
2975 
2976 /* The one entry point in this file.  DUMP_FILE is the dump file for
2977    this pass.  */
2978 
2979 void
schedule_insns(dump_file)2980 schedule_insns (dump_file)
2981      FILE *dump_file;
2982 {
2983   sbitmap large_region_blocks, blocks;
2984   int rgn;
2985   int any_large_regions;
2986   basic_block bb;
2987 
2988   /* Taking care of this degenerate case makes the rest of
2989      this code simpler.  */
2990   if (n_basic_blocks == 0)
2991     return;
2992 
2993   nr_inter = 0;
2994   nr_spec = 0;
2995 
2996   sched_init (dump_file);
2997 
2998   init_regions ();
2999 
3000   current_sched_info = &region_sched_info;
3001 
3002   /* Schedule every region in the subroutine.  */
3003   for (rgn = 0; rgn < nr_regions; rgn++)
3004     schedule_region (rgn);
3005 
3006   /* Update life analysis for the subroutine.  Do single block regions
3007      first so that we can verify that live_at_start didn't change.  Then
3008      do all other blocks.  */
3009   /* ??? There is an outside possibility that update_life_info, or more
3010      to the point propagate_block, could get called with nonzero flags
3011      more than once for one basic block.  This would be kinda bad if it
3012      were to happen, since REG_INFO would be accumulated twice for the
3013      block, and we'd have twice the REG_DEAD notes.
3014 
3015      I'm fairly certain that this _shouldn't_ happen, since I don't think
3016      that live_at_start should change at region heads.  Not sure what the
3017      best way to test for this kind of thing...  */
3018 
3019   allocate_reg_life_data ();
3020   compute_bb_for_insn ();
3021 
3022   any_large_regions = 0;
3023   large_region_blocks = sbitmap_alloc (last_basic_block);
3024   sbitmap_zero (large_region_blocks);
3025   FOR_EACH_BB (bb)
3026     SET_BIT (large_region_blocks, bb->index);
3027 
3028   blocks = sbitmap_alloc (last_basic_block);
3029   sbitmap_zero (blocks);
3030 
3031   /* Update life information.  For regions consisting of multiple blocks
3032      we've possibly done interblock scheduling that affects global liveness.
3033      For regions consisting of single blocks we need to do only local
3034      liveness.  */
3035   for (rgn = 0; rgn < nr_regions; rgn++)
3036     if (RGN_NR_BLOCKS (rgn) > 1)
3037       any_large_regions = 1;
3038     else
3039       {
3040 	SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3041 	RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3042       }
3043 
3044   /* Don't update reg info after reload, since that affects
3045      regs_ever_live, which should not change after reload.  */
3046   update_life_info (blocks, UPDATE_LIFE_LOCAL,
3047 		    (reload_completed ? PROP_DEATH_NOTES
3048 		     : PROP_DEATH_NOTES | PROP_REG_INFO));
3049   if (any_large_regions)
3050     {
3051       update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3052 			PROP_DEATH_NOTES | PROP_REG_INFO);
3053     }
3054 
3055   if (CHECK_DEAD_NOTES)
3056     {
3057       /* Verify the counts of basic block notes in single the basic block
3058          regions.  */
3059       for (rgn = 0; rgn < nr_regions; rgn++)
3060 	if (RGN_NR_BLOCKS (rgn) == 1)
3061 	  {
3062 	    sbitmap_zero (blocks);
3063 	    SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3064 
3065 	    if (deaths_in_region[rgn]
3066 		!= count_or_remove_death_notes (blocks, 0))
3067 	      abort ();
3068 	  }
3069       free (deaths_in_region);
3070     }
3071 
3072   /* Reposition the prologue and epilogue notes in case we moved the
3073      prologue/epilogue insns.  */
3074   if (reload_completed)
3075     reposition_prologue_and_epilogue_notes (get_insns ());
3076 
3077   /* Delete redundant line notes.  */
3078   if (write_symbols != NO_DEBUG)
3079     rm_redundant_line_notes ();
3080 
3081   if (sched_verbose)
3082     {
3083       if (reload_completed == 0 && flag_schedule_interblock)
3084 	{
3085 	  fprintf (sched_dump,
3086 		   "\n;; Procedure interblock/speculative motions == %d/%d \n",
3087 		   nr_inter, nr_spec);
3088 	}
3089       else
3090 	{
3091 	  if (nr_inter > 0)
3092 	    abort ();
3093 	}
3094       fprintf (sched_dump, "\n\n");
3095     }
3096 
3097   /* Clean up.  */
3098   free (rgn_table);
3099   free (rgn_bb_table);
3100   free (block_to_bb);
3101   free (containing_rgn);
3102 
3103   sched_finish ();
3104 
3105   if (edge_table)
3106     {
3107       free (edge_table);
3108       edge_table = NULL;
3109     }
3110 
3111   if (in_edges)
3112     {
3113       free (in_edges);
3114       in_edges = NULL;
3115     }
3116   if (out_edges)
3117     {
3118       free (out_edges);
3119       out_edges = NULL;
3120     }
3121 
3122   sbitmap_free (blocks);
3123   sbitmap_free (large_region_blocks);
3124 }
3125 #endif
3126