1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
23
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
27
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
31
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
37
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
47
48 #include "config.h"
49 #include "system.h"
50 #include "toplev.h"
51 #include "rtl.h"
52 #include "tm_p.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "regs.h"
56 #include "function.h"
57 #include "flags.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
60 #include "except.h"
61 #include "toplev.h"
62 #include "recog.h"
63 #include "cfglayout.h"
64 #include "sched-int.h"
65 #include "target.h"
66
67 /* Define when we want to do count REG_DEAD notes before and after scheduling
68 for sanity checking. We can't do that when conditional execution is used,
69 as REG_DEAD exist only for unconditional deaths. */
70
71 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
72 #define CHECK_DEAD_NOTES 1
73 #else
74 #define CHECK_DEAD_NOTES 0
75 #endif
76
77
78 #ifdef INSN_SCHEDULING
79 /* Some accessor macros for h_i_d members only used within this file. */
80 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
81 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
82 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
83
84 #define MAX_RGN_BLOCKS 10
85 #define MAX_RGN_INSNS 100
86
87 /* nr_inter/spec counts interblock/speculative motion for the function. */
88 static int nr_inter, nr_spec;
89
90 /* Control flow graph edges are kept in circular lists. */
91 typedef struct
92 {
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
97 }
98 haifa_edge;
99 static haifa_edge *edge_table;
100
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
105
106 /* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108 static int nr_edges;
109
110 /* Circular list of incoming/outgoing edges of a block. */
111 static int *in_edges;
112 static int *out_edges;
113
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
116
117 static int is_cfg_nonregular PARAMS ((void));
118 static int build_control_flow PARAMS ((struct edge_list *));
119 static void new_edge PARAMS ((int, int));
120
121 /* A region is the main entity for interblock scheduling: insns
122 are allowed to move between blocks in the same region, along
123 control flow graph edges, in the 'up' direction. */
124 typedef struct
125 {
126 int rgn_nr_blocks; /* Number of blocks in region. */
127 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
128 }
129 region;
130
131 /* Number of regions in the procedure. */
132 static int nr_regions;
133
134 /* Table of region descriptions. */
135 static region *rgn_table;
136
137 /* Array of lists of regions' blocks. */
138 static int *rgn_bb_table;
139
140 /* Topological order of blocks in the region (if b2 is reachable from
141 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
142 always referred to by either block or b, while its topological
143 order name (in the region) is refered to by bb. */
144 static int *block_to_bb;
145
146 /* The number of the region containing a block. */
147 static int *containing_rgn;
148
149 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
150 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
151 #define BLOCK_TO_BB(block) (block_to_bb[block])
152 #define CONTAINING_RGN(block) (containing_rgn[block])
153
154 void debug_regions PARAMS ((void));
155 static void find_single_block_region PARAMS ((void));
156 static void find_rgns PARAMS ((struct edge_list *, dominance_info));
157 static int too_large PARAMS ((int, int *, int *));
158
159 extern void debug_live PARAMS ((int, int));
160
161 /* Blocks of the current region being scheduled. */
162 static int current_nr_blocks;
163 static int current_blocks;
164
165 /* The mapping from bb to block. */
166 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
167
168 typedef struct
169 {
170 int *first_member; /* Pointer to the list start in bitlst_table. */
171 int nr_members; /* The number of members of the bit list. */
172 }
173 bitlst;
174
175 static int bitlst_table_last;
176 static int bitlst_table_size;
177 static int *bitlst_table;
178
179 static void extract_bitlst PARAMS ((sbitmap, bitlst *));
180
181 /* Target info declarations.
182
183 The block currently being scheduled is referred to as the "target" block,
184 while other blocks in the region from which insns can be moved to the
185 target are called "source" blocks. The candidate structure holds info
186 about such sources: are they valid? Speculative? Etc. */
187 typedef bitlst bblst;
188 typedef struct
189 {
190 char is_valid;
191 char is_speculative;
192 int src_prob;
193 bblst split_bbs;
194 bblst update_bbs;
195 }
196 candidate;
197
198 static candidate *candidate_table;
199
200 /* A speculative motion requires checking live information on the path
201 from 'source' to 'target'. The split blocks are those to be checked.
202 After a speculative motion, live information should be modified in
203 the 'update' blocks.
204
205 Lists of split and update blocks for each candidate of the current
206 target are in array bblst_table. */
207 static int *bblst_table, bblst_size, bblst_last;
208
209 #define IS_VALID(src) ( candidate_table[src].is_valid )
210 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211 #define SRC_PROB(src) ( candidate_table[src].src_prob )
212
213 /* The bb being currently scheduled. */
214 static int target_bb;
215
216 /* List of edges. */
217 typedef bitlst edgelst;
218
219 /* Target info functions. */
220 static void split_edges PARAMS ((int, int, edgelst *));
221 static void compute_trg_info PARAMS ((int));
222 void debug_candidate PARAMS ((int));
223 void debug_candidates PARAMS ((int));
224
225 /* Dominators array: dom[i] contains the sbitmap of dominators of
226 bb i in the region. */
227 static sbitmap *dom;
228
229 /* bb 0 is the only region entry. */
230 #define IS_RGN_ENTRY(bb) (!bb)
231
232 /* Is bb_src dominated by bb_trg. */
233 #define IS_DOMINATED(bb_src, bb_trg) \
234 ( TEST_BIT (dom[bb_src], bb_trg) )
235
236 /* Probability: Prob[i] is a float in [0, 1] which is the probability
237 of bb i relative to the region entry. */
238 static float *prob;
239
240 /* The probability of bb_src, relative to bb_trg. Note, that while the
241 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 in [0, 100]. */
243 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 prob[bb_trg])))
245
246 /* Bit-set of edges, where bit i stands for edge i. */
247 typedef sbitmap edgeset;
248
249 /* Number of edges in the region. */
250 static int rgn_nr_edges;
251
252 /* Array of size rgn_nr_edges. */
253 static int *rgn_edges;
254
255
256 /* Mapping from each edge in the graph to its number in the rgn. */
257 static int *edge_to_bit;
258 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
259
260 /* The split edges of a source bb is different for each target
261 bb. In order to compute this efficiently, the 'potential-split edges'
262 are computed for each bb prior to scheduling a region. This is actually
263 the split edges of each bb relative to the region entry.
264
265 pot_split[bb] is the set of potential split edges of bb. */
266 static edgeset *pot_split;
267
268 /* For every bb, a set of its ancestor edges. */
269 static edgeset *ancestor_edges;
270
271 static void compute_dom_prob_ps PARAMS ((int));
272
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
276
277 /* Parameters affecting the decision of rank_for_schedule().
278 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
279 #define MIN_PROBABILITY 40
280
281 /* Speculative scheduling functions. */
282 static int check_live_1 PARAMS ((int, rtx));
283 static void update_live_1 PARAMS ((int, rtx));
284 static int check_live PARAMS ((rtx, int));
285 static void update_live PARAMS ((rtx, int));
286 static void set_spec_fed PARAMS ((rtx));
287 static int is_pfree PARAMS ((rtx, int, int));
288 static int find_conditional_protection PARAMS ((rtx, int));
289 static int is_conditionally_protected PARAMS ((rtx, int, int));
290 static int may_trap_exp PARAMS ((rtx, int));
291 static int haifa_classify_insn PARAMS ((rtx));
292 static int is_prisky PARAMS ((rtx, int, int));
293 static int is_exception_free PARAMS ((rtx, int, int));
294
295 static bool sets_likely_spilled PARAMS ((rtx));
296 static void sets_likely_spilled_1 PARAMS ((rtx, rtx, void *));
297 static void add_branch_dependences PARAMS ((rtx, rtx));
298 static void compute_block_backward_dependences PARAMS ((int));
299 void debug_dependencies PARAMS ((void));
300
301 static void init_regions PARAMS ((void));
302 static void schedule_region PARAMS ((int));
303 static rtx concat_INSN_LIST PARAMS ((rtx, rtx));
304 static void concat_insn_mem_list PARAMS ((rtx, rtx, rtx *, rtx *));
305 static void propagate_deps PARAMS ((int, struct deps *));
306 static void free_pending_lists PARAMS ((void));
307
308 /* Functions for construction of the control flow graph. */
309
310 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
311
312 We decide not to build the control flow graph if there is possibly more
313 than one entry to the function, if computed branches exist, of if we
314 have nonlocal gotos. */
315
316 static int
is_cfg_nonregular()317 is_cfg_nonregular ()
318 {
319 basic_block b;
320 rtx insn;
321 RTX_CODE code;
322
323 /* If we have a label that could be the target of a nonlocal goto, then
324 the cfg is not well structured. */
325 if (nonlocal_goto_handler_labels)
326 return 1;
327
328 /* If we have any forced labels, then the cfg is not well structured. */
329 if (forced_labels)
330 return 1;
331
332 /* If this function has a computed jump, then we consider the cfg
333 not well structured. */
334 if (current_function_has_computed_jump)
335 return 1;
336
337 /* If we have exception handlers, then we consider the cfg not well
338 structured. ?!? We should be able to handle this now that flow.c
339 computes an accurate cfg for EH. */
340 if (current_function_has_exception_handlers ())
341 return 1;
342
343 /* If we have non-jumping insns which refer to labels, then we consider
344 the cfg not well structured. */
345 /* Check for labels referred to other thn by jumps. */
346 FOR_EACH_BB (b)
347 for (insn = b->head;; insn = NEXT_INSN (insn))
348 {
349 code = GET_CODE (insn);
350 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
351 {
352 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
353
354 if (note
355 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
356 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
357 XEXP (note, 0))))
358 return 1;
359 }
360
361 if (insn == b->end)
362 break;
363 }
364
365 /* All the tests passed. Consider the cfg well structured. */
366 return 0;
367 }
368
369 /* Build the control flow graph and set nr_edges.
370
371 Instead of trying to build a cfg ourselves, we rely on flow to
372 do it for us. Stamp out useless code (and bug) duplication.
373
374 Return nonzero if an irregularity in the cfg is found which would
375 prevent cross block scheduling. */
376
377 static int
build_control_flow(edge_list)378 build_control_flow (edge_list)
379 struct edge_list *edge_list;
380 {
381 int i, unreachable, num_edges;
382 basic_block b;
383
384 /* This already accounts for entry/exit edges. */
385 num_edges = NUM_EDGES (edge_list);
386
387 /* Unreachable loops with more than one basic block are detected
388 during the DFS traversal in find_rgns.
389
390 Unreachable loops with a single block are detected here. This
391 test is redundant with the one in find_rgns, but it's much
392 cheaper to go ahead and catch the trivial case here. */
393 unreachable = 0;
394 FOR_EACH_BB (b)
395 {
396 if (b->pred == NULL
397 || (b->pred->src == b
398 && b->pred->pred_next == NULL))
399 unreachable = 1;
400 }
401
402 /* ??? We can kill these soon. */
403 in_edges = (int *) xcalloc (last_basic_block, sizeof (int));
404 out_edges = (int *) xcalloc (last_basic_block, sizeof (int));
405 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
406
407 nr_edges = 0;
408 for (i = 0; i < num_edges; i++)
409 {
410 edge e = INDEX_EDGE (edge_list, i);
411
412 if (e->dest != EXIT_BLOCK_PTR
413 && e->src != ENTRY_BLOCK_PTR)
414 new_edge (e->src->index, e->dest->index);
415 }
416
417 /* Increment by 1, since edge 0 is unused. */
418 nr_edges++;
419
420 return unreachable;
421 }
422
423 /* Record an edge in the control flow graph from SOURCE to TARGET.
424
425 In theory, this is redundant with the s_succs computed above, but
426 we have not converted all of haifa to use information from the
427 integer lists. */
428
429 static void
new_edge(source,target)430 new_edge (source, target)
431 int source, target;
432 {
433 int e, next_edge;
434 int curr_edge, fst_edge;
435
436 /* Check for duplicates. */
437 fst_edge = curr_edge = OUT_EDGES (source);
438 while (curr_edge)
439 {
440 if (FROM_BLOCK (curr_edge) == source
441 && TO_BLOCK (curr_edge) == target)
442 {
443 return;
444 }
445
446 curr_edge = NEXT_OUT (curr_edge);
447
448 if (fst_edge == curr_edge)
449 break;
450 }
451
452 e = ++nr_edges;
453
454 FROM_BLOCK (e) = source;
455 TO_BLOCK (e) = target;
456
457 if (OUT_EDGES (source))
458 {
459 next_edge = NEXT_OUT (OUT_EDGES (source));
460 NEXT_OUT (OUT_EDGES (source)) = e;
461 NEXT_OUT (e) = next_edge;
462 }
463 else
464 {
465 OUT_EDGES (source) = e;
466 NEXT_OUT (e) = e;
467 }
468
469 if (IN_EDGES (target))
470 {
471 next_edge = NEXT_IN (IN_EDGES (target));
472 NEXT_IN (IN_EDGES (target)) = e;
473 NEXT_IN (e) = next_edge;
474 }
475 else
476 {
477 IN_EDGES (target) = e;
478 NEXT_IN (e) = e;
479 }
480 }
481
482 /* Translate a bit-set SET to a list BL of the bit-set members. */
483
484 static void
extract_bitlst(set,bl)485 extract_bitlst (set, bl)
486 sbitmap set;
487 bitlst *bl;
488 {
489 int i;
490
491 /* bblst table space is reused in each call to extract_bitlst. */
492 bitlst_table_last = 0;
493
494 bl->first_member = &bitlst_table[bitlst_table_last];
495 bl->nr_members = 0;
496
497 /* Iterate over each word in the bitset. */
498 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
499 {
500 bitlst_table[bitlst_table_last++] = i;
501 (bl->nr_members)++;
502 });
503
504 }
505
506 /* Functions for the construction of regions. */
507
508 /* Print the regions, for debugging purposes. Callable from debugger. */
509
510 void
debug_regions()511 debug_regions ()
512 {
513 int rgn, bb;
514
515 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
516 for (rgn = 0; rgn < nr_regions; rgn++)
517 {
518 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
519 rgn_table[rgn].rgn_nr_blocks);
520 fprintf (sched_dump, ";;\tbb/block: ");
521
522 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
523 {
524 current_blocks = RGN_BLOCKS (rgn);
525
526 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
527 abort ();
528
529 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
530 }
531
532 fprintf (sched_dump, "\n\n");
533 }
534 }
535
536 /* Build a single block region for each basic block in the function.
537 This allows for using the same code for interblock and basic block
538 scheduling. */
539
540 static void
find_single_block_region()541 find_single_block_region ()
542 {
543 basic_block bb;
544
545 nr_regions = 0;
546
547 FOR_EACH_BB (bb)
548 {
549 rgn_bb_table[nr_regions] = bb->index;
550 RGN_NR_BLOCKS (nr_regions) = 1;
551 RGN_BLOCKS (nr_regions) = nr_regions;
552 CONTAINING_RGN (bb->index) = nr_regions;
553 BLOCK_TO_BB (bb->index) = 0;
554 nr_regions++;
555 }
556 }
557
558 /* Update number of blocks and the estimate for number of insns
559 in the region. Return 1 if the region is "too large" for interblock
560 scheduling (compile time considerations), otherwise return 0. */
561
562 static int
too_large(block,num_bbs,num_insns)563 too_large (block, num_bbs, num_insns)
564 int block, *num_bbs, *num_insns;
565 {
566 (*num_bbs)++;
567 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
568 INSN_LUID (BLOCK_HEAD (block)));
569 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
570 return 1;
571 else
572 return 0;
573 }
574
575 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
576 is still an inner loop. Put in max_hdr[blk] the header of the most inner
577 loop containing blk. */
578 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
579 { \
580 if (max_hdr[blk] == -1) \
581 max_hdr[blk] = hdr; \
582 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
583 RESET_BIT (inner, hdr); \
584 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
585 { \
586 RESET_BIT (inner,max_hdr[blk]); \
587 max_hdr[blk] = hdr; \
588 } \
589 }
590
591 /* Find regions for interblock scheduling.
592
593 A region for scheduling can be:
594
595 * A loop-free procedure, or
596
597 * A reducible inner loop, or
598
599 * A basic block not contained in any other region.
600
601 ?!? In theory we could build other regions based on extended basic
602 blocks or reverse extended basic blocks. Is it worth the trouble?
603
604 Loop blocks that form a region are put into the region's block list
605 in topological order.
606
607 This procedure stores its results into the following global (ick) variables
608
609 * rgn_nr
610 * rgn_table
611 * rgn_bb_table
612 * block_to_bb
613 * containing region
614
615 We use dominator relationships to avoid making regions out of non-reducible
616 loops.
617
618 This procedure needs to be converted to work on pred/succ lists instead
619 of edge tables. That would simplify it somewhat. */
620
621 static void
find_rgns(edge_list,dom)622 find_rgns (edge_list, dom)
623 struct edge_list *edge_list;
624 dominance_info dom;
625 {
626 int *max_hdr, *dfs_nr, *stack, *degree;
627 char no_loops = 1;
628 int node, child, loop_head, i, head, tail;
629 int count = 0, sp, idx = 0;
630 int current_edge = out_edges[ENTRY_BLOCK_PTR->succ->dest->index];
631 int num_bbs, num_insns, unreachable;
632 int too_large_failure;
633 basic_block bb;
634
635 /* Note if an edge has been passed. */
636 sbitmap passed;
637
638 /* Note if a block is a natural loop header. */
639 sbitmap header;
640
641 /* Note if a block is a natural inner loop header. */
642 sbitmap inner;
643
644 /* Note if a block is in the block queue. */
645 sbitmap in_queue;
646
647 /* Note if a block is in the block queue. */
648 sbitmap in_stack;
649
650 int num_edges = NUM_EDGES (edge_list);
651
652 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
653 and a mapping from block to its loop header (if the block is contained
654 in a loop, else -1).
655
656 Store results in HEADER, INNER, and MAX_HDR respectively, these will
657 be used as inputs to the second traversal.
658
659 STACK, SP and DFS_NR are only used during the first traversal. */
660
661 /* Allocate and initialize variables for the first traversal. */
662 max_hdr = (int *) xmalloc (last_basic_block * sizeof (int));
663 dfs_nr = (int *) xcalloc (last_basic_block, sizeof (int));
664 stack = (int *) xmalloc (nr_edges * sizeof (int));
665
666 inner = sbitmap_alloc (last_basic_block);
667 sbitmap_ones (inner);
668
669 header = sbitmap_alloc (last_basic_block);
670 sbitmap_zero (header);
671
672 passed = sbitmap_alloc (nr_edges);
673 sbitmap_zero (passed);
674
675 in_queue = sbitmap_alloc (last_basic_block);
676 sbitmap_zero (in_queue);
677
678 in_stack = sbitmap_alloc (last_basic_block);
679 sbitmap_zero (in_stack);
680
681 for (i = 0; i < last_basic_block; i++)
682 max_hdr[i] = -1;
683
684 /* DFS traversal to find inner loops in the cfg. */
685
686 sp = -1;
687 while (1)
688 {
689 if (current_edge == 0 || TEST_BIT (passed, current_edge))
690 {
691 /* We have reached a leaf node or a node that was already
692 processed. Pop edges off the stack until we find
693 an edge that has not yet been processed. */
694 while (sp >= 0
695 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
696 {
697 /* Pop entry off the stack. */
698 current_edge = stack[sp--];
699 node = FROM_BLOCK (current_edge);
700 child = TO_BLOCK (current_edge);
701 RESET_BIT (in_stack, child);
702 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
703 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
704 current_edge = NEXT_OUT (current_edge);
705 }
706
707 /* See if have finished the DFS tree traversal. */
708 if (sp < 0 && TEST_BIT (passed, current_edge))
709 break;
710
711 /* Nope, continue the traversal with the popped node. */
712 continue;
713 }
714
715 /* Process a node. */
716 node = FROM_BLOCK (current_edge);
717 child = TO_BLOCK (current_edge);
718 SET_BIT (in_stack, node);
719 dfs_nr[node] = ++count;
720
721 /* If the successor is in the stack, then we've found a loop.
722 Mark the loop, if it is not a natural loop, then it will
723 be rejected during the second traversal. */
724 if (TEST_BIT (in_stack, child))
725 {
726 no_loops = 0;
727 SET_BIT (header, child);
728 UPDATE_LOOP_RELATIONS (node, child);
729 SET_BIT (passed, current_edge);
730 current_edge = NEXT_OUT (current_edge);
731 continue;
732 }
733
734 /* If the child was already visited, then there is no need to visit
735 it again. Just update the loop relationships and restart
736 with a new edge. */
737 if (dfs_nr[child])
738 {
739 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
740 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
741 SET_BIT (passed, current_edge);
742 current_edge = NEXT_OUT (current_edge);
743 continue;
744 }
745
746 /* Push an entry on the stack and continue DFS traversal. */
747 stack[++sp] = current_edge;
748 SET_BIT (passed, current_edge);
749 current_edge = OUT_EDGES (child);
750
751 /* This is temporary until haifa is converted to use rth's new
752 cfg routines which have true entry/exit blocks and the
753 appropriate edges from/to those blocks.
754
755 Generally we update dfs_nr for a node when we process its
756 out edge. However, if the node has no out edge then we will
757 not set dfs_nr for that node. This can confuse the scheduler
758 into thinking that we have unreachable blocks, which in turn
759 disables cross block scheduling.
760
761 So, if we have a node with no out edges, go ahead and mark it
762 as reachable now. */
763 if (current_edge == 0)
764 dfs_nr[child] = ++count;
765 }
766
767 /* Another check for unreachable blocks. The earlier test in
768 is_cfg_nonregular only finds unreachable blocks that do not
769 form a loop.
770
771 The DFS traversal will mark every block that is reachable from
772 the entry node by placing a nonzero value in dfs_nr. Thus if
773 dfs_nr is zero for any block, then it must be unreachable. */
774 unreachable = 0;
775 FOR_EACH_BB (bb)
776 if (dfs_nr[bb->index] == 0)
777 {
778 unreachable = 1;
779 break;
780 }
781
782 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
783 to hold degree counts. */
784 degree = dfs_nr;
785
786 FOR_EACH_BB (bb)
787 degree[bb->index] = 0;
788 for (i = 0; i < num_edges; i++)
789 {
790 edge e = INDEX_EDGE (edge_list, i);
791
792 if (e->dest != EXIT_BLOCK_PTR)
793 degree[e->dest->index]++;
794 }
795
796 /* Do not perform region scheduling if there are any unreachable
797 blocks. */
798 if (!unreachable)
799 {
800 int *queue;
801
802 if (no_loops)
803 SET_BIT (header, 0);
804
805 /* Second travsersal:find reducible inner loops and topologically sort
806 block of each region. */
807
808 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
809
810 /* Find blocks which are inner loop headers. We still have non-reducible
811 loops to consider at this point. */
812 FOR_EACH_BB (bb)
813 {
814 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
815 {
816 edge e;
817 basic_block jbb;
818
819 /* Now check that the loop is reducible. We do this separate
820 from finding inner loops so that we do not find a reducible
821 loop which contains an inner non-reducible loop.
822
823 A simple way to find reducible/natural loops is to verify
824 that each block in the loop is dominated by the loop
825 header.
826
827 If there exists a block that is not dominated by the loop
828 header, then the block is reachable from outside the loop
829 and thus the loop is not a natural loop. */
830 FOR_EACH_BB (jbb)
831 {
832 /* First identify blocks in the loop, except for the loop
833 entry block. */
834 if (bb->index == max_hdr[jbb->index] && bb != jbb)
835 {
836 /* Now verify that the block is dominated by the loop
837 header. */
838 if (!dominated_by_p (dom, jbb, bb))
839 break;
840 }
841 }
842
843 /* If we exited the loop early, then I is the header of
844 a non-reducible loop and we should quit processing it
845 now. */
846 if (jbb != EXIT_BLOCK_PTR)
847 continue;
848
849 /* I is a header of an inner loop, or block 0 in a subroutine
850 with no loops at all. */
851 head = tail = -1;
852 too_large_failure = 0;
853 loop_head = max_hdr[bb->index];
854
855 /* Decrease degree of all I's successors for topological
856 ordering. */
857 for (e = bb->succ; e; e = e->succ_next)
858 if (e->dest != EXIT_BLOCK_PTR)
859 --degree[e->dest->index];
860
861 /* Estimate # insns, and count # blocks in the region. */
862 num_bbs = 1;
863 num_insns = (INSN_LUID (bb->end)
864 - INSN_LUID (bb->head));
865
866 /* Find all loop latches (blocks with back edges to the loop
867 header) or all the leaf blocks in the cfg has no loops.
868
869 Place those blocks into the queue. */
870 if (no_loops)
871 {
872 FOR_EACH_BB (jbb)
873 /* Leaf nodes have only a single successor which must
874 be EXIT_BLOCK. */
875 if (jbb->succ
876 && jbb->succ->dest == EXIT_BLOCK_PTR
877 && jbb->succ->succ_next == NULL)
878 {
879 queue[++tail] = jbb->index;
880 SET_BIT (in_queue, jbb->index);
881
882 if (too_large (jbb->index, &num_bbs, &num_insns))
883 {
884 too_large_failure = 1;
885 break;
886 }
887 }
888 }
889 else
890 {
891 edge e;
892
893 for (e = bb->pred; e; e = e->pred_next)
894 {
895 if (e->src == ENTRY_BLOCK_PTR)
896 continue;
897
898 node = e->src->index;
899
900 if (max_hdr[node] == loop_head && node != bb->index)
901 {
902 /* This is a loop latch. */
903 queue[++tail] = node;
904 SET_BIT (in_queue, node);
905
906 if (too_large (node, &num_bbs, &num_insns))
907 {
908 too_large_failure = 1;
909 break;
910 }
911 }
912 }
913 }
914
915 /* Now add all the blocks in the loop to the queue.
916
917 We know the loop is a natural loop; however the algorithm
918 above will not always mark certain blocks as being in the
919 loop. Consider:
920 node children
921 a b,c
922 b c
923 c a,d
924 d b
925
926 The algorithm in the DFS traversal may not mark B & D as part
927 of the loop (ie they will not have max_hdr set to A).
928
929 We know they can not be loop latches (else they would have
930 had max_hdr set since they'd have a backedge to a dominator
931 block). So we don't need them on the initial queue.
932
933 We know they are part of the loop because they are dominated
934 by the loop header and can be reached by a backwards walk of
935 the edges starting with nodes on the initial queue.
936
937 It is safe and desirable to include those nodes in the
938 loop/scheduling region. To do so we would need to decrease
939 the degree of a node if it is the target of a backedge
940 within the loop itself as the node is placed in the queue.
941
942 We do not do this because I'm not sure that the actual
943 scheduling code will properly handle this case. ?!? */
944
945 while (head < tail && !too_large_failure)
946 {
947 edge e;
948 child = queue[++head];
949
950 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
951 {
952 node = e->src->index;
953
954 /* See discussion above about nodes not marked as in
955 this loop during the initial DFS traversal. */
956 if (e->src == ENTRY_BLOCK_PTR
957 || max_hdr[node] != loop_head)
958 {
959 tail = -1;
960 break;
961 }
962 else if (!TEST_BIT (in_queue, node) && node != bb->index)
963 {
964 queue[++tail] = node;
965 SET_BIT (in_queue, node);
966
967 if (too_large (node, &num_bbs, &num_insns))
968 {
969 too_large_failure = 1;
970 break;
971 }
972 }
973 }
974 }
975
976 if (tail >= 0 && !too_large_failure)
977 {
978 /* Place the loop header into list of region blocks. */
979 degree[bb->index] = -1;
980 rgn_bb_table[idx] = bb->index;
981 RGN_NR_BLOCKS (nr_regions) = num_bbs;
982 RGN_BLOCKS (nr_regions) = idx++;
983 CONTAINING_RGN (bb->index) = nr_regions;
984 BLOCK_TO_BB (bb->index) = count = 0;
985
986 /* Remove blocks from queue[] when their in degree
987 becomes zero. Repeat until no blocks are left on the
988 list. This produces a topological list of blocks in
989 the region. */
990 while (tail >= 0)
991 {
992 if (head < 0)
993 head = tail;
994 child = queue[head];
995 if (degree[child] == 0)
996 {
997 edge e;
998
999 degree[child] = -1;
1000 rgn_bb_table[idx++] = child;
1001 BLOCK_TO_BB (child) = ++count;
1002 CONTAINING_RGN (child) = nr_regions;
1003 queue[head] = queue[tail--];
1004
1005 for (e = BASIC_BLOCK (child)->succ;
1006 e;
1007 e = e->succ_next)
1008 if (e->dest != EXIT_BLOCK_PTR)
1009 --degree[e->dest->index];
1010 }
1011 else
1012 --head;
1013 }
1014 ++nr_regions;
1015 }
1016 }
1017 }
1018 free (queue);
1019 }
1020
1021 /* Any block that did not end up in a region is placed into a region
1022 by itself. */
1023 FOR_EACH_BB (bb)
1024 if (degree[bb->index] >= 0)
1025 {
1026 rgn_bb_table[idx] = bb->index;
1027 RGN_NR_BLOCKS (nr_regions) = 1;
1028 RGN_BLOCKS (nr_regions) = idx++;
1029 CONTAINING_RGN (bb->index) = nr_regions++;
1030 BLOCK_TO_BB (bb->index) = 0;
1031 }
1032
1033 free (max_hdr);
1034 free (dfs_nr);
1035 free (stack);
1036 sbitmap_free (passed);
1037 sbitmap_free (header);
1038 sbitmap_free (inner);
1039 sbitmap_free (in_queue);
1040 sbitmap_free (in_stack);
1041 }
1042
1043 /* Functions for regions scheduling information. */
1044
1045 /* Compute dominators, probability, and potential-split-edges of bb.
1046 Assume that these values were already computed for bb's predecessors. */
1047
1048 static void
compute_dom_prob_ps(bb)1049 compute_dom_prob_ps (bb)
1050 int bb;
1051 {
1052 int nxt_in_edge, fst_in_edge, pred;
1053 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1054
1055 prob[bb] = 0.0;
1056 if (IS_RGN_ENTRY (bb))
1057 {
1058 SET_BIT (dom[bb], 0);
1059 prob[bb] = 1.0;
1060 return;
1061 }
1062
1063 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1064
1065 /* Initialize dom[bb] to '111..1'. */
1066 sbitmap_ones (dom[bb]);
1067
1068 do
1069 {
1070 pred = FROM_BLOCK (nxt_in_edge);
1071 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1072 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1073
1074 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1075
1076 nr_out_edges = 1;
1077 nr_rgn_out_edges = 0;
1078 fst_out_edge = OUT_EDGES (pred);
1079 nxt_out_edge = NEXT_OUT (fst_out_edge);
1080
1081 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1082
1083 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1084
1085 /* The successor doesn't belong in the region? */
1086 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1087 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1088 ++nr_rgn_out_edges;
1089
1090 while (fst_out_edge != nxt_out_edge)
1091 {
1092 ++nr_out_edges;
1093 /* The successor doesn't belong in the region? */
1094 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1095 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1096 ++nr_rgn_out_edges;
1097 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1098 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1099
1100 }
1101
1102 /* Now nr_rgn_out_edges is the number of region-exit edges from
1103 pred, and nr_out_edges will be the number of pred out edges
1104 not leaving the region. */
1105 nr_out_edges -= nr_rgn_out_edges;
1106 if (nr_rgn_out_edges > 0)
1107 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1108 else
1109 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1110 nxt_in_edge = NEXT_IN (nxt_in_edge);
1111 }
1112 while (fst_in_edge != nxt_in_edge);
1113
1114 SET_BIT (dom[bb], bb);
1115 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1116
1117 if (sched_verbose >= 2)
1118 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1119 (int) (100.0 * prob[bb]));
1120 }
1121
1122 /* Functions for target info. */
1123
1124 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1125 Note that bb_trg dominates bb_src. */
1126
1127 static void
split_edges(bb_src,bb_trg,bl)1128 split_edges (bb_src, bb_trg, bl)
1129 int bb_src;
1130 int bb_trg;
1131 edgelst *bl;
1132 {
1133 sbitmap src = (edgeset) sbitmap_alloc (pot_split[bb_src]->n_bits);
1134 sbitmap_copy (src, pot_split[bb_src]);
1135
1136 sbitmap_difference (src, src, pot_split[bb_trg]);
1137 extract_bitlst (src, bl);
1138 sbitmap_free (src);
1139 }
1140
1141 /* Find the valid candidate-source-blocks for the target block TRG, compute
1142 their probability, and check if they are speculative or not.
1143 For speculative sources, compute their update-blocks and split-blocks. */
1144
1145 static void
compute_trg_info(trg)1146 compute_trg_info (trg)
1147 int trg;
1148 {
1149 candidate *sp;
1150 edgelst el;
1151 int check_block, update_idx;
1152 int i, j, k, fst_edge, nxt_edge;
1153
1154 /* Define some of the fields for the target bb as well. */
1155 sp = candidate_table + trg;
1156 sp->is_valid = 1;
1157 sp->is_speculative = 0;
1158 sp->src_prob = 100;
1159
1160 for (i = trg + 1; i < current_nr_blocks; i++)
1161 {
1162 sp = candidate_table + i;
1163
1164 sp->is_valid = IS_DOMINATED (i, trg);
1165 if (sp->is_valid)
1166 {
1167 sp->src_prob = GET_SRC_PROB (i, trg);
1168 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1169 }
1170
1171 if (sp->is_valid)
1172 {
1173 split_edges (i, trg, &el);
1174 sp->is_speculative = (el.nr_members) ? 1 : 0;
1175 if (sp->is_speculative && !flag_schedule_speculative)
1176 sp->is_valid = 0;
1177 }
1178
1179 if (sp->is_valid)
1180 {
1181 char *update_blocks;
1182
1183 /* Compute split blocks and store them in bblst_table.
1184 The TO block of every split edge is a split block. */
1185 sp->split_bbs.first_member = &bblst_table[bblst_last];
1186 sp->split_bbs.nr_members = el.nr_members;
1187 for (j = 0; j < el.nr_members; bblst_last++, j++)
1188 bblst_table[bblst_last] =
1189 TO_BLOCK (rgn_edges[el.first_member[j]]);
1190 sp->update_bbs.first_member = &bblst_table[bblst_last];
1191
1192 /* Compute update blocks and store them in bblst_table.
1193 For every split edge, look at the FROM block, and check
1194 all out edges. For each out edge that is not a split edge,
1195 add the TO block to the update block list. This list can end
1196 up with a lot of duplicates. We need to weed them out to avoid
1197 overrunning the end of the bblst_table. */
1198 update_blocks = (char *) alloca (last_basic_block);
1199 memset (update_blocks, 0, last_basic_block);
1200
1201 update_idx = 0;
1202 for (j = 0; j < el.nr_members; j++)
1203 {
1204 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1205 fst_edge = nxt_edge = OUT_EDGES (check_block);
1206 do
1207 {
1208 if (! update_blocks[TO_BLOCK (nxt_edge)])
1209 {
1210 for (k = 0; k < el.nr_members; k++)
1211 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1212 break;
1213
1214 if (k >= el.nr_members)
1215 {
1216 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1217 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1218 update_idx++;
1219 }
1220 }
1221
1222 nxt_edge = NEXT_OUT (nxt_edge);
1223 }
1224 while (fst_edge != nxt_edge);
1225 }
1226 sp->update_bbs.nr_members = update_idx;
1227
1228 /* Make sure we didn't overrun the end of bblst_table. */
1229 if (bblst_last > bblst_size)
1230 abort ();
1231 }
1232 else
1233 {
1234 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1235
1236 sp->is_speculative = 0;
1237 sp->src_prob = 0;
1238 }
1239 }
1240 }
1241
1242 /* Print candidates info, for debugging purposes. Callable from debugger. */
1243
1244 void
debug_candidate(i)1245 debug_candidate (i)
1246 int i;
1247 {
1248 if (!candidate_table[i].is_valid)
1249 return;
1250
1251 if (candidate_table[i].is_speculative)
1252 {
1253 int j;
1254 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1255
1256 fprintf (sched_dump, "split path: ");
1257 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1258 {
1259 int b = candidate_table[i].split_bbs.first_member[j];
1260
1261 fprintf (sched_dump, " %d ", b);
1262 }
1263 fprintf (sched_dump, "\n");
1264
1265 fprintf (sched_dump, "update path: ");
1266 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1267 {
1268 int b = candidate_table[i].update_bbs.first_member[j];
1269
1270 fprintf (sched_dump, " %d ", b);
1271 }
1272 fprintf (sched_dump, "\n");
1273 }
1274 else
1275 {
1276 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1277 }
1278 }
1279
1280 /* Print candidates info, for debugging purposes. Callable from debugger. */
1281
1282 void
debug_candidates(trg)1283 debug_candidates (trg)
1284 int trg;
1285 {
1286 int i;
1287
1288 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1289 BB_TO_BLOCK (trg), trg);
1290 for (i = trg + 1; i < current_nr_blocks; i++)
1291 debug_candidate (i);
1292 }
1293
1294 /* Functions for speculative scheduing. */
1295
1296 /* Return 0 if x is a set of a register alive in the beginning of one
1297 of the split-blocks of src, otherwise return 1. */
1298
1299 static int
check_live_1(src,x)1300 check_live_1 (src, x)
1301 int src;
1302 rtx x;
1303 {
1304 int i;
1305 int regno;
1306 rtx reg = SET_DEST (x);
1307
1308 if (reg == 0)
1309 return 1;
1310
1311 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1312 || GET_CODE (reg) == SIGN_EXTRACT
1313 || GET_CODE (reg) == STRICT_LOW_PART)
1314 reg = XEXP (reg, 0);
1315
1316 if (GET_CODE (reg) == PARALLEL)
1317 {
1318 int i;
1319
1320 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1321 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1322 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1323 return 1;
1324
1325 return 0;
1326 }
1327
1328 if (GET_CODE (reg) != REG)
1329 return 1;
1330
1331 regno = REGNO (reg);
1332
1333 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1334 {
1335 /* Global registers are assumed live. */
1336 return 0;
1337 }
1338 else
1339 {
1340 if (regno < FIRST_PSEUDO_REGISTER)
1341 {
1342 /* Check for hard registers. */
1343 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1344 while (--j >= 0)
1345 {
1346 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1347 {
1348 int b = candidate_table[src].split_bbs.first_member[i];
1349
1350 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1351 regno + j))
1352 {
1353 return 0;
1354 }
1355 }
1356 }
1357 }
1358 else
1359 {
1360 /* Check for psuedo registers. */
1361 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1362 {
1363 int b = candidate_table[src].split_bbs.first_member[i];
1364
1365 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1366 {
1367 return 0;
1368 }
1369 }
1370 }
1371 }
1372
1373 return 1;
1374 }
1375
1376 /* If x is a set of a register R, mark that R is alive in the beginning
1377 of every update-block of src. */
1378
1379 static void
update_live_1(src,x)1380 update_live_1 (src, x)
1381 int src;
1382 rtx x;
1383 {
1384 int i;
1385 int regno;
1386 rtx reg = SET_DEST (x);
1387
1388 if (reg == 0)
1389 return;
1390
1391 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1392 || GET_CODE (reg) == SIGN_EXTRACT
1393 || GET_CODE (reg) == STRICT_LOW_PART)
1394 reg = XEXP (reg, 0);
1395
1396 if (GET_CODE (reg) == PARALLEL)
1397 {
1398 int i;
1399
1400 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1401 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1402 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1403
1404 return;
1405 }
1406
1407 if (GET_CODE (reg) != REG)
1408 return;
1409
1410 /* Global registers are always live, so the code below does not apply
1411 to them. */
1412
1413 regno = REGNO (reg);
1414
1415 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1416 {
1417 if (regno < FIRST_PSEUDO_REGISTER)
1418 {
1419 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1420 while (--j >= 0)
1421 {
1422 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1423 {
1424 int b = candidate_table[src].update_bbs.first_member[i];
1425
1426 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1427 regno + j);
1428 }
1429 }
1430 }
1431 else
1432 {
1433 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1434 {
1435 int b = candidate_table[src].update_bbs.first_member[i];
1436
1437 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1438 }
1439 }
1440 }
1441 }
1442
1443 /* Return 1 if insn can be speculatively moved from block src to trg,
1444 otherwise return 0. Called before first insertion of insn to
1445 ready-list or before the scheduling. */
1446
1447 static int
check_live(insn,src)1448 check_live (insn, src)
1449 rtx insn;
1450 int src;
1451 {
1452 /* Find the registers set by instruction. */
1453 if (GET_CODE (PATTERN (insn)) == SET
1454 || GET_CODE (PATTERN (insn)) == CLOBBER)
1455 return check_live_1 (src, PATTERN (insn));
1456 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1457 {
1458 int j;
1459 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1460 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1461 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1462 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1463 return 0;
1464
1465 return 1;
1466 }
1467
1468 return 1;
1469 }
1470
1471 /* Update the live registers info after insn was moved speculatively from
1472 block src to trg. */
1473
1474 static void
update_live(insn,src)1475 update_live (insn, src)
1476 rtx insn;
1477 int src;
1478 {
1479 /* Find the registers set by instruction. */
1480 if (GET_CODE (PATTERN (insn)) == SET
1481 || GET_CODE (PATTERN (insn)) == CLOBBER)
1482 update_live_1 (src, PATTERN (insn));
1483 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1484 {
1485 int j;
1486 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1487 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1488 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1489 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1490 }
1491 }
1492
1493 /* Exception Free Loads:
1494
1495 We define five classes of speculative loads: IFREE, IRISKY,
1496 PFREE, PRISKY, and MFREE.
1497
1498 IFREE loads are loads that are proved to be exception-free, just
1499 by examining the load insn. Examples for such loads are loads
1500 from TOC and loads of global data.
1501
1502 IRISKY loads are loads that are proved to be exception-risky,
1503 just by examining the load insn. Examples for such loads are
1504 volatile loads and loads from shared memory.
1505
1506 PFREE loads are loads for which we can prove, by examining other
1507 insns, that they are exception-free. Currently, this class consists
1508 of loads for which we are able to find a "similar load", either in
1509 the target block, or, if only one split-block exists, in that split
1510 block. Load2 is similar to load1 if both have same single base
1511 register. We identify only part of the similar loads, by finding
1512 an insn upon which both load1 and load2 have a DEF-USE dependence.
1513
1514 PRISKY loads are loads for which we can prove, by examining other
1515 insns, that they are exception-risky. Currently we have two proofs for
1516 such loads. The first proof detects loads that are probably guarded by a
1517 test on the memory address. This proof is based on the
1518 backward and forward data dependence information for the region.
1519 Let load-insn be the examined load.
1520 Load-insn is PRISKY iff ALL the following hold:
1521
1522 - insn1 is not in the same block as load-insn
1523 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1524 - test-insn is either a compare or a branch, not in the same block
1525 as load-insn
1526 - load-insn is reachable from test-insn
1527 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1528
1529 This proof might fail when the compare and the load are fed
1530 by an insn not in the region. To solve this, we will add to this
1531 group all loads that have no input DEF-USE dependence.
1532
1533 The second proof detects loads that are directly or indirectly
1534 fed by a speculative load. This proof is affected by the
1535 scheduling process. We will use the flag fed_by_spec_load.
1536 Initially, all insns have this flag reset. After a speculative
1537 motion of an insn, if insn is either a load, or marked as
1538 fed_by_spec_load, we will also mark as fed_by_spec_load every
1539 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1540 load which is fed_by_spec_load is also PRISKY.
1541
1542 MFREE (maybe-free) loads are all the remaining loads. They may be
1543 exception-free, but we cannot prove it.
1544
1545 Now, all loads in IFREE and PFREE classes are considered
1546 exception-free, while all loads in IRISKY and PRISKY classes are
1547 considered exception-risky. As for loads in the MFREE class,
1548 these are considered either exception-free or exception-risky,
1549 depending on whether we are pessimistic or optimistic. We have
1550 to take the pessimistic approach to assure the safety of
1551 speculative scheduling, but we can take the optimistic approach
1552 by invoking the -fsched_spec_load_dangerous option. */
1553
1554 enum INSN_TRAP_CLASS
1555 {
1556 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1557 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1558 };
1559
1560 #define WORST_CLASS(class1, class2) \
1561 ((class1 > class2) ? class1 : class2)
1562
1563 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1564 #define IS_REACHABLE(bb_from, bb_to) \
1565 (bb_from == bb_to \
1566 || IS_RGN_ENTRY (bb_from) \
1567 || (TEST_BIT (ancestor_edges[bb_to], \
1568 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1569
1570 /* Non-zero iff the address is comprised from at most 1 register. */
1571 #define CONST_BASED_ADDRESS_P(x) \
1572 (GET_CODE (x) == REG \
1573 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1574 || (GET_CODE (x) == LO_SUM)) \
1575 && (CONSTANT_P (XEXP (x, 0)) \
1576 || CONSTANT_P (XEXP (x, 1)))))
1577
1578 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1579
1580 static void
set_spec_fed(load_insn)1581 set_spec_fed (load_insn)
1582 rtx load_insn;
1583 {
1584 rtx link;
1585
1586 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1587 if (GET_MODE (link) == VOIDmode)
1588 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1589 } /* set_spec_fed */
1590
1591 /* On the path from the insn to load_insn_bb, find a conditional
1592 branch depending on insn, that guards the speculative load. */
1593
1594 static int
find_conditional_protection(insn,load_insn_bb)1595 find_conditional_protection (insn, load_insn_bb)
1596 rtx insn;
1597 int load_insn_bb;
1598 {
1599 rtx link;
1600
1601 /* Iterate through DEF-USE forward dependences. */
1602 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1603 {
1604 rtx next = XEXP (link, 0);
1605 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1606 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1607 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1608 && load_insn_bb != INSN_BB (next)
1609 && GET_MODE (link) == VOIDmode
1610 && (GET_CODE (next) == JUMP_INSN
1611 || find_conditional_protection (next, load_insn_bb)))
1612 return 1;
1613 }
1614 return 0;
1615 } /* find_conditional_protection */
1616
1617 /* Returns 1 if the same insn1 that participates in the computation
1618 of load_insn's address is feeding a conditional branch that is
1619 guarding on load_insn. This is true if we find a the two DEF-USE
1620 chains:
1621 insn1 -> ... -> conditional-branch
1622 insn1 -> ... -> load_insn,
1623 and if a flow path exist:
1624 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1625 and if insn1 is on the path
1626 region-entry -> ... -> bb_trg -> ... load_insn.
1627
1628 Locate insn1 by climbing on LOG_LINKS from load_insn.
1629 Locate the branch by following INSN_DEPEND from insn1. */
1630
1631 static int
is_conditionally_protected(load_insn,bb_src,bb_trg)1632 is_conditionally_protected (load_insn, bb_src, bb_trg)
1633 rtx load_insn;
1634 int bb_src, bb_trg;
1635 {
1636 rtx link;
1637
1638 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1639 {
1640 rtx insn1 = XEXP (link, 0);
1641
1642 /* Must be a DEF-USE dependence upon non-branch. */
1643 if (GET_MODE (link) != VOIDmode
1644 || GET_CODE (insn1) == JUMP_INSN)
1645 continue;
1646
1647 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1648 if (INSN_BB (insn1) == bb_src
1649 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1650 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1651 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1652 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1653 continue;
1654
1655 /* Now search for the conditional-branch. */
1656 if (find_conditional_protection (insn1, bb_src))
1657 return 1;
1658
1659 /* Recursive step: search another insn1, "above" current insn1. */
1660 return is_conditionally_protected (insn1, bb_src, bb_trg);
1661 }
1662
1663 /* The chain does not exist. */
1664 return 0;
1665 } /* is_conditionally_protected */
1666
1667 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1668 load_insn can move speculatively from bb_src to bb_trg. All the
1669 following must hold:
1670
1671 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1672 (2) load_insn and load1 have a def-use dependence upon
1673 the same insn 'insn1'.
1674 (3) either load2 is in bb_trg, or:
1675 - there's only one split-block, and
1676 - load1 is on the escape path, and
1677
1678 From all these we can conclude that the two loads access memory
1679 addresses that differ at most by a constant, and hence if moving
1680 load_insn would cause an exception, it would have been caused by
1681 load2 anyhow. */
1682
1683 static int
is_pfree(load_insn,bb_src,bb_trg)1684 is_pfree (load_insn, bb_src, bb_trg)
1685 rtx load_insn;
1686 int bb_src, bb_trg;
1687 {
1688 rtx back_link;
1689 candidate *candp = candidate_table + bb_src;
1690
1691 if (candp->split_bbs.nr_members != 1)
1692 /* Must have exactly one escape block. */
1693 return 0;
1694
1695 for (back_link = LOG_LINKS (load_insn);
1696 back_link; back_link = XEXP (back_link, 1))
1697 {
1698 rtx insn1 = XEXP (back_link, 0);
1699
1700 if (GET_MODE (back_link) == VOIDmode)
1701 {
1702 /* Found a DEF-USE dependence (insn1, load_insn). */
1703 rtx fore_link;
1704
1705 for (fore_link = INSN_DEPEND (insn1);
1706 fore_link; fore_link = XEXP (fore_link, 1))
1707 {
1708 rtx insn2 = XEXP (fore_link, 0);
1709 if (GET_MODE (fore_link) == VOIDmode)
1710 {
1711 /* Found a DEF-USE dependence (insn1, insn2). */
1712 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1713 /* insn2 not guaranteed to be a 1 base reg load. */
1714 continue;
1715
1716 if (INSN_BB (insn2) == bb_trg)
1717 /* insn2 is the similar load, in the target block. */
1718 return 1;
1719
1720 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1721 /* insn2 is a similar load, in a split-block. */
1722 return 1;
1723 }
1724 }
1725 }
1726 }
1727
1728 /* Couldn't find a similar load. */
1729 return 0;
1730 } /* is_pfree */
1731
1732 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1733 as found by analyzing insn's expression. */
1734
1735 static int
may_trap_exp(x,is_store)1736 may_trap_exp (x, is_store)
1737 rtx x;
1738 int is_store;
1739 {
1740 enum rtx_code code;
1741
1742 if (x == 0)
1743 return TRAP_FREE;
1744 code = GET_CODE (x);
1745 if (is_store)
1746 {
1747 if (code == MEM && may_trap_p (x))
1748 return TRAP_RISKY;
1749 else
1750 return TRAP_FREE;
1751 }
1752 if (code == MEM)
1753 {
1754 /* The insn uses memory: a volatile load. */
1755 if (MEM_VOLATILE_P (x))
1756 return IRISKY;
1757 /* An exception-free load. */
1758 if (!may_trap_p (x))
1759 return IFREE;
1760 /* A load with 1 base register, to be further checked. */
1761 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1762 return PFREE_CANDIDATE;
1763 /* No info on the load, to be further checked. */
1764 return PRISKY_CANDIDATE;
1765 }
1766 else
1767 {
1768 const char *fmt;
1769 int i, insn_class = TRAP_FREE;
1770
1771 /* Neither store nor load, check if it may cause a trap. */
1772 if (may_trap_p (x))
1773 return TRAP_RISKY;
1774 /* Recursive step: walk the insn... */
1775 fmt = GET_RTX_FORMAT (code);
1776 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1777 {
1778 if (fmt[i] == 'e')
1779 {
1780 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1781 insn_class = WORST_CLASS (insn_class, tmp_class);
1782 }
1783 else if (fmt[i] == 'E')
1784 {
1785 int j;
1786 for (j = 0; j < XVECLEN (x, i); j++)
1787 {
1788 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1789 insn_class = WORST_CLASS (insn_class, tmp_class);
1790 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1791 break;
1792 }
1793 }
1794 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1795 break;
1796 }
1797 return insn_class;
1798 }
1799 }
1800
1801 /* Classifies insn for the purpose of verifying that it can be
1802 moved speculatively, by examining it's patterns, returning:
1803 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1804 TRAP_FREE: non-load insn.
1805 IFREE: load from a globaly safe location.
1806 IRISKY: volatile load.
1807 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1808 being either PFREE or PRISKY. */
1809
1810 static int
haifa_classify_insn(insn)1811 haifa_classify_insn (insn)
1812 rtx insn;
1813 {
1814 rtx pat = PATTERN (insn);
1815 int tmp_class = TRAP_FREE;
1816 int insn_class = TRAP_FREE;
1817 enum rtx_code code;
1818
1819 if (GET_CODE (pat) == PARALLEL)
1820 {
1821 int i, len = XVECLEN (pat, 0);
1822
1823 for (i = len - 1; i >= 0; i--)
1824 {
1825 code = GET_CODE (XVECEXP (pat, 0, i));
1826 switch (code)
1827 {
1828 case CLOBBER:
1829 /* Test if it is a 'store'. */
1830 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1831 break;
1832 case SET:
1833 /* Test if it is a store. */
1834 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1835 if (tmp_class == TRAP_RISKY)
1836 break;
1837 /* Test if it is a load. */
1838 tmp_class
1839 = WORST_CLASS (tmp_class,
1840 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1841 0));
1842 break;
1843 case COND_EXEC:
1844 case TRAP_IF:
1845 tmp_class = TRAP_RISKY;
1846 break;
1847 default:
1848 ;
1849 }
1850 insn_class = WORST_CLASS (insn_class, tmp_class);
1851 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1852 break;
1853 }
1854 }
1855 else
1856 {
1857 code = GET_CODE (pat);
1858 switch (code)
1859 {
1860 case CLOBBER:
1861 /* Test if it is a 'store'. */
1862 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1863 break;
1864 case SET:
1865 /* Test if it is a store. */
1866 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1867 if (tmp_class == TRAP_RISKY)
1868 break;
1869 /* Test if it is a load. */
1870 tmp_class =
1871 WORST_CLASS (tmp_class,
1872 may_trap_exp (SET_SRC (pat), 0));
1873 break;
1874 case COND_EXEC:
1875 case TRAP_IF:
1876 tmp_class = TRAP_RISKY;
1877 break;
1878 default:;
1879 }
1880 insn_class = tmp_class;
1881 }
1882
1883 return insn_class;
1884 }
1885
1886 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1887 a load moved speculatively, or if load_insn is protected by
1888 a compare on load_insn's address). */
1889
1890 static int
is_prisky(load_insn,bb_src,bb_trg)1891 is_prisky (load_insn, bb_src, bb_trg)
1892 rtx load_insn;
1893 int bb_src, bb_trg;
1894 {
1895 if (FED_BY_SPEC_LOAD (load_insn))
1896 return 1;
1897
1898 if (LOG_LINKS (load_insn) == NULL)
1899 /* Dependence may 'hide' out of the region. */
1900 return 1;
1901
1902 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1903 return 1;
1904
1905 return 0;
1906 }
1907
1908 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1909 Return 1 if insn is exception-free (and the motion is valid)
1910 and 0 otherwise. */
1911
1912 static int
is_exception_free(insn,bb_src,bb_trg)1913 is_exception_free (insn, bb_src, bb_trg)
1914 rtx insn;
1915 int bb_src, bb_trg;
1916 {
1917 int insn_class = haifa_classify_insn (insn);
1918
1919 /* Handle non-load insns. */
1920 switch (insn_class)
1921 {
1922 case TRAP_FREE:
1923 return 1;
1924 case TRAP_RISKY:
1925 return 0;
1926 default:;
1927 }
1928
1929 /* Handle loads. */
1930 if (!flag_schedule_speculative_load)
1931 return 0;
1932 IS_LOAD_INSN (insn) = 1;
1933 switch (insn_class)
1934 {
1935 case IFREE:
1936 return (1);
1937 case IRISKY:
1938 return 0;
1939 case PFREE_CANDIDATE:
1940 if (is_pfree (insn, bb_src, bb_trg))
1941 return 1;
1942 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1943 case PRISKY_CANDIDATE:
1944 if (!flag_schedule_speculative_load_dangerous
1945 || is_prisky (insn, bb_src, bb_trg))
1946 return 0;
1947 break;
1948 default:;
1949 }
1950
1951 return flag_schedule_speculative_load_dangerous;
1952 }
1953
1954 /* The number of insns from the current block scheduled so far. */
1955 static int sched_target_n_insns;
1956 /* The number of insns from the current block to be scheduled in total. */
1957 static int target_n_insns;
1958 /* The number of insns from the entire region scheduled so far. */
1959 static int sched_n_insns;
1960 /* Nonzero if the last scheduled insn was a jump. */
1961 static int last_was_jump;
1962
1963 /* Implementations of the sched_info functions for region scheduling. */
1964 static void init_ready_list PARAMS ((struct ready_list *));
1965 static int can_schedule_ready_p PARAMS ((rtx));
1966 static int new_ready PARAMS ((rtx));
1967 static int schedule_more_p PARAMS ((void));
1968 static const char *rgn_print_insn PARAMS ((rtx, int));
1969 static int rgn_rank PARAMS ((rtx, rtx));
1970 static int contributes_to_priority PARAMS ((rtx, rtx));
1971 static void compute_jump_reg_dependencies PARAMS ((rtx, regset, regset,
1972 regset));
1973
1974 /* Return nonzero if there are more insns that should be scheduled. */
1975
1976 static int
schedule_more_p()1977 schedule_more_p ()
1978 {
1979 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1980 }
1981
1982 /* Add all insns that are initially ready to the ready list READY. Called
1983 once before scheduling a set of insns. */
1984
1985 static void
init_ready_list(ready)1986 init_ready_list (ready)
1987 struct ready_list *ready;
1988 {
1989 rtx prev_head = current_sched_info->prev_head;
1990 rtx next_tail = current_sched_info->next_tail;
1991 int bb_src;
1992 rtx insn;
1993
1994 target_n_insns = 0;
1995 sched_target_n_insns = 0;
1996 sched_n_insns = 0;
1997 last_was_jump = 0;
1998
1999 /* Print debugging information. */
2000 if (sched_verbose >= 5)
2001 debug_dependencies ();
2002
2003 /* Prepare current target block info. */
2004 if (current_nr_blocks > 1)
2005 {
2006 candidate_table = (candidate *) xmalloc (current_nr_blocks
2007 * sizeof (candidate));
2008
2009 bblst_last = 0;
2010 /* bblst_table holds split blocks and update blocks for each block after
2011 the current one in the region. split blocks and update blocks are
2012 the TO blocks of region edges, so there can be at most rgn_nr_edges
2013 of them. */
2014 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2015 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2016
2017 bitlst_table_last = 0;
2018 bitlst_table_size = rgn_nr_edges;
2019 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2020
2021 compute_trg_info (target_bb);
2022 }
2023
2024 /* Initialize ready list with all 'ready' insns in target block.
2025 Count number of insns in the target block being scheduled. */
2026 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2027 {
2028 rtx next;
2029
2030 if (! INSN_P (insn))
2031 continue;
2032 next = NEXT_INSN (insn);
2033
2034 if (INSN_DEP_COUNT (insn) == 0
2035 && (! INSN_P (next) || SCHED_GROUP_P (next) == 0))
2036 ready_add (ready, insn);
2037 if (!(SCHED_GROUP_P (insn)))
2038 target_n_insns++;
2039 }
2040
2041 /* Add to ready list all 'ready' insns in valid source blocks.
2042 For speculative insns, check-live, exception-free, and
2043 issue-delay. */
2044 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2045 if (IS_VALID (bb_src))
2046 {
2047 rtx src_head;
2048 rtx src_next_tail;
2049 rtx tail, head;
2050
2051 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2052 src_next_tail = NEXT_INSN (tail);
2053 src_head = head;
2054
2055 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2056 {
2057 if (! INSN_P (insn))
2058 continue;
2059
2060 if (!CANT_MOVE (insn)
2061 && (!IS_SPECULATIVE_INSN (insn)
2062 || ((((!targetm.sched.use_dfa_pipeline_interface
2063 || !(*targetm.sched.use_dfa_pipeline_interface) ())
2064 && insn_issue_delay (insn) <= 3)
2065 || (targetm.sched.use_dfa_pipeline_interface
2066 && (*targetm.sched.use_dfa_pipeline_interface) ()
2067 && (recog_memoized (insn) < 0
2068 || min_insn_conflict_delay (curr_state,
2069 insn, insn) <= 3)))
2070 && check_live (insn, bb_src)
2071 && is_exception_free (insn, bb_src, target_bb))))
2072 {
2073 rtx next;
2074
2075 /* Note that we haven't squirreled away the notes for
2076 blocks other than the current. So if this is a
2077 speculative insn, NEXT might otherwise be a note. */
2078 next = next_nonnote_insn (insn);
2079 if (INSN_DEP_COUNT (insn) == 0
2080 && (! next
2081 || ! INSN_P (next)
2082 || SCHED_GROUP_P (next) == 0))
2083 ready_add (ready, insn);
2084 }
2085 }
2086 }
2087 }
2088
2089 /* Called after taking INSN from the ready list. Returns nonzero if this
2090 insn can be scheduled, nonzero if we should silently discard it. */
2091
2092 static int
can_schedule_ready_p(insn)2093 can_schedule_ready_p (insn)
2094 rtx insn;
2095 {
2096 if (GET_CODE (insn) == JUMP_INSN)
2097 last_was_jump = 1;
2098
2099 /* An interblock motion? */
2100 if (INSN_BB (insn) != target_bb)
2101 {
2102 rtx temp;
2103 basic_block b1;
2104
2105 if (IS_SPECULATIVE_INSN (insn))
2106 {
2107 if (!check_live (insn, INSN_BB (insn)))
2108 return 0;
2109 update_live (insn, INSN_BB (insn));
2110
2111 /* For speculative load, mark insns fed by it. */
2112 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2113 set_spec_fed (insn);
2114
2115 nr_spec++;
2116 }
2117 nr_inter++;
2118
2119 /* Find the beginning of the scheduling group. */
2120 /* ??? Ought to update basic block here, but later bits of
2121 schedule_block assumes the original insn block is
2122 still intact. */
2123
2124 temp = insn;
2125 while (SCHED_GROUP_P (temp))
2126 temp = PREV_INSN (temp);
2127
2128 /* Update source block boundaries. */
2129 b1 = BLOCK_FOR_INSN (temp);
2130 if (temp == b1->head && insn == b1->end)
2131 {
2132 /* We moved all the insns in the basic block.
2133 Emit a note after the last insn and update the
2134 begin/end boundaries to point to the note. */
2135 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2136 b1->head = note;
2137 b1->end = note;
2138 }
2139 else if (insn == b1->end)
2140 {
2141 /* We took insns from the end of the basic block,
2142 so update the end of block boundary so that it
2143 points to the first insn we did not move. */
2144 b1->end = PREV_INSN (temp);
2145 }
2146 else if (temp == b1->head)
2147 {
2148 /* We took insns from the start of the basic block,
2149 so update the start of block boundary so that
2150 it points to the first insn we did not move. */
2151 b1->head = NEXT_INSN (insn);
2152 }
2153 }
2154 else
2155 {
2156 /* In block motion. */
2157 sched_target_n_insns++;
2158 }
2159 sched_n_insns++;
2160
2161 return 1;
2162 }
2163
2164 /* Called after INSN has all its dependencies resolved. Return nonzero
2165 if it should be moved to the ready list or the queue, or zero if we
2166 should silently discard it. */
2167 static int
new_ready(next)2168 new_ready (next)
2169 rtx next;
2170 {
2171 /* For speculative insns, before inserting to ready/queue,
2172 check live, exception-free, and issue-delay. */
2173 if (INSN_BB (next) != target_bb
2174 && (!IS_VALID (INSN_BB (next))
2175 || CANT_MOVE (next)
2176 || (IS_SPECULATIVE_INSN (next)
2177 && (0
2178 || (targetm.sched.use_dfa_pipeline_interface
2179 && (*targetm.sched.use_dfa_pipeline_interface) ()
2180 && recog_memoized (next) >= 0
2181 && min_insn_conflict_delay (curr_state, next,
2182 next) > 3)
2183 || ((!targetm.sched.use_dfa_pipeline_interface
2184 || !(*targetm.sched.use_dfa_pipeline_interface) ())
2185 && insn_issue_delay (next) > 3)
2186 || !check_live (next, INSN_BB (next))
2187 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2188 return 0;
2189 return 1;
2190 }
2191
2192 /* Return a string that contains the insn uid and optionally anything else
2193 necessary to identify this insn in an output. It's valid to use a
2194 static buffer for this. The ALIGNED parameter should cause the string
2195 to be formatted so that multiple output lines will line up nicely. */
2196
2197 static const char *
rgn_print_insn(insn,aligned)2198 rgn_print_insn (insn, aligned)
2199 rtx insn;
2200 int aligned;
2201 {
2202 static char tmp[80];
2203
2204 if (aligned)
2205 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2206 else
2207 {
2208 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2209 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2210 else
2211 sprintf (tmp, "%d", INSN_UID (insn));
2212 }
2213 return tmp;
2214 }
2215
2216 /* Compare priority of two insns. Return a positive number if the second
2217 insn is to be preferred for scheduling, and a negative one if the first
2218 is to be preferred. Zero if they are equally good. */
2219
2220 static int
rgn_rank(insn1,insn2)2221 rgn_rank (insn1, insn2)
2222 rtx insn1, insn2;
2223 {
2224 /* Some comparison make sense in interblock scheduling only. */
2225 if (INSN_BB (insn1) != INSN_BB (insn2))
2226 {
2227 int spec_val, prob_val;
2228
2229 /* Prefer an inblock motion on an interblock motion. */
2230 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2231 return 1;
2232 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2233 return -1;
2234
2235 /* Prefer a useful motion on a speculative one. */
2236 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2237 if (spec_val)
2238 return spec_val;
2239
2240 /* Prefer a more probable (speculative) insn. */
2241 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2242 if (prob_val)
2243 return prob_val;
2244 }
2245 return 0;
2246 }
2247
2248 /* NEXT is an instruction that depends on INSN (a backward dependence);
2249 return nonzero if we should include this dependence in priority
2250 calculations. */
2251
2252 static int
contributes_to_priority(next,insn)2253 contributes_to_priority (next, insn)
2254 rtx next, insn;
2255 {
2256 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2257 }
2258
2259 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2260 conditionally set before INSN. Store the set of registers that
2261 must be considered as used by this jump in USED and that of
2262 registers that must be considered as set in SET. */
2263
2264 static void
compute_jump_reg_dependencies(insn,cond_set,used,set)2265 compute_jump_reg_dependencies (insn, cond_set, used, set)
2266 rtx insn ATTRIBUTE_UNUSED;
2267 regset cond_set ATTRIBUTE_UNUSED;
2268 regset used ATTRIBUTE_UNUSED;
2269 regset set ATTRIBUTE_UNUSED;
2270 {
2271 /* Nothing to do here, since we postprocess jumps in
2272 add_branch_dependences. */
2273 }
2274
2275 /* Used in schedule_insns to initialize current_sched_info for scheduling
2276 regions (or single basic blocks). */
2277
2278 static struct sched_info region_sched_info =
2279 {
2280 init_ready_list,
2281 can_schedule_ready_p,
2282 schedule_more_p,
2283 new_ready,
2284 rgn_rank,
2285 rgn_print_insn,
2286 contributes_to_priority,
2287 compute_jump_reg_dependencies,
2288
2289 NULL, NULL,
2290 NULL, NULL,
2291 0, 0
2292 };
2293
2294 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2295
2296 static bool
sets_likely_spilled(pat)2297 sets_likely_spilled (pat)
2298 rtx pat;
2299 {
2300 bool ret = false;
2301 note_stores (pat, sets_likely_spilled_1, &ret);
2302 return ret;
2303 }
2304
2305 static void
sets_likely_spilled_1(x,pat,data)2306 sets_likely_spilled_1 (x, pat, data)
2307 rtx x, pat;
2308 void *data;
2309 {
2310 bool *ret = (bool *) data;
2311
2312 if (GET_CODE (pat) == SET
2313 && REG_P (x)
2314 && REGNO (x) < FIRST_PSEUDO_REGISTER
2315 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2316 *ret = true;
2317 }
2318
2319 /* Add dependences so that branches are scheduled to run last in their
2320 block. */
2321
2322 static void
add_branch_dependences(head,tail)2323 add_branch_dependences (head, tail)
2324 rtx head, tail;
2325 {
2326 rtx insn, last;
2327
2328 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2329 that can throw exceptions, force them to remain in order at the end of
2330 the block by adding dependencies and giving the last a high priority.
2331 There may be notes present, and prev_head may also be a note.
2332
2333 Branches must obviously remain at the end. Calls should remain at the
2334 end since moving them results in worse register allocation. Uses remain
2335 at the end to ensure proper register allocation.
2336
2337 cc0 setters remaim at the end because they can't be moved away from
2338 their cc0 user.
2339
2340 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2341 are not moved before reload because we can wind up with register
2342 allocation failures. */
2343
2344 insn = tail;
2345 last = 0;
2346 while (GET_CODE (insn) == CALL_INSN
2347 || GET_CODE (insn) == JUMP_INSN
2348 || (GET_CODE (insn) == INSN
2349 && (GET_CODE (PATTERN (insn)) == USE
2350 || GET_CODE (PATTERN (insn)) == CLOBBER
2351 || can_throw_internal (insn)
2352 #ifdef HAVE_cc0
2353 || sets_cc0_p (PATTERN (insn))
2354 #endif
2355 || (!reload_completed
2356 && sets_likely_spilled (PATTERN (insn)))))
2357 || GET_CODE (insn) == NOTE)
2358 {
2359 if (GET_CODE (insn) != NOTE)
2360 {
2361 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2362 {
2363 add_dependence (last, insn, REG_DEP_ANTI);
2364 INSN_REF_COUNT (insn)++;
2365 }
2366
2367 CANT_MOVE (insn) = 1;
2368
2369 last = insn;
2370 /* Skip over insns that are part of a group.
2371 Make each insn explicitly depend on the previous insn.
2372 This ensures that only the group header will ever enter
2373 the ready queue (and, when scheduled, will automatically
2374 schedule the SCHED_GROUP_P block). */
2375 while (SCHED_GROUP_P (insn))
2376 {
2377 rtx temp = prev_nonnote_insn (insn);
2378 add_dependence (insn, temp, REG_DEP_ANTI);
2379 insn = temp;
2380 }
2381 }
2382
2383 /* Don't overrun the bounds of the basic block. */
2384 if (insn == head)
2385 break;
2386
2387 insn = PREV_INSN (insn);
2388 }
2389
2390 /* Make sure these insns are scheduled last in their block. */
2391 insn = last;
2392 if (insn != 0)
2393 while (insn != head)
2394 {
2395 insn = prev_nonnote_insn (insn);
2396
2397 if (INSN_REF_COUNT (insn) != 0)
2398 continue;
2399
2400 add_dependence (last, insn, REG_DEP_ANTI);
2401 INSN_REF_COUNT (insn) = 1;
2402
2403 /* Skip over insns that are part of a group. */
2404 while (SCHED_GROUP_P (insn))
2405 insn = prev_nonnote_insn (insn);
2406 }
2407 }
2408
2409 /* Data structures for the computation of data dependences in a regions. We
2410 keep one `deps' structure for every basic block. Before analyzing the
2411 data dependences for a bb, its variables are initialized as a function of
2412 the variables of its predecessors. When the analysis for a bb completes,
2413 we save the contents to the corresponding bb_deps[bb] variable. */
2414
2415 static struct deps *bb_deps;
2416
2417 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2418
2419 static rtx
concat_INSN_LIST(copy,old)2420 concat_INSN_LIST (copy, old)
2421 rtx copy, old;
2422 {
2423 rtx new = old;
2424 for (; copy ; copy = XEXP (copy, 1))
2425 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2426 return new;
2427 }
2428
2429 static void
concat_insn_mem_list(copy_insns,copy_mems,old_insns_p,old_mems_p)2430 concat_insn_mem_list (copy_insns, copy_mems, old_insns_p, old_mems_p)
2431 rtx copy_insns, copy_mems;
2432 rtx *old_insns_p, *old_mems_p;
2433 {
2434 rtx new_insns = *old_insns_p;
2435 rtx new_mems = *old_mems_p;
2436
2437 while (copy_insns)
2438 {
2439 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2440 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2441 copy_insns = XEXP (copy_insns, 1);
2442 copy_mems = XEXP (copy_mems, 1);
2443 }
2444
2445 *old_insns_p = new_insns;
2446 *old_mems_p = new_mems;
2447 }
2448
2449 /* After computing the dependencies for block BB, propagate the dependencies
2450 found in TMP_DEPS to the successors of the block. */
2451 static void
propagate_deps(bb,pred_deps)2452 propagate_deps (bb, pred_deps)
2453 int bb;
2454 struct deps *pred_deps;
2455 {
2456 int b = BB_TO_BLOCK (bb);
2457 int e, first_edge;
2458
2459 /* bb's structures are inherited by its successors. */
2460 first_edge = e = OUT_EDGES (b);
2461 if (e > 0)
2462 do
2463 {
2464 int b_succ = TO_BLOCK (e);
2465 int bb_succ = BLOCK_TO_BB (b_succ);
2466 struct deps *succ_deps = bb_deps + bb_succ;
2467 int reg;
2468
2469 /* Only bbs "below" bb, in the same region, are interesting. */
2470 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2471 || bb_succ <= bb)
2472 {
2473 e = NEXT_OUT (e);
2474 continue;
2475 }
2476
2477 /* The reg_last lists are inherited by bb_succ. */
2478 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2479 {
2480 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2481 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2482
2483 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2484 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2485 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2486 succ_rl->clobbers);
2487 succ_rl->uses_length += pred_rl->uses_length;
2488 succ_rl->clobbers_length += pred_rl->clobbers_length;
2489 });
2490 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2491
2492 /* Mem read/write lists are inherited by bb_succ. */
2493 concat_insn_mem_list (pred_deps->pending_read_insns,
2494 pred_deps->pending_read_mems,
2495 &succ_deps->pending_read_insns,
2496 &succ_deps->pending_read_mems);
2497 concat_insn_mem_list (pred_deps->pending_write_insns,
2498 pred_deps->pending_write_mems,
2499 &succ_deps->pending_write_insns,
2500 &succ_deps->pending_write_mems);
2501
2502 succ_deps->last_pending_memory_flush
2503 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2504 succ_deps->last_pending_memory_flush);
2505
2506 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2507 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2508
2509 /* last_function_call is inherited by bb_succ. */
2510 succ_deps->last_function_call
2511 = concat_INSN_LIST (pred_deps->last_function_call,
2512 succ_deps->last_function_call);
2513
2514 /* sched_before_next_call is inherited by bb_succ. */
2515 succ_deps->sched_before_next_call
2516 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2517 succ_deps->sched_before_next_call);
2518
2519 e = NEXT_OUT (e);
2520 }
2521 while (e != first_edge);
2522
2523 /* These lists should point to the right place, for correct
2524 freeing later. */
2525 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2526 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2527 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2528 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2529
2530 /* Can't allow these to be freed twice. */
2531 pred_deps->pending_read_insns = 0;
2532 pred_deps->pending_read_mems = 0;
2533 pred_deps->pending_write_insns = 0;
2534 pred_deps->pending_write_mems = 0;
2535 }
2536
2537 /* Compute backward dependences inside bb. In a multiple blocks region:
2538 (1) a bb is analyzed after its predecessors, and (2) the lists in
2539 effect at the end of bb (after analyzing for bb) are inherited by
2540 bb's successrs.
2541
2542 Specifically for reg-reg data dependences, the block insns are
2543 scanned by sched_analyze () top-to-bottom. Two lists are
2544 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2545 and reg_last[].uses for register USEs.
2546
2547 When analysis is completed for bb, we update for its successors:
2548 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2549 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2550
2551 The mechanism for computing mem-mem data dependence is very
2552 similar, and the result is interblock dependences in the region. */
2553
2554 static void
compute_block_backward_dependences(bb)2555 compute_block_backward_dependences (bb)
2556 int bb;
2557 {
2558 rtx head, tail;
2559 struct deps tmp_deps;
2560
2561 tmp_deps = bb_deps[bb];
2562
2563 /* Do the analysis for this block. */
2564 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2565 sched_analyze (&tmp_deps, head, tail);
2566 add_branch_dependences (head, tail);
2567
2568 if (current_nr_blocks > 1)
2569 propagate_deps (bb, &tmp_deps);
2570
2571 /* Free up the INSN_LISTs. */
2572 free_deps (&tmp_deps);
2573 }
2574
2575 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2576 them to the unused_*_list variables, so that they can be reused. */
2577
2578 static void
free_pending_lists()2579 free_pending_lists ()
2580 {
2581 int bb;
2582
2583 for (bb = 0; bb < current_nr_blocks; bb++)
2584 {
2585 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2586 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2587 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2588 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2589 }
2590 }
2591
2592 /* Print dependences for debugging, callable from debugger. */
2593
2594 void
debug_dependencies()2595 debug_dependencies ()
2596 {
2597 int bb;
2598
2599 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2600 for (bb = 0; bb < current_nr_blocks; bb++)
2601 {
2602 if (1)
2603 {
2604 rtx head, tail;
2605 rtx next_tail;
2606 rtx insn;
2607
2608 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2609 next_tail = NEXT_INSN (tail);
2610 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2611 BB_TO_BLOCK (bb), bb);
2612
2613 if (targetm.sched.use_dfa_pipeline_interface
2614 && (*targetm.sched.use_dfa_pipeline_interface) ())
2615 {
2616 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2617 "insn", "code", "bb", "dep", "prio", "cost",
2618 "reservation");
2619 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2620 "----", "----", "--", "---", "----", "----",
2621 "-----------");
2622 }
2623 else
2624 {
2625 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2626 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2627 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2628 "----", "----", "--", "---", "----", "----", "--------", "-----");
2629 }
2630
2631 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2632 {
2633 rtx link;
2634
2635 if (! INSN_P (insn))
2636 {
2637 int n;
2638 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2639 if (GET_CODE (insn) == NOTE)
2640 {
2641 n = NOTE_LINE_NUMBER (insn);
2642 if (n < 0)
2643 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2644 else
2645 fprintf (sched_dump, "line %d, file %s\n", n,
2646 NOTE_SOURCE_FILE (insn));
2647 }
2648 else
2649 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2650 continue;
2651 }
2652
2653 if (targetm.sched.use_dfa_pipeline_interface
2654 && (*targetm.sched.use_dfa_pipeline_interface) ())
2655 {
2656 fprintf (sched_dump,
2657 ";; %s%5d%6d%6d%6d%6d%6d ",
2658 (SCHED_GROUP_P (insn) ? "+" : " "),
2659 INSN_UID (insn),
2660 INSN_CODE (insn),
2661 INSN_BB (insn),
2662 INSN_DEP_COUNT (insn),
2663 INSN_PRIORITY (insn),
2664 insn_cost (insn, 0, 0));
2665
2666 if (recog_memoized (insn) < 0)
2667 fprintf (sched_dump, "nothing");
2668 else
2669 print_reservation (sched_dump, insn);
2670 }
2671 else
2672 {
2673 int unit = insn_unit (insn);
2674 int range
2675 = (unit < 0
2676 || function_units[unit].blockage_range_function == 0
2677 ? 0
2678 : function_units[unit].blockage_range_function (insn));
2679 fprintf (sched_dump,
2680 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2681 (SCHED_GROUP_P (insn) ? "+" : " "),
2682 INSN_UID (insn),
2683 INSN_CODE (insn),
2684 INSN_BB (insn),
2685 INSN_DEP_COUNT (insn),
2686 INSN_PRIORITY (insn),
2687 insn_cost (insn, 0, 0),
2688 (int) MIN_BLOCKAGE_COST (range),
2689 (int) MAX_BLOCKAGE_COST (range));
2690 insn_print_units (insn);
2691 }
2692
2693 fprintf (sched_dump, "\t: ");
2694 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2695 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2696 fprintf (sched_dump, "\n");
2697 }
2698 }
2699 }
2700 fprintf (sched_dump, "\n");
2701 }
2702
2703 /* Schedule a region. A region is either an inner loop, a loop-free
2704 subroutine, or a single basic block. Each bb in the region is
2705 scheduled after its flow predecessors. */
2706
2707 static void
schedule_region(rgn)2708 schedule_region (rgn)
2709 int rgn;
2710 {
2711 int bb;
2712 int rgn_n_insns = 0;
2713 int sched_rgn_n_insns = 0;
2714
2715 /* Set variables for the current region. */
2716 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2717 current_blocks = RGN_BLOCKS (rgn);
2718
2719 init_deps_global ();
2720
2721 /* Initializations for region data dependence analyisis. */
2722 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2723 for (bb = 0; bb < current_nr_blocks; bb++)
2724 init_deps (bb_deps + bb);
2725
2726 /* Compute LOG_LINKS. */
2727 for (bb = 0; bb < current_nr_blocks; bb++)
2728 compute_block_backward_dependences (bb);
2729
2730 /* Compute INSN_DEPEND. */
2731 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2732 {
2733 rtx head, tail;
2734 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2735
2736 compute_forward_dependences (head, tail);
2737 }
2738
2739 /* Set priorities. */
2740 for (bb = 0; bb < current_nr_blocks; bb++)
2741 {
2742 rtx head, tail;
2743 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2744
2745 rgn_n_insns += set_priorities (head, tail);
2746 }
2747
2748 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2749 if (current_nr_blocks > 1)
2750 {
2751 int i;
2752
2753 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2754
2755 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2756 sbitmap_vector_zero (dom, current_nr_blocks);
2757 /* Edge to bit. */
2758 rgn_nr_edges = 0;
2759 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2760 for (i = 1; i < nr_edges; i++)
2761 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2762 EDGE_TO_BIT (i) = rgn_nr_edges++;
2763 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2764
2765 rgn_nr_edges = 0;
2766 for (i = 1; i < nr_edges; i++)
2767 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2768 rgn_edges[rgn_nr_edges++] = i;
2769
2770 /* Split edges. */
2771 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2772 sbitmap_vector_zero (pot_split, current_nr_blocks);
2773 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2774 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2775
2776 /* Compute probabilities, dominators, split_edges. */
2777 for (bb = 0; bb < current_nr_blocks; bb++)
2778 compute_dom_prob_ps (bb);
2779 }
2780
2781 /* Now we can schedule all blocks. */
2782 for (bb = 0; bb < current_nr_blocks; bb++)
2783 {
2784 rtx head, tail;
2785 int b = BB_TO_BLOCK (bb);
2786
2787 get_block_head_tail (b, &head, &tail);
2788
2789 if (no_real_insns_p (head, tail))
2790 continue;
2791
2792 current_sched_info->prev_head = PREV_INSN (head);
2793 current_sched_info->next_tail = NEXT_INSN (tail);
2794
2795 if (write_symbols != NO_DEBUG)
2796 {
2797 save_line_notes (b, head, tail);
2798 rm_line_notes (head, tail);
2799 }
2800
2801 /* rm_other_notes only removes notes which are _inside_ the
2802 block---that is, it won't remove notes before the first real insn
2803 or after the last real insn of the block. So if the first insn
2804 has a REG_SAVE_NOTE which would otherwise be emitted before the
2805 insn, it is redundant with the note before the start of the
2806 block, and so we have to take it out. */
2807 if (INSN_P (head))
2808 {
2809 rtx note;
2810
2811 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2812 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2813 {
2814 remove_note (head, note);
2815 note = XEXP (note, 1);
2816 remove_note (head, note);
2817 }
2818 }
2819
2820 /* Remove remaining note insns from the block, save them in
2821 note_list. These notes are restored at the end of
2822 schedule_block (). */
2823 rm_other_notes (head, tail);
2824
2825 target_bb = bb;
2826
2827 current_sched_info->queue_must_finish_empty
2828 = current_nr_blocks > 1 && !flag_schedule_interblock;
2829
2830 schedule_block (b, rgn_n_insns);
2831 sched_rgn_n_insns += sched_n_insns;
2832
2833 /* Update target block boundaries. */
2834 if (head == BLOCK_HEAD (b))
2835 BLOCK_HEAD (b) = current_sched_info->head;
2836 if (tail == BLOCK_END (b))
2837 BLOCK_END (b) = current_sched_info->tail;
2838
2839 /* Clean up. */
2840 if (current_nr_blocks > 1)
2841 {
2842 free (candidate_table);
2843 free (bblst_table);
2844 free (bitlst_table);
2845 }
2846 }
2847
2848 /* Sanity check: verify that all region insns were scheduled. */
2849 if (sched_rgn_n_insns != rgn_n_insns)
2850 abort ();
2851
2852 /* Restore line notes. */
2853 if (write_symbols != NO_DEBUG)
2854 {
2855 for (bb = 0; bb < current_nr_blocks; bb++)
2856 {
2857 rtx head, tail;
2858 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2859 restore_line_notes (head, tail);
2860 }
2861 }
2862
2863 /* Done with this region. */
2864 free_pending_lists ();
2865
2866 finish_deps_global ();
2867
2868 free (bb_deps);
2869
2870 if (current_nr_blocks > 1)
2871 {
2872 free (prob);
2873 sbitmap_vector_free (dom);
2874 sbitmap_vector_free (pot_split);
2875 sbitmap_vector_free (ancestor_edges);
2876 free (edge_to_bit);
2877 free (rgn_edges);
2878 }
2879 }
2880
2881 /* Indexed by region, holds the number of death notes found in that region.
2882 Used for consistency checks. */
2883 static int *deaths_in_region;
2884
2885 /* Initialize data structures for region scheduling. */
2886
2887 static void
init_regions()2888 init_regions ()
2889 {
2890 sbitmap blocks;
2891 int rgn;
2892
2893 nr_regions = 0;
2894 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2895 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2896 block_to_bb = (int *) xmalloc ((last_basic_block) * sizeof (int));
2897 containing_rgn = (int *) xmalloc ((last_basic_block) * sizeof (int));
2898
2899 /* Compute regions for scheduling. */
2900 if (reload_completed
2901 || n_basic_blocks == 1
2902 || !flag_schedule_interblock)
2903 {
2904 find_single_block_region ();
2905 }
2906 else
2907 {
2908 /* Verify that a 'good' control flow graph can be built. */
2909 if (is_cfg_nonregular ())
2910 {
2911 find_single_block_region ();
2912 }
2913 else
2914 {
2915 dominance_info dom;
2916 struct edge_list *edge_list;
2917
2918 /* The scheduler runs after flow; therefore, we can't blindly call
2919 back into find_basic_blocks since doing so could invalidate the
2920 info in global_live_at_start.
2921
2922 Consider a block consisting entirely of dead stores; after life
2923 analysis it would be a block of NOTE_INSN_DELETED notes. If
2924 we call find_basic_blocks again, then the block would be removed
2925 entirely and invalidate our the register live information.
2926
2927 We could (should?) recompute register live information. Doing
2928 so may even be beneficial. */
2929 edge_list = create_edge_list ();
2930
2931 /* Compute the dominators and post dominators. */
2932 dom = calculate_dominance_info (CDI_DOMINATORS);
2933
2934 /* build_control_flow will return nonzero if it detects unreachable
2935 blocks or any other irregularity with the cfg which prevents
2936 cross block scheduling. */
2937 if (build_control_flow (edge_list) != 0)
2938 find_single_block_region ();
2939 else
2940 find_rgns (edge_list, dom);
2941
2942 if (sched_verbose >= 3)
2943 debug_regions ();
2944
2945 /* We are done with flow's edge list. */
2946 free_edge_list (edge_list);
2947
2948 /* For now. This will move as more and more of haifa is converted
2949 to using the cfg code in flow.c. */
2950 free_dominance_info (dom);
2951 }
2952 }
2953
2954
2955 if (CHECK_DEAD_NOTES)
2956 {
2957 blocks = sbitmap_alloc (last_basic_block);
2958 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2959 /* Remove all death notes from the subroutine. */
2960 for (rgn = 0; rgn < nr_regions; rgn++)
2961 {
2962 int b;
2963
2964 sbitmap_zero (blocks);
2965 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2966 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2967
2968 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2969 }
2970 sbitmap_free (blocks);
2971 }
2972 else
2973 count_or_remove_death_notes (NULL, 1);
2974 }
2975
2976 /* The one entry point in this file. DUMP_FILE is the dump file for
2977 this pass. */
2978
2979 void
schedule_insns(dump_file)2980 schedule_insns (dump_file)
2981 FILE *dump_file;
2982 {
2983 sbitmap large_region_blocks, blocks;
2984 int rgn;
2985 int any_large_regions;
2986 basic_block bb;
2987
2988 /* Taking care of this degenerate case makes the rest of
2989 this code simpler. */
2990 if (n_basic_blocks == 0)
2991 return;
2992
2993 nr_inter = 0;
2994 nr_spec = 0;
2995
2996 sched_init (dump_file);
2997
2998 init_regions ();
2999
3000 current_sched_info = ®ion_sched_info;
3001
3002 /* Schedule every region in the subroutine. */
3003 for (rgn = 0; rgn < nr_regions; rgn++)
3004 schedule_region (rgn);
3005
3006 /* Update life analysis for the subroutine. Do single block regions
3007 first so that we can verify that live_at_start didn't change. Then
3008 do all other blocks. */
3009 /* ??? There is an outside possibility that update_life_info, or more
3010 to the point propagate_block, could get called with nonzero flags
3011 more than once for one basic block. This would be kinda bad if it
3012 were to happen, since REG_INFO would be accumulated twice for the
3013 block, and we'd have twice the REG_DEAD notes.
3014
3015 I'm fairly certain that this _shouldn't_ happen, since I don't think
3016 that live_at_start should change at region heads. Not sure what the
3017 best way to test for this kind of thing... */
3018
3019 allocate_reg_life_data ();
3020 compute_bb_for_insn ();
3021
3022 any_large_regions = 0;
3023 large_region_blocks = sbitmap_alloc (last_basic_block);
3024 sbitmap_zero (large_region_blocks);
3025 FOR_EACH_BB (bb)
3026 SET_BIT (large_region_blocks, bb->index);
3027
3028 blocks = sbitmap_alloc (last_basic_block);
3029 sbitmap_zero (blocks);
3030
3031 /* Update life information. For regions consisting of multiple blocks
3032 we've possibly done interblock scheduling that affects global liveness.
3033 For regions consisting of single blocks we need to do only local
3034 liveness. */
3035 for (rgn = 0; rgn < nr_regions; rgn++)
3036 if (RGN_NR_BLOCKS (rgn) > 1)
3037 any_large_regions = 1;
3038 else
3039 {
3040 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3041 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3042 }
3043
3044 /* Don't update reg info after reload, since that affects
3045 regs_ever_live, which should not change after reload. */
3046 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3047 (reload_completed ? PROP_DEATH_NOTES
3048 : PROP_DEATH_NOTES | PROP_REG_INFO));
3049 if (any_large_regions)
3050 {
3051 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3052 PROP_DEATH_NOTES | PROP_REG_INFO);
3053 }
3054
3055 if (CHECK_DEAD_NOTES)
3056 {
3057 /* Verify the counts of basic block notes in single the basic block
3058 regions. */
3059 for (rgn = 0; rgn < nr_regions; rgn++)
3060 if (RGN_NR_BLOCKS (rgn) == 1)
3061 {
3062 sbitmap_zero (blocks);
3063 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3064
3065 if (deaths_in_region[rgn]
3066 != count_or_remove_death_notes (blocks, 0))
3067 abort ();
3068 }
3069 free (deaths_in_region);
3070 }
3071
3072 /* Reposition the prologue and epilogue notes in case we moved the
3073 prologue/epilogue insns. */
3074 if (reload_completed)
3075 reposition_prologue_and_epilogue_notes (get_insns ());
3076
3077 /* Delete redundant line notes. */
3078 if (write_symbols != NO_DEBUG)
3079 rm_redundant_line_notes ();
3080
3081 if (sched_verbose)
3082 {
3083 if (reload_completed == 0 && flag_schedule_interblock)
3084 {
3085 fprintf (sched_dump,
3086 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3087 nr_inter, nr_spec);
3088 }
3089 else
3090 {
3091 if (nr_inter > 0)
3092 abort ();
3093 }
3094 fprintf (sched_dump, "\n\n");
3095 }
3096
3097 /* Clean up. */
3098 free (rgn_table);
3099 free (rgn_bb_table);
3100 free (block_to_bb);
3101 free (containing_rgn);
3102
3103 sched_finish ();
3104
3105 if (edge_table)
3106 {
3107 free (edge_table);
3108 edge_table = NULL;
3109 }
3110
3111 if (in_edges)
3112 {
3113 free (in_edges);
3114 in_edges = NULL;
3115 }
3116 if (out_edges)
3117 {
3118 free (out_edges);
3119 out_edges = NULL;
3120 }
3121
3122 sbitmap_free (blocks);
3123 sbitmap_free (large_region_blocks);
3124 }
3125 #endif
3126