xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/modulo-sched.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /* Swing Modulo Scheduling implementation.
2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
3    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "hard-reg-set.h"
30 #include "regs.h"
31 #include "hashtab.h"
32 #include "hash-set.h"
33 #include "vec.h"
34 #include "machmode.h"
35 #include "input.h"
36 #include "function.h"
37 #include "profile.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "insn-attr.h"
41 #include "except.h"
42 #include "recog.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfgrtl.h"
46 #include "predict.h"
47 #include "basic-block.h"
48 #include "sched-int.h"
49 #include "target.h"
50 #include "cfgloop.h"
51 #include "double-int.h"
52 #include "alias.h"
53 #include "symtab.h"
54 #include "wide-int.h"
55 #include "inchash.h"
56 #include "tree.h"
57 #include "insn-codes.h"
58 #include "optabs.h"
59 #include "statistics.h"
60 #include "real.h"
61 #include "fixed-value.h"
62 #include "expmed.h"
63 #include "dojump.h"
64 #include "explow.h"
65 #include "calls.h"
66 #include "emit-rtl.h"
67 #include "varasm.h"
68 #include "stmt.h"
69 #include "expr.h"
70 #include "params.h"
71 #include "gcov-io.h"
72 #include "sbitmap.h"
73 #include "df.h"
74 #include "ddg.h"
75 #include "tree-pass.h"
76 #include "dbgcnt.h"
77 #include "loop-unroll.h"
78 
79 #ifdef INSN_SCHEDULING
80 
81 /* This file contains the implementation of the Swing Modulo Scheduler,
82    described in the following references:
83    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
84        Lifetime--sensitive modulo scheduling in a production environment.
85        IEEE Trans. on Comps., 50(3), March 2001
86    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
87        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
88        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
89 
90    The basic structure is:
91    1. Build a data-dependence graph (DDG) for each loop.
92    2. Use the DDG to order the insns of a loop (not in topological order
93       necessarily, but rather) trying to place each insn after all its
94       predecessors _or_ after all its successors.
95    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
96    4. Use the ordering to perform list-scheduling of the loop:
97       1. Set II = MII.  We will try to schedule the loop within II cycles.
98       2. Try to schedule the insns one by one according to the ordering.
99 	 For each insn compute an interval of cycles by considering already-
100 	 scheduled preds and succs (and associated latencies); try to place
101 	 the insn in the cycles of this window checking for potential
102 	 resource conflicts (using the DFA interface).
103 	 Note: this is different from the cycle-scheduling of schedule_insns;
104 	 here the insns are not scheduled monotonically top-down (nor bottom-
105 	 up).
106       3. If failed in scheduling all insns - bump II++ and try again, unless
107 	 II reaches an upper bound MaxII, in which case report failure.
108    5. If we succeeded in scheduling the loop within II cycles, we now
109       generate prolog and epilog, decrease the counter of the loop, and
110       perform modulo variable expansion for live ranges that span more than
111       II cycles (i.e. use register copies to prevent a def from overwriting
112       itself before reaching the use).
113 
114     SMS works with countable loops (1) whose control part can be easily
115     decoupled from the rest of the loop and (2) whose loop count can
116     be easily adjusted.  This is because we peel a constant number of
117     iterations into a prologue and epilogue for which we want to avoid
118     emitting the control part, and a kernel which is to iterate that
119     constant number of iterations less than the original loop.  So the
120     control part should be a set of insns clearly identified and having
121     its own iv, not otherwise used in the loop (at-least for now), which
122     initializes a register before the loop to the number of iterations.
123     Currently SMS relies on the do-loop pattern to recognize such loops,
124     where (1) the control part comprises of all insns defining and/or
125     using a certain 'count' register and (2) the loop count can be
126     adjusted by modifying this register prior to the loop.
127     TODO: Rely on cfgloop analysis instead.  */
128 
129 /* This page defines partial-schedule structures and functions for
130    modulo scheduling.  */
131 
132 typedef struct partial_schedule *partial_schedule_ptr;
133 typedef struct ps_insn *ps_insn_ptr;
134 
135 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
136 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
137 
138 /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
139 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
140 
141 /* Perform signed modulo, always returning a non-negative value.  */
142 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
143 
144 /* The number of different iterations the nodes in ps span, assuming
145    the stage boundaries are placed efficiently.  */
146 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
147                          + 1 + ii - 1) / ii)
148 /* The stage count of ps.  */
149 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
150 
151 /* A single instruction in the partial schedule.  */
152 struct ps_insn
153 {
154   /* Identifies the instruction to be scheduled.  Values smaller than
155      the ddg's num_nodes refer directly to ddg nodes.  A value of
156      X - num_nodes refers to register move X.  */
157   int id;
158 
159   /* The (absolute) cycle in which the PS instruction is scheduled.
160      Same as SCHED_TIME (node).  */
161   int cycle;
162 
163   /* The next/prev PS_INSN in the same row.  */
164   ps_insn_ptr next_in_row,
165 	      prev_in_row;
166 
167 };
168 
169 /* Information about a register move that has been added to a partial
170    schedule.  */
171 struct ps_reg_move_info
172 {
173   /* The source of the move is defined by the ps_insn with id DEF.
174      The destination is used by the ps_insns with the ids in USES.  */
175   int def;
176   sbitmap uses;
177 
178   /* The original form of USES' instructions used OLD_REG, but they
179      should now use NEW_REG.  */
180   rtx old_reg;
181   rtx new_reg;
182 
183   /* The number of consecutive stages that the move occupies.  */
184   int num_consecutive_stages;
185 
186   /* An instruction that sets NEW_REG to the correct value.  The first
187      move associated with DEF will have an rhs of OLD_REG; later moves
188      use the result of the previous move.  */
189   rtx_insn *insn;
190 };
191 
192 typedef struct ps_reg_move_info ps_reg_move_info;
193 
194 /* Holds the partial schedule as an array of II rows.  Each entry of the
195    array points to a linked list of PS_INSNs, which represents the
196    instructions that are scheduled for that row.  */
197 struct partial_schedule
198 {
199   int ii;	/* Number of rows in the partial schedule.  */
200   int history;  /* Threshold for conflict checking using DFA.  */
201 
202   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
203   ps_insn_ptr *rows;
204 
205   /* All the moves added for this partial schedule.  Index X has
206      a ps_insn id of X + g->num_nodes.  */
207   vec<ps_reg_move_info> reg_moves;
208 
209   /*  rows_length[i] holds the number of instructions in the row.
210       It is used only (as an optimization) to back off quickly from
211       trying to schedule a node in a full row; that is, to avoid running
212       through futile DFA state transitions.  */
213   int *rows_length;
214 
215   /* The earliest absolute cycle of an insn in the partial schedule.  */
216   int min_cycle;
217 
218   /* The latest absolute cycle of an insn in the partial schedule.  */
219   int max_cycle;
220 
221   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
222 
223   int stage_count;  /* The stage count of the partial schedule.  */
224 };
225 
226 
227 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
228 static void free_partial_schedule (partial_schedule_ptr);
229 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
230 void print_partial_schedule (partial_schedule_ptr, FILE *);
231 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
232 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
233 						int, int, sbitmap, sbitmap);
234 static void rotate_partial_schedule (partial_schedule_ptr, int);
235 void set_row_column_for_ps (partial_schedule_ptr);
236 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
237 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
238 
239 
240 /* This page defines constants and structures for the modulo scheduling
241    driver.  */
242 
243 static int sms_order_nodes (ddg_ptr, int, int *, int *);
244 static void set_node_sched_params (ddg_ptr);
245 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
246 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
247 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
248                                     rtx, rtx);
249 static int calculate_stage_count (partial_schedule_ptr, int);
250 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
251 					   int, int, sbitmap, sbitmap, sbitmap);
252 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
253 			     sbitmap, int, int *, int *, int *);
254 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
255 					  sbitmap, int *, sbitmap, sbitmap);
256 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
257 
258 #define NODE_ASAP(node) ((node)->aux.count)
259 
260 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
261 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
262 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
263 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
264 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
265 
266 /* The scheduling parameters held for each node.  */
267 typedef struct node_sched_params
268 {
269   int time;	/* The absolute scheduling cycle.  */
270 
271   int row;    /* Holds time % ii.  */
272   int stage;  /* Holds time / ii.  */
273 
274   /* The column of a node inside the ps.  If nodes u, v are on the same row,
275      u will precede v if column (u) < column (v).  */
276   int column;
277 } *node_sched_params_ptr;
278 
279 typedef struct node_sched_params node_sched_params;
280 
281 /* The following three functions are copied from the current scheduler
282    code in order to use sched_analyze() for computing the dependencies.
283    They are used when initializing the sched_info structure.  */
284 static const char *
285 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
286 {
287   static char tmp[80];
288 
289   sprintf (tmp, "i%4d", INSN_UID (insn));
290   return tmp;
291 }
292 
293 static void
294 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
295 			       regset used ATTRIBUTE_UNUSED)
296 {
297 }
298 
299 static struct common_sched_info_def sms_common_sched_info;
300 
301 static struct sched_deps_info_def sms_sched_deps_info =
302   {
303     compute_jump_reg_dependencies,
304     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
305     NULL,
306     0, 0, 0
307   };
308 
309 static struct haifa_sched_info sms_sched_info =
310 {
311   NULL,
312   NULL,
313   NULL,
314   NULL,
315   NULL,
316   sms_print_insn,
317   NULL,
318   NULL, /* insn_finishes_block_p */
319   NULL, NULL,
320   NULL, NULL,
321   0, 0,
322 
323   NULL, NULL, NULL, NULL,
324   NULL, NULL,
325   0
326 };
327 
328 /* Partial schedule instruction ID in PS is a register move.  Return
329    information about it.  */
330 static struct ps_reg_move_info *
331 ps_reg_move (partial_schedule_ptr ps, int id)
332 {
333   gcc_checking_assert (id >= ps->g->num_nodes);
334   return &ps->reg_moves[id - ps->g->num_nodes];
335 }
336 
337 /* Return the rtl instruction that is being scheduled by partial schedule
338    instruction ID, which belongs to schedule PS.  */
339 static rtx_insn *
340 ps_rtl_insn (partial_schedule_ptr ps, int id)
341 {
342   if (id < ps->g->num_nodes)
343     return ps->g->nodes[id].insn;
344   else
345     return ps_reg_move (ps, id)->insn;
346 }
347 
348 /* Partial schedule instruction ID, which belongs to PS, occurred in
349    the original (unscheduled) loop.  Return the first instruction
350    in the loop that was associated with ps_rtl_insn (PS, ID).
351    If the instruction had some notes before it, this is the first
352    of those notes.  */
353 static rtx_insn *
354 ps_first_note (partial_schedule_ptr ps, int id)
355 {
356   gcc_assert (id < ps->g->num_nodes);
357   return ps->g->nodes[id].first_note;
358 }
359 
360 /* Return the number of consecutive stages that are occupied by
361    partial schedule instruction ID in PS.  */
362 static int
363 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
364 {
365   if (id < ps->g->num_nodes)
366     return 1;
367   else
368     return ps_reg_move (ps, id)->num_consecutive_stages;
369 }
370 
371 /* Given HEAD and TAIL which are the first and last insns in a loop;
372    return the register which controls the loop.  Return zero if it has
373    more than one occurrence in the loop besides the control part or the
374    do-loop pattern is not of the form we expect.  */
375 static rtx
376 doloop_register_get (rtx_insn *head ATTRIBUTE_UNUSED, rtx_insn *tail ATTRIBUTE_UNUSED)
377 {
378 #ifdef HAVE_doloop_end
379   rtx reg, condition;
380   rtx_insn *insn, *first_insn_not_to_check;
381 
382   if (!JUMP_P (tail))
383     return NULL_RTX;
384 
385   /* TODO: Free SMS's dependence on doloop_condition_get.  */
386   condition = doloop_condition_get (tail);
387   if (! condition)
388     return NULL_RTX;
389 
390   if (REG_P (XEXP (condition, 0)))
391     reg = XEXP (condition, 0);
392   else if (GET_CODE (XEXP (condition, 0)) == PLUS
393 	   && REG_P (XEXP (XEXP (condition, 0), 0)))
394     reg = XEXP (XEXP (condition, 0), 0);
395   else
396     gcc_unreachable ();
397 
398   /* Check that the COUNT_REG has no other occurrences in the loop
399      until the decrement.  We assume the control part consists of
400      either a single (parallel) branch-on-count or a (non-parallel)
401      branch immediately preceded by a single (decrement) insn.  */
402   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
403                              : prev_nondebug_insn (tail));
404 
405   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
406     if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
407       {
408         if (dump_file)
409         {
410           fprintf (dump_file, "SMS count_reg found ");
411           print_rtl_single (dump_file, reg);
412           fprintf (dump_file, " outside control in insn:\n");
413           print_rtl_single (dump_file, insn);
414         }
415 
416         return NULL_RTX;
417       }
418 
419   return reg;
420 #else
421   return NULL_RTX;
422 #endif
423 }
424 
425 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
426    that the number of iterations is a compile-time constant.  If so,
427    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
428    this constant.  Otherwise return 0.  */
429 static rtx_insn *
430 const_iteration_count (rtx count_reg, basic_block pre_header,
431 		       int64_t * count)
432 {
433   rtx_insn *insn;
434   rtx_insn *head, *tail;
435 
436   if (! pre_header)
437     return NULL;
438 
439   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
440 
441   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
442     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
443 	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
444       {
445 	rtx pat = single_set (insn);
446 
447 	if (CONST_INT_P (SET_SRC (pat)))
448 	  {
449 	    *count = INTVAL (SET_SRC (pat));
450 	    return insn;
451 	  }
452 
453 	return NULL;
454       }
455 
456   return NULL;
457 }
458 
459 /* A very simple resource-based lower bound on the initiation interval.
460    ??? Improve the accuracy of this bound by considering the
461    utilization of various units.  */
462 static int
463 res_MII (ddg_ptr g)
464 {
465   if (targetm.sched.sms_res_mii)
466     return targetm.sched.sms_res_mii (g);
467 
468   return ((g->num_nodes - g->num_debug) / issue_rate);
469 }
470 
471 
472 /* A vector that contains the sched data for each ps_insn.  */
473 static vec<node_sched_params> node_sched_param_vec;
474 
475 /* Allocate sched_params for each node and initialize it.  */
476 static void
477 set_node_sched_params (ddg_ptr g)
478 {
479   node_sched_param_vec.truncate (0);
480   node_sched_param_vec.safe_grow_cleared (g->num_nodes);
481 }
482 
483 /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
484 static void
485 extend_node_sched_params (partial_schedule_ptr ps)
486 {
487   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
488 					  + ps->reg_moves.length ());
489 }
490 
491 /* Update the sched_params (time, row and stage) for node U using the II,
492    the CYCLE of U and MIN_CYCLE.
493    We're not simply taking the following
494    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
495    because the stages may not be aligned on cycle 0.  */
496 static void
497 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
498 {
499   int sc_until_cycle_zero;
500   int stage;
501 
502   SCHED_TIME (u) = cycle;
503   SCHED_ROW (u) = SMODULO (cycle, ii);
504 
505   /* The calculation of stage count is done adding the number
506      of stages before cycle zero and after cycle zero.  */
507   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
508 
509   if (SCHED_TIME (u) < 0)
510     {
511       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
512       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
513     }
514   else
515     {
516       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
517       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
518     }
519 }
520 
521 static void
522 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
523 {
524   int i;
525 
526   if (! file)
527     return;
528   for (i = 0; i < num_nodes; i++)
529     {
530       node_sched_params_ptr nsp = SCHED_PARAMS (i);
531 
532       fprintf (file, "Node = %d; INSN = %d\n", i,
533 	       INSN_UID (ps_rtl_insn (ps, i)));
534       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
535       fprintf (file, " time = %d:\n", nsp->time);
536       fprintf (file, " stage = %d:\n", nsp->stage);
537     }
538 }
539 
540 /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
541 static void
542 set_columns_for_row (partial_schedule_ptr ps, int row)
543 {
544   ps_insn_ptr cur_insn;
545   int column;
546 
547   column = 0;
548   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
549     SCHED_COLUMN (cur_insn->id) = column++;
550 }
551 
552 /* Set SCHED_COLUMN for each instruction in PS.  */
553 static void
554 set_columns_for_ps (partial_schedule_ptr ps)
555 {
556   int row;
557 
558   for (row = 0; row < ps->ii; row++)
559     set_columns_for_row (ps, row);
560 }
561 
562 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
563    Its single predecessor has already been scheduled, as has its
564    ddg node successors.  (The move may have also another move as its
565    successor, in which case that successor will be scheduled later.)
566 
567    The move is part of a chain that satisfies register dependencies
568    between a producing ddg node and various consuming ddg nodes.
569    If some of these dependencies have a distance of 1 (meaning that
570    the use is upward-exposed) then DISTANCE1_USES is nonnull and
571    contains the set of uses with distance-1 dependencies.
572    DISTANCE1_USES is null otherwise.
573 
574    MUST_FOLLOW is a scratch bitmap that is big enough to hold
575    all current ps_insn ids.
576 
577    Return true on success.  */
578 static bool
579 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
580 		   sbitmap distance1_uses, sbitmap must_follow)
581 {
582   unsigned int u;
583   int this_time, this_distance, this_start, this_end, this_latency;
584   int start, end, c, ii;
585   sbitmap_iterator sbi;
586   ps_reg_move_info *move;
587   rtx_insn *this_insn;
588   ps_insn_ptr psi;
589 
590   move = ps_reg_move (ps, i_reg_move);
591   ii = ps->ii;
592   if (dump_file)
593     {
594       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
595 	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
596 	       PS_MIN_CYCLE (ps));
597       print_rtl_single (dump_file, move->insn);
598       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
599       fprintf (dump_file, "=========== =========== =====\n");
600     }
601 
602   start = INT_MIN;
603   end = INT_MAX;
604 
605   /* For dependencies of distance 1 between a producer ddg node A
606      and consumer ddg node B, we have a chain of dependencies:
607 
608         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
609 
610      where Mi is the ith move.  For dependencies of distance 0 between
611      a producer ddg node A and consumer ddg node C, we have a chain of
612      dependencies:
613 
614         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
615 
616      where Mi' occupies the same position as Mi but occurs a stage later.
617      We can only schedule each move once, so if we have both types of
618      chain, we model the second as:
619 
620         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
621 
622      First handle the dependencies between the previously-scheduled
623      predecessor and the move.  */
624   this_insn = ps_rtl_insn (ps, move->def);
625   this_latency = insn_latency (this_insn, move->insn);
626   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
627   this_time = SCHED_TIME (move->def) - this_distance * ii;
628   this_start = this_time + this_latency;
629   this_end = this_time + ii;
630   if (dump_file)
631     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
632 	     this_start, this_end, SCHED_TIME (move->def),
633 	     INSN_UID (this_insn), this_latency, this_distance,
634 	     INSN_UID (move->insn));
635 
636   if (start < this_start)
637     start = this_start;
638   if (end > this_end)
639     end = this_end;
640 
641   /* Handle the dependencies between the move and previously-scheduled
642      successors.  */
643   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
644     {
645       this_insn = ps_rtl_insn (ps, u);
646       this_latency = insn_latency (move->insn, this_insn);
647       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
648 	this_distance = -1;
649       else
650 	this_distance = 0;
651       this_time = SCHED_TIME (u) + this_distance * ii;
652       this_start = this_time - ii;
653       this_end = this_time - this_latency;
654       if (dump_file)
655 	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
656 		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
657 		 this_latency, this_distance, INSN_UID (this_insn));
658 
659       if (start < this_start)
660 	start = this_start;
661       if (end > this_end)
662 	end = this_end;
663     }
664 
665   if (dump_file)
666     {
667       fprintf (dump_file, "----------- ----------- -----\n");
668       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
669     }
670 
671   bitmap_clear (must_follow);
672   bitmap_set_bit (must_follow, move->def);
673 
674   start = MAX (start, end - (ii - 1));
675   for (c = end; c >= start; c--)
676     {
677       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
678 					 move->uses, must_follow);
679       if (psi)
680 	{
681 	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
682 	  if (dump_file)
683 	    fprintf (dump_file, "\nScheduled register move INSN %d at"
684 		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
685 		     SCHED_ROW (i_reg_move));
686 	  return true;
687 	}
688     }
689 
690   if (dump_file)
691     fprintf (dump_file, "\nNo available slot\n\n");
692 
693   return false;
694 }
695 
696 /*
697    Breaking intra-loop register anti-dependences:
698    Each intra-loop register anti-dependence implies a cross-iteration true
699    dependence of distance 1. Therefore, we can remove such false dependencies
700    and figure out if the partial schedule broke them by checking if (for a
701    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
702    if so generate a register move.   The number of such moves is equal to:
703               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
704    nreg_moves = ----------------------------------- + 1 - {   dependence.
705                             ii                          { 1 if not.
706 */
707 static bool
708 schedule_reg_moves (partial_schedule_ptr ps)
709 {
710   ddg_ptr g = ps->g;
711   int ii = ps->ii;
712   int i;
713 
714   for (i = 0; i < g->num_nodes; i++)
715     {
716       ddg_node_ptr u = &g->nodes[i];
717       ddg_edge_ptr e;
718       int nreg_moves = 0, i_reg_move;
719       rtx prev_reg, old_reg;
720       int first_move;
721       int distances[2];
722       sbitmap must_follow;
723       sbitmap distance1_uses;
724       rtx set = single_set (u->insn);
725 
726       /* Skip instructions that do not set a register.  */
727       if ((set && !REG_P (SET_DEST (set))))
728         continue;
729 
730       /* Compute the number of reg_moves needed for u, by looking at life
731 	 ranges started at u (excluding self-loops).  */
732       distances[0] = distances[1] = false;
733       for (e = u->out; e; e = e->next_out)
734 	if (e->type == TRUE_DEP && e->dest != e->src)
735 	  {
736 	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
737 				- SCHED_TIME (e->src->cuid)) / ii;
738 
739             if (e->distance == 1)
740               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
741 			      - SCHED_TIME (e->src->cuid) + ii) / ii;
742 
743 	    /* If dest precedes src in the schedule of the kernel, then dest
744 	       will read before src writes and we can save one reg_copy.  */
745 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
746 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
747 	      nreg_moves4e--;
748 
749             if (nreg_moves4e >= 1)
750 	      {
751 		/* !single_set instructions are not supported yet and
752 		   thus we do not except to encounter them in the loop
753 		   except from the doloop part.  For the latter case
754 		   we assume no regmoves are generated as the doloop
755 		   instructions are tied to the branch with an edge.  */
756 		gcc_assert (set);
757 		/* If the instruction contains auto-inc register then
758 		   validate that the regmov is being generated for the
759 		   target regsiter rather then the inc'ed register.	*/
760 		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
761 	      }
762 
763 	    if (nreg_moves4e)
764 	      {
765 		gcc_assert (e->distance < 2);
766 		distances[e->distance] = true;
767 	      }
768 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
769 	  }
770 
771       if (nreg_moves == 0)
772 	continue;
773 
774       /* Create NREG_MOVES register moves.  */
775       first_move = ps->reg_moves.length ();
776       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
777       extend_node_sched_params (ps);
778 
779       /* Record the moves associated with this node.  */
780       first_move += ps->g->num_nodes;
781 
782       /* Generate each move.  */
783       old_reg = prev_reg = SET_DEST (single_set (u->insn));
784       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
785 	{
786 	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
787 
788 	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
789 	  move->uses = sbitmap_alloc (first_move + nreg_moves);
790 	  move->old_reg = old_reg;
791 	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
792 	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
793 	  move->insn = as_a <rtx_insn *> (gen_move_insn (move->new_reg,
794 							 copy_rtx (prev_reg)));
795 	  bitmap_clear (move->uses);
796 
797 	  prev_reg = move->new_reg;
798 	}
799 
800       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
801 
802       if (distance1_uses)
803 	bitmap_clear (distance1_uses);
804 
805       /* Every use of the register defined by node may require a different
806 	 copy of this register, depending on the time the use is scheduled.
807 	 Record which uses require which move results.  */
808       for (e = u->out; e; e = e->next_out)
809 	if (e->type == TRUE_DEP && e->dest != e->src)
810 	  {
811 	    int dest_copy = (SCHED_TIME (e->dest->cuid)
812 			     - SCHED_TIME (e->src->cuid)) / ii;
813 
814 	    if (e->distance == 1)
815 	      dest_copy = (SCHED_TIME (e->dest->cuid)
816 			   - SCHED_TIME (e->src->cuid) + ii) / ii;
817 
818 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
819 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
820 	      dest_copy--;
821 
822 	    if (dest_copy)
823 	      {
824 		ps_reg_move_info *move;
825 
826 		move = ps_reg_move (ps, first_move + dest_copy - 1);
827 		bitmap_set_bit (move->uses, e->dest->cuid);
828 		if (e->distance == 1)
829 		  bitmap_set_bit (distance1_uses, e->dest->cuid);
830 	      }
831 	  }
832 
833       must_follow = sbitmap_alloc (first_move + nreg_moves);
834       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
835 	if (!schedule_reg_move (ps, first_move + i_reg_move,
836 				distance1_uses, must_follow))
837 	  break;
838       sbitmap_free (must_follow);
839       if (distance1_uses)
840 	sbitmap_free (distance1_uses);
841       if (i_reg_move < nreg_moves)
842 	return false;
843     }
844   return true;
845 }
846 
847 /* Emit the moves associatied with PS.  Apply the substitutions
848    associated with them.  */
849 static void
850 apply_reg_moves (partial_schedule_ptr ps)
851 {
852   ps_reg_move_info *move;
853   int i;
854 
855   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
856     {
857       unsigned int i_use;
858       sbitmap_iterator sbi;
859 
860       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
861 	{
862 	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
863 	  df_insn_rescan (ps->g->nodes[i_use].insn);
864 	}
865     }
866 }
867 
868 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
869    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
870    will move to cycle zero.  */
871 static void
872 reset_sched_times (partial_schedule_ptr ps, int amount)
873 {
874   int row;
875   int ii = ps->ii;
876   ps_insn_ptr crr_insn;
877 
878   for (row = 0; row < ii; row++)
879     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
880       {
881 	int u = crr_insn->id;
882 	int normalized_time = SCHED_TIME (u) - amount;
883 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
884 
885         if (dump_file)
886           {
887             /* Print the scheduling times after the rotation.  */
888 	    rtx_insn *insn = ps_rtl_insn (ps, u);
889 
890             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
891                      "crr_insn->cycle=%d, min_cycle=%d", u,
892                      INSN_UID (insn), normalized_time, new_min_cycle);
893             if (JUMP_P (insn))
894               fprintf (dump_file, " (branch)");
895             fprintf (dump_file, "\n");
896           }
897 
898 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
899 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
900 
901 	crr_insn->cycle = normalized_time;
902 	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
903       }
904 }
905 
906 /* Permute the insns according to their order in PS, from row 0 to
907    row ii-1, and position them right before LAST.  This schedules
908    the insns of the loop kernel.  */
909 static void
910 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
911 {
912   int ii = ps->ii;
913   int row;
914   ps_insn_ptr ps_ij;
915 
916   for (row = 0; row < ii ; row++)
917     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
918       {
919 	rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
920 
921 	if (PREV_INSN (last) != insn)
922 	  {
923 	    if (ps_ij->id < ps->g->num_nodes)
924 	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
925 				  PREV_INSN (last));
926 	    else
927 	      add_insn_before (insn, last, NULL);
928 	  }
929       }
930 }
931 
932 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
933    respectively only if cycle C falls on the border of the scheduling
934    window boundaries marked by START and END cycles.  STEP is the
935    direction of the window.  */
936 static inline void
937 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
938 			 sbitmap *tmp_precede, sbitmap must_precede, int c,
939 			 int start, int end, int step)
940 {
941   *tmp_precede = NULL;
942   *tmp_follow = NULL;
943 
944   if (c == start)
945     {
946       if (step == 1)
947 	*tmp_precede = must_precede;
948       else			/* step == -1.  */
949 	*tmp_follow = must_follow;
950     }
951   if (c == end - step)
952     {
953       if (step == 1)
954 	*tmp_follow = must_follow;
955       else			/* step == -1.  */
956 	*tmp_precede = must_precede;
957     }
958 
959 }
960 
961 /* Return True if the branch can be moved to row ii-1 while
962    normalizing the partial schedule PS to start from cycle zero and thus
963    optimize the SC.  Otherwise return False.  */
964 static bool
965 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
966 {
967   int amount = PS_MIN_CYCLE (ps);
968   sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
969   int start, end, step;
970   int ii = ps->ii;
971   bool ok = false;
972   int stage_count, stage_count_curr;
973 
974   /* Compare the SC after normalization and SC after bringing the branch
975      to row ii-1.  If they are equal just bail out.  */
976   stage_count = calculate_stage_count (ps, amount);
977   stage_count_curr =
978     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
979 
980   if (stage_count == stage_count_curr)
981     {
982       if (dump_file)
983 	fprintf (dump_file, "SMS SC already optimized.\n");
984 
985       ok = false;
986       goto clear;
987     }
988 
989   if (dump_file)
990     {
991       fprintf (dump_file, "SMS Trying to optimize branch location\n");
992       fprintf (dump_file, "SMS partial schedule before trial:\n");
993       print_partial_schedule (ps, dump_file);
994     }
995 
996   /* First, normalize the partial scheduling.  */
997   reset_sched_times (ps, amount);
998   rotate_partial_schedule (ps, amount);
999   if (dump_file)
1000     {
1001       fprintf (dump_file,
1002 	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
1003 	       ii, stage_count);
1004       print_partial_schedule (ps, dump_file);
1005     }
1006 
1007   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
1008     {
1009       ok = true;
1010       goto clear;
1011     }
1012 
1013   bitmap_ones (sched_nodes);
1014 
1015   /* Calculate the new placement of the branch.  It should be in row
1016      ii-1 and fall into it's scheduling window.  */
1017   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
1018 			&step, &end) == 0)
1019     {
1020       bool success;
1021       ps_insn_ptr next_ps_i;
1022       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
1023       int row = SMODULO (branch_cycle, ps->ii);
1024       int num_splits = 0;
1025       sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1026       int min_cycle, c;
1027 
1028       if (dump_file)
1029 	fprintf (dump_file, "\nTrying to schedule node %d "
1030 		 "INSN = %d  in (%d .. %d) step %d\n",
1031 		 g->closing_branch->cuid,
1032 		 (INSN_UID (g->closing_branch->insn)), start, end, step);
1033 
1034       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1035       if (step == 1)
1036 	{
1037 	  c = start + ii - SMODULO (start, ii) - 1;
1038 	  gcc_assert (c >= start);
1039 	  if (c >= end)
1040 	    {
1041 	      ok = false;
1042 	      if (dump_file)
1043 		fprintf (dump_file,
1044 			 "SMS failed to schedule branch at cycle: %d\n", c);
1045 	      goto clear;
1046 	    }
1047 	}
1048       else
1049 	{
1050 	  c = start - SMODULO (start, ii) - 1;
1051 	  gcc_assert (c <= start);
1052 
1053 	  if (c <= end)
1054 	    {
1055 	      if (dump_file)
1056 		fprintf (dump_file,
1057 			 "SMS failed to schedule branch at cycle: %d\n", c);
1058 	      ok = false;
1059 	      goto clear;
1060 	    }
1061 	}
1062 
1063       must_precede = sbitmap_alloc (g->num_nodes);
1064       must_follow = sbitmap_alloc (g->num_nodes);
1065 
1066       /* Try to schedule the branch is it's new cycle.  */
1067       calculate_must_precede_follow (g->closing_branch, start, end,
1068 				     step, ii, sched_nodes,
1069 				     must_precede, must_follow);
1070 
1071       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1072 			       must_precede, c, start, end, step);
1073 
1074       /* Find the element in the partial schedule related to the closing
1075          branch so we can remove it from it's current cycle.  */
1076       for (next_ps_i = ps->rows[row];
1077 	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
1078 	if (next_ps_i->id == g->closing_branch->cuid)
1079 	  break;
1080 
1081       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1082       remove_node_from_ps (ps, next_ps_i);
1083       success =
1084 	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1085 				      sched_nodes, &num_splits,
1086 				      tmp_precede, tmp_follow);
1087       gcc_assert (num_splits == 0);
1088       if (!success)
1089 	{
1090 	  if (dump_file)
1091 	    fprintf (dump_file,
1092 		     "SMS failed to schedule branch at cycle: %d, "
1093 		     "bringing it back to cycle %d\n", c, branch_cycle);
1094 
1095 	  /* The branch was failed to be placed in row ii - 1.
1096 	     Put it back in it's original place in the partial
1097 	     schedualing.  */
1098 	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1099 				   must_precede, branch_cycle, start, end,
1100 				   step);
1101 	  success =
1102 	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1103 					  branch_cycle, sched_nodes,
1104 					  &num_splits, tmp_precede,
1105 					  tmp_follow);
1106 	  gcc_assert (success && (num_splits == 0));
1107 	  ok = false;
1108 	}
1109       else
1110 	{
1111 	  /* The branch is placed in row ii - 1.  */
1112 	  if (dump_file)
1113 	    fprintf (dump_file,
1114 		     "SMS success in moving branch to cycle %d\n", c);
1115 
1116 	  update_node_sched_params (g->closing_branch->cuid, ii, c,
1117 				    PS_MIN_CYCLE (ps));
1118 	  ok = true;
1119 	}
1120 
1121       /* This might have been added to a new first stage.  */
1122       if (PS_MIN_CYCLE (ps) < min_cycle)
1123 	reset_sched_times (ps, 0);
1124 
1125       free (must_precede);
1126       free (must_follow);
1127     }
1128 
1129 clear:
1130   free (sched_nodes);
1131   return ok;
1132 }
1133 
1134 static void
1135 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1136 			   int to_stage, rtx count_reg)
1137 {
1138   int row;
1139   ps_insn_ptr ps_ij;
1140 
1141   for (row = 0; row < ps->ii; row++)
1142     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1143       {
1144 	int u = ps_ij->id;
1145 	int first_u, last_u;
1146 	rtx_insn *u_insn;
1147 
1148         /* Do not duplicate any insn which refers to count_reg as it
1149            belongs to the control part.
1150            The closing branch is scheduled as well and thus should
1151            be ignored.
1152            TODO: This should be done by analyzing the control part of
1153            the loop.  */
1154 	u_insn = ps_rtl_insn (ps, u);
1155         if (reg_mentioned_p (count_reg, u_insn)
1156             || JUMP_P (u_insn))
1157           continue;
1158 
1159 	first_u = SCHED_STAGE (u);
1160 	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1161 	if (from_stage <= last_u && to_stage >= first_u)
1162 	  {
1163 	    if (u < ps->g->num_nodes)
1164 	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1165 	    else
1166 	      emit_insn (copy_rtx (PATTERN (u_insn)));
1167 	  }
1168       }
1169 }
1170 
1171 
1172 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
1173 static void
1174 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1175                         rtx count_reg, rtx count_init)
1176 {
1177   int i;
1178   int last_stage = PS_STAGE_COUNT (ps) - 1;
1179   edge e;
1180 
1181   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1182   start_sequence ();
1183 
1184   if (!count_init)
1185     {
1186       /* Generate instructions at the beginning of the prolog to
1187          adjust the loop count by STAGE_COUNT.  If loop count is constant
1188          (count_init), this constant is adjusted by STAGE_COUNT in
1189          generate_prolog_epilog function.  */
1190       rtx sub_reg = NULL_RTX;
1191 
1192       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1193 				     gen_int_mode (last_stage,
1194 						   GET_MODE (count_reg)),
1195                                      count_reg, 1, OPTAB_DIRECT);
1196       gcc_assert (REG_P (sub_reg));
1197       if (REGNO (sub_reg) != REGNO (count_reg))
1198         emit_move_insn (count_reg, sub_reg);
1199     }
1200 
1201   for (i = 0; i < last_stage; i++)
1202     duplicate_insns_of_cycles (ps, 0, i, count_reg);
1203 
1204   /* Put the prolog on the entry edge.  */
1205   e = loop_preheader_edge (loop);
1206   split_edge_and_insert (e, get_insns ());
1207   if (!flag_resched_modulo_sched)
1208     e->dest->flags |= BB_DISABLE_SCHEDULE;
1209 
1210   end_sequence ();
1211 
1212   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1213   start_sequence ();
1214 
1215   for (i = 0; i < last_stage; i++)
1216     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1217 
1218   /* Put the epilogue on the exit edge.  */
1219   gcc_assert (single_exit (loop));
1220   e = single_exit (loop);
1221   split_edge_and_insert (e, get_insns ());
1222   if (!flag_resched_modulo_sched)
1223     e->dest->flags |= BB_DISABLE_SCHEDULE;
1224 
1225   end_sequence ();
1226 }
1227 
1228 /* Mark LOOP as software pipelined so the later
1229    scheduling passes don't touch it.  */
1230 static void
1231 mark_loop_unsched (struct loop *loop)
1232 {
1233   unsigned i;
1234   basic_block *bbs = get_loop_body (loop);
1235 
1236   for (i = 0; i < loop->num_nodes; i++)
1237     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1238 
1239   free (bbs);
1240 }
1241 
1242 /* Return true if all the BBs of the loop are empty except the
1243    loop header.  */
1244 static bool
1245 loop_single_full_bb_p (struct loop *loop)
1246 {
1247   unsigned i;
1248   basic_block *bbs = get_loop_body (loop);
1249 
1250   for (i = 0; i < loop->num_nodes ; i++)
1251     {
1252       rtx_insn *head, *tail;
1253       bool empty_bb = true;
1254 
1255       if (bbs[i] == loop->header)
1256         continue;
1257 
1258       /* Make sure that basic blocks other than the header
1259          have only notes labels or jumps.  */
1260       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1261       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1262         {
1263           if (NOTE_P (head) || LABEL_P (head)
1264  	      || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1265  	    continue;
1266  	  empty_bb = false;
1267  	  break;
1268         }
1269 
1270       if (! empty_bb)
1271         {
1272           free (bbs);
1273           return false;
1274         }
1275     }
1276   free (bbs);
1277   return true;
1278 }
1279 
1280 /* Dump file:line from INSN's location info to dump_file.  */
1281 
1282 static void
1283 dump_insn_location (rtx_insn *insn)
1284 {
1285   if (dump_file && INSN_HAS_LOCATION (insn))
1286     {
1287       expanded_location xloc = insn_location (insn);
1288       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1289     }
1290 }
1291 
1292 /* A simple loop from SMS point of view; it is a loop that is composed of
1293    either a single basic block or two BBs - a header and a latch.  */
1294 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
1295 				  && (EDGE_COUNT (loop->latch->preds) == 1) \
1296                                   && (EDGE_COUNT (loop->latch->succs) == 1))
1297 
1298 /* Return true if the loop is in its canonical form and false if not.
1299    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1300 static bool
1301 loop_canon_p (struct loop *loop)
1302 {
1303 
1304   if (loop->inner || !loop_outer (loop))
1305   {
1306     if (dump_file)
1307       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1308     return false;
1309   }
1310 
1311   if (!single_exit (loop))
1312     {
1313       if (dump_file)
1314 	{
1315 	  rtx_insn *insn = BB_END (loop->header);
1316 
1317 	  fprintf (dump_file, "SMS loop many exits");
1318 	  dump_insn_location (insn);
1319 	  fprintf (dump_file, "\n");
1320 	}
1321       return false;
1322     }
1323 
1324   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1325     {
1326       if (dump_file)
1327 	{
1328 	  rtx_insn *insn = BB_END (loop->header);
1329 
1330 	  fprintf (dump_file, "SMS loop many BBs.");
1331 	  dump_insn_location (insn);
1332 	  fprintf (dump_file, "\n");
1333 	}
1334       return false;
1335     }
1336 
1337     return true;
1338 }
1339 
1340 /* If there are more than one entry for the loop,
1341    make it one by splitting the first entry edge and
1342    redirecting the others to the new BB.  */
1343 static void
1344 canon_loop (struct loop *loop)
1345 {
1346   edge e;
1347   edge_iterator i;
1348 
1349   /* Avoid annoying special cases of edges going to exit
1350      block.  */
1351   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1352     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1353       split_edge (e);
1354 
1355   if (loop->latch == loop->header
1356       || EDGE_COUNT (loop->latch->succs) > 1)
1357     {
1358       FOR_EACH_EDGE (e, i, loop->header->preds)
1359         if (e->src == loop->latch)
1360           break;
1361       split_edge (e);
1362     }
1363 }
1364 
1365 /* Setup infos.  */
1366 static void
1367 setup_sched_infos (void)
1368 {
1369   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1370 	  sizeof (sms_common_sched_info));
1371   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1372   common_sched_info = &sms_common_sched_info;
1373 
1374   sched_deps_info = &sms_sched_deps_info;
1375   current_sched_info = &sms_sched_info;
1376 }
1377 
1378 /* Probability in % that the sms-ed loop rolls enough so that optimized
1379    version may be entered.  Just a guess.  */
1380 #define PROB_SMS_ENOUGH_ITERATIONS 80
1381 
1382 /* Used to calculate the upper bound of ii.  */
1383 #define MAXII_FACTOR 2
1384 
1385 /* Main entry point, perform SMS scheduling on the loops of the function
1386    that consist of single basic blocks.  */
1387 static void
1388 sms_schedule (void)
1389 {
1390   rtx_insn *insn;
1391   ddg_ptr *g_arr, g;
1392   int * node_order;
1393   int maxii, max_asap;
1394   partial_schedule_ptr ps;
1395   basic_block bb = NULL;
1396   struct loop *loop;
1397   basic_block condition_bb = NULL;
1398   edge latch_edge;
1399   gcov_type trip_count = 0;
1400 
1401   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1402 		       | LOOPS_HAVE_RECORDED_EXITS);
1403   if (number_of_loops (cfun) <= 1)
1404     {
1405       loop_optimizer_finalize ();
1406       return;  /* There are no loops to schedule.  */
1407     }
1408 
1409   /* Initialize issue_rate.  */
1410   if (targetm.sched.issue_rate)
1411     {
1412       int temp = reload_completed;
1413 
1414       reload_completed = 1;
1415       issue_rate = targetm.sched.issue_rate ();
1416       reload_completed = temp;
1417     }
1418   else
1419     issue_rate = 1;
1420 
1421   /* Initialize the scheduler.  */
1422   setup_sched_infos ();
1423   haifa_sched_init ();
1424 
1425   /* Allocate memory to hold the DDG array one entry for each loop.
1426      We use loop->num as index into this array.  */
1427   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1428 
1429   if (dump_file)
1430   {
1431     fprintf (dump_file, "\n\nSMS analysis phase\n");
1432     fprintf (dump_file, "===================\n\n");
1433   }
1434 
1435   /* Build DDGs for all the relevant loops and hold them in G_ARR
1436      indexed by the loop index.  */
1437   FOR_EACH_LOOP (loop, 0)
1438     {
1439       rtx_insn *head, *tail;
1440       rtx count_reg;
1441 
1442       /* For debugging.  */
1443       if (dbg_cnt (sms_sched_loop) == false)
1444         {
1445           if (dump_file)
1446             fprintf (dump_file, "SMS reached max limit... \n");
1447 
1448 	  break;
1449         }
1450 
1451       if (dump_file)
1452 	{
1453 	  rtx_insn *insn = BB_END (loop->header);
1454 
1455 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1456 	  dump_insn_location (insn);
1457 	  fprintf (dump_file, "\n");
1458 	}
1459 
1460       if (! loop_canon_p (loop))
1461         continue;
1462 
1463       if (! loop_single_full_bb_p (loop))
1464       {
1465         if (dump_file)
1466           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1467 	continue;
1468       }
1469 
1470       bb = loop->header;
1471 
1472       get_ebb_head_tail (bb, bb, &head, &tail);
1473       latch_edge = loop_latch_edge (loop);
1474       gcc_assert (single_exit (loop));
1475       if (single_exit (loop)->count)
1476 	trip_count = latch_edge->count / single_exit (loop)->count;
1477 
1478       /* Perform SMS only on loops that their average count is above threshold.  */
1479 
1480       if ( latch_edge->count
1481           && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1482 	{
1483 	  if (dump_file)
1484 	    {
1485 	      dump_insn_location (tail);
1486 	      fprintf (dump_file, "\nSMS single-bb-loop\n");
1487 	      if (profile_info && flag_branch_probabilities)
1488 	    	{
1489 	      	  fprintf (dump_file, "SMS loop-count ");
1490 	      	  fprintf (dump_file, "%"PRId64,
1491 	             	   (int64_t) bb->count);
1492 	      	  fprintf (dump_file, "\n");
1493                   fprintf (dump_file, "SMS trip-count ");
1494                   fprintf (dump_file, "%"PRId64,
1495                            (int64_t) trip_count);
1496                   fprintf (dump_file, "\n");
1497 	      	  fprintf (dump_file, "SMS profile-sum-max ");
1498 	      	  fprintf (dump_file, "%"PRId64,
1499 	          	   (int64_t) profile_info->sum_max);
1500 	      	  fprintf (dump_file, "\n");
1501 	    	}
1502 	    }
1503           continue;
1504         }
1505 
1506       /* Make sure this is a doloop.  */
1507       if ( !(count_reg = doloop_register_get (head, tail)))
1508       {
1509         if (dump_file)
1510           fprintf (dump_file, "SMS doloop_register_get failed\n");
1511 	continue;
1512       }
1513 
1514       /* Don't handle BBs with calls or barriers
1515 	 or !single_set with the exception of instructions that include
1516 	 count_reg---these instructions are part of the control part
1517 	 that do-loop recognizes.
1518          ??? Should handle insns defining subregs.  */
1519      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1520       {
1521          rtx set;
1522 
1523         if (CALL_P (insn)
1524             || BARRIER_P (insn)
1525             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1526                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1527                 && !reg_mentioned_p (count_reg, insn))
1528             || (INSN_P (insn) && (set = single_set (insn))
1529                 && GET_CODE (SET_DEST (set)) == SUBREG))
1530         break;
1531       }
1532 
1533       if (insn != NEXT_INSN (tail))
1534 	{
1535 	  if (dump_file)
1536 	    {
1537 	      if (CALL_P (insn))
1538 		fprintf (dump_file, "SMS loop-with-call\n");
1539 	      else if (BARRIER_P (insn))
1540 		fprintf (dump_file, "SMS loop-with-barrier\n");
1541               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1542                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1543                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1544               else
1545                fprintf (dump_file, "SMS loop with subreg in lhs\n");
1546 	      print_rtl_single (dump_file, insn);
1547 	    }
1548 
1549 	  continue;
1550 	}
1551 
1552       /* Always schedule the closing branch with the rest of the
1553          instructions. The branch is rotated to be in row ii-1 at the
1554          end of the scheduling procedure to make sure it's the last
1555          instruction in the iteration.  */
1556       if (! (g = create_ddg (bb, 1)))
1557         {
1558           if (dump_file)
1559 	    fprintf (dump_file, "SMS create_ddg failed\n");
1560 	  continue;
1561         }
1562 
1563       g_arr[loop->num] = g;
1564       if (dump_file)
1565         fprintf (dump_file, "...OK\n");
1566 
1567     }
1568   if (dump_file)
1569   {
1570     fprintf (dump_file, "\nSMS transformation phase\n");
1571     fprintf (dump_file, "=========================\n\n");
1572   }
1573 
1574   /* We don't want to perform SMS on new loops - created by versioning.  */
1575   FOR_EACH_LOOP (loop, 0)
1576     {
1577       rtx_insn *head, *tail;
1578       rtx count_reg;
1579       rtx_insn *count_init;
1580       int mii, rec_mii, stage_count, min_cycle;
1581       int64_t loop_count = 0;
1582       bool opt_sc_p;
1583 
1584       if (! (g = g_arr[loop->num]))
1585         continue;
1586 
1587       if (dump_file)
1588 	{
1589 	  rtx_insn *insn = BB_END (loop->header);
1590 
1591 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1592 	  dump_insn_location (insn);
1593 	  fprintf (dump_file, "\n");
1594 
1595 	  print_ddg (dump_file, g);
1596 	}
1597 
1598       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1599 
1600       latch_edge = loop_latch_edge (loop);
1601       gcc_assert (single_exit (loop));
1602       if (single_exit (loop)->count)
1603 	trip_count = latch_edge->count / single_exit (loop)->count;
1604 
1605       if (dump_file)
1606 	{
1607 	  dump_insn_location (tail);
1608 	  fprintf (dump_file, "\nSMS single-bb-loop\n");
1609 	  if (profile_info && flag_branch_probabilities)
1610 	    {
1611 	      fprintf (dump_file, "SMS loop-count ");
1612 	      fprintf (dump_file, "%"PRId64,
1613 	               (int64_t) bb->count);
1614 	      fprintf (dump_file, "\n");
1615 	      fprintf (dump_file, "SMS profile-sum-max ");
1616 	      fprintf (dump_file, "%"PRId64,
1617 	               (int64_t) profile_info->sum_max);
1618 	      fprintf (dump_file, "\n");
1619 	    }
1620 	  fprintf (dump_file, "SMS doloop\n");
1621 	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1622           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1623           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1624 	}
1625 
1626 
1627       /* In case of th loop have doloop register it gets special
1628 	 handling.  */
1629       count_init = NULL;
1630       if ((count_reg = doloop_register_get (head, tail)))
1631 	{
1632 	  basic_block pre_header;
1633 
1634 	  pre_header = loop_preheader_edge (loop)->src;
1635 	  count_init = const_iteration_count (count_reg, pre_header,
1636 					      &loop_count);
1637 	}
1638       gcc_assert (count_reg);
1639 
1640       if (dump_file && count_init)
1641         {
1642           fprintf (dump_file, "SMS const-doloop ");
1643           fprintf (dump_file, "%"PRId64,
1644 		     loop_count);
1645           fprintf (dump_file, "\n");
1646         }
1647 
1648       node_order = XNEWVEC (int, g->num_nodes);
1649 
1650       mii = 1; /* Need to pass some estimate of mii.  */
1651       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1652       mii = MAX (res_MII (g), rec_mii);
1653       maxii = MAX (max_asap, MAXII_FACTOR * mii);
1654 
1655       if (dump_file)
1656 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1657 		 rec_mii, mii, maxii);
1658 
1659       for (;;)
1660 	{
1661 	  set_node_sched_params (g);
1662 
1663 	  stage_count = 0;
1664 	  opt_sc_p = false;
1665 	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
1666 
1667 	  if (ps)
1668 	    {
1669 	      /* Try to achieve optimized SC by normalizing the partial
1670 		 schedule (having the cycles start from cycle zero).
1671 		 The branch location must be placed in row ii-1 in the
1672 		 final scheduling.	If failed, shift all instructions to
1673 		 position the branch in row ii-1.  */
1674 	      opt_sc_p = optimize_sc (ps, g);
1675 	      if (opt_sc_p)
1676 		stage_count = calculate_stage_count (ps, 0);
1677 	      else
1678 		{
1679 		  /* Bring the branch to cycle ii-1.  */
1680 		  int amount = (SCHED_TIME (g->closing_branch->cuid)
1681 				- (ps->ii - 1));
1682 
1683 		  if (dump_file)
1684 		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1685 
1686 		  stage_count = calculate_stage_count (ps, amount);
1687 		}
1688 
1689 	      gcc_assert (stage_count >= 1);
1690 	    }
1691 
1692 	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1693 	     1 means that there is no interleaving between iterations thus
1694 	     we let the scheduling passes do the job in this case.  */
1695 	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1696 	      || (count_init && (loop_count <= stage_count))
1697 	      || (flag_branch_probabilities && (trip_count <= stage_count)))
1698 	    {
1699 	      if (dump_file)
1700 		{
1701 		  fprintf (dump_file, "SMS failed... \n");
1702 		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1703 			   " loop-count=", stage_count);
1704 		  fprintf (dump_file, "%"PRId64, loop_count);
1705 		  fprintf (dump_file, ", trip-count=");
1706 		  fprintf (dump_file, "%"PRId64, trip_count);
1707 		  fprintf (dump_file, ")\n");
1708 		}
1709 	      break;
1710 	    }
1711 
1712           if (!opt_sc_p)
1713             {
1714 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
1715               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1716 
1717               reset_sched_times (ps, amount);
1718               rotate_partial_schedule (ps, amount);
1719             }
1720 
1721 	  set_columns_for_ps (ps);
1722 
1723 	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1724 	  if (!schedule_reg_moves (ps))
1725 	    {
1726 	      mii = ps->ii + 1;
1727 	      free_partial_schedule (ps);
1728 	      continue;
1729 	    }
1730 
1731 	  /* Moves that handle incoming values might have been added
1732 	     to a new first stage.  Bump the stage count if so.
1733 
1734 	     ??? Perhaps we could consider rotating the schedule here
1735 	     instead?  */
1736 	  if (PS_MIN_CYCLE (ps) < min_cycle)
1737 	    {
1738 	      reset_sched_times (ps, 0);
1739 	      stage_count++;
1740 	    }
1741 
1742 	  /* The stage count should now be correct without rotation.  */
1743 	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1744 	  PS_STAGE_COUNT (ps) = stage_count;
1745 
1746 	  canon_loop (loop);
1747 
1748           if (dump_file)
1749             {
1750 	      dump_insn_location (tail);
1751 	      fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1752 		       ps->ii, stage_count);
1753 	      print_partial_schedule (ps, dump_file);
1754 	    }
1755 
1756           /* case the BCT count is not known , Do loop-versioning */
1757 	  if (count_reg && ! count_init)
1758             {
1759 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1760 					 gen_int_mode (stage_count,
1761 						       GET_MODE (count_reg)));
1762 	      unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1763 			       * REG_BR_PROB_BASE) / 100;
1764 
1765 	      loop_version (loop, comp_rtx, &condition_bb,
1766 	  		    prob, prob, REG_BR_PROB_BASE - prob,
1767 			    true);
1768 	     }
1769 
1770 	  /* Set new iteration count of loop kernel.  */
1771           if (count_reg && count_init)
1772 	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1773 						     - stage_count + 1);
1774 
1775 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
1776 	  permute_partial_schedule (ps, g->closing_branch->first_note);
1777 
1778           /* Mark this loop as software pipelined so the later
1779 	     scheduling passes don't touch it.  */
1780 	  if (! flag_resched_modulo_sched)
1781 	    mark_loop_unsched (loop);
1782 
1783 	  /* The life-info is not valid any more.  */
1784 	  df_set_bb_dirty (g->bb);
1785 
1786 	  apply_reg_moves (ps);
1787 	  if (dump_file)
1788 	    print_node_sched_params (dump_file, g->num_nodes, ps);
1789 	  /* Generate prolog and epilog.  */
1790           generate_prolog_epilog (ps, loop, count_reg, count_init);
1791 	  break;
1792 	}
1793 
1794       free_partial_schedule (ps);
1795       node_sched_param_vec.release ();
1796       free (node_order);
1797       free_ddg (g);
1798     }
1799 
1800   free (g_arr);
1801 
1802   /* Release scheduler data, needed until now because of DFA.  */
1803   haifa_sched_finish ();
1804   loop_optimizer_finalize ();
1805 }
1806 
1807 /* The SMS scheduling algorithm itself
1808    -----------------------------------
1809    Input: 'O' an ordered list of insns of a loop.
1810    Output: A scheduling of the loop - kernel, prolog, and epilogue.
1811 
1812    'Q' is the empty Set
1813    'PS' is the partial schedule; it holds the currently scheduled nodes with
1814 	their cycle/slot.
1815    'PSP' previously scheduled predecessors.
1816    'PSS' previously scheduled successors.
1817    't(u)' the cycle where u is scheduled.
1818    'l(u)' is the latency of u.
1819    'd(v,u)' is the dependence distance from v to u.
1820    'ASAP(u)' the earliest time at which u could be scheduled as computed in
1821 	     the node ordering phase.
1822    'check_hardware_resources_conflicts(u, PS, c)'
1823 			     run a trace around cycle/slot through DFA model
1824 			     to check resource conflicts involving instruction u
1825 			     at cycle c given the partial schedule PS.
1826    'add_to_partial_schedule_at_time(u, PS, c)'
1827 			     Add the node/instruction u to the partial schedule
1828 			     PS at time c.
1829    'calculate_register_pressure(PS)'
1830 			     Given a schedule of instructions, calculate the register
1831 			     pressure it implies.  One implementation could be the
1832 			     maximum number of overlapping live ranges.
1833    'maxRP' The maximum allowed register pressure, it is usually derived from the number
1834 	   registers available in the hardware.
1835 
1836    1. II = MII.
1837    2. PS = empty list
1838    3. for each node u in O in pre-computed order
1839    4.   if (PSP(u) != Q && PSS(u) == Q) then
1840    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1841    6.     start = Early_start; end = Early_start + II - 1; step = 1
1842    11.  else if (PSP(u) == Q && PSS(u) != Q) then
1843    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1844    13.     start = Late_start; end = Late_start - II + 1; step = -1
1845    14.  else if (PSP(u) != Q && PSS(u) != Q) then
1846    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1847    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1848    17.     start = Early_start;
1849    18.     end = min(Early_start + II - 1 , Late_start);
1850    19.     step = 1
1851    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1852    21.	  start = ASAP(u); end = start + II - 1; step = 1
1853    22.  endif
1854 
1855    23.  success = false
1856    24.  for (c = start ; c != end ; c += step)
1857    25.     if check_hardware_resources_conflicts(u, PS, c) then
1858    26.       add_to_partial_schedule_at_time(u, PS, c)
1859    27.       success = true
1860    28.       break
1861    29.     endif
1862    30.  endfor
1863    31.  if (success == false) then
1864    32.    II = II + 1
1865    33.    if (II > maxII) then
1866    34.       finish - failed to schedule
1867    35.	 endif
1868    36.    goto 2.
1869    37.  endif
1870    38. endfor
1871    39. if (calculate_register_pressure(PS) > maxRP) then
1872    40.    goto 32.
1873    41. endif
1874    42. compute epilogue & prologue
1875    43. finish - succeeded to schedule
1876 
1877    ??? The algorithm restricts the scheduling window to II cycles.
1878    In rare cases, it may be better to allow windows of II+1 cycles.
1879    The window would then start and end on the same row, but with
1880    different "must precede" and "must follow" requirements.  */
1881 
1882 /* A limit on the number of cycles that resource conflicts can span.  ??? Should
1883    be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1884    set to 0 to save compile time.  */
1885 #define DFA_HISTORY SMS_DFA_HISTORY
1886 
1887 /* A threshold for the number of repeated unsuccessful attempts to insert
1888    an empty row, before we flush the partial schedule and start over.  */
1889 #define MAX_SPLIT_NUM 10
1890 /* Given the partial schedule PS, this function calculates and returns the
1891    cycles in which we can schedule the node with the given index I.
1892    NOTE: Here we do the backtracking in SMS, in some special cases. We have
1893    noticed that there are several cases in which we fail    to SMS the loop
1894    because the sched window of a node is empty    due to tight data-deps. In
1895    such cases we want to unschedule    some of the predecessors/successors
1896    until we get non-empty    scheduling window.  It returns -1 if the
1897    scheduling window is empty and zero otherwise.  */
1898 
1899 static int
1900 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1901 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1902 		  int *end_p)
1903 {
1904   int start, step, end;
1905   int early_start, late_start;
1906   ddg_edge_ptr e;
1907   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1908   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1909   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1910   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1911   int psp_not_empty;
1912   int pss_not_empty;
1913   int count_preds;
1914   int count_succs;
1915 
1916   /* 1. compute sched window for u (start, end, step).  */
1917   bitmap_clear (psp);
1918   bitmap_clear (pss);
1919   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1920   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1921 
1922   /* We first compute a forward range (start <= end), then decide whether
1923      to reverse it.  */
1924   early_start = INT_MIN;
1925   late_start = INT_MAX;
1926   start = INT_MIN;
1927   end = INT_MAX;
1928   step = 1;
1929 
1930   count_preds = 0;
1931   count_succs = 0;
1932 
1933   if (dump_file && (psp_not_empty || pss_not_empty))
1934     {
1935       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1936 	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1937       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1938 	       "start", "early start", "late start", "end", "time");
1939       fprintf (dump_file, "=========== =========== =========== ==========="
1940 	       " =====\n");
1941     }
1942   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1943   if (psp_not_empty)
1944     for (e = u_node->in; e != 0; e = e->next_in)
1945       {
1946 	int v = e->src->cuid;
1947 
1948 	if (bitmap_bit_p (sched_nodes, v))
1949 	  {
1950 	    int p_st = SCHED_TIME (v);
1951 	    int earliest = p_st + e->latency - (e->distance * ii);
1952 	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1953 
1954 	    if (dump_file)
1955 	      {
1956 		fprintf (dump_file, "%11s %11d %11s %11d %5d",
1957 			 "", earliest, "", latest, p_st);
1958 		print_ddg_edge (dump_file, e);
1959 		fprintf (dump_file, "\n");
1960 	      }
1961 
1962 	    early_start = MAX (early_start, earliest);
1963 	    end = MIN (end, latest);
1964 
1965 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1966 	      count_preds++;
1967 	  }
1968       }
1969 
1970   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1971   if (pss_not_empty)
1972     for (e = u_node->out; e != 0; e = e->next_out)
1973       {
1974 	int v = e->dest->cuid;
1975 
1976 	if (bitmap_bit_p (sched_nodes, v))
1977 	  {
1978 	    int s_st = SCHED_TIME (v);
1979 	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1980 	    int latest = s_st - e->latency + (e->distance * ii);
1981 
1982 	    if (dump_file)
1983 	      {
1984 		fprintf (dump_file, "%11d %11s %11d %11s %5d",
1985 			 earliest, "", latest, "", s_st);
1986 		print_ddg_edge (dump_file, e);
1987 		fprintf (dump_file, "\n");
1988 	      }
1989 
1990 	    start = MAX (start, earliest);
1991 	    late_start = MIN (late_start, latest);
1992 
1993 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1994 	      count_succs++;
1995 	  }
1996       }
1997 
1998   if (dump_file && (psp_not_empty || pss_not_empty))
1999     {
2000       fprintf (dump_file, "----------- ----------- ----------- -----------"
2001 	       " -----\n");
2002       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
2003 	       start, early_start, late_start, end, "",
2004 	       "(max, max, min, min)");
2005     }
2006 
2007   /* Get a target scheduling window no bigger than ii.  */
2008   if (early_start == INT_MIN && late_start == INT_MAX)
2009     early_start = NODE_ASAP (u_node);
2010   else if (early_start == INT_MIN)
2011     early_start = late_start - (ii - 1);
2012   late_start = MIN (late_start, early_start + (ii - 1));
2013 
2014   /* Apply memory dependence limits.  */
2015   start = MAX (start, early_start);
2016   end = MIN (end, late_start);
2017 
2018   if (dump_file && (psp_not_empty || pss_not_empty))
2019     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
2020 	     "", start, end, "", "");
2021 
2022   /* If there are at least as many successors as predecessors, schedule the
2023      node close to its successors.  */
2024   if (pss_not_empty && count_succs >= count_preds)
2025     {
2026       int tmp = end;
2027       end = start;
2028       start = tmp;
2029       step = -1;
2030     }
2031 
2032   /* Now that we've finalized the window, make END an exclusive rather
2033      than an inclusive bound.  */
2034   end += step;
2035 
2036   *start_p = start;
2037   *step_p = step;
2038   *end_p = end;
2039   sbitmap_free (psp);
2040   sbitmap_free (pss);
2041 
2042   if ((start >= end && step == 1) || (start <= end && step == -1))
2043     {
2044       if (dump_file)
2045 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2046 		 start, end, step);
2047       return -1;
2048     }
2049 
2050   return 0;
2051 }
2052 
2053 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2054    node currently been scheduled.  At the end of the calculation
2055    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2056    U_NODE which are (1) already scheduled in the first/last row of
2057    U_NODE's scheduling window, (2) whose dependence inequality with U
2058    becomes an equality when U is scheduled in this same row, and (3)
2059    whose dependence latency is zero.
2060 
2061    The first and last rows are calculated using the following parameters:
2062    START/END rows - The cycles that begins/ends the traversal on the window;
2063    searching for an empty cycle to schedule U_NODE.
2064    STEP - The direction in which we traverse the window.
2065    II - The initiation interval.  */
2066 
2067 static void
2068 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2069 			       int step, int ii, sbitmap sched_nodes,
2070 			       sbitmap must_precede, sbitmap must_follow)
2071 {
2072   ddg_edge_ptr e;
2073   int first_cycle_in_window, last_cycle_in_window;
2074 
2075   gcc_assert (must_precede && must_follow);
2076 
2077   /* Consider the following scheduling window:
2078      {first_cycle_in_window, first_cycle_in_window+1, ...,
2079      last_cycle_in_window}.  If step is 1 then the following will be
2080      the order we traverse the window: {start=first_cycle_in_window,
2081      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2082      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2083      end=first_cycle_in_window-1} if step is -1.  */
2084   first_cycle_in_window = (step == 1) ? start : end - step;
2085   last_cycle_in_window = (step == 1) ? end - step : start;
2086 
2087   bitmap_clear (must_precede);
2088   bitmap_clear (must_follow);
2089 
2090   if (dump_file)
2091     fprintf (dump_file, "\nmust_precede: ");
2092 
2093   /* Instead of checking if:
2094       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2095       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2096              first_cycle_in_window)
2097       && e->latency == 0
2098      we use the fact that latency is non-negative:
2099       SCHED_TIME (e->src) - (e->distance * ii) <=
2100       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2101       first_cycle_in_window
2102      and check only if
2103       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2104   for (e = u_node->in; e != 0; e = e->next_in)
2105     if (bitmap_bit_p (sched_nodes, e->src->cuid)
2106 	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2107              first_cycle_in_window))
2108       {
2109 	if (dump_file)
2110 	  fprintf (dump_file, "%d ", e->src->cuid);
2111 
2112 	bitmap_set_bit (must_precede, e->src->cuid);
2113       }
2114 
2115   if (dump_file)
2116     fprintf (dump_file, "\nmust_follow: ");
2117 
2118   /* Instead of checking if:
2119       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2120       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2121              last_cycle_in_window)
2122       && e->latency == 0
2123      we use the fact that latency is non-negative:
2124       SCHED_TIME (e->dest) + (e->distance * ii) >=
2125       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2126       last_cycle_in_window
2127      and check only if
2128       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2129   for (e = u_node->out; e != 0; e = e->next_out)
2130     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2131 	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2132              last_cycle_in_window))
2133       {
2134 	if (dump_file)
2135 	  fprintf (dump_file, "%d ", e->dest->cuid);
2136 
2137 	bitmap_set_bit (must_follow, e->dest->cuid);
2138       }
2139 
2140   if (dump_file)
2141     fprintf (dump_file, "\n");
2142 }
2143 
2144 /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2145    parameters to decide if that's possible:
2146    PS - The partial schedule.
2147    U - The serial number of U_NODE.
2148    NUM_SPLITS - The number of row splits made so far.
2149    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2150    the first row of the scheduling window)
2151    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2152    last row of the scheduling window)  */
2153 
2154 static bool
2155 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2156 			      int u, int cycle, sbitmap sched_nodes,
2157 			      int *num_splits, sbitmap must_precede,
2158 			      sbitmap must_follow)
2159 {
2160   ps_insn_ptr psi;
2161   bool success = 0;
2162 
2163   verify_partial_schedule (ps, sched_nodes);
2164   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2165   if (psi)
2166     {
2167       SCHED_TIME (u) = cycle;
2168       bitmap_set_bit (sched_nodes, u);
2169       success = 1;
2170       *num_splits = 0;
2171       if (dump_file)
2172 	fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2173 
2174     }
2175 
2176   return success;
2177 }
2178 
2179 /* This function implements the scheduling algorithm for SMS according to the
2180    above algorithm.  */
2181 static partial_schedule_ptr
2182 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2183 {
2184   int ii = mii;
2185   int i, c, success, num_splits = 0;
2186   int flush_and_start_over = true;
2187   int num_nodes = g->num_nodes;
2188   int start, end, step; /* Place together into one struct?  */
2189   sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2190   sbitmap must_precede = sbitmap_alloc (num_nodes);
2191   sbitmap must_follow = sbitmap_alloc (num_nodes);
2192   sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2193 
2194   partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2195 
2196   bitmap_ones (tobe_scheduled);
2197   bitmap_clear (sched_nodes);
2198 
2199   while (flush_and_start_over && (ii < maxii))
2200     {
2201 
2202       if (dump_file)
2203 	fprintf (dump_file, "Starting with ii=%d\n", ii);
2204       flush_and_start_over = false;
2205       bitmap_clear (sched_nodes);
2206 
2207       for (i = 0; i < num_nodes; i++)
2208 	{
2209 	  int u = nodes_order[i];
2210   	  ddg_node_ptr u_node = &ps->g->nodes[u];
2211 	  rtx insn = u_node->insn;
2212 
2213 	  if (!NONDEBUG_INSN_P (insn))
2214 	    {
2215 	      bitmap_clear_bit (tobe_scheduled, u);
2216 	      continue;
2217 	    }
2218 
2219 	  if (bitmap_bit_p (sched_nodes, u))
2220 	    continue;
2221 
2222 	  /* Try to get non-empty scheduling window.  */
2223 	 success = 0;
2224          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2225                                 &step, &end) == 0)
2226             {
2227               if (dump_file)
2228                 fprintf (dump_file, "\nTrying to schedule node %d "
2229 			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2230                         (g->nodes[u].insn)), start, end, step);
2231 
2232               gcc_assert ((step > 0 && start < end)
2233                           || (step < 0 && start > end));
2234 
2235               calculate_must_precede_follow (u_node, start, end, step, ii,
2236                                              sched_nodes, must_precede,
2237                                              must_follow);
2238 
2239               for (c = start; c != end; c += step)
2240                 {
2241 		  sbitmap tmp_precede, tmp_follow;
2242 
2243                   set_must_precede_follow (&tmp_follow, must_follow,
2244 		                           &tmp_precede, must_precede,
2245                                            c, start, end, step);
2246                   success =
2247                     try_scheduling_node_in_cycle (ps, u, c,
2248                                                   sched_nodes,
2249                                                   &num_splits, tmp_precede,
2250                                                   tmp_follow);
2251                   if (success)
2252                     break;
2253                 }
2254 
2255               verify_partial_schedule (ps, sched_nodes);
2256             }
2257             if (!success)
2258             {
2259               int split_row;
2260 
2261               if (ii++ == maxii)
2262                 break;
2263 
2264               if (num_splits >= MAX_SPLIT_NUM)
2265                 {
2266                   num_splits = 0;
2267                   flush_and_start_over = true;
2268                   verify_partial_schedule (ps, sched_nodes);
2269                   reset_partial_schedule (ps, ii);
2270                   verify_partial_schedule (ps, sched_nodes);
2271                   break;
2272                 }
2273 
2274               num_splits++;
2275               /* The scheduling window is exclusive of 'end'
2276                  whereas compute_split_window() expects an inclusive,
2277                  ordered range.  */
2278               if (step == 1)
2279                 split_row = compute_split_row (sched_nodes, start, end - 1,
2280                                                ps->ii, u_node);
2281               else
2282                 split_row = compute_split_row (sched_nodes, end + 1, start,
2283                                                ps->ii, u_node);
2284 
2285               ps_insert_empty_row (ps, split_row, sched_nodes);
2286               i--;              /* Go back and retry node i.  */
2287 
2288               if (dump_file)
2289                 fprintf (dump_file, "num_splits=%d\n", num_splits);
2290             }
2291 
2292           /* ??? If (success), check register pressure estimates.  */
2293         }                       /* Continue with next node.  */
2294     }                           /* While flush_and_start_over.  */
2295   if (ii >= maxii)
2296     {
2297       free_partial_schedule (ps);
2298       ps = NULL;
2299     }
2300   else
2301     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2302 
2303   sbitmap_free (sched_nodes);
2304   sbitmap_free (must_precede);
2305   sbitmap_free (must_follow);
2306   sbitmap_free (tobe_scheduled);
2307 
2308   return ps;
2309 }
2310 
2311 /* This function inserts a new empty row into PS at the position
2312    according to SPLITROW, keeping all already scheduled instructions
2313    intact and updating their SCHED_TIME and cycle accordingly.  */
2314 static void
2315 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2316 		     sbitmap sched_nodes)
2317 {
2318   ps_insn_ptr crr_insn;
2319   ps_insn_ptr *rows_new;
2320   int ii = ps->ii;
2321   int new_ii = ii + 1;
2322   int row;
2323   int *rows_length_new;
2324 
2325   verify_partial_schedule (ps, sched_nodes);
2326 
2327   /* We normalize sched_time and rotate ps to have only non-negative sched
2328      times, for simplicity of updating cycles after inserting new row.  */
2329   split_row -= ps->min_cycle;
2330   split_row = SMODULO (split_row, ii);
2331   if (dump_file)
2332     fprintf (dump_file, "split_row=%d\n", split_row);
2333 
2334   reset_sched_times (ps, PS_MIN_CYCLE (ps));
2335   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2336 
2337   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2338   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2339   for (row = 0; row < split_row; row++)
2340     {
2341       rows_new[row] = ps->rows[row];
2342       rows_length_new[row] = ps->rows_length[row];
2343       ps->rows[row] = NULL;
2344       for (crr_insn = rows_new[row];
2345 	   crr_insn; crr_insn = crr_insn->next_in_row)
2346 	{
2347 	  int u = crr_insn->id;
2348 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2349 
2350 	  SCHED_TIME (u) = new_time;
2351 	  crr_insn->cycle = new_time;
2352 	  SCHED_ROW (u) = new_time % new_ii;
2353 	  SCHED_STAGE (u) = new_time / new_ii;
2354 	}
2355 
2356     }
2357 
2358   rows_new[split_row] = NULL;
2359 
2360   for (row = split_row; row < ii; row++)
2361     {
2362       rows_new[row + 1] = ps->rows[row];
2363       rows_length_new[row + 1] = ps->rows_length[row];
2364       ps->rows[row] = NULL;
2365       for (crr_insn = rows_new[row + 1];
2366 	   crr_insn; crr_insn = crr_insn->next_in_row)
2367 	{
2368 	  int u = crr_insn->id;
2369 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2370 
2371 	  SCHED_TIME (u) = new_time;
2372 	  crr_insn->cycle = new_time;
2373 	  SCHED_ROW (u) = new_time % new_ii;
2374 	  SCHED_STAGE (u) = new_time / new_ii;
2375 	}
2376     }
2377 
2378   /* Updating ps.  */
2379   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2380     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2381   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2382     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2383   free (ps->rows);
2384   ps->rows = rows_new;
2385   free (ps->rows_length);
2386   ps->rows_length = rows_length_new;
2387   ps->ii = new_ii;
2388   gcc_assert (ps->min_cycle >= 0);
2389 
2390   verify_partial_schedule (ps, sched_nodes);
2391 
2392   if (dump_file)
2393     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2394 	     ps->max_cycle);
2395 }
2396 
2397 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2398    UP which are the boundaries of it's scheduling window; compute using
2399    SCHED_NODES and II a row in the partial schedule that can be split
2400    which will separate a critical predecessor from a critical successor
2401    thereby expanding the window, and return it.  */
2402 static int
2403 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2404 		   ddg_node_ptr u_node)
2405 {
2406   ddg_edge_ptr e;
2407   int lower = INT_MIN, upper = INT_MAX;
2408   int crit_pred = -1;
2409   int crit_succ = -1;
2410   int crit_cycle;
2411 
2412   for (e = u_node->in; e != 0; e = e->next_in)
2413     {
2414       int v = e->src->cuid;
2415 
2416       if (bitmap_bit_p (sched_nodes, v)
2417 	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2418 	if (SCHED_TIME (v) > lower)
2419 	  {
2420 	    crit_pred = v;
2421 	    lower = SCHED_TIME (v);
2422 	  }
2423     }
2424 
2425   if (crit_pred >= 0)
2426     {
2427       crit_cycle = SCHED_TIME (crit_pred) + 1;
2428       return SMODULO (crit_cycle, ii);
2429     }
2430 
2431   for (e = u_node->out; e != 0; e = e->next_out)
2432     {
2433       int v = e->dest->cuid;
2434 
2435       if (bitmap_bit_p (sched_nodes, v)
2436 	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2437 	if (SCHED_TIME (v) < upper)
2438 	  {
2439 	    crit_succ = v;
2440 	    upper = SCHED_TIME (v);
2441 	  }
2442     }
2443 
2444   if (crit_succ >= 0)
2445     {
2446       crit_cycle = SCHED_TIME (crit_succ);
2447       return SMODULO (crit_cycle, ii);
2448     }
2449 
2450   if (dump_file)
2451     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2452 
2453   return SMODULO ((low + up + 1) / 2, ii);
2454 }
2455 
2456 static void
2457 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2458 {
2459   int row;
2460   ps_insn_ptr crr_insn;
2461 
2462   for (row = 0; row < ps->ii; row++)
2463     {
2464       int length = 0;
2465 
2466       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2467 	{
2468 	  int u = crr_insn->id;
2469 
2470 	  length++;
2471 	  gcc_assert (bitmap_bit_p (sched_nodes, u));
2472 	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2473 	     popcount (sched_nodes) == number of insns in ps.  */
2474 	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2475 	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2476 	}
2477 
2478       gcc_assert (ps->rows_length[row] == length);
2479     }
2480 }
2481 
2482 
2483 /* This page implements the algorithm for ordering the nodes of a DDG
2484    for modulo scheduling, activated through the
2485    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2486 
2487 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2488 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2489 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2490 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2491 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2492 #define DEPTH(x) (ASAP ((x)))
2493 
2494 typedef struct node_order_params * nopa;
2495 
2496 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2497 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2498 static nopa  calculate_order_params (ddg_ptr, int, int *);
2499 static int find_max_asap (ddg_ptr, sbitmap);
2500 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2501 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2502 
2503 enum sms_direction {BOTTOMUP, TOPDOWN};
2504 
2505 struct node_order_params
2506 {
2507   int asap;
2508   int alap;
2509   int height;
2510 };
2511 
2512 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2513 static void
2514 check_nodes_order (int *node_order, int num_nodes)
2515 {
2516   int i;
2517   sbitmap tmp = sbitmap_alloc (num_nodes);
2518 
2519   bitmap_clear (tmp);
2520 
2521   if (dump_file)
2522     fprintf (dump_file, "SMS final nodes order: \n");
2523 
2524   for (i = 0; i < num_nodes; i++)
2525     {
2526       int u = node_order[i];
2527 
2528       if (dump_file)
2529         fprintf (dump_file, "%d ", u);
2530       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2531 
2532       bitmap_set_bit (tmp, u);
2533     }
2534 
2535   if (dump_file)
2536     fprintf (dump_file, "\n");
2537 
2538   sbitmap_free (tmp);
2539 }
2540 
2541 /* Order the nodes of G for scheduling and pass the result in
2542    NODE_ORDER.  Also set aux.count of each node to ASAP.
2543    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2544 static int
2545 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2546 {
2547   int i;
2548   int rec_mii = 0;
2549   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2550 
2551   nopa nops = calculate_order_params (g, mii, pmax_asap);
2552 
2553   if (dump_file)
2554     print_sccs (dump_file, sccs, g);
2555 
2556   order_nodes_of_sccs (sccs, node_order);
2557 
2558   if (sccs->num_sccs > 0)
2559     /* First SCC has the largest recurrence_length.  */
2560     rec_mii = sccs->sccs[0]->recurrence_length;
2561 
2562   /* Save ASAP before destroying node_order_params.  */
2563   for (i = 0; i < g->num_nodes; i++)
2564     {
2565       ddg_node_ptr v = &g->nodes[i];
2566       v->aux.count = ASAP (v);
2567     }
2568 
2569   free (nops);
2570   free_ddg_all_sccs (sccs);
2571   check_nodes_order (node_order, g->num_nodes);
2572 
2573   return rec_mii;
2574 }
2575 
2576 static void
2577 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2578 {
2579   int i, pos = 0;
2580   ddg_ptr g = all_sccs->ddg;
2581   int num_nodes = g->num_nodes;
2582   sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2583   sbitmap on_path = sbitmap_alloc (num_nodes);
2584   sbitmap tmp = sbitmap_alloc (num_nodes);
2585   sbitmap ones = sbitmap_alloc (num_nodes);
2586 
2587   bitmap_clear (prev_sccs);
2588   bitmap_ones (ones);
2589 
2590   /* Perform the node ordering starting from the SCC with the highest recMII.
2591      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2592   for (i = 0; i < all_sccs->num_sccs; i++)
2593     {
2594       ddg_scc_ptr scc = all_sccs->sccs[i];
2595 
2596       /* Add nodes on paths from previous SCCs to the current SCC.  */
2597       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2598       bitmap_ior (tmp, scc->nodes, on_path);
2599 
2600       /* Add nodes on paths from the current SCC to previous SCCs.  */
2601       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2602       bitmap_ior (tmp, tmp, on_path);
2603 
2604       /* Remove nodes of previous SCCs from current extended SCC.  */
2605       bitmap_and_compl (tmp, tmp, prev_sccs);
2606 
2607       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2608       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2609     }
2610 
2611   /* Handle the remaining nodes that do not belong to any scc.  Each call
2612      to order_nodes_in_scc handles a single connected component.  */
2613   while (pos < g->num_nodes)
2614     {
2615       bitmap_and_compl (tmp, ones, prev_sccs);
2616       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2617     }
2618   sbitmap_free (prev_sccs);
2619   sbitmap_free (on_path);
2620   sbitmap_free (tmp);
2621   sbitmap_free (ones);
2622 }
2623 
2624 /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2625 static struct node_order_params *
2626 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2627 {
2628   int u;
2629   int max_asap;
2630   int num_nodes = g->num_nodes;
2631   ddg_edge_ptr e;
2632   /* Allocate a place to hold ordering params for each node in the DDG.  */
2633   nopa node_order_params_arr;
2634 
2635   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2636   node_order_params_arr = (nopa) xcalloc (num_nodes,
2637 					  sizeof (struct node_order_params));
2638 
2639   /* Set the aux pointer of each node to point to its order_params structure.  */
2640   for (u = 0; u < num_nodes; u++)
2641     g->nodes[u].aux.info = &node_order_params_arr[u];
2642 
2643   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2644      calculate ASAP, ALAP, mobility, distance, and height for each node
2645      in the dependence (direct acyclic) graph.  */
2646 
2647   /* We assume that the nodes in the array are in topological order.  */
2648 
2649   max_asap = 0;
2650   for (u = 0; u < num_nodes; u++)
2651     {
2652       ddg_node_ptr u_node = &g->nodes[u];
2653 
2654       ASAP (u_node) = 0;
2655       for (e = u_node->in; e; e = e->next_in)
2656 	if (e->distance == 0)
2657 	  ASAP (u_node) = MAX (ASAP (u_node),
2658 			       ASAP (e->src) + e->latency);
2659       max_asap = MAX (max_asap, ASAP (u_node));
2660     }
2661 
2662   for (u = num_nodes - 1; u > -1; u--)
2663     {
2664       ddg_node_ptr u_node = &g->nodes[u];
2665 
2666       ALAP (u_node) = max_asap;
2667       HEIGHT (u_node) = 0;
2668       for (e = u_node->out; e; e = e->next_out)
2669 	if (e->distance == 0)
2670 	  {
2671 	    ALAP (u_node) = MIN (ALAP (u_node),
2672 				 ALAP (e->dest) - e->latency);
2673 	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
2674 				   HEIGHT (e->dest) + e->latency);
2675 	  }
2676     }
2677   if (dump_file)
2678   {
2679     fprintf (dump_file, "\nOrder params\n");
2680     for (u = 0; u < num_nodes; u++)
2681       {
2682         ddg_node_ptr u_node = &g->nodes[u];
2683 
2684         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2685                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2686       }
2687   }
2688 
2689   *pmax_asap = max_asap;
2690   return node_order_params_arr;
2691 }
2692 
2693 static int
2694 find_max_asap (ddg_ptr g, sbitmap nodes)
2695 {
2696   unsigned int u = 0;
2697   int max_asap = -1;
2698   int result = -1;
2699   sbitmap_iterator sbi;
2700 
2701   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2702     {
2703       ddg_node_ptr u_node = &g->nodes[u];
2704 
2705       if (max_asap < ASAP (u_node))
2706 	{
2707 	  max_asap = ASAP (u_node);
2708 	  result = u;
2709 	}
2710     }
2711   return result;
2712 }
2713 
2714 static int
2715 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2716 {
2717   unsigned int u = 0;
2718   int max_hv = -1;
2719   int min_mob = INT_MAX;
2720   int result = -1;
2721   sbitmap_iterator sbi;
2722 
2723   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2724     {
2725       ddg_node_ptr u_node = &g->nodes[u];
2726 
2727       if (max_hv < HEIGHT (u_node))
2728 	{
2729 	  max_hv = HEIGHT (u_node);
2730 	  min_mob = MOB (u_node);
2731 	  result = u;
2732 	}
2733       else if ((max_hv == HEIGHT (u_node))
2734 	       && (min_mob > MOB (u_node)))
2735 	{
2736 	  min_mob = MOB (u_node);
2737 	  result = u;
2738 	}
2739     }
2740   return result;
2741 }
2742 
2743 static int
2744 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2745 {
2746   unsigned int u = 0;
2747   int max_dv = -1;
2748   int min_mob = INT_MAX;
2749   int result = -1;
2750   sbitmap_iterator sbi;
2751 
2752   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2753     {
2754       ddg_node_ptr u_node = &g->nodes[u];
2755 
2756       if (max_dv < DEPTH (u_node))
2757 	{
2758 	  max_dv = DEPTH (u_node);
2759 	  min_mob = MOB (u_node);
2760 	  result = u;
2761 	}
2762       else if ((max_dv == DEPTH (u_node))
2763 	       && (min_mob > MOB (u_node)))
2764 	{
2765 	  min_mob = MOB (u_node);
2766 	  result = u;
2767 	}
2768     }
2769   return result;
2770 }
2771 
2772 /* Places the nodes of SCC into the NODE_ORDER array starting
2773    at position POS, according to the SMS ordering algorithm.
2774    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2775    the NODE_ORDER array, starting from position zero.  */
2776 static int
2777 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2778 		    int * node_order, int pos)
2779 {
2780   enum sms_direction dir;
2781   int num_nodes = g->num_nodes;
2782   sbitmap workset = sbitmap_alloc (num_nodes);
2783   sbitmap tmp = sbitmap_alloc (num_nodes);
2784   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2785   sbitmap predecessors = sbitmap_alloc (num_nodes);
2786   sbitmap successors = sbitmap_alloc (num_nodes);
2787 
2788   bitmap_clear (predecessors);
2789   find_predecessors (predecessors, g, nodes_ordered);
2790 
2791   bitmap_clear (successors);
2792   find_successors (successors, g, nodes_ordered);
2793 
2794   bitmap_clear (tmp);
2795   if (bitmap_and (tmp, predecessors, scc))
2796     {
2797       bitmap_copy (workset, tmp);
2798       dir = BOTTOMUP;
2799     }
2800   else if (bitmap_and (tmp, successors, scc))
2801     {
2802       bitmap_copy (workset, tmp);
2803       dir = TOPDOWN;
2804     }
2805   else
2806     {
2807       int u;
2808 
2809       bitmap_clear (workset);
2810       if ((u = find_max_asap (g, scc)) >= 0)
2811 	bitmap_set_bit (workset, u);
2812       dir = BOTTOMUP;
2813     }
2814 
2815   bitmap_clear (zero_bitmap);
2816   while (!bitmap_equal_p (workset, zero_bitmap))
2817     {
2818       int v;
2819       ddg_node_ptr v_node;
2820       sbitmap v_node_preds;
2821       sbitmap v_node_succs;
2822 
2823       if (dir == TOPDOWN)
2824 	{
2825 	  while (!bitmap_equal_p (workset, zero_bitmap))
2826 	    {
2827 	      v = find_max_hv_min_mob (g, workset);
2828 	      v_node = &g->nodes[v];
2829 	      node_order[pos++] = v;
2830 	      v_node_succs = NODE_SUCCESSORS (v_node);
2831 	      bitmap_and (tmp, v_node_succs, scc);
2832 
2833 	      /* Don't consider the already ordered successors again.  */
2834 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2835 	      bitmap_ior (workset, workset, tmp);
2836 	      bitmap_clear_bit (workset, v);
2837 	      bitmap_set_bit (nodes_ordered, v);
2838 	    }
2839 	  dir = BOTTOMUP;
2840 	  bitmap_clear (predecessors);
2841 	  find_predecessors (predecessors, g, nodes_ordered);
2842 	  bitmap_and (workset, predecessors, scc);
2843 	}
2844       else
2845 	{
2846 	  while (!bitmap_equal_p (workset, zero_bitmap))
2847 	    {
2848 	      v = find_max_dv_min_mob (g, workset);
2849 	      v_node = &g->nodes[v];
2850 	      node_order[pos++] = v;
2851 	      v_node_preds = NODE_PREDECESSORS (v_node);
2852 	      bitmap_and (tmp, v_node_preds, scc);
2853 
2854 	      /* Don't consider the already ordered predecessors again.  */
2855 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2856 	      bitmap_ior (workset, workset, tmp);
2857 	      bitmap_clear_bit (workset, v);
2858 	      bitmap_set_bit (nodes_ordered, v);
2859 	    }
2860 	  dir = TOPDOWN;
2861 	  bitmap_clear (successors);
2862 	  find_successors (successors, g, nodes_ordered);
2863 	  bitmap_and (workset, successors, scc);
2864 	}
2865     }
2866   sbitmap_free (tmp);
2867   sbitmap_free (workset);
2868   sbitmap_free (zero_bitmap);
2869   sbitmap_free (predecessors);
2870   sbitmap_free (successors);
2871   return pos;
2872 }
2873 
2874 
2875 /* This page contains functions for manipulating partial-schedules during
2876    modulo scheduling.  */
2877 
2878 /* Create a partial schedule and allocate a memory to hold II rows.  */
2879 
2880 static partial_schedule_ptr
2881 create_partial_schedule (int ii, ddg_ptr g, int history)
2882 {
2883   partial_schedule_ptr ps = XNEW (struct partial_schedule);
2884   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2885   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2886   ps->reg_moves.create (0);
2887   ps->ii = ii;
2888   ps->history = history;
2889   ps->min_cycle = INT_MAX;
2890   ps->max_cycle = INT_MIN;
2891   ps->g = g;
2892 
2893   return ps;
2894 }
2895 
2896 /* Free the PS_INSNs in rows array of the given partial schedule.
2897    ??? Consider caching the PS_INSN's.  */
2898 static void
2899 free_ps_insns (partial_schedule_ptr ps)
2900 {
2901   int i;
2902 
2903   for (i = 0; i < ps->ii; i++)
2904     {
2905       while (ps->rows[i])
2906 	{
2907 	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2908 
2909 	  free (ps->rows[i]);
2910 	  ps->rows[i] = ps_insn;
2911 	}
2912       ps->rows[i] = NULL;
2913     }
2914 }
2915 
2916 /* Free all the memory allocated to the partial schedule.  */
2917 
2918 static void
2919 free_partial_schedule (partial_schedule_ptr ps)
2920 {
2921   ps_reg_move_info *move;
2922   unsigned int i;
2923 
2924   if (!ps)
2925     return;
2926 
2927   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2928     sbitmap_free (move->uses);
2929   ps->reg_moves.release ();
2930 
2931   free_ps_insns (ps);
2932   free (ps->rows);
2933   free (ps->rows_length);
2934   free (ps);
2935 }
2936 
2937 /* Clear the rows array with its PS_INSNs, and create a new one with
2938    NEW_II rows.  */
2939 
2940 static void
2941 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2942 {
2943   if (!ps)
2944     return;
2945   free_ps_insns (ps);
2946   if (new_ii == ps->ii)
2947     return;
2948   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2949 						 * sizeof (ps_insn_ptr));
2950   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2951   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2952   memset (ps->rows_length, 0, new_ii * sizeof (int));
2953   ps->ii = new_ii;
2954   ps->min_cycle = INT_MAX;
2955   ps->max_cycle = INT_MIN;
2956 }
2957 
2958 /* Prints the partial schedule as an ii rows array, for each rows
2959    print the ids of the insns in it.  */
2960 void
2961 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2962 {
2963   int i;
2964 
2965   for (i = 0; i < ps->ii; i++)
2966     {
2967       ps_insn_ptr ps_i = ps->rows[i];
2968 
2969       fprintf (dump, "\n[ROW %d ]: ", i);
2970       while (ps_i)
2971 	{
2972 	  rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2973 
2974 	  if (JUMP_P (insn))
2975 	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
2976 	  else
2977 	    fprintf (dump, "%d, ", INSN_UID (insn));
2978 
2979 	  ps_i = ps_i->next_in_row;
2980 	}
2981     }
2982 }
2983 
2984 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
2985 static ps_insn_ptr
2986 create_ps_insn (int id, int cycle)
2987 {
2988   ps_insn_ptr ps_i = XNEW (struct ps_insn);
2989 
2990   ps_i->id = id;
2991   ps_i->next_in_row = NULL;
2992   ps_i->prev_in_row = NULL;
2993   ps_i->cycle = cycle;
2994 
2995   return ps_i;
2996 }
2997 
2998 
2999 /* Removes the given PS_INSN from the partial schedule.  */
3000 static void
3001 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
3002 {
3003   int row;
3004 
3005   gcc_assert (ps && ps_i);
3006 
3007   row = SMODULO (ps_i->cycle, ps->ii);
3008   if (! ps_i->prev_in_row)
3009     {
3010       gcc_assert (ps_i == ps->rows[row]);
3011       ps->rows[row] = ps_i->next_in_row;
3012       if (ps->rows[row])
3013 	ps->rows[row]->prev_in_row = NULL;
3014     }
3015   else
3016     {
3017       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
3018       if (ps_i->next_in_row)
3019 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
3020     }
3021 
3022   ps->rows_length[row] -= 1;
3023   free (ps_i);
3024   return;
3025 }
3026 
3027 /* Unlike what literature describes for modulo scheduling (which focuses
3028    on VLIW machines) the order of the instructions inside a cycle is
3029    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
3030    where the current instruction should go relative to the already
3031    scheduled instructions in the given cycle.  Go over these
3032    instructions and find the first possible column to put it in.  */
3033 static bool
3034 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3035 		     sbitmap must_precede, sbitmap must_follow)
3036 {
3037   ps_insn_ptr next_ps_i;
3038   ps_insn_ptr first_must_follow = NULL;
3039   ps_insn_ptr last_must_precede = NULL;
3040   ps_insn_ptr last_in_row = NULL;
3041   int row;
3042 
3043   if (! ps_i)
3044     return false;
3045 
3046   row = SMODULO (ps_i->cycle, ps->ii);
3047 
3048   /* Find the first must follow and the last must precede
3049      and insert the node immediately after the must precede
3050      but make sure that it there is no must follow after it.  */
3051   for (next_ps_i = ps->rows[row];
3052        next_ps_i;
3053        next_ps_i = next_ps_i->next_in_row)
3054     {
3055       if (must_follow
3056 	  && bitmap_bit_p (must_follow, next_ps_i->id)
3057 	  && ! first_must_follow)
3058         first_must_follow = next_ps_i;
3059       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3060         {
3061           /* If we have already met a node that must follow, then
3062 	     there is no possible column.  */
3063   	  if (first_must_follow)
3064             return false;
3065 	  else
3066             last_must_precede = next_ps_i;
3067         }
3068       /* The closing branch must be the last in the row.  */
3069       if (must_precede
3070 	  && bitmap_bit_p (must_precede, next_ps_i->id)
3071 	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3072 	return false;
3073 
3074        last_in_row = next_ps_i;
3075     }
3076 
3077   /* The closing branch is scheduled as well.  Make sure there is no
3078      dependent instruction after it as the branch should be the last
3079      instruction in the row.  */
3080   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3081     {
3082       if (first_must_follow)
3083 	return false;
3084       if (last_in_row)
3085 	{
3086 	  /* Make the branch the last in the row.  New instructions
3087 	     will be inserted at the beginning of the row or after the
3088 	     last must_precede instruction thus the branch is guaranteed
3089 	     to remain the last instruction in the row.  */
3090 	  last_in_row->next_in_row = ps_i;
3091 	  ps_i->prev_in_row = last_in_row;
3092 	  ps_i->next_in_row = NULL;
3093 	}
3094       else
3095 	ps->rows[row] = ps_i;
3096       return true;
3097     }
3098 
3099   /* Now insert the node after INSERT_AFTER_PSI.  */
3100 
3101   if (! last_must_precede)
3102     {
3103       ps_i->next_in_row = ps->rows[row];
3104       ps_i->prev_in_row = NULL;
3105       if (ps_i->next_in_row)
3106     	ps_i->next_in_row->prev_in_row = ps_i;
3107       ps->rows[row] = ps_i;
3108     }
3109   else
3110     {
3111       ps_i->next_in_row = last_must_precede->next_in_row;
3112       last_must_precede->next_in_row = ps_i;
3113       ps_i->prev_in_row = last_must_precede;
3114       if (ps_i->next_in_row)
3115         ps_i->next_in_row->prev_in_row = ps_i;
3116     }
3117 
3118   return true;
3119 }
3120 
3121 /* Advances the PS_INSN one column in its current row; returns false
3122    in failure and true in success.  Bit N is set in MUST_FOLLOW if
3123    the node with cuid N must be come after the node pointed to by
3124    PS_I when scheduled in the same cycle.  */
3125 static int
3126 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3127 			sbitmap must_follow)
3128 {
3129   ps_insn_ptr prev, next;
3130   int row;
3131 
3132   if (!ps || !ps_i)
3133     return false;
3134 
3135   row = SMODULO (ps_i->cycle, ps->ii);
3136 
3137   if (! ps_i->next_in_row)
3138     return false;
3139 
3140   /* Check if next_in_row is dependent on ps_i, both having same sched
3141      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3142   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3143     return false;
3144 
3145   /* Advance PS_I over its next_in_row in the doubly linked list.  */
3146   prev = ps_i->prev_in_row;
3147   next = ps_i->next_in_row;
3148 
3149   if (ps_i == ps->rows[row])
3150     ps->rows[row] = next;
3151 
3152   ps_i->next_in_row = next->next_in_row;
3153 
3154   if (next->next_in_row)
3155     next->next_in_row->prev_in_row = ps_i;
3156 
3157   next->next_in_row = ps_i;
3158   ps_i->prev_in_row = next;
3159 
3160   next->prev_in_row = prev;
3161   if (prev)
3162     prev->next_in_row = next;
3163 
3164   return true;
3165 }
3166 
3167 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3168    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3169    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3170    before/after (respectively) the node pointed to by PS_I when scheduled
3171    in the same cycle.  */
3172 static ps_insn_ptr
3173 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3174 		sbitmap must_precede, sbitmap must_follow)
3175 {
3176   ps_insn_ptr ps_i;
3177   int row = SMODULO (cycle, ps->ii);
3178 
3179   if (ps->rows_length[row] >= issue_rate)
3180     return NULL;
3181 
3182   ps_i = create_ps_insn (id, cycle);
3183 
3184   /* Finds and inserts PS_I according to MUST_FOLLOW and
3185      MUST_PRECEDE.  */
3186   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3187     {
3188       free (ps_i);
3189       return NULL;
3190     }
3191 
3192   ps->rows_length[row] += 1;
3193   return ps_i;
3194 }
3195 
3196 /* Advance time one cycle.  Assumes DFA is being used.  */
3197 static void
3198 advance_one_cycle (void)
3199 {
3200   if (targetm.sched.dfa_pre_cycle_insn)
3201     state_transition (curr_state,
3202 		      targetm.sched.dfa_pre_cycle_insn ());
3203 
3204   state_transition (curr_state, NULL);
3205 
3206   if (targetm.sched.dfa_post_cycle_insn)
3207     state_transition (curr_state,
3208 		      targetm.sched.dfa_post_cycle_insn ());
3209 }
3210 
3211 
3212 
3213 /* Checks if PS has resource conflicts according to DFA, starting from
3214    FROM cycle to TO cycle; returns true if there are conflicts and false
3215    if there are no conflicts.  Assumes DFA is being used.  */
3216 static int
3217 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3218 {
3219   int cycle;
3220 
3221   state_reset (curr_state);
3222 
3223   for (cycle = from; cycle <= to; cycle++)
3224     {
3225       ps_insn_ptr crr_insn;
3226       /* Holds the remaining issue slots in the current row.  */
3227       int can_issue_more = issue_rate;
3228 
3229       /* Walk through the DFA for the current row.  */
3230       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3231 	   crr_insn;
3232 	   crr_insn = crr_insn->next_in_row)
3233 	{
3234 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3235 
3236 	  if (!NONDEBUG_INSN_P (insn))
3237 	    continue;
3238 
3239 	  /* Check if there is room for the current insn.  */
3240 	  if (!can_issue_more || state_dead_lock_p (curr_state))
3241 	    return true;
3242 
3243 	  /* Update the DFA state and return with failure if the DFA found
3244 	     resource conflicts.  */
3245 	  if (state_transition (curr_state, insn) >= 0)
3246 	    return true;
3247 
3248 	  if (targetm.sched.variable_issue)
3249 	    can_issue_more =
3250 	      targetm.sched.variable_issue (sched_dump, sched_verbose,
3251 					    insn, can_issue_more);
3252 	  /* A naked CLOBBER or USE generates no instruction, so don't
3253 	     let them consume issue slots.  */
3254 	  else if (GET_CODE (PATTERN (insn)) != USE
3255 		   && GET_CODE (PATTERN (insn)) != CLOBBER)
3256 	    can_issue_more--;
3257 	}
3258 
3259       /* Advance the DFA to the next cycle.  */
3260       advance_one_cycle ();
3261     }
3262   return false;
3263 }
3264 
3265 /* Checks if the given node causes resource conflicts when added to PS at
3266    cycle C.  If not the node is added to PS and returned; otherwise zero
3267    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3268    cuid N must be come before/after (respectively) the node pointed to by
3269    PS_I when scheduled in the same cycle.  */
3270 ps_insn_ptr
3271 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3272    			     int c, sbitmap must_precede,
3273 			     sbitmap must_follow)
3274 {
3275   int has_conflicts = 0;
3276   ps_insn_ptr ps_i;
3277 
3278   /* First add the node to the PS, if this succeeds check for
3279      conflicts, trying different issue slots in the same row.  */
3280   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3281     return NULL; /* Failed to insert the node at the given cycle.  */
3282 
3283   has_conflicts = ps_has_conflicts (ps, c, c)
3284 		  || (ps->history > 0
3285 		      && ps_has_conflicts (ps,
3286 					   c - ps->history,
3287 					   c + ps->history));
3288 
3289   /* Try different issue slots to find one that the given node can be
3290      scheduled in without conflicts.  */
3291   while (has_conflicts)
3292     {
3293       if (! ps_insn_advance_column (ps, ps_i, must_follow))
3294 	break;
3295       has_conflicts = ps_has_conflicts (ps, c, c)
3296 		      || (ps->history > 0
3297 			  && ps_has_conflicts (ps,
3298 					       c - ps->history,
3299 					       c + ps->history));
3300     }
3301 
3302   if (has_conflicts)
3303     {
3304       remove_node_from_ps (ps, ps_i);
3305       return NULL;
3306     }
3307 
3308   ps->min_cycle = MIN (ps->min_cycle, c);
3309   ps->max_cycle = MAX (ps->max_cycle, c);
3310   return ps_i;
3311 }
3312 
3313 /* Calculate the stage count of the partial schedule PS.  The calculation
3314    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3315 int
3316 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3317 {
3318   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3319   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3320   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3321 
3322   /* The calculation of stage count is done adding the number of stages
3323      before cycle zero and after cycle zero.  */
3324   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3325 
3326   return stage_count;
3327 }
3328 
3329 /* Rotate the rows of PS such that insns scheduled at time
3330    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3331 void
3332 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3333 {
3334   int i, row, backward_rotates;
3335   int last_row = ps->ii - 1;
3336 
3337   if (start_cycle == 0)
3338     return;
3339 
3340   backward_rotates = SMODULO (start_cycle, ps->ii);
3341 
3342   /* Revisit later and optimize this into a single loop.  */
3343   for (i = 0; i < backward_rotates; i++)
3344     {
3345       ps_insn_ptr first_row = ps->rows[0];
3346       int first_row_length = ps->rows_length[0];
3347 
3348       for (row = 0; row < last_row; row++)
3349 	{
3350 	  ps->rows[row] = ps->rows[row + 1];
3351 	  ps->rows_length[row] = ps->rows_length[row + 1];
3352 	}
3353 
3354       ps->rows[last_row] = first_row;
3355       ps->rows_length[last_row] = first_row_length;
3356     }
3357 
3358   ps->max_cycle -= start_cycle;
3359   ps->min_cycle -= start_cycle;
3360 }
3361 
3362 #endif /* INSN_SCHEDULING */
3363 
3364 /* Run instruction scheduler.  */
3365 /* Perform SMS module scheduling.  */
3366 
3367 namespace {
3368 
3369 const pass_data pass_data_sms =
3370 {
3371   RTL_PASS, /* type */
3372   "sms", /* name */
3373   OPTGROUP_NONE, /* optinfo_flags */
3374   TV_SMS, /* tv_id */
3375   0, /* properties_required */
3376   0, /* properties_provided */
3377   0, /* properties_destroyed */
3378   0, /* todo_flags_start */
3379   TODO_df_finish, /* todo_flags_finish */
3380 };
3381 
3382 class pass_sms : public rtl_opt_pass
3383 {
3384 public:
3385   pass_sms (gcc::context *ctxt)
3386     : rtl_opt_pass (pass_data_sms, ctxt)
3387   {}
3388 
3389   /* opt_pass methods: */
3390   virtual bool gate (function *)
3391 {
3392   return (optimize > 0 && flag_modulo_sched);
3393 }
3394 
3395   virtual unsigned int execute (function *);
3396 
3397 }; // class pass_sms
3398 
3399 unsigned int
3400 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3401 {
3402 #ifdef INSN_SCHEDULING
3403   basic_block bb;
3404 
3405   /* Collect loop information to be used in SMS.  */
3406   cfg_layout_initialize (0);
3407   sms_schedule ();
3408 
3409   /* Update the life information, because we add pseudos.  */
3410   max_regno = max_reg_num ();
3411 
3412   /* Finalize layout changes.  */
3413   FOR_EACH_BB_FN (bb, fun)
3414     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3415       bb->aux = bb->next_bb;
3416   free_dominance_info (CDI_DOMINATORS);
3417   cfg_layout_finalize ();
3418 #endif /* INSN_SCHEDULING */
3419   return 0;
3420 }
3421 
3422 } // anon namespace
3423 
3424 rtl_opt_pass *
3425 make_pass_sms (gcc::context *ctxt)
3426 {
3427   return new pass_sms (ctxt);
3428 }
3429