xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/modulo-sched.c (revision 404ee5b9334f618040b6cdef96a0ff35a6fc4636)
1 /* Swing Modulo Scheduling implementation.
2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
3    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "gcov-io.h"
36 #include "profile.h"
37 #include "insn-attr.h"
38 #include "cfgrtl.h"
39 #include "sched-int.h"
40 #include "cfgloop.h"
41 #include "expr.h"
42 #include "params.h"
43 #include "ddg.h"
44 #include "tree-pass.h"
45 #include "dbgcnt.h"
46 #include "loop-unroll.h"
47 
48 #ifdef INSN_SCHEDULING
49 
50 /* This file contains the implementation of the Swing Modulo Scheduler,
51    described in the following references:
52    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53        Lifetime--sensitive modulo scheduling in a production environment.
54        IEEE Trans. on Comps., 50(3), March 2001
55    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 
59    The basic structure is:
60    1. Build a data-dependence graph (DDG) for each loop.
61    2. Use the DDG to order the insns of a loop (not in topological order
62       necessarily, but rather) trying to place each insn after all its
63       predecessors _or_ after all its successors.
64    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65    4. Use the ordering to perform list-scheduling of the loop:
66       1. Set II = MII.  We will try to schedule the loop within II cycles.
67       2. Try to schedule the insns one by one according to the ordering.
68 	 For each insn compute an interval of cycles by considering already-
69 	 scheduled preds and succs (and associated latencies); try to place
70 	 the insn in the cycles of this window checking for potential
71 	 resource conflicts (using the DFA interface).
72 	 Note: this is different from the cycle-scheduling of schedule_insns;
73 	 here the insns are not scheduled monotonically top-down (nor bottom-
74 	 up).
75       3. If failed in scheduling all insns - bump II++ and try again, unless
76 	 II reaches an upper bound MaxII, in which case report failure.
77    5. If we succeeded in scheduling the loop within II cycles, we now
78       generate prolog and epilog, decrease the counter of the loop, and
79       perform modulo variable expansion for live ranges that span more than
80       II cycles (i.e. use register copies to prevent a def from overwriting
81       itself before reaching the use).
82 
83     SMS works with countable loops (1) whose control part can be easily
84     decoupled from the rest of the loop and (2) whose loop count can
85     be easily adjusted.  This is because we peel a constant number of
86     iterations into a prologue and epilogue for which we want to avoid
87     emitting the control part, and a kernel which is to iterate that
88     constant number of iterations less than the original loop.  So the
89     control part should be a set of insns clearly identified and having
90     its own iv, not otherwise used in the loop (at-least for now), which
91     initializes a register before the loop to the number of iterations.
92     Currently SMS relies on the do-loop pattern to recognize such loops,
93     where (1) the control part comprises of all insns defining and/or
94     using a certain 'count' register and (2) the loop count can be
95     adjusted by modifying this register prior to the loop.
96     TODO: Rely on cfgloop analysis instead.  */
97 
98 /* This page defines partial-schedule structures and functions for
99    modulo scheduling.  */
100 
101 typedef struct partial_schedule *partial_schedule_ptr;
102 typedef struct ps_insn *ps_insn_ptr;
103 
104 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
105 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 
107 /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
108 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 
110 /* Perform signed modulo, always returning a non-negative value.  */
111 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 
113 /* The number of different iterations the nodes in ps span, assuming
114    the stage boundaries are placed efficiently.  */
115 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116                          + 1 + ii - 1) / ii)
117 /* The stage count of ps.  */
118 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 
120 /* A single instruction in the partial schedule.  */
121 struct ps_insn
122 {
123   /* Identifies the instruction to be scheduled.  Values smaller than
124      the ddg's num_nodes refer directly to ddg nodes.  A value of
125      X - num_nodes refers to register move X.  */
126   int id;
127 
128   /* The (absolute) cycle in which the PS instruction is scheduled.
129      Same as SCHED_TIME (node).  */
130   int cycle;
131 
132   /* The next/prev PS_INSN in the same row.  */
133   ps_insn_ptr next_in_row,
134 	      prev_in_row;
135 
136 };
137 
138 /* Information about a register move that has been added to a partial
139    schedule.  */
140 struct ps_reg_move_info
141 {
142   /* The source of the move is defined by the ps_insn with id DEF.
143      The destination is used by the ps_insns with the ids in USES.  */
144   int def;
145   sbitmap uses;
146 
147   /* The original form of USES' instructions used OLD_REG, but they
148      should now use NEW_REG.  */
149   rtx old_reg;
150   rtx new_reg;
151 
152   /* The number of consecutive stages that the move occupies.  */
153   int num_consecutive_stages;
154 
155   /* An instruction that sets NEW_REG to the correct value.  The first
156      move associated with DEF will have an rhs of OLD_REG; later moves
157      use the result of the previous move.  */
158   rtx_insn *insn;
159 };
160 
161 /* Holds the partial schedule as an array of II rows.  Each entry of the
162    array points to a linked list of PS_INSNs, which represents the
163    instructions that are scheduled for that row.  */
164 struct partial_schedule
165 {
166   int ii;	/* Number of rows in the partial schedule.  */
167   int history;  /* Threshold for conflict checking using DFA.  */
168 
169   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
170   ps_insn_ptr *rows;
171 
172   /* All the moves added for this partial schedule.  Index X has
173      a ps_insn id of X + g->num_nodes.  */
174   vec<ps_reg_move_info> reg_moves;
175 
176   /*  rows_length[i] holds the number of instructions in the row.
177       It is used only (as an optimization) to back off quickly from
178       trying to schedule a node in a full row; that is, to avoid running
179       through futile DFA state transitions.  */
180   int *rows_length;
181 
182   /* The earliest absolute cycle of an insn in the partial schedule.  */
183   int min_cycle;
184 
185   /* The latest absolute cycle of an insn in the partial schedule.  */
186   int max_cycle;
187 
188   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
189 
190   int stage_count;  /* The stage count of the partial schedule.  */
191 };
192 
193 
194 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 static void free_partial_schedule (partial_schedule_ptr);
196 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 void print_partial_schedule (partial_schedule_ptr, FILE *);
198 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 						int, int, sbitmap, sbitmap);
201 static void rotate_partial_schedule (partial_schedule_ptr, int);
202 void set_row_column_for_ps (partial_schedule_ptr);
203 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 
206 
207 /* This page defines constants and structures for the modulo scheduling
208    driver.  */
209 
210 static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 static void set_node_sched_params (ddg_ptr);
212 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
215                                     rtx, rtx);
216 static int calculate_stage_count (partial_schedule_ptr, int);
217 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
218 					   int, int, sbitmap, sbitmap, sbitmap);
219 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
220 			     sbitmap, int, int *, int *, int *);
221 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
222 					  sbitmap, int *, sbitmap, sbitmap);
223 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
224 
225 #define NODE_ASAP(node) ((node)->aux.count)
226 
227 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
228 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
229 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
230 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
231 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
232 
233 /* The scheduling parameters held for each node.  */
234 typedef struct node_sched_params
235 {
236   int time;	/* The absolute scheduling cycle.  */
237 
238   int row;    /* Holds time % ii.  */
239   int stage;  /* Holds time / ii.  */
240 
241   /* The column of a node inside the ps.  If nodes u, v are on the same row,
242      u will precede v if column (u) < column (v).  */
243   int column;
244 } *node_sched_params_ptr;
245 
246 /* The following three functions are copied from the current scheduler
247    code in order to use sched_analyze() for computing the dependencies.
248    They are used when initializing the sched_info structure.  */
249 static const char *
250 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
251 {
252   static char tmp[80];
253 
254   sprintf (tmp, "i%4d", INSN_UID (insn));
255   return tmp;
256 }
257 
258 static void
259 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
260 			       regset used ATTRIBUTE_UNUSED)
261 {
262 }
263 
264 static struct common_sched_info_def sms_common_sched_info;
265 
266 static struct sched_deps_info_def sms_sched_deps_info =
267   {
268     compute_jump_reg_dependencies,
269     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
270     NULL,
271     0, 0, 0
272   };
273 
274 static struct haifa_sched_info sms_sched_info =
275 {
276   NULL,
277   NULL,
278   NULL,
279   NULL,
280   NULL,
281   sms_print_insn,
282   NULL,
283   NULL, /* insn_finishes_block_p */
284   NULL, NULL,
285   NULL, NULL,
286   0, 0,
287 
288   NULL, NULL, NULL, NULL,
289   NULL, NULL,
290   0
291 };
292 
293 /* Partial schedule instruction ID in PS is a register move.  Return
294    information about it.  */
295 static struct ps_reg_move_info *
296 ps_reg_move (partial_schedule_ptr ps, int id)
297 {
298   gcc_checking_assert (id >= ps->g->num_nodes);
299   return &ps->reg_moves[id - ps->g->num_nodes];
300 }
301 
302 /* Return the rtl instruction that is being scheduled by partial schedule
303    instruction ID, which belongs to schedule PS.  */
304 static rtx_insn *
305 ps_rtl_insn (partial_schedule_ptr ps, int id)
306 {
307   if (id < ps->g->num_nodes)
308     return ps->g->nodes[id].insn;
309   else
310     return ps_reg_move (ps, id)->insn;
311 }
312 
313 /* Partial schedule instruction ID, which belongs to PS, occurred in
314    the original (unscheduled) loop.  Return the first instruction
315    in the loop that was associated with ps_rtl_insn (PS, ID).
316    If the instruction had some notes before it, this is the first
317    of those notes.  */
318 static rtx_insn *
319 ps_first_note (partial_schedule_ptr ps, int id)
320 {
321   gcc_assert (id < ps->g->num_nodes);
322   return ps->g->nodes[id].first_note;
323 }
324 
325 /* Return the number of consecutive stages that are occupied by
326    partial schedule instruction ID in PS.  */
327 static int
328 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
329 {
330   if (id < ps->g->num_nodes)
331     return 1;
332   else
333     return ps_reg_move (ps, id)->num_consecutive_stages;
334 }
335 
336 /* Given HEAD and TAIL which are the first and last insns in a loop;
337    return the register which controls the loop.  Return zero if it has
338    more than one occurrence in the loop besides the control part or the
339    do-loop pattern is not of the form we expect.  */
340 static rtx
341 doloop_register_get (rtx_insn *head, rtx_insn *tail)
342 {
343   rtx reg, condition;
344   rtx_insn *insn, *first_insn_not_to_check;
345 
346   if (!JUMP_P (tail))
347     return NULL_RTX;
348 
349   if (!targetm.code_for_doloop_end)
350     return NULL_RTX;
351 
352   /* TODO: Free SMS's dependence on doloop_condition_get.  */
353   condition = doloop_condition_get (tail);
354   if (! condition)
355     return NULL_RTX;
356 
357   if (REG_P (XEXP (condition, 0)))
358     reg = XEXP (condition, 0);
359   else if (GET_CODE (XEXP (condition, 0)) == PLUS
360 	   && REG_P (XEXP (XEXP (condition, 0), 0)))
361     reg = XEXP (XEXP (condition, 0), 0);
362   else
363     gcc_unreachable ();
364 
365   /* Check that the COUNT_REG has no other occurrences in the loop
366      until the decrement.  We assume the control part consists of
367      either a single (parallel) branch-on-count or a (non-parallel)
368      branch immediately preceded by a single (decrement) insn.  */
369   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
370                              : prev_nondebug_insn (tail));
371 
372   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
373     if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
374       {
375         if (dump_file)
376         {
377           fprintf (dump_file, "SMS count_reg found ");
378           print_rtl_single (dump_file, reg);
379           fprintf (dump_file, " outside control in insn:\n");
380           print_rtl_single (dump_file, insn);
381         }
382 
383         return NULL_RTX;
384       }
385 
386   return reg;
387 }
388 
389 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
390    that the number of iterations is a compile-time constant.  If so,
391    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
392    this constant.  Otherwise return 0.  */
393 static rtx_insn *
394 const_iteration_count (rtx count_reg, basic_block pre_header,
395 		       int64_t * count)
396 {
397   rtx_insn *insn;
398   rtx_insn *head, *tail;
399 
400   if (! pre_header)
401     return NULL;
402 
403   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
404 
405   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
406     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
407 	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
408       {
409 	rtx pat = single_set (insn);
410 
411 	if (CONST_INT_P (SET_SRC (pat)))
412 	  {
413 	    *count = INTVAL (SET_SRC (pat));
414 	    return insn;
415 	  }
416 
417 	return NULL;
418       }
419 
420   return NULL;
421 }
422 
423 /* A very simple resource-based lower bound on the initiation interval.
424    ??? Improve the accuracy of this bound by considering the
425    utilization of various units.  */
426 static int
427 res_MII (ddg_ptr g)
428 {
429   if (targetm.sched.sms_res_mii)
430     return targetm.sched.sms_res_mii (g);
431 
432   return ((g->num_nodes - g->num_debug) / issue_rate);
433 }
434 
435 
436 /* A vector that contains the sched data for each ps_insn.  */
437 static vec<node_sched_params> node_sched_param_vec;
438 
439 /* Allocate sched_params for each node and initialize it.  */
440 static void
441 set_node_sched_params (ddg_ptr g)
442 {
443   node_sched_param_vec.truncate (0);
444   node_sched_param_vec.safe_grow_cleared (g->num_nodes);
445 }
446 
447 /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
448 static void
449 extend_node_sched_params (partial_schedule_ptr ps)
450 {
451   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
452 					  + ps->reg_moves.length ());
453 }
454 
455 /* Update the sched_params (time, row and stage) for node U using the II,
456    the CYCLE of U and MIN_CYCLE.
457    We're not simply taking the following
458    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
459    because the stages may not be aligned on cycle 0.  */
460 static void
461 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
462 {
463   int sc_until_cycle_zero;
464   int stage;
465 
466   SCHED_TIME (u) = cycle;
467   SCHED_ROW (u) = SMODULO (cycle, ii);
468 
469   /* The calculation of stage count is done adding the number
470      of stages before cycle zero and after cycle zero.  */
471   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
472 
473   if (SCHED_TIME (u) < 0)
474     {
475       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
476       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
477     }
478   else
479     {
480       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
481       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
482     }
483 }
484 
485 static void
486 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
487 {
488   int i;
489 
490   if (! file)
491     return;
492   for (i = 0; i < num_nodes; i++)
493     {
494       node_sched_params_ptr nsp = SCHED_PARAMS (i);
495 
496       fprintf (file, "Node = %d; INSN = %d\n", i,
497 	       INSN_UID (ps_rtl_insn (ps, i)));
498       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
499       fprintf (file, " time = %d:\n", nsp->time);
500       fprintf (file, " stage = %d:\n", nsp->stage);
501     }
502 }
503 
504 /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
505 static void
506 set_columns_for_row (partial_schedule_ptr ps, int row)
507 {
508   ps_insn_ptr cur_insn;
509   int column;
510 
511   column = 0;
512   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
513     SCHED_COLUMN (cur_insn->id) = column++;
514 }
515 
516 /* Set SCHED_COLUMN for each instruction in PS.  */
517 static void
518 set_columns_for_ps (partial_schedule_ptr ps)
519 {
520   int row;
521 
522   for (row = 0; row < ps->ii; row++)
523     set_columns_for_row (ps, row);
524 }
525 
526 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
527    Its single predecessor has already been scheduled, as has its
528    ddg node successors.  (The move may have also another move as its
529    successor, in which case that successor will be scheduled later.)
530 
531    The move is part of a chain that satisfies register dependencies
532    between a producing ddg node and various consuming ddg nodes.
533    If some of these dependencies have a distance of 1 (meaning that
534    the use is upward-exposed) then DISTANCE1_USES is nonnull and
535    contains the set of uses with distance-1 dependencies.
536    DISTANCE1_USES is null otherwise.
537 
538    MUST_FOLLOW is a scratch bitmap that is big enough to hold
539    all current ps_insn ids.
540 
541    Return true on success.  */
542 static bool
543 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
544 		   sbitmap distance1_uses, sbitmap must_follow)
545 {
546   unsigned int u;
547   int this_time, this_distance, this_start, this_end, this_latency;
548   int start, end, c, ii;
549   sbitmap_iterator sbi;
550   ps_reg_move_info *move;
551   rtx_insn *this_insn;
552   ps_insn_ptr psi;
553 
554   move = ps_reg_move (ps, i_reg_move);
555   ii = ps->ii;
556   if (dump_file)
557     {
558       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
559 	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
560 	       PS_MIN_CYCLE (ps));
561       print_rtl_single (dump_file, move->insn);
562       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
563       fprintf (dump_file, "=========== =========== =====\n");
564     }
565 
566   start = INT_MIN;
567   end = INT_MAX;
568 
569   /* For dependencies of distance 1 between a producer ddg node A
570      and consumer ddg node B, we have a chain of dependencies:
571 
572         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
573 
574      where Mi is the ith move.  For dependencies of distance 0 between
575      a producer ddg node A and consumer ddg node C, we have a chain of
576      dependencies:
577 
578         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
579 
580      where Mi' occupies the same position as Mi but occurs a stage later.
581      We can only schedule each move once, so if we have both types of
582      chain, we model the second as:
583 
584         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
585 
586      First handle the dependencies between the previously-scheduled
587      predecessor and the move.  */
588   this_insn = ps_rtl_insn (ps, move->def);
589   this_latency = insn_latency (this_insn, move->insn);
590   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
591   this_time = SCHED_TIME (move->def) - this_distance * ii;
592   this_start = this_time + this_latency;
593   this_end = this_time + ii;
594   if (dump_file)
595     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
596 	     this_start, this_end, SCHED_TIME (move->def),
597 	     INSN_UID (this_insn), this_latency, this_distance,
598 	     INSN_UID (move->insn));
599 
600   if (start < this_start)
601     start = this_start;
602   if (end > this_end)
603     end = this_end;
604 
605   /* Handle the dependencies between the move and previously-scheduled
606      successors.  */
607   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
608     {
609       this_insn = ps_rtl_insn (ps, u);
610       this_latency = insn_latency (move->insn, this_insn);
611       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
612 	this_distance = -1;
613       else
614 	this_distance = 0;
615       this_time = SCHED_TIME (u) + this_distance * ii;
616       this_start = this_time - ii;
617       this_end = this_time - this_latency;
618       if (dump_file)
619 	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
620 		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
621 		 this_latency, this_distance, INSN_UID (this_insn));
622 
623       if (start < this_start)
624 	start = this_start;
625       if (end > this_end)
626 	end = this_end;
627     }
628 
629   if (dump_file)
630     {
631       fprintf (dump_file, "----------- ----------- -----\n");
632       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
633     }
634 
635   bitmap_clear (must_follow);
636   bitmap_set_bit (must_follow, move->def);
637 
638   start = MAX (start, end - (ii - 1));
639   for (c = end; c >= start; c--)
640     {
641       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
642 					 move->uses, must_follow);
643       if (psi)
644 	{
645 	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
646 	  if (dump_file)
647 	    fprintf (dump_file, "\nScheduled register move INSN %d at"
648 		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
649 		     SCHED_ROW (i_reg_move));
650 	  return true;
651 	}
652     }
653 
654   if (dump_file)
655     fprintf (dump_file, "\nNo available slot\n\n");
656 
657   return false;
658 }
659 
660 /*
661    Breaking intra-loop register anti-dependences:
662    Each intra-loop register anti-dependence implies a cross-iteration true
663    dependence of distance 1. Therefore, we can remove such false dependencies
664    and figure out if the partial schedule broke them by checking if (for a
665    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
666    if so generate a register move.   The number of such moves is equal to:
667               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
668    nreg_moves = ----------------------------------- + 1 - {   dependence.
669                             ii                          { 1 if not.
670 */
671 static bool
672 schedule_reg_moves (partial_schedule_ptr ps)
673 {
674   ddg_ptr g = ps->g;
675   int ii = ps->ii;
676   int i;
677 
678   for (i = 0; i < g->num_nodes; i++)
679     {
680       ddg_node_ptr u = &g->nodes[i];
681       ddg_edge_ptr e;
682       int nreg_moves = 0, i_reg_move;
683       rtx prev_reg, old_reg;
684       int first_move;
685       int distances[2];
686       sbitmap distance1_uses;
687       rtx set = single_set (u->insn);
688 
689       /* Skip instructions that do not set a register.  */
690       if ((set && !REG_P (SET_DEST (set))))
691         continue;
692 
693       /* Compute the number of reg_moves needed for u, by looking at life
694 	 ranges started at u (excluding self-loops).  */
695       distances[0] = distances[1] = false;
696       for (e = u->out; e; e = e->next_out)
697 	if (e->type == TRUE_DEP && e->dest != e->src)
698 	  {
699 	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
700 				- SCHED_TIME (e->src->cuid)) / ii;
701 
702             if (e->distance == 1)
703               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
704 			      - SCHED_TIME (e->src->cuid) + ii) / ii;
705 
706 	    /* If dest precedes src in the schedule of the kernel, then dest
707 	       will read before src writes and we can save one reg_copy.  */
708 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
709 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
710 	      nreg_moves4e--;
711 
712             if (nreg_moves4e >= 1)
713 	      {
714 		/* !single_set instructions are not supported yet and
715 		   thus we do not except to encounter them in the loop
716 		   except from the doloop part.  For the latter case
717 		   we assume no regmoves are generated as the doloop
718 		   instructions are tied to the branch with an edge.  */
719 		gcc_assert (set);
720 		/* If the instruction contains auto-inc register then
721 		   validate that the regmov is being generated for the
722 		   target regsiter rather then the inc'ed register.	*/
723 		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
724 	      }
725 
726 	    if (nreg_moves4e)
727 	      {
728 		gcc_assert (e->distance < 2);
729 		distances[e->distance] = true;
730 	      }
731 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
732 	  }
733 
734       if (nreg_moves == 0)
735 	continue;
736 
737       /* Create NREG_MOVES register moves.  */
738       first_move = ps->reg_moves.length ();
739       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
740       extend_node_sched_params (ps);
741 
742       /* Record the moves associated with this node.  */
743       first_move += ps->g->num_nodes;
744 
745       /* Generate each move.  */
746       old_reg = prev_reg = SET_DEST (single_set (u->insn));
747       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
748 	{
749 	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
750 
751 	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
752 	  move->uses = sbitmap_alloc (first_move + nreg_moves);
753 	  move->old_reg = old_reg;
754 	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
755 	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
756 	  move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
757 	  bitmap_clear (move->uses);
758 
759 	  prev_reg = move->new_reg;
760 	}
761 
762       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
763 
764       if (distance1_uses)
765 	bitmap_clear (distance1_uses);
766 
767       /* Every use of the register defined by node may require a different
768 	 copy of this register, depending on the time the use is scheduled.
769 	 Record which uses require which move results.  */
770       for (e = u->out; e; e = e->next_out)
771 	if (e->type == TRUE_DEP && e->dest != e->src)
772 	  {
773 	    int dest_copy = (SCHED_TIME (e->dest->cuid)
774 			     - SCHED_TIME (e->src->cuid)) / ii;
775 
776 	    if (e->distance == 1)
777 	      dest_copy = (SCHED_TIME (e->dest->cuid)
778 			   - SCHED_TIME (e->src->cuid) + ii) / ii;
779 
780 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
781 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
782 	      dest_copy--;
783 
784 	    if (dest_copy)
785 	      {
786 		ps_reg_move_info *move;
787 
788 		move = ps_reg_move (ps, first_move + dest_copy - 1);
789 		bitmap_set_bit (move->uses, e->dest->cuid);
790 		if (e->distance == 1)
791 		  bitmap_set_bit (distance1_uses, e->dest->cuid);
792 	      }
793 	  }
794 
795       auto_sbitmap must_follow (first_move + nreg_moves);
796       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
797 	if (!schedule_reg_move (ps, first_move + i_reg_move,
798 				distance1_uses, must_follow))
799 	  break;
800       if (distance1_uses)
801 	sbitmap_free (distance1_uses);
802       if (i_reg_move < nreg_moves)
803 	return false;
804     }
805   return true;
806 }
807 
808 /* Emit the moves associated with PS.  Apply the substitutions
809    associated with them.  */
810 static void
811 apply_reg_moves (partial_schedule_ptr ps)
812 {
813   ps_reg_move_info *move;
814   int i;
815 
816   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
817     {
818       unsigned int i_use;
819       sbitmap_iterator sbi;
820 
821       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
822 	{
823 	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
824 	  df_insn_rescan (ps->g->nodes[i_use].insn);
825 	}
826     }
827 }
828 
829 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
830    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
831    will move to cycle zero.  */
832 static void
833 reset_sched_times (partial_schedule_ptr ps, int amount)
834 {
835   int row;
836   int ii = ps->ii;
837   ps_insn_ptr crr_insn;
838 
839   for (row = 0; row < ii; row++)
840     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
841       {
842 	int u = crr_insn->id;
843 	int normalized_time = SCHED_TIME (u) - amount;
844 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
845 
846         if (dump_file)
847           {
848             /* Print the scheduling times after the rotation.  */
849 	    rtx_insn *insn = ps_rtl_insn (ps, u);
850 
851             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
852                      "crr_insn->cycle=%d, min_cycle=%d", u,
853                      INSN_UID (insn), normalized_time, new_min_cycle);
854             if (JUMP_P (insn))
855               fprintf (dump_file, " (branch)");
856             fprintf (dump_file, "\n");
857           }
858 
859 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
860 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
861 
862 	crr_insn->cycle = normalized_time;
863 	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
864       }
865 }
866 
867 /* Permute the insns according to their order in PS, from row 0 to
868    row ii-1, and position them right before LAST.  This schedules
869    the insns of the loop kernel.  */
870 static void
871 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
872 {
873   int ii = ps->ii;
874   int row;
875   ps_insn_ptr ps_ij;
876 
877   for (row = 0; row < ii ; row++)
878     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
879       {
880 	rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
881 
882 	if (PREV_INSN (last) != insn)
883 	  {
884 	    if (ps_ij->id < ps->g->num_nodes)
885 	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
886 				  PREV_INSN (last));
887 	    else
888 	      add_insn_before (insn, last, NULL);
889 	  }
890       }
891 }
892 
893 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
894    respectively only if cycle C falls on the border of the scheduling
895    window boundaries marked by START and END cycles.  STEP is the
896    direction of the window.  */
897 static inline void
898 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
899 			 sbitmap *tmp_precede, sbitmap must_precede, int c,
900 			 int start, int end, int step)
901 {
902   *tmp_precede = NULL;
903   *tmp_follow = NULL;
904 
905   if (c == start)
906     {
907       if (step == 1)
908 	*tmp_precede = must_precede;
909       else			/* step == -1.  */
910 	*tmp_follow = must_follow;
911     }
912   if (c == end - step)
913     {
914       if (step == 1)
915 	*tmp_follow = must_follow;
916       else			/* step == -1.  */
917 	*tmp_precede = must_precede;
918     }
919 
920 }
921 
922 /* Return True if the branch can be moved to row ii-1 while
923    normalizing the partial schedule PS to start from cycle zero and thus
924    optimize the SC.  Otherwise return False.  */
925 static bool
926 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
927 {
928   int amount = PS_MIN_CYCLE (ps);
929   int start, end, step;
930   int ii = ps->ii;
931   bool ok = false;
932   int stage_count, stage_count_curr;
933 
934   /* Compare the SC after normalization and SC after bringing the branch
935      to row ii-1.  If they are equal just bail out.  */
936   stage_count = calculate_stage_count (ps, amount);
937   stage_count_curr =
938     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
939 
940   if (stage_count == stage_count_curr)
941     {
942       if (dump_file)
943 	fprintf (dump_file, "SMS SC already optimized.\n");
944 
945       return false;
946     }
947 
948   if (dump_file)
949     {
950       fprintf (dump_file, "SMS Trying to optimize branch location\n");
951       fprintf (dump_file, "SMS partial schedule before trial:\n");
952       print_partial_schedule (ps, dump_file);
953     }
954 
955   /* First, normalize the partial scheduling.  */
956   reset_sched_times (ps, amount);
957   rotate_partial_schedule (ps, amount);
958   if (dump_file)
959     {
960       fprintf (dump_file,
961 	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
962 	       ii, stage_count);
963       print_partial_schedule (ps, dump_file);
964     }
965 
966   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
967     return true;
968 
969   auto_sbitmap sched_nodes (g->num_nodes);
970   bitmap_ones (sched_nodes);
971 
972   /* Calculate the new placement of the branch.  It should be in row
973      ii-1 and fall into it's scheduling window.  */
974   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
975 			&step, &end) == 0)
976     {
977       bool success;
978       ps_insn_ptr next_ps_i;
979       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
980       int row = SMODULO (branch_cycle, ps->ii);
981       int num_splits = 0;
982       sbitmap tmp_precede, tmp_follow;
983       int min_cycle, c;
984 
985       if (dump_file)
986 	fprintf (dump_file, "\nTrying to schedule node %d "
987 		 "INSN = %d  in (%d .. %d) step %d\n",
988 		 g->closing_branch->cuid,
989 		 (INSN_UID (g->closing_branch->insn)), start, end, step);
990 
991       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
992       if (step == 1)
993 	{
994 	  c = start + ii - SMODULO (start, ii) - 1;
995 	  gcc_assert (c >= start);
996 	  if (c >= end)
997 	    {
998 	      if (dump_file)
999 		fprintf (dump_file,
1000 			 "SMS failed to schedule branch at cycle: %d\n", c);
1001 	      return false;
1002 	    }
1003 	}
1004       else
1005 	{
1006 	  c = start - SMODULO (start, ii) - 1;
1007 	  gcc_assert (c <= start);
1008 
1009 	  if (c <= end)
1010 	    {
1011 	      if (dump_file)
1012 		fprintf (dump_file,
1013 			 "SMS failed to schedule branch at cycle: %d\n", c);
1014 	      return false;
1015 	    }
1016 	}
1017 
1018       auto_sbitmap must_precede (g->num_nodes);
1019       auto_sbitmap must_follow (g->num_nodes);
1020 
1021       /* Try to schedule the branch is it's new cycle.  */
1022       calculate_must_precede_follow (g->closing_branch, start, end,
1023 				     step, ii, sched_nodes,
1024 				     must_precede, must_follow);
1025 
1026       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1027 			       must_precede, c, start, end, step);
1028 
1029       /* Find the element in the partial schedule related to the closing
1030          branch so we can remove it from it's current cycle.  */
1031       for (next_ps_i = ps->rows[row];
1032 	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
1033 	if (next_ps_i->id == g->closing_branch->cuid)
1034 	  break;
1035 
1036       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1037       remove_node_from_ps (ps, next_ps_i);
1038       success =
1039 	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1040 				      sched_nodes, &num_splits,
1041 				      tmp_precede, tmp_follow);
1042       gcc_assert (num_splits == 0);
1043       if (!success)
1044 	{
1045 	  if (dump_file)
1046 	    fprintf (dump_file,
1047 		     "SMS failed to schedule branch at cycle: %d, "
1048 		     "bringing it back to cycle %d\n", c, branch_cycle);
1049 
1050 	  /* The branch was failed to be placed in row ii - 1.
1051 	     Put it back in it's original place in the partial
1052 	     schedualing.  */
1053 	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1054 				   must_precede, branch_cycle, start, end,
1055 				   step);
1056 	  success =
1057 	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1058 					  branch_cycle, sched_nodes,
1059 					  &num_splits, tmp_precede,
1060 					  tmp_follow);
1061 	  gcc_assert (success && (num_splits == 0));
1062 	  ok = false;
1063 	}
1064       else
1065 	{
1066 	  /* The branch is placed in row ii - 1.  */
1067 	  if (dump_file)
1068 	    fprintf (dump_file,
1069 		     "SMS success in moving branch to cycle %d\n", c);
1070 
1071 	  update_node_sched_params (g->closing_branch->cuid, ii, c,
1072 				    PS_MIN_CYCLE (ps));
1073 	  ok = true;
1074 	}
1075 
1076       /* This might have been added to a new first stage.  */
1077       if (PS_MIN_CYCLE (ps) < min_cycle)
1078 	reset_sched_times (ps, 0);
1079     }
1080 
1081   return ok;
1082 }
1083 
1084 static void
1085 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1086 			   int to_stage, rtx count_reg)
1087 {
1088   int row;
1089   ps_insn_ptr ps_ij;
1090 
1091   for (row = 0; row < ps->ii; row++)
1092     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1093       {
1094 	int u = ps_ij->id;
1095 	int first_u, last_u;
1096 	rtx_insn *u_insn;
1097 
1098         /* Do not duplicate any insn which refers to count_reg as it
1099            belongs to the control part.
1100            The closing branch is scheduled as well and thus should
1101            be ignored.
1102            TODO: This should be done by analyzing the control part of
1103            the loop.  */
1104 	u_insn = ps_rtl_insn (ps, u);
1105         if (reg_mentioned_p (count_reg, u_insn)
1106             || JUMP_P (u_insn))
1107           continue;
1108 
1109 	first_u = SCHED_STAGE (u);
1110 	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1111 	if (from_stage <= last_u && to_stage >= first_u)
1112 	  {
1113 	    if (u < ps->g->num_nodes)
1114 	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1115 	    else
1116 	      emit_insn (copy_rtx (PATTERN (u_insn)));
1117 	  }
1118       }
1119 }
1120 
1121 
1122 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
1123 static void
1124 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1125                         rtx count_reg, rtx count_init)
1126 {
1127   int i;
1128   int last_stage = PS_STAGE_COUNT (ps) - 1;
1129   edge e;
1130 
1131   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1132   start_sequence ();
1133 
1134   if (!count_init)
1135     {
1136       /* Generate instructions at the beginning of the prolog to
1137          adjust the loop count by STAGE_COUNT.  If loop count is constant
1138          (count_init), this constant is adjusted by STAGE_COUNT in
1139          generate_prolog_epilog function.  */
1140       rtx sub_reg = NULL_RTX;
1141 
1142       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1143 				     gen_int_mode (last_stage,
1144 						   GET_MODE (count_reg)),
1145                                      count_reg, 1, OPTAB_DIRECT);
1146       gcc_assert (REG_P (sub_reg));
1147       if (REGNO (sub_reg) != REGNO (count_reg))
1148         emit_move_insn (count_reg, sub_reg);
1149     }
1150 
1151   for (i = 0; i < last_stage; i++)
1152     duplicate_insns_of_cycles (ps, 0, i, count_reg);
1153 
1154   /* Put the prolog on the entry edge.  */
1155   e = loop_preheader_edge (loop);
1156   split_edge_and_insert (e, get_insns ());
1157   if (!flag_resched_modulo_sched)
1158     e->dest->flags |= BB_DISABLE_SCHEDULE;
1159 
1160   end_sequence ();
1161 
1162   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1163   start_sequence ();
1164 
1165   for (i = 0; i < last_stage; i++)
1166     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1167 
1168   /* Put the epilogue on the exit edge.  */
1169   gcc_assert (single_exit (loop));
1170   e = single_exit (loop);
1171   split_edge_and_insert (e, get_insns ());
1172   if (!flag_resched_modulo_sched)
1173     e->dest->flags |= BB_DISABLE_SCHEDULE;
1174 
1175   end_sequence ();
1176 }
1177 
1178 /* Mark LOOP as software pipelined so the later
1179    scheduling passes don't touch it.  */
1180 static void
1181 mark_loop_unsched (struct loop *loop)
1182 {
1183   unsigned i;
1184   basic_block *bbs = get_loop_body (loop);
1185 
1186   for (i = 0; i < loop->num_nodes; i++)
1187     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1188 
1189   free (bbs);
1190 }
1191 
1192 /* Return true if all the BBs of the loop are empty except the
1193    loop header.  */
1194 static bool
1195 loop_single_full_bb_p (struct loop *loop)
1196 {
1197   unsigned i;
1198   basic_block *bbs = get_loop_body (loop);
1199 
1200   for (i = 0; i < loop->num_nodes ; i++)
1201     {
1202       rtx_insn *head, *tail;
1203       bool empty_bb = true;
1204 
1205       if (bbs[i] == loop->header)
1206         continue;
1207 
1208       /* Make sure that basic blocks other than the header
1209          have only notes labels or jumps.  */
1210       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1211       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1212         {
1213           if (NOTE_P (head) || LABEL_P (head)
1214  	      || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1215  	    continue;
1216  	  empty_bb = false;
1217  	  break;
1218         }
1219 
1220       if (! empty_bb)
1221         {
1222           free (bbs);
1223           return false;
1224         }
1225     }
1226   free (bbs);
1227   return true;
1228 }
1229 
1230 /* Dump file:line from INSN's location info to dump_file.  */
1231 
1232 static void
1233 dump_insn_location (rtx_insn *insn)
1234 {
1235   if (dump_file && INSN_HAS_LOCATION (insn))
1236     {
1237       expanded_location xloc = insn_location (insn);
1238       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1239     }
1240 }
1241 
1242 /* A simple loop from SMS point of view; it is a loop that is composed of
1243    either a single basic block or two BBs - a header and a latch.  */
1244 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
1245 				  && (EDGE_COUNT (loop->latch->preds) == 1) \
1246                                   && (EDGE_COUNT (loop->latch->succs) == 1))
1247 
1248 /* Return true if the loop is in its canonical form and false if not.
1249    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1250 static bool
1251 loop_canon_p (struct loop *loop)
1252 {
1253 
1254   if (loop->inner || !loop_outer (loop))
1255   {
1256     if (dump_file)
1257       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1258     return false;
1259   }
1260 
1261   if (!single_exit (loop))
1262     {
1263       if (dump_file)
1264 	{
1265 	  rtx_insn *insn = BB_END (loop->header);
1266 
1267 	  fprintf (dump_file, "SMS loop many exits");
1268 	  dump_insn_location (insn);
1269 	  fprintf (dump_file, "\n");
1270 	}
1271       return false;
1272     }
1273 
1274   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1275     {
1276       if (dump_file)
1277 	{
1278 	  rtx_insn *insn = BB_END (loop->header);
1279 
1280 	  fprintf (dump_file, "SMS loop many BBs.");
1281 	  dump_insn_location (insn);
1282 	  fprintf (dump_file, "\n");
1283 	}
1284       return false;
1285     }
1286 
1287     return true;
1288 }
1289 
1290 /* If there are more than one entry for the loop,
1291    make it one by splitting the first entry edge and
1292    redirecting the others to the new BB.  */
1293 static void
1294 canon_loop (struct loop *loop)
1295 {
1296   edge e;
1297   edge_iterator i;
1298 
1299   /* Avoid annoying special cases of edges going to exit
1300      block.  */
1301   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1302     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1303       split_edge (e);
1304 
1305   if (loop->latch == loop->header
1306       || EDGE_COUNT (loop->latch->succs) > 1)
1307     {
1308       FOR_EACH_EDGE (e, i, loop->header->preds)
1309         if (e->src == loop->latch)
1310           break;
1311       split_edge (e);
1312     }
1313 }
1314 
1315 /* Setup infos.  */
1316 static void
1317 setup_sched_infos (void)
1318 {
1319   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1320 	  sizeof (sms_common_sched_info));
1321   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1322   common_sched_info = &sms_common_sched_info;
1323 
1324   sched_deps_info = &sms_sched_deps_info;
1325   current_sched_info = &sms_sched_info;
1326 }
1327 
1328 /* Probability in % that the sms-ed loop rolls enough so that optimized
1329    version may be entered.  Just a guess.  */
1330 #define PROB_SMS_ENOUGH_ITERATIONS 80
1331 
1332 /* Used to calculate the upper bound of ii.  */
1333 #define MAXII_FACTOR 2
1334 
1335 /* Main entry point, perform SMS scheduling on the loops of the function
1336    that consist of single basic blocks.  */
1337 static void
1338 sms_schedule (void)
1339 {
1340   rtx_insn *insn;
1341   ddg_ptr *g_arr, g;
1342   int * node_order;
1343   int maxii, max_asap;
1344   partial_schedule_ptr ps;
1345   basic_block bb = NULL;
1346   struct loop *loop;
1347   basic_block condition_bb = NULL;
1348   edge latch_edge;
1349   gcov_type trip_count = 0;
1350 
1351   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1352 		       | LOOPS_HAVE_RECORDED_EXITS);
1353   if (number_of_loops (cfun) <= 1)
1354     {
1355       loop_optimizer_finalize ();
1356       return;  /* There are no loops to schedule.  */
1357     }
1358 
1359   /* Initialize issue_rate.  */
1360   if (targetm.sched.issue_rate)
1361     {
1362       int temp = reload_completed;
1363 
1364       reload_completed = 1;
1365       issue_rate = targetm.sched.issue_rate ();
1366       reload_completed = temp;
1367     }
1368   else
1369     issue_rate = 1;
1370 
1371   /* Initialize the scheduler.  */
1372   setup_sched_infos ();
1373   haifa_sched_init ();
1374 
1375   /* Allocate memory to hold the DDG array one entry for each loop.
1376      We use loop->num as index into this array.  */
1377   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1378 
1379   if (dump_file)
1380   {
1381     fprintf (dump_file, "\n\nSMS analysis phase\n");
1382     fprintf (dump_file, "===================\n\n");
1383   }
1384 
1385   /* Build DDGs for all the relevant loops and hold them in G_ARR
1386      indexed by the loop index.  */
1387   FOR_EACH_LOOP (loop, 0)
1388     {
1389       rtx_insn *head, *tail;
1390       rtx count_reg;
1391 
1392       /* For debugging.  */
1393       if (dbg_cnt (sms_sched_loop) == false)
1394         {
1395           if (dump_file)
1396             fprintf (dump_file, "SMS reached max limit... \n");
1397 
1398 	  break;
1399         }
1400 
1401       if (dump_file)
1402 	{
1403 	  rtx_insn *insn = BB_END (loop->header);
1404 
1405 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1406 	  dump_insn_location (insn);
1407 	  fprintf (dump_file, "\n");
1408 	}
1409 
1410       if (! loop_canon_p (loop))
1411         continue;
1412 
1413       if (! loop_single_full_bb_p (loop))
1414       {
1415         if (dump_file)
1416           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1417 	continue;
1418       }
1419 
1420       bb = loop->header;
1421 
1422       get_ebb_head_tail (bb, bb, &head, &tail);
1423       latch_edge = loop_latch_edge (loop);
1424       gcc_assert (single_exit (loop));
1425       if (single_exit (loop)->count)
1426 	trip_count = latch_edge->count / single_exit (loop)->count;
1427 
1428       /* Perform SMS only on loops that their average count is above threshold.  */
1429 
1430       if ( latch_edge->count
1431           && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1432 	{
1433 	  if (dump_file)
1434 	    {
1435 	      dump_insn_location (tail);
1436 	      fprintf (dump_file, "\nSMS single-bb-loop\n");
1437 	      if (profile_info && flag_branch_probabilities)
1438 	    	{
1439 	      	  fprintf (dump_file, "SMS loop-count ");
1440 	      	  fprintf (dump_file, "%" PRId64,
1441 	             	   (int64_t) bb->count);
1442 	      	  fprintf (dump_file, "\n");
1443                   fprintf (dump_file, "SMS trip-count ");
1444                   fprintf (dump_file, "%" PRId64,
1445                            (int64_t) trip_count);
1446                   fprintf (dump_file, "\n");
1447 	      	  fprintf (dump_file, "SMS profile-sum-max ");
1448 	      	  fprintf (dump_file, "%" PRId64,
1449 	          	   (int64_t) profile_info->sum_max);
1450 	      	  fprintf (dump_file, "\n");
1451 	    	}
1452 	    }
1453           continue;
1454         }
1455 
1456       /* Make sure this is a doloop.  */
1457       if ( !(count_reg = doloop_register_get (head, tail)))
1458       {
1459         if (dump_file)
1460           fprintf (dump_file, "SMS doloop_register_get failed\n");
1461 	continue;
1462       }
1463 
1464       /* Don't handle BBs with calls or barriers
1465 	 or !single_set with the exception of instructions that include
1466 	 count_reg---these instructions are part of the control part
1467 	 that do-loop recognizes.
1468          ??? Should handle insns defining subregs.  */
1469      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1470       {
1471          rtx set;
1472 
1473         if (CALL_P (insn)
1474             || BARRIER_P (insn)
1475             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1476                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1477                 && !reg_mentioned_p (count_reg, insn))
1478             || (INSN_P (insn) && (set = single_set (insn))
1479                 && GET_CODE (SET_DEST (set)) == SUBREG))
1480         break;
1481       }
1482 
1483       if (insn != NEXT_INSN (tail))
1484 	{
1485 	  if (dump_file)
1486 	    {
1487 	      if (CALL_P (insn))
1488 		fprintf (dump_file, "SMS loop-with-call\n");
1489 	      else if (BARRIER_P (insn))
1490 		fprintf (dump_file, "SMS loop-with-barrier\n");
1491               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1492                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1493                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1494               else
1495                fprintf (dump_file, "SMS loop with subreg in lhs\n");
1496 	      print_rtl_single (dump_file, insn);
1497 	    }
1498 
1499 	  continue;
1500 	}
1501 
1502       /* Always schedule the closing branch with the rest of the
1503          instructions. The branch is rotated to be in row ii-1 at the
1504          end of the scheduling procedure to make sure it's the last
1505          instruction in the iteration.  */
1506       if (! (g = create_ddg (bb, 1)))
1507         {
1508           if (dump_file)
1509 	    fprintf (dump_file, "SMS create_ddg failed\n");
1510 	  continue;
1511         }
1512 
1513       g_arr[loop->num] = g;
1514       if (dump_file)
1515         fprintf (dump_file, "...OK\n");
1516 
1517     }
1518   if (dump_file)
1519   {
1520     fprintf (dump_file, "\nSMS transformation phase\n");
1521     fprintf (dump_file, "=========================\n\n");
1522   }
1523 
1524   /* We don't want to perform SMS on new loops - created by versioning.  */
1525   FOR_EACH_LOOP (loop, 0)
1526     {
1527       rtx_insn *head, *tail;
1528       rtx count_reg;
1529       rtx_insn *count_init;
1530       int mii, rec_mii, stage_count, min_cycle;
1531       int64_t loop_count = 0;
1532       bool opt_sc_p;
1533 
1534       if (! (g = g_arr[loop->num]))
1535         continue;
1536 
1537       if (dump_file)
1538 	{
1539 	  rtx_insn *insn = BB_END (loop->header);
1540 
1541 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1542 	  dump_insn_location (insn);
1543 	  fprintf (dump_file, "\n");
1544 
1545 	  print_ddg (dump_file, g);
1546 	}
1547 
1548       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1549 
1550       latch_edge = loop_latch_edge (loop);
1551       gcc_assert (single_exit (loop));
1552       if (single_exit (loop)->count)
1553 	trip_count = latch_edge->count / single_exit (loop)->count;
1554 
1555       if (dump_file)
1556 	{
1557 	  dump_insn_location (tail);
1558 	  fprintf (dump_file, "\nSMS single-bb-loop\n");
1559 	  if (profile_info && flag_branch_probabilities)
1560 	    {
1561 	      fprintf (dump_file, "SMS loop-count ");
1562 	      fprintf (dump_file, "%" PRId64,
1563 	               (int64_t) bb->count);
1564 	      fprintf (dump_file, "\n");
1565 	      fprintf (dump_file, "SMS profile-sum-max ");
1566 	      fprintf (dump_file, "%" PRId64,
1567 	               (int64_t) profile_info->sum_max);
1568 	      fprintf (dump_file, "\n");
1569 	    }
1570 	  fprintf (dump_file, "SMS doloop\n");
1571 	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1572           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1573           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1574 	}
1575 
1576 
1577       /* In case of th loop have doloop register it gets special
1578 	 handling.  */
1579       count_init = NULL;
1580       if ((count_reg = doloop_register_get (head, tail)))
1581 	{
1582 	  basic_block pre_header;
1583 
1584 	  pre_header = loop_preheader_edge (loop)->src;
1585 	  count_init = const_iteration_count (count_reg, pre_header,
1586 					      &loop_count);
1587 	}
1588       gcc_assert (count_reg);
1589 
1590       if (dump_file && count_init)
1591         {
1592           fprintf (dump_file, "SMS const-doloop ");
1593           fprintf (dump_file, "%" PRId64,
1594 		     loop_count);
1595           fprintf (dump_file, "\n");
1596         }
1597 
1598       node_order = XNEWVEC (int, g->num_nodes);
1599 
1600       mii = 1; /* Need to pass some estimate of mii.  */
1601       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1602       mii = MAX (res_MII (g), rec_mii);
1603       maxii = MAX (max_asap, MAXII_FACTOR * mii);
1604 
1605       if (dump_file)
1606 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1607 		 rec_mii, mii, maxii);
1608 
1609       for (;;)
1610 	{
1611 	  set_node_sched_params (g);
1612 
1613 	  stage_count = 0;
1614 	  opt_sc_p = false;
1615 	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
1616 
1617 	  if (ps)
1618 	    {
1619 	      /* Try to achieve optimized SC by normalizing the partial
1620 		 schedule (having the cycles start from cycle zero).
1621 		 The branch location must be placed in row ii-1 in the
1622 		 final scheduling.	If failed, shift all instructions to
1623 		 position the branch in row ii-1.  */
1624 	      opt_sc_p = optimize_sc (ps, g);
1625 	      if (opt_sc_p)
1626 		stage_count = calculate_stage_count (ps, 0);
1627 	      else
1628 		{
1629 		  /* Bring the branch to cycle ii-1.  */
1630 		  int amount = (SCHED_TIME (g->closing_branch->cuid)
1631 				- (ps->ii - 1));
1632 
1633 		  if (dump_file)
1634 		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1635 
1636 		  stage_count = calculate_stage_count (ps, amount);
1637 		}
1638 
1639 	      gcc_assert (stage_count >= 1);
1640 	    }
1641 
1642 	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1643 	     1 means that there is no interleaving between iterations thus
1644 	     we let the scheduling passes do the job in this case.  */
1645 	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1646 	      || (count_init && (loop_count <= stage_count))
1647 	      || (flag_branch_probabilities && (trip_count <= stage_count)))
1648 	    {
1649 	      if (dump_file)
1650 		{
1651 		  fprintf (dump_file, "SMS failed... \n");
1652 		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1653 			   " loop-count=", stage_count);
1654 		  fprintf (dump_file, "%" PRId64, loop_count);
1655 		  fprintf (dump_file, ", trip-count=");
1656 		  fprintf (dump_file, "%" PRId64, trip_count);
1657 		  fprintf (dump_file, ")\n");
1658 		}
1659 	      break;
1660 	    }
1661 
1662           if (!opt_sc_p)
1663             {
1664 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
1665               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1666 
1667               reset_sched_times (ps, amount);
1668               rotate_partial_schedule (ps, amount);
1669             }
1670 
1671 	  set_columns_for_ps (ps);
1672 
1673 	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1674 	  if (!schedule_reg_moves (ps))
1675 	    {
1676 	      mii = ps->ii + 1;
1677 	      free_partial_schedule (ps);
1678 	      continue;
1679 	    }
1680 
1681 	  /* Moves that handle incoming values might have been added
1682 	     to a new first stage.  Bump the stage count if so.
1683 
1684 	     ??? Perhaps we could consider rotating the schedule here
1685 	     instead?  */
1686 	  if (PS_MIN_CYCLE (ps) < min_cycle)
1687 	    {
1688 	      reset_sched_times (ps, 0);
1689 	      stage_count++;
1690 	    }
1691 
1692 	  /* The stage count should now be correct without rotation.  */
1693 	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1694 	  PS_STAGE_COUNT (ps) = stage_count;
1695 
1696 	  canon_loop (loop);
1697 
1698           if (dump_file)
1699             {
1700 	      dump_insn_location (tail);
1701 	      fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1702 		       ps->ii, stage_count);
1703 	      print_partial_schedule (ps, dump_file);
1704 	    }
1705 
1706           /* case the BCT count is not known , Do loop-versioning */
1707 	  if (count_reg && ! count_init)
1708             {
1709 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1710 					 gen_int_mode (stage_count,
1711 						       GET_MODE (count_reg)));
1712 	      unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1713 			       * REG_BR_PROB_BASE) / 100;
1714 
1715 	      loop_version (loop, comp_rtx, &condition_bb,
1716 	  		    prob, REG_BR_PROB_BASE - prob,
1717 			    prob, REG_BR_PROB_BASE - prob,
1718 			    true);
1719 	     }
1720 
1721 	  /* Set new iteration count of loop kernel.  */
1722           if (count_reg && count_init)
1723 	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1724 						     - stage_count + 1);
1725 
1726 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
1727 	  permute_partial_schedule (ps, g->closing_branch->first_note);
1728 
1729           /* Mark this loop as software pipelined so the later
1730 	     scheduling passes don't touch it.  */
1731 	  if (! flag_resched_modulo_sched)
1732 	    mark_loop_unsched (loop);
1733 
1734 	  /* The life-info is not valid any more.  */
1735 	  df_set_bb_dirty (g->bb);
1736 
1737 	  apply_reg_moves (ps);
1738 	  if (dump_file)
1739 	    print_node_sched_params (dump_file, g->num_nodes, ps);
1740 	  /* Generate prolog and epilog.  */
1741           generate_prolog_epilog (ps, loop, count_reg, count_init);
1742 	  break;
1743 	}
1744 
1745       free_partial_schedule (ps);
1746       node_sched_param_vec.release ();
1747       free (node_order);
1748       free_ddg (g);
1749     }
1750 
1751   free (g_arr);
1752 
1753   /* Release scheduler data, needed until now because of DFA.  */
1754   haifa_sched_finish ();
1755   loop_optimizer_finalize ();
1756 }
1757 
1758 /* The SMS scheduling algorithm itself
1759    -----------------------------------
1760    Input: 'O' an ordered list of insns of a loop.
1761    Output: A scheduling of the loop - kernel, prolog, and epilogue.
1762 
1763    'Q' is the empty Set
1764    'PS' is the partial schedule; it holds the currently scheduled nodes with
1765 	their cycle/slot.
1766    'PSP' previously scheduled predecessors.
1767    'PSS' previously scheduled successors.
1768    't(u)' the cycle where u is scheduled.
1769    'l(u)' is the latency of u.
1770    'd(v,u)' is the dependence distance from v to u.
1771    'ASAP(u)' the earliest time at which u could be scheduled as computed in
1772 	     the node ordering phase.
1773    'check_hardware_resources_conflicts(u, PS, c)'
1774 			     run a trace around cycle/slot through DFA model
1775 			     to check resource conflicts involving instruction u
1776 			     at cycle c given the partial schedule PS.
1777    'add_to_partial_schedule_at_time(u, PS, c)'
1778 			     Add the node/instruction u to the partial schedule
1779 			     PS at time c.
1780    'calculate_register_pressure(PS)'
1781 			     Given a schedule of instructions, calculate the register
1782 			     pressure it implies.  One implementation could be the
1783 			     maximum number of overlapping live ranges.
1784    'maxRP' The maximum allowed register pressure, it is usually derived from the number
1785 	   registers available in the hardware.
1786 
1787    1. II = MII.
1788    2. PS = empty list
1789    3. for each node u in O in pre-computed order
1790    4.   if (PSP(u) != Q && PSS(u) == Q) then
1791    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1792    6.     start = Early_start; end = Early_start + II - 1; step = 1
1793    11.  else if (PSP(u) == Q && PSS(u) != Q) then
1794    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1795    13.     start = Late_start; end = Late_start - II + 1; step = -1
1796    14.  else if (PSP(u) != Q && PSS(u) != Q) then
1797    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1798    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1799    17.     start = Early_start;
1800    18.     end = min(Early_start + II - 1 , Late_start);
1801    19.     step = 1
1802    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1803    21.	  start = ASAP(u); end = start + II - 1; step = 1
1804    22.  endif
1805 
1806    23.  success = false
1807    24.  for (c = start ; c != end ; c += step)
1808    25.     if check_hardware_resources_conflicts(u, PS, c) then
1809    26.       add_to_partial_schedule_at_time(u, PS, c)
1810    27.       success = true
1811    28.       break
1812    29.     endif
1813    30.  endfor
1814    31.  if (success == false) then
1815    32.    II = II + 1
1816    33.    if (II > maxII) then
1817    34.       finish - failed to schedule
1818    35.	 endif
1819    36.    goto 2.
1820    37.  endif
1821    38. endfor
1822    39. if (calculate_register_pressure(PS) > maxRP) then
1823    40.    goto 32.
1824    41. endif
1825    42. compute epilogue & prologue
1826    43. finish - succeeded to schedule
1827 
1828    ??? The algorithm restricts the scheduling window to II cycles.
1829    In rare cases, it may be better to allow windows of II+1 cycles.
1830    The window would then start and end on the same row, but with
1831    different "must precede" and "must follow" requirements.  */
1832 
1833 /* A limit on the number of cycles that resource conflicts can span.  ??? Should
1834    be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1835    set to 0 to save compile time.  */
1836 #define DFA_HISTORY SMS_DFA_HISTORY
1837 
1838 /* A threshold for the number of repeated unsuccessful attempts to insert
1839    an empty row, before we flush the partial schedule and start over.  */
1840 #define MAX_SPLIT_NUM 10
1841 /* Given the partial schedule PS, this function calculates and returns the
1842    cycles in which we can schedule the node with the given index I.
1843    NOTE: Here we do the backtracking in SMS, in some special cases. We have
1844    noticed that there are several cases in which we fail    to SMS the loop
1845    because the sched window of a node is empty    due to tight data-deps. In
1846    such cases we want to unschedule    some of the predecessors/successors
1847    until we get non-empty    scheduling window.  It returns -1 if the
1848    scheduling window is empty and zero otherwise.  */
1849 
1850 static int
1851 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1852 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1853 		  int *end_p)
1854 {
1855   int start, step, end;
1856   int early_start, late_start;
1857   ddg_edge_ptr e;
1858   auto_sbitmap psp (ps->g->num_nodes);
1859   auto_sbitmap pss (ps->g->num_nodes);
1860   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1861   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1862   int psp_not_empty;
1863   int pss_not_empty;
1864   int count_preds;
1865   int count_succs;
1866 
1867   /* 1. compute sched window for u (start, end, step).  */
1868   bitmap_clear (psp);
1869   bitmap_clear (pss);
1870   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1871   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1872 
1873   /* We first compute a forward range (start <= end), then decide whether
1874      to reverse it.  */
1875   early_start = INT_MIN;
1876   late_start = INT_MAX;
1877   start = INT_MIN;
1878   end = INT_MAX;
1879   step = 1;
1880 
1881   count_preds = 0;
1882   count_succs = 0;
1883 
1884   if (dump_file && (psp_not_empty || pss_not_empty))
1885     {
1886       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1887 	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1888       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1889 	       "start", "early start", "late start", "end", "time");
1890       fprintf (dump_file, "=========== =========== =========== ==========="
1891 	       " =====\n");
1892     }
1893   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1894   if (psp_not_empty)
1895     for (e = u_node->in; e != 0; e = e->next_in)
1896       {
1897 	int v = e->src->cuid;
1898 
1899 	if (bitmap_bit_p (sched_nodes, v))
1900 	  {
1901 	    int p_st = SCHED_TIME (v);
1902 	    int earliest = p_st + e->latency - (e->distance * ii);
1903 	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1904 
1905 	    if (dump_file)
1906 	      {
1907 		fprintf (dump_file, "%11s %11d %11s %11d %5d",
1908 			 "", earliest, "", latest, p_st);
1909 		print_ddg_edge (dump_file, e);
1910 		fprintf (dump_file, "\n");
1911 	      }
1912 
1913 	    early_start = MAX (early_start, earliest);
1914 	    end = MIN (end, latest);
1915 
1916 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1917 	      count_preds++;
1918 	  }
1919       }
1920 
1921   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1922   if (pss_not_empty)
1923     for (e = u_node->out; e != 0; e = e->next_out)
1924       {
1925 	int v = e->dest->cuid;
1926 
1927 	if (bitmap_bit_p (sched_nodes, v))
1928 	  {
1929 	    int s_st = SCHED_TIME (v);
1930 	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1931 	    int latest = s_st - e->latency + (e->distance * ii);
1932 
1933 	    if (dump_file)
1934 	      {
1935 		fprintf (dump_file, "%11d %11s %11d %11s %5d",
1936 			 earliest, "", latest, "", s_st);
1937 		print_ddg_edge (dump_file, e);
1938 		fprintf (dump_file, "\n");
1939 	      }
1940 
1941 	    start = MAX (start, earliest);
1942 	    late_start = MIN (late_start, latest);
1943 
1944 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1945 	      count_succs++;
1946 	  }
1947       }
1948 
1949   if (dump_file && (psp_not_empty || pss_not_empty))
1950     {
1951       fprintf (dump_file, "----------- ----------- ----------- -----------"
1952 	       " -----\n");
1953       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1954 	       start, early_start, late_start, end, "",
1955 	       "(max, max, min, min)");
1956     }
1957 
1958   /* Get a target scheduling window no bigger than ii.  */
1959   if (early_start == INT_MIN && late_start == INT_MAX)
1960     early_start = NODE_ASAP (u_node);
1961   else if (early_start == INT_MIN)
1962     early_start = late_start - (ii - 1);
1963   late_start = MIN (late_start, early_start + (ii - 1));
1964 
1965   /* Apply memory dependence limits.  */
1966   start = MAX (start, early_start);
1967   end = MIN (end, late_start);
1968 
1969   if (dump_file && (psp_not_empty || pss_not_empty))
1970     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1971 	     "", start, end, "", "");
1972 
1973   /* If there are at least as many successors as predecessors, schedule the
1974      node close to its successors.  */
1975   if (pss_not_empty && count_succs >= count_preds)
1976     {
1977       std::swap (start, end);
1978       step = -1;
1979     }
1980 
1981   /* Now that we've finalized the window, make END an exclusive rather
1982      than an inclusive bound.  */
1983   end += step;
1984 
1985   *start_p = start;
1986   *step_p = step;
1987   *end_p = end;
1988 
1989   if ((start >= end && step == 1) || (start <= end && step == -1))
1990     {
1991       if (dump_file)
1992 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1993 		 start, end, step);
1994       return -1;
1995     }
1996 
1997   return 0;
1998 }
1999 
2000 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2001    node currently been scheduled.  At the end of the calculation
2002    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2003    U_NODE which are (1) already scheduled in the first/last row of
2004    U_NODE's scheduling window, (2) whose dependence inequality with U
2005    becomes an equality when U is scheduled in this same row, and (3)
2006    whose dependence latency is zero.
2007 
2008    The first and last rows are calculated using the following parameters:
2009    START/END rows - The cycles that begins/ends the traversal on the window;
2010    searching for an empty cycle to schedule U_NODE.
2011    STEP - The direction in which we traverse the window.
2012    II - The initiation interval.  */
2013 
2014 static void
2015 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2016 			       int step, int ii, sbitmap sched_nodes,
2017 			       sbitmap must_precede, sbitmap must_follow)
2018 {
2019   ddg_edge_ptr e;
2020   int first_cycle_in_window, last_cycle_in_window;
2021 
2022   gcc_assert (must_precede && must_follow);
2023 
2024   /* Consider the following scheduling window:
2025      {first_cycle_in_window, first_cycle_in_window+1, ...,
2026      last_cycle_in_window}.  If step is 1 then the following will be
2027      the order we traverse the window: {start=first_cycle_in_window,
2028      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2029      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2030      end=first_cycle_in_window-1} if step is -1.  */
2031   first_cycle_in_window = (step == 1) ? start : end - step;
2032   last_cycle_in_window = (step == 1) ? end - step : start;
2033 
2034   bitmap_clear (must_precede);
2035   bitmap_clear (must_follow);
2036 
2037   if (dump_file)
2038     fprintf (dump_file, "\nmust_precede: ");
2039 
2040   /* Instead of checking if:
2041       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2042       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2043              first_cycle_in_window)
2044       && e->latency == 0
2045      we use the fact that latency is non-negative:
2046       SCHED_TIME (e->src) - (e->distance * ii) <=
2047       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2048       first_cycle_in_window
2049      and check only if
2050       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2051   for (e = u_node->in; e != 0; e = e->next_in)
2052     if (bitmap_bit_p (sched_nodes, e->src->cuid)
2053 	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2054              first_cycle_in_window))
2055       {
2056 	if (dump_file)
2057 	  fprintf (dump_file, "%d ", e->src->cuid);
2058 
2059 	bitmap_set_bit (must_precede, e->src->cuid);
2060       }
2061 
2062   if (dump_file)
2063     fprintf (dump_file, "\nmust_follow: ");
2064 
2065   /* Instead of checking if:
2066       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2067       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2068              last_cycle_in_window)
2069       && e->latency == 0
2070      we use the fact that latency is non-negative:
2071       SCHED_TIME (e->dest) + (e->distance * ii) >=
2072       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2073       last_cycle_in_window
2074      and check only if
2075       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2076   for (e = u_node->out; e != 0; e = e->next_out)
2077     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2078 	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2079              last_cycle_in_window))
2080       {
2081 	if (dump_file)
2082 	  fprintf (dump_file, "%d ", e->dest->cuid);
2083 
2084 	bitmap_set_bit (must_follow, e->dest->cuid);
2085       }
2086 
2087   if (dump_file)
2088     fprintf (dump_file, "\n");
2089 }
2090 
2091 /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2092    parameters to decide if that's possible:
2093    PS - The partial schedule.
2094    U - The serial number of U_NODE.
2095    NUM_SPLITS - The number of row splits made so far.
2096    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2097    the first row of the scheduling window)
2098    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2099    last row of the scheduling window)  */
2100 
2101 static bool
2102 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2103 			      int u, int cycle, sbitmap sched_nodes,
2104 			      int *num_splits, sbitmap must_precede,
2105 			      sbitmap must_follow)
2106 {
2107   ps_insn_ptr psi;
2108   bool success = 0;
2109 
2110   verify_partial_schedule (ps, sched_nodes);
2111   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2112   if (psi)
2113     {
2114       SCHED_TIME (u) = cycle;
2115       bitmap_set_bit (sched_nodes, u);
2116       success = 1;
2117       *num_splits = 0;
2118       if (dump_file)
2119 	fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2120 
2121     }
2122 
2123   return success;
2124 }
2125 
2126 /* This function implements the scheduling algorithm for SMS according to the
2127    above algorithm.  */
2128 static partial_schedule_ptr
2129 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2130 {
2131   int ii = mii;
2132   int i, c, success, num_splits = 0;
2133   int flush_and_start_over = true;
2134   int num_nodes = g->num_nodes;
2135   int start, end, step; /* Place together into one struct?  */
2136   auto_sbitmap sched_nodes (num_nodes);
2137   auto_sbitmap must_precede (num_nodes);
2138   auto_sbitmap must_follow (num_nodes);
2139   auto_sbitmap tobe_scheduled (num_nodes);
2140 
2141   partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2142 
2143   bitmap_ones (tobe_scheduled);
2144   bitmap_clear (sched_nodes);
2145 
2146   while (flush_and_start_over && (ii < maxii))
2147     {
2148 
2149       if (dump_file)
2150 	fprintf (dump_file, "Starting with ii=%d\n", ii);
2151       flush_and_start_over = false;
2152       bitmap_clear (sched_nodes);
2153 
2154       for (i = 0; i < num_nodes; i++)
2155 	{
2156 	  int u = nodes_order[i];
2157   	  ddg_node_ptr u_node = &ps->g->nodes[u];
2158 	  rtx_insn *insn = u_node->insn;
2159 
2160 	  if (!NONDEBUG_INSN_P (insn))
2161 	    {
2162 	      bitmap_clear_bit (tobe_scheduled, u);
2163 	      continue;
2164 	    }
2165 
2166 	  if (bitmap_bit_p (sched_nodes, u))
2167 	    continue;
2168 
2169 	  /* Try to get non-empty scheduling window.  */
2170 	 success = 0;
2171          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2172                                 &step, &end) == 0)
2173             {
2174               if (dump_file)
2175                 fprintf (dump_file, "\nTrying to schedule node %d "
2176 			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2177                         (g->nodes[u].insn)), start, end, step);
2178 
2179               gcc_assert ((step > 0 && start < end)
2180                           || (step < 0 && start > end));
2181 
2182               calculate_must_precede_follow (u_node, start, end, step, ii,
2183                                              sched_nodes, must_precede,
2184                                              must_follow);
2185 
2186               for (c = start; c != end; c += step)
2187                 {
2188 		  sbitmap tmp_precede, tmp_follow;
2189 
2190                   set_must_precede_follow (&tmp_follow, must_follow,
2191 		                           &tmp_precede, must_precede,
2192                                            c, start, end, step);
2193                   success =
2194                     try_scheduling_node_in_cycle (ps, u, c,
2195                                                   sched_nodes,
2196                                                   &num_splits, tmp_precede,
2197                                                   tmp_follow);
2198                   if (success)
2199                     break;
2200                 }
2201 
2202               verify_partial_schedule (ps, sched_nodes);
2203             }
2204             if (!success)
2205             {
2206               int split_row;
2207 
2208               if (ii++ == maxii)
2209                 break;
2210 
2211               if (num_splits >= MAX_SPLIT_NUM)
2212                 {
2213                   num_splits = 0;
2214                   flush_and_start_over = true;
2215                   verify_partial_schedule (ps, sched_nodes);
2216                   reset_partial_schedule (ps, ii);
2217                   verify_partial_schedule (ps, sched_nodes);
2218                   break;
2219                 }
2220 
2221               num_splits++;
2222               /* The scheduling window is exclusive of 'end'
2223                  whereas compute_split_window() expects an inclusive,
2224                  ordered range.  */
2225               if (step == 1)
2226                 split_row = compute_split_row (sched_nodes, start, end - 1,
2227                                                ps->ii, u_node);
2228               else
2229                 split_row = compute_split_row (sched_nodes, end + 1, start,
2230                                                ps->ii, u_node);
2231 
2232               ps_insert_empty_row (ps, split_row, sched_nodes);
2233               i--;              /* Go back and retry node i.  */
2234 
2235               if (dump_file)
2236                 fprintf (dump_file, "num_splits=%d\n", num_splits);
2237             }
2238 
2239           /* ??? If (success), check register pressure estimates.  */
2240         }                       /* Continue with next node.  */
2241     }                           /* While flush_and_start_over.  */
2242   if (ii >= maxii)
2243     {
2244       free_partial_schedule (ps);
2245       ps = NULL;
2246     }
2247   else
2248     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2249 
2250   return ps;
2251 }
2252 
2253 /* This function inserts a new empty row into PS at the position
2254    according to SPLITROW, keeping all already scheduled instructions
2255    intact and updating their SCHED_TIME and cycle accordingly.  */
2256 static void
2257 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2258 		     sbitmap sched_nodes)
2259 {
2260   ps_insn_ptr crr_insn;
2261   ps_insn_ptr *rows_new;
2262   int ii = ps->ii;
2263   int new_ii = ii + 1;
2264   int row;
2265   int *rows_length_new;
2266 
2267   verify_partial_schedule (ps, sched_nodes);
2268 
2269   /* We normalize sched_time and rotate ps to have only non-negative sched
2270      times, for simplicity of updating cycles after inserting new row.  */
2271   split_row -= ps->min_cycle;
2272   split_row = SMODULO (split_row, ii);
2273   if (dump_file)
2274     fprintf (dump_file, "split_row=%d\n", split_row);
2275 
2276   reset_sched_times (ps, PS_MIN_CYCLE (ps));
2277   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2278 
2279   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2280   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2281   for (row = 0; row < split_row; row++)
2282     {
2283       rows_new[row] = ps->rows[row];
2284       rows_length_new[row] = ps->rows_length[row];
2285       ps->rows[row] = NULL;
2286       for (crr_insn = rows_new[row];
2287 	   crr_insn; crr_insn = crr_insn->next_in_row)
2288 	{
2289 	  int u = crr_insn->id;
2290 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2291 
2292 	  SCHED_TIME (u) = new_time;
2293 	  crr_insn->cycle = new_time;
2294 	  SCHED_ROW (u) = new_time % new_ii;
2295 	  SCHED_STAGE (u) = new_time / new_ii;
2296 	}
2297 
2298     }
2299 
2300   rows_new[split_row] = NULL;
2301 
2302   for (row = split_row; row < ii; row++)
2303     {
2304       rows_new[row + 1] = ps->rows[row];
2305       rows_length_new[row + 1] = ps->rows_length[row];
2306       ps->rows[row] = NULL;
2307       for (crr_insn = rows_new[row + 1];
2308 	   crr_insn; crr_insn = crr_insn->next_in_row)
2309 	{
2310 	  int u = crr_insn->id;
2311 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2312 
2313 	  SCHED_TIME (u) = new_time;
2314 	  crr_insn->cycle = new_time;
2315 	  SCHED_ROW (u) = new_time % new_ii;
2316 	  SCHED_STAGE (u) = new_time / new_ii;
2317 	}
2318     }
2319 
2320   /* Updating ps.  */
2321   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2322     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2323   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2324     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2325   free (ps->rows);
2326   ps->rows = rows_new;
2327   free (ps->rows_length);
2328   ps->rows_length = rows_length_new;
2329   ps->ii = new_ii;
2330   gcc_assert (ps->min_cycle >= 0);
2331 
2332   verify_partial_schedule (ps, sched_nodes);
2333 
2334   if (dump_file)
2335     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2336 	     ps->max_cycle);
2337 }
2338 
2339 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2340    UP which are the boundaries of it's scheduling window; compute using
2341    SCHED_NODES and II a row in the partial schedule that can be split
2342    which will separate a critical predecessor from a critical successor
2343    thereby expanding the window, and return it.  */
2344 static int
2345 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2346 		   ddg_node_ptr u_node)
2347 {
2348   ddg_edge_ptr e;
2349   int lower = INT_MIN, upper = INT_MAX;
2350   int crit_pred = -1;
2351   int crit_succ = -1;
2352   int crit_cycle;
2353 
2354   for (e = u_node->in; e != 0; e = e->next_in)
2355     {
2356       int v = e->src->cuid;
2357 
2358       if (bitmap_bit_p (sched_nodes, v)
2359 	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2360 	if (SCHED_TIME (v) > lower)
2361 	  {
2362 	    crit_pred = v;
2363 	    lower = SCHED_TIME (v);
2364 	  }
2365     }
2366 
2367   if (crit_pred >= 0)
2368     {
2369       crit_cycle = SCHED_TIME (crit_pred) + 1;
2370       return SMODULO (crit_cycle, ii);
2371     }
2372 
2373   for (e = u_node->out; e != 0; e = e->next_out)
2374     {
2375       int v = e->dest->cuid;
2376 
2377       if (bitmap_bit_p (sched_nodes, v)
2378 	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2379 	if (SCHED_TIME (v) < upper)
2380 	  {
2381 	    crit_succ = v;
2382 	    upper = SCHED_TIME (v);
2383 	  }
2384     }
2385 
2386   if (crit_succ >= 0)
2387     {
2388       crit_cycle = SCHED_TIME (crit_succ);
2389       return SMODULO (crit_cycle, ii);
2390     }
2391 
2392   if (dump_file)
2393     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2394 
2395   return SMODULO ((low + up + 1) / 2, ii);
2396 }
2397 
2398 static void
2399 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2400 {
2401   int row;
2402   ps_insn_ptr crr_insn;
2403 
2404   for (row = 0; row < ps->ii; row++)
2405     {
2406       int length = 0;
2407 
2408       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2409 	{
2410 	  int u = crr_insn->id;
2411 
2412 	  length++;
2413 	  gcc_assert (bitmap_bit_p (sched_nodes, u));
2414 	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2415 	     popcount (sched_nodes) == number of insns in ps.  */
2416 	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2417 	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2418 	}
2419 
2420       gcc_assert (ps->rows_length[row] == length);
2421     }
2422 }
2423 
2424 
2425 /* This page implements the algorithm for ordering the nodes of a DDG
2426    for modulo scheduling, activated through the
2427    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2428 
2429 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2430 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2431 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2432 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2433 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2434 #define DEPTH(x) (ASAP ((x)))
2435 
2436 typedef struct node_order_params * nopa;
2437 
2438 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2439 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2440 static nopa  calculate_order_params (ddg_ptr, int, int *);
2441 static int find_max_asap (ddg_ptr, sbitmap);
2442 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2443 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2444 
2445 enum sms_direction {BOTTOMUP, TOPDOWN};
2446 
2447 struct node_order_params
2448 {
2449   int asap;
2450   int alap;
2451   int height;
2452 };
2453 
2454 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2455 static void
2456 check_nodes_order (int *node_order, int num_nodes)
2457 {
2458   int i;
2459   auto_sbitmap tmp (num_nodes);
2460 
2461   bitmap_clear (tmp);
2462 
2463   if (dump_file)
2464     fprintf (dump_file, "SMS final nodes order: \n");
2465 
2466   for (i = 0; i < num_nodes; i++)
2467     {
2468       int u = node_order[i];
2469 
2470       if (dump_file)
2471         fprintf (dump_file, "%d ", u);
2472       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2473 
2474       bitmap_set_bit (tmp, u);
2475     }
2476 
2477   if (dump_file)
2478     fprintf (dump_file, "\n");
2479 }
2480 
2481 /* Order the nodes of G for scheduling and pass the result in
2482    NODE_ORDER.  Also set aux.count of each node to ASAP.
2483    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2484 static int
2485 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2486 {
2487   int i;
2488   int rec_mii = 0;
2489   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2490 
2491   nopa nops = calculate_order_params (g, mii, pmax_asap);
2492 
2493   if (dump_file)
2494     print_sccs (dump_file, sccs, g);
2495 
2496   order_nodes_of_sccs (sccs, node_order);
2497 
2498   if (sccs->num_sccs > 0)
2499     /* First SCC has the largest recurrence_length.  */
2500     rec_mii = sccs->sccs[0]->recurrence_length;
2501 
2502   /* Save ASAP before destroying node_order_params.  */
2503   for (i = 0; i < g->num_nodes; i++)
2504     {
2505       ddg_node_ptr v = &g->nodes[i];
2506       v->aux.count = ASAP (v);
2507     }
2508 
2509   free (nops);
2510   free_ddg_all_sccs (sccs);
2511   check_nodes_order (node_order, g->num_nodes);
2512 
2513   return rec_mii;
2514 }
2515 
2516 static void
2517 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2518 {
2519   int i, pos = 0;
2520   ddg_ptr g = all_sccs->ddg;
2521   int num_nodes = g->num_nodes;
2522   auto_sbitmap prev_sccs (num_nodes);
2523   auto_sbitmap on_path (num_nodes);
2524   auto_sbitmap tmp (num_nodes);
2525   auto_sbitmap ones (num_nodes);
2526 
2527   bitmap_clear (prev_sccs);
2528   bitmap_ones (ones);
2529 
2530   /* Perform the node ordering starting from the SCC with the highest recMII.
2531      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2532   for (i = 0; i < all_sccs->num_sccs; i++)
2533     {
2534       ddg_scc_ptr scc = all_sccs->sccs[i];
2535 
2536       /* Add nodes on paths from previous SCCs to the current SCC.  */
2537       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2538       bitmap_ior (tmp, scc->nodes, on_path);
2539 
2540       /* Add nodes on paths from the current SCC to previous SCCs.  */
2541       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2542       bitmap_ior (tmp, tmp, on_path);
2543 
2544       /* Remove nodes of previous SCCs from current extended SCC.  */
2545       bitmap_and_compl (tmp, tmp, prev_sccs);
2546 
2547       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2548       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2549     }
2550 
2551   /* Handle the remaining nodes that do not belong to any scc.  Each call
2552      to order_nodes_in_scc handles a single connected component.  */
2553   while (pos < g->num_nodes)
2554     {
2555       bitmap_and_compl (tmp, ones, prev_sccs);
2556       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2557     }
2558 }
2559 
2560 /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2561 static struct node_order_params *
2562 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2563 {
2564   int u;
2565   int max_asap;
2566   int num_nodes = g->num_nodes;
2567   ddg_edge_ptr e;
2568   /* Allocate a place to hold ordering params for each node in the DDG.  */
2569   nopa node_order_params_arr;
2570 
2571   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2572   node_order_params_arr = (nopa) xcalloc (num_nodes,
2573 					  sizeof (struct node_order_params));
2574 
2575   /* Set the aux pointer of each node to point to its order_params structure.  */
2576   for (u = 0; u < num_nodes; u++)
2577     g->nodes[u].aux.info = &node_order_params_arr[u];
2578 
2579   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2580      calculate ASAP, ALAP, mobility, distance, and height for each node
2581      in the dependence (direct acyclic) graph.  */
2582 
2583   /* We assume that the nodes in the array are in topological order.  */
2584 
2585   max_asap = 0;
2586   for (u = 0; u < num_nodes; u++)
2587     {
2588       ddg_node_ptr u_node = &g->nodes[u];
2589 
2590       ASAP (u_node) = 0;
2591       for (e = u_node->in; e; e = e->next_in)
2592 	if (e->distance == 0)
2593 	  ASAP (u_node) = MAX (ASAP (u_node),
2594 			       ASAP (e->src) + e->latency);
2595       max_asap = MAX (max_asap, ASAP (u_node));
2596     }
2597 
2598   for (u = num_nodes - 1; u > -1; u--)
2599     {
2600       ddg_node_ptr u_node = &g->nodes[u];
2601 
2602       ALAP (u_node) = max_asap;
2603       HEIGHT (u_node) = 0;
2604       for (e = u_node->out; e; e = e->next_out)
2605 	if (e->distance == 0)
2606 	  {
2607 	    ALAP (u_node) = MIN (ALAP (u_node),
2608 				 ALAP (e->dest) - e->latency);
2609 	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
2610 				   HEIGHT (e->dest) + e->latency);
2611 	  }
2612     }
2613   if (dump_file)
2614   {
2615     fprintf (dump_file, "\nOrder params\n");
2616     for (u = 0; u < num_nodes; u++)
2617       {
2618         ddg_node_ptr u_node = &g->nodes[u];
2619 
2620         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2621                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2622       }
2623   }
2624 
2625   *pmax_asap = max_asap;
2626   return node_order_params_arr;
2627 }
2628 
2629 static int
2630 find_max_asap (ddg_ptr g, sbitmap nodes)
2631 {
2632   unsigned int u = 0;
2633   int max_asap = -1;
2634   int result = -1;
2635   sbitmap_iterator sbi;
2636 
2637   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2638     {
2639       ddg_node_ptr u_node = &g->nodes[u];
2640 
2641       if (max_asap < ASAP (u_node))
2642 	{
2643 	  max_asap = ASAP (u_node);
2644 	  result = u;
2645 	}
2646     }
2647   return result;
2648 }
2649 
2650 static int
2651 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2652 {
2653   unsigned int u = 0;
2654   int max_hv = -1;
2655   int min_mob = INT_MAX;
2656   int result = -1;
2657   sbitmap_iterator sbi;
2658 
2659   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2660     {
2661       ddg_node_ptr u_node = &g->nodes[u];
2662 
2663       if (max_hv < HEIGHT (u_node))
2664 	{
2665 	  max_hv = HEIGHT (u_node);
2666 	  min_mob = MOB (u_node);
2667 	  result = u;
2668 	}
2669       else if ((max_hv == HEIGHT (u_node))
2670 	       && (min_mob > MOB (u_node)))
2671 	{
2672 	  min_mob = MOB (u_node);
2673 	  result = u;
2674 	}
2675     }
2676   return result;
2677 }
2678 
2679 static int
2680 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2681 {
2682   unsigned int u = 0;
2683   int max_dv = -1;
2684   int min_mob = INT_MAX;
2685   int result = -1;
2686   sbitmap_iterator sbi;
2687 
2688   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2689     {
2690       ddg_node_ptr u_node = &g->nodes[u];
2691 
2692       if (max_dv < DEPTH (u_node))
2693 	{
2694 	  max_dv = DEPTH (u_node);
2695 	  min_mob = MOB (u_node);
2696 	  result = u;
2697 	}
2698       else if ((max_dv == DEPTH (u_node))
2699 	       && (min_mob > MOB (u_node)))
2700 	{
2701 	  min_mob = MOB (u_node);
2702 	  result = u;
2703 	}
2704     }
2705   return result;
2706 }
2707 
2708 /* Places the nodes of SCC into the NODE_ORDER array starting
2709    at position POS, according to the SMS ordering algorithm.
2710    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2711    the NODE_ORDER array, starting from position zero.  */
2712 static int
2713 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2714 		    int * node_order, int pos)
2715 {
2716   enum sms_direction dir;
2717   int num_nodes = g->num_nodes;
2718   auto_sbitmap workset (num_nodes);
2719   auto_sbitmap tmp (num_nodes);
2720   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2721   auto_sbitmap predecessors (num_nodes);
2722   auto_sbitmap successors (num_nodes);
2723 
2724   bitmap_clear (predecessors);
2725   find_predecessors (predecessors, g, nodes_ordered);
2726 
2727   bitmap_clear (successors);
2728   find_successors (successors, g, nodes_ordered);
2729 
2730   bitmap_clear (tmp);
2731   if (bitmap_and (tmp, predecessors, scc))
2732     {
2733       bitmap_copy (workset, tmp);
2734       dir = BOTTOMUP;
2735     }
2736   else if (bitmap_and (tmp, successors, scc))
2737     {
2738       bitmap_copy (workset, tmp);
2739       dir = TOPDOWN;
2740     }
2741   else
2742     {
2743       int u;
2744 
2745       bitmap_clear (workset);
2746       if ((u = find_max_asap (g, scc)) >= 0)
2747 	bitmap_set_bit (workset, u);
2748       dir = BOTTOMUP;
2749     }
2750 
2751   bitmap_clear (zero_bitmap);
2752   while (!bitmap_equal_p (workset, zero_bitmap))
2753     {
2754       int v;
2755       ddg_node_ptr v_node;
2756       sbitmap v_node_preds;
2757       sbitmap v_node_succs;
2758 
2759       if (dir == TOPDOWN)
2760 	{
2761 	  while (!bitmap_equal_p (workset, zero_bitmap))
2762 	    {
2763 	      v = find_max_hv_min_mob (g, workset);
2764 	      v_node = &g->nodes[v];
2765 	      node_order[pos++] = v;
2766 	      v_node_succs = NODE_SUCCESSORS (v_node);
2767 	      bitmap_and (tmp, v_node_succs, scc);
2768 
2769 	      /* Don't consider the already ordered successors again.  */
2770 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2771 	      bitmap_ior (workset, workset, tmp);
2772 	      bitmap_clear_bit (workset, v);
2773 	      bitmap_set_bit (nodes_ordered, v);
2774 	    }
2775 	  dir = BOTTOMUP;
2776 	  bitmap_clear (predecessors);
2777 	  find_predecessors (predecessors, g, nodes_ordered);
2778 	  bitmap_and (workset, predecessors, scc);
2779 	}
2780       else
2781 	{
2782 	  while (!bitmap_equal_p (workset, zero_bitmap))
2783 	    {
2784 	      v = find_max_dv_min_mob (g, workset);
2785 	      v_node = &g->nodes[v];
2786 	      node_order[pos++] = v;
2787 	      v_node_preds = NODE_PREDECESSORS (v_node);
2788 	      bitmap_and (tmp, v_node_preds, scc);
2789 
2790 	      /* Don't consider the already ordered predecessors again.  */
2791 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2792 	      bitmap_ior (workset, workset, tmp);
2793 	      bitmap_clear_bit (workset, v);
2794 	      bitmap_set_bit (nodes_ordered, v);
2795 	    }
2796 	  dir = TOPDOWN;
2797 	  bitmap_clear (successors);
2798 	  find_successors (successors, g, nodes_ordered);
2799 	  bitmap_and (workset, successors, scc);
2800 	}
2801     }
2802   sbitmap_free (zero_bitmap);
2803   return pos;
2804 }
2805 
2806 
2807 /* This page contains functions for manipulating partial-schedules during
2808    modulo scheduling.  */
2809 
2810 /* Create a partial schedule and allocate a memory to hold II rows.  */
2811 
2812 static partial_schedule_ptr
2813 create_partial_schedule (int ii, ddg_ptr g, int history)
2814 {
2815   partial_schedule_ptr ps = XNEW (struct partial_schedule);
2816   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2817   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2818   ps->reg_moves.create (0);
2819   ps->ii = ii;
2820   ps->history = history;
2821   ps->min_cycle = INT_MAX;
2822   ps->max_cycle = INT_MIN;
2823   ps->g = g;
2824 
2825   return ps;
2826 }
2827 
2828 /* Free the PS_INSNs in rows array of the given partial schedule.
2829    ??? Consider caching the PS_INSN's.  */
2830 static void
2831 free_ps_insns (partial_schedule_ptr ps)
2832 {
2833   int i;
2834 
2835   for (i = 0; i < ps->ii; i++)
2836     {
2837       while (ps->rows[i])
2838 	{
2839 	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2840 
2841 	  free (ps->rows[i]);
2842 	  ps->rows[i] = ps_insn;
2843 	}
2844       ps->rows[i] = NULL;
2845     }
2846 }
2847 
2848 /* Free all the memory allocated to the partial schedule.  */
2849 
2850 static void
2851 free_partial_schedule (partial_schedule_ptr ps)
2852 {
2853   ps_reg_move_info *move;
2854   unsigned int i;
2855 
2856   if (!ps)
2857     return;
2858 
2859   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2860     sbitmap_free (move->uses);
2861   ps->reg_moves.release ();
2862 
2863   free_ps_insns (ps);
2864   free (ps->rows);
2865   free (ps->rows_length);
2866   free (ps);
2867 }
2868 
2869 /* Clear the rows array with its PS_INSNs, and create a new one with
2870    NEW_II rows.  */
2871 
2872 static void
2873 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2874 {
2875   if (!ps)
2876     return;
2877   free_ps_insns (ps);
2878   if (new_ii == ps->ii)
2879     return;
2880   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2881 						 * sizeof (ps_insn_ptr));
2882   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2883   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2884   memset (ps->rows_length, 0, new_ii * sizeof (int));
2885   ps->ii = new_ii;
2886   ps->min_cycle = INT_MAX;
2887   ps->max_cycle = INT_MIN;
2888 }
2889 
2890 /* Prints the partial schedule as an ii rows array, for each rows
2891    print the ids of the insns in it.  */
2892 void
2893 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2894 {
2895   int i;
2896 
2897   for (i = 0; i < ps->ii; i++)
2898     {
2899       ps_insn_ptr ps_i = ps->rows[i];
2900 
2901       fprintf (dump, "\n[ROW %d ]: ", i);
2902       while (ps_i)
2903 	{
2904 	  rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2905 
2906 	  if (JUMP_P (insn))
2907 	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
2908 	  else
2909 	    fprintf (dump, "%d, ", INSN_UID (insn));
2910 
2911 	  ps_i = ps_i->next_in_row;
2912 	}
2913     }
2914 }
2915 
2916 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
2917 static ps_insn_ptr
2918 create_ps_insn (int id, int cycle)
2919 {
2920   ps_insn_ptr ps_i = XNEW (struct ps_insn);
2921 
2922   ps_i->id = id;
2923   ps_i->next_in_row = NULL;
2924   ps_i->prev_in_row = NULL;
2925   ps_i->cycle = cycle;
2926 
2927   return ps_i;
2928 }
2929 
2930 
2931 /* Removes the given PS_INSN from the partial schedule.  */
2932 static void
2933 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2934 {
2935   int row;
2936 
2937   gcc_assert (ps && ps_i);
2938 
2939   row = SMODULO (ps_i->cycle, ps->ii);
2940   if (! ps_i->prev_in_row)
2941     {
2942       gcc_assert (ps_i == ps->rows[row]);
2943       ps->rows[row] = ps_i->next_in_row;
2944       if (ps->rows[row])
2945 	ps->rows[row]->prev_in_row = NULL;
2946     }
2947   else
2948     {
2949       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2950       if (ps_i->next_in_row)
2951 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2952     }
2953 
2954   ps->rows_length[row] -= 1;
2955   free (ps_i);
2956   return;
2957 }
2958 
2959 /* Unlike what literature describes for modulo scheduling (which focuses
2960    on VLIW machines) the order of the instructions inside a cycle is
2961    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2962    where the current instruction should go relative to the already
2963    scheduled instructions in the given cycle.  Go over these
2964    instructions and find the first possible column to put it in.  */
2965 static bool
2966 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2967 		     sbitmap must_precede, sbitmap must_follow)
2968 {
2969   ps_insn_ptr next_ps_i;
2970   ps_insn_ptr first_must_follow = NULL;
2971   ps_insn_ptr last_must_precede = NULL;
2972   ps_insn_ptr last_in_row = NULL;
2973   int row;
2974 
2975   if (! ps_i)
2976     return false;
2977 
2978   row = SMODULO (ps_i->cycle, ps->ii);
2979 
2980   /* Find the first must follow and the last must precede
2981      and insert the node immediately after the must precede
2982      but make sure that it there is no must follow after it.  */
2983   for (next_ps_i = ps->rows[row];
2984        next_ps_i;
2985        next_ps_i = next_ps_i->next_in_row)
2986     {
2987       if (must_follow
2988 	  && bitmap_bit_p (must_follow, next_ps_i->id)
2989 	  && ! first_must_follow)
2990         first_must_follow = next_ps_i;
2991       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
2992         {
2993           /* If we have already met a node that must follow, then
2994 	     there is no possible column.  */
2995   	  if (first_must_follow)
2996             return false;
2997 	  else
2998             last_must_precede = next_ps_i;
2999         }
3000       /* The closing branch must be the last in the row.  */
3001       if (must_precede
3002 	  && bitmap_bit_p (must_precede, next_ps_i->id)
3003 	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3004 	return false;
3005 
3006        last_in_row = next_ps_i;
3007     }
3008 
3009   /* The closing branch is scheduled as well.  Make sure there is no
3010      dependent instruction after it as the branch should be the last
3011      instruction in the row.  */
3012   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3013     {
3014       if (first_must_follow)
3015 	return false;
3016       if (last_in_row)
3017 	{
3018 	  /* Make the branch the last in the row.  New instructions
3019 	     will be inserted at the beginning of the row or after the
3020 	     last must_precede instruction thus the branch is guaranteed
3021 	     to remain the last instruction in the row.  */
3022 	  last_in_row->next_in_row = ps_i;
3023 	  ps_i->prev_in_row = last_in_row;
3024 	  ps_i->next_in_row = NULL;
3025 	}
3026       else
3027 	ps->rows[row] = ps_i;
3028       return true;
3029     }
3030 
3031   /* Now insert the node after INSERT_AFTER_PSI.  */
3032 
3033   if (! last_must_precede)
3034     {
3035       ps_i->next_in_row = ps->rows[row];
3036       ps_i->prev_in_row = NULL;
3037       if (ps_i->next_in_row)
3038     	ps_i->next_in_row->prev_in_row = ps_i;
3039       ps->rows[row] = ps_i;
3040     }
3041   else
3042     {
3043       ps_i->next_in_row = last_must_precede->next_in_row;
3044       last_must_precede->next_in_row = ps_i;
3045       ps_i->prev_in_row = last_must_precede;
3046       if (ps_i->next_in_row)
3047         ps_i->next_in_row->prev_in_row = ps_i;
3048     }
3049 
3050   return true;
3051 }
3052 
3053 /* Advances the PS_INSN one column in its current row; returns false
3054    in failure and true in success.  Bit N is set in MUST_FOLLOW if
3055    the node with cuid N must be come after the node pointed to by
3056    PS_I when scheduled in the same cycle.  */
3057 static int
3058 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3059 			sbitmap must_follow)
3060 {
3061   ps_insn_ptr prev, next;
3062   int row;
3063 
3064   if (!ps || !ps_i)
3065     return false;
3066 
3067   row = SMODULO (ps_i->cycle, ps->ii);
3068 
3069   if (! ps_i->next_in_row)
3070     return false;
3071 
3072   /* Check if next_in_row is dependent on ps_i, both having same sched
3073      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3074   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3075     return false;
3076 
3077   /* Advance PS_I over its next_in_row in the doubly linked list.  */
3078   prev = ps_i->prev_in_row;
3079   next = ps_i->next_in_row;
3080 
3081   if (ps_i == ps->rows[row])
3082     ps->rows[row] = next;
3083 
3084   ps_i->next_in_row = next->next_in_row;
3085 
3086   if (next->next_in_row)
3087     next->next_in_row->prev_in_row = ps_i;
3088 
3089   next->next_in_row = ps_i;
3090   ps_i->prev_in_row = next;
3091 
3092   next->prev_in_row = prev;
3093   if (prev)
3094     prev->next_in_row = next;
3095 
3096   return true;
3097 }
3098 
3099 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3100    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3101    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3102    before/after (respectively) the node pointed to by PS_I when scheduled
3103    in the same cycle.  */
3104 static ps_insn_ptr
3105 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3106 		sbitmap must_precede, sbitmap must_follow)
3107 {
3108   ps_insn_ptr ps_i;
3109   int row = SMODULO (cycle, ps->ii);
3110 
3111   if (ps->rows_length[row] >= issue_rate)
3112     return NULL;
3113 
3114   ps_i = create_ps_insn (id, cycle);
3115 
3116   /* Finds and inserts PS_I according to MUST_FOLLOW and
3117      MUST_PRECEDE.  */
3118   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3119     {
3120       free (ps_i);
3121       return NULL;
3122     }
3123 
3124   ps->rows_length[row] += 1;
3125   return ps_i;
3126 }
3127 
3128 /* Advance time one cycle.  Assumes DFA is being used.  */
3129 static void
3130 advance_one_cycle (void)
3131 {
3132   if (targetm.sched.dfa_pre_cycle_insn)
3133     state_transition (curr_state,
3134 		      targetm.sched.dfa_pre_cycle_insn ());
3135 
3136   state_transition (curr_state, NULL);
3137 
3138   if (targetm.sched.dfa_post_cycle_insn)
3139     state_transition (curr_state,
3140 		      targetm.sched.dfa_post_cycle_insn ());
3141 }
3142 
3143 
3144 
3145 /* Checks if PS has resource conflicts according to DFA, starting from
3146    FROM cycle to TO cycle; returns true if there are conflicts and false
3147    if there are no conflicts.  Assumes DFA is being used.  */
3148 static int
3149 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3150 {
3151   int cycle;
3152 
3153   state_reset (curr_state);
3154 
3155   for (cycle = from; cycle <= to; cycle++)
3156     {
3157       ps_insn_ptr crr_insn;
3158       /* Holds the remaining issue slots in the current row.  */
3159       int can_issue_more = issue_rate;
3160 
3161       /* Walk through the DFA for the current row.  */
3162       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3163 	   crr_insn;
3164 	   crr_insn = crr_insn->next_in_row)
3165 	{
3166 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3167 
3168 	  if (!NONDEBUG_INSN_P (insn))
3169 	    continue;
3170 
3171 	  /* Check if there is room for the current insn.  */
3172 	  if (!can_issue_more || state_dead_lock_p (curr_state))
3173 	    return true;
3174 
3175 	  /* Update the DFA state and return with failure if the DFA found
3176 	     resource conflicts.  */
3177 	  if (state_transition (curr_state, insn) >= 0)
3178 	    return true;
3179 
3180 	  if (targetm.sched.variable_issue)
3181 	    can_issue_more =
3182 	      targetm.sched.variable_issue (sched_dump, sched_verbose,
3183 					    insn, can_issue_more);
3184 	  /* A naked CLOBBER or USE generates no instruction, so don't
3185 	     let them consume issue slots.  */
3186 	  else if (GET_CODE (PATTERN (insn)) != USE
3187 		   && GET_CODE (PATTERN (insn)) != CLOBBER)
3188 	    can_issue_more--;
3189 	}
3190 
3191       /* Advance the DFA to the next cycle.  */
3192       advance_one_cycle ();
3193     }
3194   return false;
3195 }
3196 
3197 /* Checks if the given node causes resource conflicts when added to PS at
3198    cycle C.  If not the node is added to PS and returned; otherwise zero
3199    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3200    cuid N must be come before/after (respectively) the node pointed to by
3201    PS_I when scheduled in the same cycle.  */
3202 ps_insn_ptr
3203 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3204    			     int c, sbitmap must_precede,
3205 			     sbitmap must_follow)
3206 {
3207   int has_conflicts = 0;
3208   ps_insn_ptr ps_i;
3209 
3210   /* First add the node to the PS, if this succeeds check for
3211      conflicts, trying different issue slots in the same row.  */
3212   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3213     return NULL; /* Failed to insert the node at the given cycle.  */
3214 
3215   has_conflicts = ps_has_conflicts (ps, c, c)
3216 		  || (ps->history > 0
3217 		      && ps_has_conflicts (ps,
3218 					   c - ps->history,
3219 					   c + ps->history));
3220 
3221   /* Try different issue slots to find one that the given node can be
3222      scheduled in without conflicts.  */
3223   while (has_conflicts)
3224     {
3225       if (! ps_insn_advance_column (ps, ps_i, must_follow))
3226 	break;
3227       has_conflicts = ps_has_conflicts (ps, c, c)
3228 		      || (ps->history > 0
3229 			  && ps_has_conflicts (ps,
3230 					       c - ps->history,
3231 					       c + ps->history));
3232     }
3233 
3234   if (has_conflicts)
3235     {
3236       remove_node_from_ps (ps, ps_i);
3237       return NULL;
3238     }
3239 
3240   ps->min_cycle = MIN (ps->min_cycle, c);
3241   ps->max_cycle = MAX (ps->max_cycle, c);
3242   return ps_i;
3243 }
3244 
3245 /* Calculate the stage count of the partial schedule PS.  The calculation
3246    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3247 int
3248 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3249 {
3250   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3251   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3252   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3253 
3254   /* The calculation of stage count is done adding the number of stages
3255      before cycle zero and after cycle zero.  */
3256   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3257 
3258   return stage_count;
3259 }
3260 
3261 /* Rotate the rows of PS such that insns scheduled at time
3262    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3263 void
3264 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3265 {
3266   int i, row, backward_rotates;
3267   int last_row = ps->ii - 1;
3268 
3269   if (start_cycle == 0)
3270     return;
3271 
3272   backward_rotates = SMODULO (start_cycle, ps->ii);
3273 
3274   /* Revisit later and optimize this into a single loop.  */
3275   for (i = 0; i < backward_rotates; i++)
3276     {
3277       ps_insn_ptr first_row = ps->rows[0];
3278       int first_row_length = ps->rows_length[0];
3279 
3280       for (row = 0; row < last_row; row++)
3281 	{
3282 	  ps->rows[row] = ps->rows[row + 1];
3283 	  ps->rows_length[row] = ps->rows_length[row + 1];
3284 	}
3285 
3286       ps->rows[last_row] = first_row;
3287       ps->rows_length[last_row] = first_row_length;
3288     }
3289 
3290   ps->max_cycle -= start_cycle;
3291   ps->min_cycle -= start_cycle;
3292 }
3293 
3294 #endif /* INSN_SCHEDULING */
3295 
3296 /* Run instruction scheduler.  */
3297 /* Perform SMS module scheduling.  */
3298 
3299 namespace {
3300 
3301 const pass_data pass_data_sms =
3302 {
3303   RTL_PASS, /* type */
3304   "sms", /* name */
3305   OPTGROUP_NONE, /* optinfo_flags */
3306   TV_SMS, /* tv_id */
3307   0, /* properties_required */
3308   0, /* properties_provided */
3309   0, /* properties_destroyed */
3310   0, /* todo_flags_start */
3311   TODO_df_finish, /* todo_flags_finish */
3312 };
3313 
3314 class pass_sms : public rtl_opt_pass
3315 {
3316 public:
3317   pass_sms (gcc::context *ctxt)
3318     : rtl_opt_pass (pass_data_sms, ctxt)
3319   {}
3320 
3321   /* opt_pass methods: */
3322   virtual bool gate (function *)
3323 {
3324   return (optimize > 0 && flag_modulo_sched);
3325 }
3326 
3327   virtual unsigned int execute (function *);
3328 
3329 }; // class pass_sms
3330 
3331 unsigned int
3332 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3333 {
3334 #ifdef INSN_SCHEDULING
3335   basic_block bb;
3336 
3337   /* Collect loop information to be used in SMS.  */
3338   cfg_layout_initialize (0);
3339   sms_schedule ();
3340 
3341   /* Update the life information, because we add pseudos.  */
3342   max_regno = max_reg_num ();
3343 
3344   /* Finalize layout changes.  */
3345   FOR_EACH_BB_FN (bb, fun)
3346     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3347       bb->aux = bb->next_bb;
3348   free_dominance_info (CDI_DOMINATORS);
3349   cfg_layout_finalize ();
3350 #endif /* INSN_SCHEDULING */
3351   return 0;
3352 }
3353 
3354 } // anon namespace
3355 
3356 rtl_opt_pass *
3357 make_pass_sms (gcc::context *ctxt)
3358 {
3359   return new pass_sms (ctxt);
3360 }
3361