xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/modulo-sched.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /* Swing Modulo Scheduling implementation.
2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3    Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "gcov-io.h"
36 #include "profile.h"
37 #include "insn-attr.h"
38 #include "cfgrtl.h"
39 #include "sched-int.h"
40 #include "cfgloop.h"
41 #include "expr.h"
42 #include "params.h"
43 #include "ddg.h"
44 #include "tree-pass.h"
45 #include "dbgcnt.h"
46 #include "loop-unroll.h"
47 
48 #ifdef INSN_SCHEDULING
49 
50 /* This file contains the implementation of the Swing Modulo Scheduler,
51    described in the following references:
52    [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
53        Lifetime--sensitive modulo scheduling in a production environment.
54        IEEE Trans. on Comps., 50(3), March 2001
55    [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
56        Swing Modulo Scheduling: A Lifetime Sensitive Approach.
57        PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
58 
59    The basic structure is:
60    1. Build a data-dependence graph (DDG) for each loop.
61    2. Use the DDG to order the insns of a loop (not in topological order
62       necessarily, but rather) trying to place each insn after all its
63       predecessors _or_ after all its successors.
64    3. Compute MII: a lower bound on the number of cycles to schedule the loop.
65    4. Use the ordering to perform list-scheduling of the loop:
66       1. Set II = MII.  We will try to schedule the loop within II cycles.
67       2. Try to schedule the insns one by one according to the ordering.
68 	 For each insn compute an interval of cycles by considering already-
69 	 scheduled preds and succs (and associated latencies); try to place
70 	 the insn in the cycles of this window checking for potential
71 	 resource conflicts (using the DFA interface).
72 	 Note: this is different from the cycle-scheduling of schedule_insns;
73 	 here the insns are not scheduled monotonically top-down (nor bottom-
74 	 up).
75       3. If failed in scheduling all insns - bump II++ and try again, unless
76 	 II reaches an upper bound MaxII, in which case report failure.
77    5. If we succeeded in scheduling the loop within II cycles, we now
78       generate prolog and epilog, decrease the counter of the loop, and
79       perform modulo variable expansion for live ranges that span more than
80       II cycles (i.e. use register copies to prevent a def from overwriting
81       itself before reaching the use).
82 
83     SMS works with countable loops (1) whose control part can be easily
84     decoupled from the rest of the loop and (2) whose loop count can
85     be easily adjusted.  This is because we peel a constant number of
86     iterations into a prologue and epilogue for which we want to avoid
87     emitting the control part, and a kernel which is to iterate that
88     constant number of iterations less than the original loop.  So the
89     control part should be a set of insns clearly identified and having
90     its own iv, not otherwise used in the loop (at-least for now), which
91     initializes a register before the loop to the number of iterations.
92     Currently SMS relies on the do-loop pattern to recognize such loops,
93     where (1) the control part comprises of all insns defining and/or
94     using a certain 'count' register and (2) the loop count can be
95     adjusted by modifying this register prior to the loop.
96     TODO: Rely on cfgloop analysis instead.  */
97 
98 /* This page defines partial-schedule structures and functions for
99    modulo scheduling.  */
100 
101 typedef struct partial_schedule *partial_schedule_ptr;
102 typedef struct ps_insn *ps_insn_ptr;
103 
104 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
105 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
106 
107 /* The maximum (absolute) cycle that a node of ps was scheduled in.  */
108 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
109 
110 /* Perform signed modulo, always returning a non-negative value.  */
111 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
112 
113 /* The number of different iterations the nodes in ps span, assuming
114    the stage boundaries are placed efficiently.  */
115 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
116                          + 1 + ii - 1) / ii)
117 /* The stage count of ps.  */
118 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
119 
120 /* A single instruction in the partial schedule.  */
121 struct ps_insn
122 {
123   /* Identifies the instruction to be scheduled.  Values smaller than
124      the ddg's num_nodes refer directly to ddg nodes.  A value of
125      X - num_nodes refers to register move X.  */
126   int id;
127 
128   /* The (absolute) cycle in which the PS instruction is scheduled.
129      Same as SCHED_TIME (node).  */
130   int cycle;
131 
132   /* The next/prev PS_INSN in the same row.  */
133   ps_insn_ptr next_in_row,
134 	      prev_in_row;
135 
136 };
137 
138 /* Information about a register move that has been added to a partial
139    schedule.  */
140 struct ps_reg_move_info
141 {
142   /* The source of the move is defined by the ps_insn with id DEF.
143      The destination is used by the ps_insns with the ids in USES.  */
144   int def;
145   sbitmap uses;
146 
147   /* The original form of USES' instructions used OLD_REG, but they
148      should now use NEW_REG.  */
149   rtx old_reg;
150   rtx new_reg;
151 
152   /* The number of consecutive stages that the move occupies.  */
153   int num_consecutive_stages;
154 
155   /* An instruction that sets NEW_REG to the correct value.  The first
156      move associated with DEF will have an rhs of OLD_REG; later moves
157      use the result of the previous move.  */
158   rtx_insn *insn;
159 };
160 
161 /* Holds the partial schedule as an array of II rows.  Each entry of the
162    array points to a linked list of PS_INSNs, which represents the
163    instructions that are scheduled for that row.  */
164 struct partial_schedule
165 {
166   int ii;	/* Number of rows in the partial schedule.  */
167   int history;  /* Threshold for conflict checking using DFA.  */
168 
169   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
170   ps_insn_ptr *rows;
171 
172   /* All the moves added for this partial schedule.  Index X has
173      a ps_insn id of X + g->num_nodes.  */
174   vec<ps_reg_move_info> reg_moves;
175 
176   /*  rows_length[i] holds the number of instructions in the row.
177       It is used only (as an optimization) to back off quickly from
178       trying to schedule a node in a full row; that is, to avoid running
179       through futile DFA state transitions.  */
180   int *rows_length;
181 
182   /* The earliest absolute cycle of an insn in the partial schedule.  */
183   int min_cycle;
184 
185   /* The latest absolute cycle of an insn in the partial schedule.  */
186   int max_cycle;
187 
188   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
189 
190   int stage_count;  /* The stage count of the partial schedule.  */
191 };
192 
193 
194 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
195 static void free_partial_schedule (partial_schedule_ptr);
196 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
197 void print_partial_schedule (partial_schedule_ptr, FILE *);
198 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
199 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
200 						int, int, sbitmap, sbitmap);
201 static void rotate_partial_schedule (partial_schedule_ptr, int);
202 void set_row_column_for_ps (partial_schedule_ptr);
203 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
204 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
205 
206 
207 /* This page defines constants and structures for the modulo scheduling
208    driver.  */
209 
210 static int sms_order_nodes (ddg_ptr, int, int *, int *);
211 static void set_node_sched_params (ddg_ptr);
212 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
213 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
214 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
215                                     rtx, rtx);
216 static int calculate_stage_count (partial_schedule_ptr, int);
217 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
218 					   int, int, sbitmap, sbitmap, sbitmap);
219 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
220 			     sbitmap, int, int *, int *, int *);
221 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
222 					  sbitmap, int *, sbitmap, sbitmap);
223 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
224 
225 #define NODE_ASAP(node) ((node)->aux.count)
226 
227 #define SCHED_PARAMS(x) (&node_sched_param_vec[x])
228 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
229 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
230 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
231 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
232 
233 /* The scheduling parameters held for each node.  */
234 typedef struct node_sched_params
235 {
236   int time;	/* The absolute scheduling cycle.  */
237 
238   int row;    /* Holds time % ii.  */
239   int stage;  /* Holds time / ii.  */
240 
241   /* The column of a node inside the ps.  If nodes u, v are on the same row,
242      u will precede v if column (u) < column (v).  */
243   int column;
244 } *node_sched_params_ptr;
245 
246 /* The following three functions are copied from the current scheduler
247    code in order to use sched_analyze() for computing the dependencies.
248    They are used when initializing the sched_info structure.  */
249 static const char *
250 sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
251 {
252   static char tmp[80];
253 
254   sprintf (tmp, "i%4d", INSN_UID (insn));
255   return tmp;
256 }
257 
258 static void
259 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
260 			       regset used ATTRIBUTE_UNUSED)
261 {
262 }
263 
264 static struct common_sched_info_def sms_common_sched_info;
265 
266 static struct sched_deps_info_def sms_sched_deps_info =
267   {
268     compute_jump_reg_dependencies,
269     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
270     NULL,
271     0, 0, 0
272   };
273 
274 static struct haifa_sched_info sms_sched_info =
275 {
276   NULL,
277   NULL,
278   NULL,
279   NULL,
280   NULL,
281   sms_print_insn,
282   NULL,
283   NULL, /* insn_finishes_block_p */
284   NULL, NULL,
285   NULL, NULL,
286   0, 0,
287 
288   NULL, NULL, NULL, NULL,
289   NULL, NULL,
290   0
291 };
292 
293 /* Partial schedule instruction ID in PS is a register move.  Return
294    information about it.  */
295 static struct ps_reg_move_info *
296 ps_reg_move (partial_schedule_ptr ps, int id)
297 {
298   gcc_checking_assert (id >= ps->g->num_nodes);
299   return &ps->reg_moves[id - ps->g->num_nodes];
300 }
301 
302 /* Return the rtl instruction that is being scheduled by partial schedule
303    instruction ID, which belongs to schedule PS.  */
304 static rtx_insn *
305 ps_rtl_insn (partial_schedule_ptr ps, int id)
306 {
307   if (id < ps->g->num_nodes)
308     return ps->g->nodes[id].insn;
309   else
310     return ps_reg_move (ps, id)->insn;
311 }
312 
313 /* Partial schedule instruction ID, which belongs to PS, occurred in
314    the original (unscheduled) loop.  Return the first instruction
315    in the loop that was associated with ps_rtl_insn (PS, ID).
316    If the instruction had some notes before it, this is the first
317    of those notes.  */
318 static rtx_insn *
319 ps_first_note (partial_schedule_ptr ps, int id)
320 {
321   gcc_assert (id < ps->g->num_nodes);
322   return ps->g->nodes[id].first_note;
323 }
324 
325 /* Return the number of consecutive stages that are occupied by
326    partial schedule instruction ID in PS.  */
327 static int
328 ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
329 {
330   if (id < ps->g->num_nodes)
331     return 1;
332   else
333     return ps_reg_move (ps, id)->num_consecutive_stages;
334 }
335 
336 /* Given HEAD and TAIL which are the first and last insns in a loop;
337    return the register which controls the loop.  Return zero if it has
338    more than one occurrence in the loop besides the control part or the
339    do-loop pattern is not of the form we expect.  */
340 static rtx
341 doloop_register_get (rtx_insn *head, rtx_insn *tail)
342 {
343   rtx reg, condition;
344   rtx_insn *insn, *first_insn_not_to_check;
345 
346   if (!JUMP_P (tail))
347     return NULL_RTX;
348 
349   if (!targetm.code_for_doloop_end)
350     return NULL_RTX;
351 
352   /* TODO: Free SMS's dependence on doloop_condition_get.  */
353   condition = doloop_condition_get (tail);
354   if (! condition)
355     return NULL_RTX;
356 
357   if (REG_P (XEXP (condition, 0)))
358     reg = XEXP (condition, 0);
359   else if (GET_CODE (XEXP (condition, 0)) == PLUS
360 	   && REG_P (XEXP (XEXP (condition, 0), 0)))
361     reg = XEXP (XEXP (condition, 0), 0);
362   else
363     gcc_unreachable ();
364 
365   /* Check that the COUNT_REG has no other occurrences in the loop
366      until the decrement.  We assume the control part consists of
367      either a single (parallel) branch-on-count or a (non-parallel)
368      branch immediately preceded by a single (decrement) insn.  */
369   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
370                              : prev_nondebug_insn (tail));
371 
372   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
373     if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
374       {
375         if (dump_file)
376         {
377           fprintf (dump_file, "SMS count_reg found ");
378           print_rtl_single (dump_file, reg);
379           fprintf (dump_file, " outside control in insn:\n");
380           print_rtl_single (dump_file, insn);
381         }
382 
383         return NULL_RTX;
384       }
385 
386   return reg;
387 }
388 
389 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
390    that the number of iterations is a compile-time constant.  If so,
391    return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
392    this constant.  Otherwise return 0.  */
393 static rtx_insn *
394 const_iteration_count (rtx count_reg, basic_block pre_header,
395 		       int64_t * count)
396 {
397   rtx_insn *insn;
398   rtx_insn *head, *tail;
399 
400   if (! pre_header)
401     return NULL;
402 
403   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
404 
405   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
406     if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
407 	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
408       {
409 	rtx pat = single_set (insn);
410 
411 	if (CONST_INT_P (SET_SRC (pat)))
412 	  {
413 	    *count = INTVAL (SET_SRC (pat));
414 	    return insn;
415 	  }
416 
417 	return NULL;
418       }
419 
420   return NULL;
421 }
422 
423 /* A very simple resource-based lower bound on the initiation interval.
424    ??? Improve the accuracy of this bound by considering the
425    utilization of various units.  */
426 static int
427 res_MII (ddg_ptr g)
428 {
429   if (targetm.sched.sms_res_mii)
430     return targetm.sched.sms_res_mii (g);
431 
432   return ((g->num_nodes - g->num_debug) / issue_rate);
433 }
434 
435 
436 /* A vector that contains the sched data for each ps_insn.  */
437 static vec<node_sched_params> node_sched_param_vec;
438 
439 /* Allocate sched_params for each node and initialize it.  */
440 static void
441 set_node_sched_params (ddg_ptr g)
442 {
443   node_sched_param_vec.truncate (0);
444   node_sched_param_vec.safe_grow_cleared (g->num_nodes);
445 }
446 
447 /* Make sure that node_sched_param_vec has an entry for every move in PS.  */
448 static void
449 extend_node_sched_params (partial_schedule_ptr ps)
450 {
451   node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
452 					  + ps->reg_moves.length ());
453 }
454 
455 /* Update the sched_params (time, row and stage) for node U using the II,
456    the CYCLE of U and MIN_CYCLE.
457    We're not simply taking the following
458    SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
459    because the stages may not be aligned on cycle 0.  */
460 static void
461 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
462 {
463   int sc_until_cycle_zero;
464   int stage;
465 
466   SCHED_TIME (u) = cycle;
467   SCHED_ROW (u) = SMODULO (cycle, ii);
468 
469   /* The calculation of stage count is done adding the number
470      of stages before cycle zero and after cycle zero.  */
471   sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
472 
473   if (SCHED_TIME (u) < 0)
474     {
475       stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
476       SCHED_STAGE (u) = sc_until_cycle_zero - stage;
477     }
478   else
479     {
480       stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
481       SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
482     }
483 }
484 
485 static void
486 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
487 {
488   int i;
489 
490   if (! file)
491     return;
492   for (i = 0; i < num_nodes; i++)
493     {
494       node_sched_params_ptr nsp = SCHED_PARAMS (i);
495 
496       fprintf (file, "Node = %d; INSN = %d\n", i,
497 	       INSN_UID (ps_rtl_insn (ps, i)));
498       fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
499       fprintf (file, " time = %d:\n", nsp->time);
500       fprintf (file, " stage = %d:\n", nsp->stage);
501     }
502 }
503 
504 /* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
505 static void
506 set_columns_for_row (partial_schedule_ptr ps, int row)
507 {
508   ps_insn_ptr cur_insn;
509   int column;
510 
511   column = 0;
512   for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
513     SCHED_COLUMN (cur_insn->id) = column++;
514 }
515 
516 /* Set SCHED_COLUMN for each instruction in PS.  */
517 static void
518 set_columns_for_ps (partial_schedule_ptr ps)
519 {
520   int row;
521 
522   for (row = 0; row < ps->ii; row++)
523     set_columns_for_row (ps, row);
524 }
525 
526 /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
527    Its single predecessor has already been scheduled, as has its
528    ddg node successors.  (The move may have also another move as its
529    successor, in which case that successor will be scheduled later.)
530 
531    The move is part of a chain that satisfies register dependencies
532    between a producing ddg node and various consuming ddg nodes.
533    If some of these dependencies have a distance of 1 (meaning that
534    the use is upward-exposed) then DISTANCE1_USES is nonnull and
535    contains the set of uses with distance-1 dependencies.
536    DISTANCE1_USES is null otherwise.
537 
538    MUST_FOLLOW is a scratch bitmap that is big enough to hold
539    all current ps_insn ids.
540 
541    Return true on success.  */
542 static bool
543 schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
544 		   sbitmap distance1_uses, sbitmap must_follow)
545 {
546   unsigned int u;
547   int this_time, this_distance, this_start, this_end, this_latency;
548   int start, end, c, ii;
549   sbitmap_iterator sbi;
550   ps_reg_move_info *move;
551   rtx_insn *this_insn;
552   ps_insn_ptr psi;
553 
554   move = ps_reg_move (ps, i_reg_move);
555   ii = ps->ii;
556   if (dump_file)
557     {
558       fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
559 	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
560 	       PS_MIN_CYCLE (ps));
561       print_rtl_single (dump_file, move->insn);
562       fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
563       fprintf (dump_file, "=========== =========== =====\n");
564     }
565 
566   start = INT_MIN;
567   end = INT_MAX;
568 
569   /* For dependencies of distance 1 between a producer ddg node A
570      and consumer ddg node B, we have a chain of dependencies:
571 
572         A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
573 
574      where Mi is the ith move.  For dependencies of distance 0 between
575      a producer ddg node A and consumer ddg node C, we have a chain of
576      dependencies:
577 
578         A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
579 
580      where Mi' occupies the same position as Mi but occurs a stage later.
581      We can only schedule each move once, so if we have both types of
582      chain, we model the second as:
583 
584         A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
585 
586      First handle the dependencies between the previously-scheduled
587      predecessor and the move.  */
588   this_insn = ps_rtl_insn (ps, move->def);
589   this_latency = insn_latency (this_insn, move->insn);
590   this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
591   this_time = SCHED_TIME (move->def) - this_distance * ii;
592   this_start = this_time + this_latency;
593   this_end = this_time + ii;
594   if (dump_file)
595     fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
596 	     this_start, this_end, SCHED_TIME (move->def),
597 	     INSN_UID (this_insn), this_latency, this_distance,
598 	     INSN_UID (move->insn));
599 
600   if (start < this_start)
601     start = this_start;
602   if (end > this_end)
603     end = this_end;
604 
605   /* Handle the dependencies between the move and previously-scheduled
606      successors.  */
607   EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
608     {
609       this_insn = ps_rtl_insn (ps, u);
610       this_latency = insn_latency (move->insn, this_insn);
611       if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
612 	this_distance = -1;
613       else
614 	this_distance = 0;
615       this_time = SCHED_TIME (u) + this_distance * ii;
616       this_start = this_time - ii;
617       this_end = this_time - this_latency;
618       if (dump_file)
619 	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
620 		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
621 		 this_latency, this_distance, INSN_UID (this_insn));
622 
623       if (start < this_start)
624 	start = this_start;
625       if (end > this_end)
626 	end = this_end;
627     }
628 
629   if (dump_file)
630     {
631       fprintf (dump_file, "----------- ----------- -----\n");
632       fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
633     }
634 
635   bitmap_clear (must_follow);
636   bitmap_set_bit (must_follow, move->def);
637 
638   start = MAX (start, end - (ii - 1));
639   for (c = end; c >= start; c--)
640     {
641       psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
642 					 move->uses, must_follow);
643       if (psi)
644 	{
645 	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
646 	  if (dump_file)
647 	    fprintf (dump_file, "\nScheduled register move INSN %d at"
648 		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
649 		     SCHED_ROW (i_reg_move));
650 	  return true;
651 	}
652     }
653 
654   if (dump_file)
655     fprintf (dump_file, "\nNo available slot\n\n");
656 
657   return false;
658 }
659 
660 /*
661    Breaking intra-loop register anti-dependences:
662    Each intra-loop register anti-dependence implies a cross-iteration true
663    dependence of distance 1. Therefore, we can remove such false dependencies
664    and figure out if the partial schedule broke them by checking if (for a
665    true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
666    if so generate a register move.   The number of such moves is equal to:
667               SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
668    nreg_moves = ----------------------------------- + 1 - {   dependence.
669                             ii                          { 1 if not.
670 */
671 static bool
672 schedule_reg_moves (partial_schedule_ptr ps)
673 {
674   ddg_ptr g = ps->g;
675   int ii = ps->ii;
676   int i;
677 
678   for (i = 0; i < g->num_nodes; i++)
679     {
680       ddg_node_ptr u = &g->nodes[i];
681       ddg_edge_ptr e;
682       int nreg_moves = 0, i_reg_move;
683       rtx prev_reg, old_reg;
684       int first_move;
685       int distances[2];
686       sbitmap distance1_uses;
687       rtx set = single_set (u->insn);
688 
689       /* Skip instructions that do not set a register.  */
690       if (set && !REG_P (SET_DEST (set)))
691         continue;
692 
693       /* Compute the number of reg_moves needed for u, by looking at life
694 	 ranges started at u (excluding self-loops).  */
695       distances[0] = distances[1] = false;
696       for (e = u->out; e; e = e->next_out)
697 	if (e->type == TRUE_DEP && e->dest != e->src)
698 	  {
699 	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
700 				- SCHED_TIME (e->src->cuid)) / ii;
701 
702             if (e->distance == 1)
703               nreg_moves4e = (SCHED_TIME (e->dest->cuid)
704 			      - SCHED_TIME (e->src->cuid) + ii) / ii;
705 
706 	    /* If dest precedes src in the schedule of the kernel, then dest
707 	       will read before src writes and we can save one reg_copy.  */
708 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
709 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
710 	      nreg_moves4e--;
711 
712             if (nreg_moves4e >= 1)
713 	      {
714 		/* !single_set instructions are not supported yet and
715 		   thus we do not except to encounter them in the loop
716 		   except from the doloop part.  For the latter case
717 		   we assume no regmoves are generated as the doloop
718 		   instructions are tied to the branch with an edge.  */
719 		gcc_assert (set);
720 		/* If the instruction contains auto-inc register then
721 		   validate that the regmov is being generated for the
722 		   target regsiter rather then the inc'ed register.	*/
723 		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
724 	      }
725 
726 	    if (nreg_moves4e)
727 	      {
728 		gcc_assert (e->distance < 2);
729 		distances[e->distance] = true;
730 	      }
731 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
732 	  }
733 
734       if (nreg_moves == 0)
735 	continue;
736 
737       /* Create NREG_MOVES register moves.  */
738       first_move = ps->reg_moves.length ();
739       ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
740       extend_node_sched_params (ps);
741 
742       /* Record the moves associated with this node.  */
743       first_move += ps->g->num_nodes;
744 
745       /* Generate each move.  */
746       old_reg = prev_reg = SET_DEST (set);
747       if (HARD_REGISTER_P (old_reg))
748 	return false;
749 
750       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
751 	{
752 	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
753 
754 	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
755 	  move->uses = sbitmap_alloc (first_move + nreg_moves);
756 	  move->old_reg = old_reg;
757 	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
758 	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
759 	  move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
760 	  bitmap_clear (move->uses);
761 
762 	  prev_reg = move->new_reg;
763 	}
764 
765       distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
766 
767       if (distance1_uses)
768 	bitmap_clear (distance1_uses);
769 
770       /* Every use of the register defined by node may require a different
771 	 copy of this register, depending on the time the use is scheduled.
772 	 Record which uses require which move results.  */
773       for (e = u->out; e; e = e->next_out)
774 	if (e->type == TRUE_DEP && e->dest != e->src)
775 	  {
776 	    int dest_copy = (SCHED_TIME (e->dest->cuid)
777 			     - SCHED_TIME (e->src->cuid)) / ii;
778 
779 	    if (e->distance == 1)
780 	      dest_copy = (SCHED_TIME (e->dest->cuid)
781 			   - SCHED_TIME (e->src->cuid) + ii) / ii;
782 
783 	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
784 		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
785 	      dest_copy--;
786 
787 	    if (dest_copy)
788 	      {
789 		ps_reg_move_info *move;
790 
791 		move = ps_reg_move (ps, first_move + dest_copy - 1);
792 		bitmap_set_bit (move->uses, e->dest->cuid);
793 		if (e->distance == 1)
794 		  bitmap_set_bit (distance1_uses, e->dest->cuid);
795 	      }
796 	  }
797 
798       auto_sbitmap must_follow (first_move + nreg_moves);
799       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
800 	if (!schedule_reg_move (ps, first_move + i_reg_move,
801 				distance1_uses, must_follow))
802 	  break;
803       if (distance1_uses)
804 	sbitmap_free (distance1_uses);
805       if (i_reg_move < nreg_moves)
806 	return false;
807     }
808   return true;
809 }
810 
811 /* Emit the moves associated with PS.  Apply the substitutions
812    associated with them.  */
813 static void
814 apply_reg_moves (partial_schedule_ptr ps)
815 {
816   ps_reg_move_info *move;
817   int i;
818 
819   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
820     {
821       unsigned int i_use;
822       sbitmap_iterator sbi;
823 
824       EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
825 	{
826 	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
827 	  df_insn_rescan (ps->g->nodes[i_use].insn);
828 	}
829     }
830 }
831 
832 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
833    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
834    will move to cycle zero.  */
835 static void
836 reset_sched_times (partial_schedule_ptr ps, int amount)
837 {
838   int row;
839   int ii = ps->ii;
840   ps_insn_ptr crr_insn;
841 
842   for (row = 0; row < ii; row++)
843     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
844       {
845 	int u = crr_insn->id;
846 	int normalized_time = SCHED_TIME (u) - amount;
847 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
848 
849         if (dump_file)
850           {
851             /* Print the scheduling times after the rotation.  */
852 	    rtx_insn *insn = ps_rtl_insn (ps, u);
853 
854             fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
855                      "crr_insn->cycle=%d, min_cycle=%d", u,
856                      INSN_UID (insn), normalized_time, new_min_cycle);
857             if (JUMP_P (insn))
858               fprintf (dump_file, " (branch)");
859             fprintf (dump_file, "\n");
860           }
861 
862 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
863 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
864 
865 	crr_insn->cycle = normalized_time;
866 	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
867       }
868 }
869 
870 /* Permute the insns according to their order in PS, from row 0 to
871    row ii-1, and position them right before LAST.  This schedules
872    the insns of the loop kernel.  */
873 static void
874 permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
875 {
876   int ii = ps->ii;
877   int row;
878   ps_insn_ptr ps_ij;
879 
880   for (row = 0; row < ii ; row++)
881     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
882       {
883 	rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
884 
885 	if (PREV_INSN (last) != insn)
886 	  {
887 	    if (ps_ij->id < ps->g->num_nodes)
888 	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
889 				  PREV_INSN (last));
890 	    else
891 	      add_insn_before (insn, last, NULL);
892 	  }
893       }
894 }
895 
896 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
897    respectively only if cycle C falls on the border of the scheduling
898    window boundaries marked by START and END cycles.  STEP is the
899    direction of the window.  */
900 static inline void
901 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
902 			 sbitmap *tmp_precede, sbitmap must_precede, int c,
903 			 int start, int end, int step)
904 {
905   *tmp_precede = NULL;
906   *tmp_follow = NULL;
907 
908   if (c == start)
909     {
910       if (step == 1)
911 	*tmp_precede = must_precede;
912       else			/* step == -1.  */
913 	*tmp_follow = must_follow;
914     }
915   if (c == end - step)
916     {
917       if (step == 1)
918 	*tmp_follow = must_follow;
919       else			/* step == -1.  */
920 	*tmp_precede = must_precede;
921     }
922 
923 }
924 
925 /* Return True if the branch can be moved to row ii-1 while
926    normalizing the partial schedule PS to start from cycle zero and thus
927    optimize the SC.  Otherwise return False.  */
928 static bool
929 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
930 {
931   int amount = PS_MIN_CYCLE (ps);
932   int start, end, step;
933   int ii = ps->ii;
934   bool ok = false;
935   int stage_count, stage_count_curr;
936 
937   /* Compare the SC after normalization and SC after bringing the branch
938      to row ii-1.  If they are equal just bail out.  */
939   stage_count = calculate_stage_count (ps, amount);
940   stage_count_curr =
941     calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
942 
943   if (stage_count == stage_count_curr)
944     {
945       if (dump_file)
946 	fprintf (dump_file, "SMS SC already optimized.\n");
947 
948       return false;
949     }
950 
951   if (dump_file)
952     {
953       fprintf (dump_file, "SMS Trying to optimize branch location\n");
954       fprintf (dump_file, "SMS partial schedule before trial:\n");
955       print_partial_schedule (ps, dump_file);
956     }
957 
958   /* First, normalize the partial scheduling.  */
959   reset_sched_times (ps, amount);
960   rotate_partial_schedule (ps, amount);
961   if (dump_file)
962     {
963       fprintf (dump_file,
964 	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
965 	       ii, stage_count);
966       print_partial_schedule (ps, dump_file);
967     }
968 
969   if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
970     return true;
971 
972   auto_sbitmap sched_nodes (g->num_nodes);
973   bitmap_ones (sched_nodes);
974 
975   /* Calculate the new placement of the branch.  It should be in row
976      ii-1 and fall into it's scheduling window.  */
977   if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
978 			&step, &end) == 0)
979     {
980       bool success;
981       ps_insn_ptr next_ps_i;
982       int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
983       int row = SMODULO (branch_cycle, ps->ii);
984       int num_splits = 0;
985       sbitmap tmp_precede, tmp_follow;
986       int min_cycle, c;
987 
988       if (dump_file)
989 	fprintf (dump_file, "\nTrying to schedule node %d "
990 		 "INSN = %d  in (%d .. %d) step %d\n",
991 		 g->closing_branch->cuid,
992 		 (INSN_UID (g->closing_branch->insn)), start, end, step);
993 
994       gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
995       if (step == 1)
996 	{
997 	  c = start + ii - SMODULO (start, ii) - 1;
998 	  gcc_assert (c >= start);
999 	  if (c >= end)
1000 	    {
1001 	      if (dump_file)
1002 		fprintf (dump_file,
1003 			 "SMS failed to schedule branch at cycle: %d\n", c);
1004 	      return false;
1005 	    }
1006 	}
1007       else
1008 	{
1009 	  c = start - SMODULO (start, ii) - 1;
1010 	  gcc_assert (c <= start);
1011 
1012 	  if (c <= end)
1013 	    {
1014 	      if (dump_file)
1015 		fprintf (dump_file,
1016 			 "SMS failed to schedule branch at cycle: %d\n", c);
1017 	      return false;
1018 	    }
1019 	}
1020 
1021       auto_sbitmap must_precede (g->num_nodes);
1022       auto_sbitmap must_follow (g->num_nodes);
1023 
1024       /* Try to schedule the branch is it's new cycle.  */
1025       calculate_must_precede_follow (g->closing_branch, start, end,
1026 				     step, ii, sched_nodes,
1027 				     must_precede, must_follow);
1028 
1029       set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1030 			       must_precede, c, start, end, step);
1031 
1032       /* Find the element in the partial schedule related to the closing
1033          branch so we can remove it from it's current cycle.  */
1034       for (next_ps_i = ps->rows[row];
1035 	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
1036 	if (next_ps_i->id == g->closing_branch->cuid)
1037 	  break;
1038 
1039       min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1040       remove_node_from_ps (ps, next_ps_i);
1041       success =
1042 	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1043 				      sched_nodes, &num_splits,
1044 				      tmp_precede, tmp_follow);
1045       gcc_assert (num_splits == 0);
1046       if (!success)
1047 	{
1048 	  if (dump_file)
1049 	    fprintf (dump_file,
1050 		     "SMS failed to schedule branch at cycle: %d, "
1051 		     "bringing it back to cycle %d\n", c, branch_cycle);
1052 
1053 	  /* The branch was failed to be placed in row ii - 1.
1054 	     Put it back in it's original place in the partial
1055 	     schedualing.  */
1056 	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1057 				   must_precede, branch_cycle, start, end,
1058 				   step);
1059 	  success =
1060 	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1061 					  branch_cycle, sched_nodes,
1062 					  &num_splits, tmp_precede,
1063 					  tmp_follow);
1064 	  gcc_assert (success && (num_splits == 0));
1065 	  ok = false;
1066 	}
1067       else
1068 	{
1069 	  /* The branch is placed in row ii - 1.  */
1070 	  if (dump_file)
1071 	    fprintf (dump_file,
1072 		     "SMS success in moving branch to cycle %d\n", c);
1073 
1074 	  update_node_sched_params (g->closing_branch->cuid, ii, c,
1075 				    PS_MIN_CYCLE (ps));
1076 	  ok = true;
1077 	}
1078 
1079       /* This might have been added to a new first stage.  */
1080       if (PS_MIN_CYCLE (ps) < min_cycle)
1081 	reset_sched_times (ps, 0);
1082     }
1083 
1084   return ok;
1085 }
1086 
1087 static void
1088 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1089 			   int to_stage, rtx count_reg)
1090 {
1091   int row;
1092   ps_insn_ptr ps_ij;
1093 
1094   for (row = 0; row < ps->ii; row++)
1095     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1096       {
1097 	int u = ps_ij->id;
1098 	int first_u, last_u;
1099 	rtx_insn *u_insn;
1100 
1101         /* Do not duplicate any insn which refers to count_reg as it
1102            belongs to the control part.
1103            The closing branch is scheduled as well and thus should
1104            be ignored.
1105            TODO: This should be done by analyzing the control part of
1106            the loop.  */
1107 	u_insn = ps_rtl_insn (ps, u);
1108         if (reg_mentioned_p (count_reg, u_insn)
1109             || JUMP_P (u_insn))
1110           continue;
1111 
1112 	first_u = SCHED_STAGE (u);
1113 	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1114 	if (from_stage <= last_u && to_stage >= first_u)
1115 	  {
1116 	    if (u < ps->g->num_nodes)
1117 	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1118 	    else
1119 	      emit_insn (copy_rtx (PATTERN (u_insn)));
1120 	  }
1121       }
1122 }
1123 
1124 
1125 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
1126 static void
1127 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1128                         rtx count_reg, rtx count_init)
1129 {
1130   int i;
1131   int last_stage = PS_STAGE_COUNT (ps) - 1;
1132   edge e;
1133 
1134   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1135   start_sequence ();
1136 
1137   if (!count_init)
1138     {
1139       /* Generate instructions at the beginning of the prolog to
1140          adjust the loop count by STAGE_COUNT.  If loop count is constant
1141          (count_init), this constant is adjusted by STAGE_COUNT in
1142          generate_prolog_epilog function.  */
1143       rtx sub_reg = NULL_RTX;
1144 
1145       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1146 				     gen_int_mode (last_stage,
1147 						   GET_MODE (count_reg)),
1148                                      count_reg, 1, OPTAB_DIRECT);
1149       gcc_assert (REG_P (sub_reg));
1150       if (REGNO (sub_reg) != REGNO (count_reg))
1151         emit_move_insn (count_reg, sub_reg);
1152     }
1153 
1154   for (i = 0; i < last_stage; i++)
1155     duplicate_insns_of_cycles (ps, 0, i, count_reg);
1156 
1157   /* Put the prolog on the entry edge.  */
1158   e = loop_preheader_edge (loop);
1159   split_edge_and_insert (e, get_insns ());
1160   if (!flag_resched_modulo_sched)
1161     e->dest->flags |= BB_DISABLE_SCHEDULE;
1162 
1163   end_sequence ();
1164 
1165   /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1166   start_sequence ();
1167 
1168   for (i = 0; i < last_stage; i++)
1169     duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1170 
1171   /* Put the epilogue on the exit edge.  */
1172   gcc_assert (single_exit (loop));
1173   e = single_exit (loop);
1174   split_edge_and_insert (e, get_insns ());
1175   if (!flag_resched_modulo_sched)
1176     e->dest->flags |= BB_DISABLE_SCHEDULE;
1177 
1178   end_sequence ();
1179 }
1180 
1181 /* Mark LOOP as software pipelined so the later
1182    scheduling passes don't touch it.  */
1183 static void
1184 mark_loop_unsched (struct loop *loop)
1185 {
1186   unsigned i;
1187   basic_block *bbs = get_loop_body (loop);
1188 
1189   for (i = 0; i < loop->num_nodes; i++)
1190     bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1191 
1192   free (bbs);
1193 }
1194 
1195 /* Return true if all the BBs of the loop are empty except the
1196    loop header.  */
1197 static bool
1198 loop_single_full_bb_p (struct loop *loop)
1199 {
1200   unsigned i;
1201   basic_block *bbs = get_loop_body (loop);
1202 
1203   for (i = 0; i < loop->num_nodes ; i++)
1204     {
1205       rtx_insn *head, *tail;
1206       bool empty_bb = true;
1207 
1208       if (bbs[i] == loop->header)
1209         continue;
1210 
1211       /* Make sure that basic blocks other than the header
1212          have only notes labels or jumps.  */
1213       get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1214       for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1215         {
1216           if (NOTE_P (head) || LABEL_P (head)
1217  	      || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1218  	    continue;
1219  	  empty_bb = false;
1220  	  break;
1221         }
1222 
1223       if (! empty_bb)
1224         {
1225           free (bbs);
1226           return false;
1227         }
1228     }
1229   free (bbs);
1230   return true;
1231 }
1232 
1233 /* Dump file:line from INSN's location info to dump_file.  */
1234 
1235 static void
1236 dump_insn_location (rtx_insn *insn)
1237 {
1238   if (dump_file && INSN_HAS_LOCATION (insn))
1239     {
1240       expanded_location xloc = insn_location (insn);
1241       fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1242     }
1243 }
1244 
1245 /* A simple loop from SMS point of view; it is a loop that is composed of
1246    either a single basic block or two BBs - a header and a latch.  */
1247 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
1248 				  && (EDGE_COUNT (loop->latch->preds) == 1) \
1249                                   && (EDGE_COUNT (loop->latch->succs) == 1))
1250 
1251 /* Return true if the loop is in its canonical form and false if not.
1252    i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1253 static bool
1254 loop_canon_p (struct loop *loop)
1255 {
1256 
1257   if (loop->inner || !loop_outer (loop))
1258   {
1259     if (dump_file)
1260       fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1261     return false;
1262   }
1263 
1264   if (!single_exit (loop))
1265     {
1266       if (dump_file)
1267 	{
1268 	  rtx_insn *insn = BB_END (loop->header);
1269 
1270 	  fprintf (dump_file, "SMS loop many exits");
1271 	  dump_insn_location (insn);
1272 	  fprintf (dump_file, "\n");
1273 	}
1274       return false;
1275     }
1276 
1277   if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1278     {
1279       if (dump_file)
1280 	{
1281 	  rtx_insn *insn = BB_END (loop->header);
1282 
1283 	  fprintf (dump_file, "SMS loop many BBs.");
1284 	  dump_insn_location (insn);
1285 	  fprintf (dump_file, "\n");
1286 	}
1287       return false;
1288     }
1289 
1290     return true;
1291 }
1292 
1293 /* If there are more than one entry for the loop,
1294    make it one by splitting the first entry edge and
1295    redirecting the others to the new BB.  */
1296 static void
1297 canon_loop (struct loop *loop)
1298 {
1299   edge e;
1300   edge_iterator i;
1301 
1302   /* Avoid annoying special cases of edges going to exit
1303      block.  */
1304   FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1305     if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1306       split_edge (e);
1307 
1308   if (loop->latch == loop->header
1309       || EDGE_COUNT (loop->latch->succs) > 1)
1310     {
1311       FOR_EACH_EDGE (e, i, loop->header->preds)
1312         if (e->src == loop->latch)
1313           break;
1314       split_edge (e);
1315     }
1316 }
1317 
1318 /* Setup infos.  */
1319 static void
1320 setup_sched_infos (void)
1321 {
1322   memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1323 	  sizeof (sms_common_sched_info));
1324   sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1325   common_sched_info = &sms_common_sched_info;
1326 
1327   sched_deps_info = &sms_sched_deps_info;
1328   current_sched_info = &sms_sched_info;
1329 }
1330 
1331 /* Probability in % that the sms-ed loop rolls enough so that optimized
1332    version may be entered.  Just a guess.  */
1333 #define PROB_SMS_ENOUGH_ITERATIONS 80
1334 
1335 /* Used to calculate the upper bound of ii.  */
1336 #define MAXII_FACTOR 2
1337 
1338 /* Main entry point, perform SMS scheduling on the loops of the function
1339    that consist of single basic blocks.  */
1340 static void
1341 sms_schedule (void)
1342 {
1343   rtx_insn *insn;
1344   ddg_ptr *g_arr, g;
1345   int * node_order;
1346   int maxii, max_asap;
1347   partial_schedule_ptr ps;
1348   basic_block bb = NULL;
1349   struct loop *loop;
1350   basic_block condition_bb = NULL;
1351   edge latch_edge;
1352   HOST_WIDE_INT trip_count, max_trip_count;
1353 
1354   loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1355 		       | LOOPS_HAVE_RECORDED_EXITS);
1356   if (number_of_loops (cfun) <= 1)
1357     {
1358       loop_optimizer_finalize ();
1359       return;  /* There are no loops to schedule.  */
1360     }
1361 
1362   /* Initialize issue_rate.  */
1363   if (targetm.sched.issue_rate)
1364     {
1365       int temp = reload_completed;
1366 
1367       reload_completed = 1;
1368       issue_rate = targetm.sched.issue_rate ();
1369       reload_completed = temp;
1370     }
1371   else
1372     issue_rate = 1;
1373 
1374   /* Initialize the scheduler.  */
1375   setup_sched_infos ();
1376   haifa_sched_init ();
1377 
1378   /* Allocate memory to hold the DDG array one entry for each loop.
1379      We use loop->num as index into this array.  */
1380   g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1381 
1382   if (dump_file)
1383   {
1384     fprintf (dump_file, "\n\nSMS analysis phase\n");
1385     fprintf (dump_file, "===================\n\n");
1386   }
1387 
1388   /* Build DDGs for all the relevant loops and hold them in G_ARR
1389      indexed by the loop index.  */
1390   FOR_EACH_LOOP (loop, 0)
1391     {
1392       rtx_insn *head, *tail;
1393       rtx count_reg;
1394 
1395       /* For debugging.  */
1396       if (dbg_cnt (sms_sched_loop) == false)
1397         {
1398           if (dump_file)
1399             fprintf (dump_file, "SMS reached max limit... \n");
1400 
1401 	  break;
1402         }
1403 
1404       if (dump_file)
1405 	{
1406 	  rtx_insn *insn = BB_END (loop->header);
1407 
1408 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1409 	  dump_insn_location (insn);
1410 	  fprintf (dump_file, "\n");
1411 	}
1412 
1413       if (! loop_canon_p (loop))
1414         continue;
1415 
1416       if (! loop_single_full_bb_p (loop))
1417       {
1418         if (dump_file)
1419           fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1420 	continue;
1421       }
1422 
1423       bb = loop->header;
1424 
1425       get_ebb_head_tail (bb, bb, &head, &tail);
1426       latch_edge = loop_latch_edge (loop);
1427       gcc_assert (single_exit (loop));
1428       trip_count = get_estimated_loop_iterations_int (loop);
1429       max_trip_count = get_max_loop_iterations_int (loop);
1430 
1431       /* Perform SMS only on loops that their average count is above threshold.  */
1432 
1433       if ( latch_edge->count () > profile_count::zero ()
1434           && (latch_edge->count()
1435 	      < single_exit (loop)->count ().apply_scale
1436 				 (SMS_LOOP_AVERAGE_COUNT_THRESHOLD, 1)))
1437 	{
1438 	  if (dump_file)
1439 	    {
1440 	      dump_insn_location (tail);
1441 	      fprintf (dump_file, "\nSMS single-bb-loop\n");
1442 	      if (profile_info && flag_branch_probabilities)
1443 	    	{
1444 	      	  fprintf (dump_file, "SMS loop-count ");
1445 	      	  fprintf (dump_file, "%" PRId64,
1446 	             	   (int64_t) bb->count.to_gcov_type ());
1447 	      	  fprintf (dump_file, "\n");
1448                   fprintf (dump_file, "SMS trip-count ");
1449                   fprintf (dump_file, "%" PRId64 "max %" PRId64,
1450                            (int64_t) trip_count, (int64_t) max_trip_count);
1451                   fprintf (dump_file, "\n");
1452 	      	  fprintf (dump_file, "SMS profile-sum-max ");
1453 	      	  fprintf (dump_file, "%" PRId64,
1454 	          	   (int64_t) profile_info->sum_max);
1455 	      	  fprintf (dump_file, "\n");
1456 	    	}
1457 	    }
1458           continue;
1459         }
1460 
1461       /* Make sure this is a doloop.  */
1462       if ( !(count_reg = doloop_register_get (head, tail)))
1463       {
1464         if (dump_file)
1465           fprintf (dump_file, "SMS doloop_register_get failed\n");
1466 	continue;
1467       }
1468 
1469       /* Don't handle BBs with calls or barriers
1470 	 or !single_set with the exception of instructions that include
1471 	 count_reg---these instructions are part of the control part
1472 	 that do-loop recognizes.
1473          ??? Should handle insns defining subregs.  */
1474      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1475       {
1476          rtx set;
1477 
1478         if (CALL_P (insn)
1479             || BARRIER_P (insn)
1480             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1481                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1482                 && !reg_mentioned_p (count_reg, insn))
1483             || (INSN_P (insn) && (set = single_set (insn))
1484                 && GET_CODE (SET_DEST (set)) == SUBREG))
1485         break;
1486       }
1487 
1488       if (insn != NEXT_INSN (tail))
1489 	{
1490 	  if (dump_file)
1491 	    {
1492 	      if (CALL_P (insn))
1493 		fprintf (dump_file, "SMS loop-with-call\n");
1494 	      else if (BARRIER_P (insn))
1495 		fprintf (dump_file, "SMS loop-with-barrier\n");
1496               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1497                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1498                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1499               else
1500                fprintf (dump_file, "SMS loop with subreg in lhs\n");
1501 	      print_rtl_single (dump_file, insn);
1502 	    }
1503 
1504 	  continue;
1505 	}
1506 
1507       /* Always schedule the closing branch with the rest of the
1508          instructions. The branch is rotated to be in row ii-1 at the
1509          end of the scheduling procedure to make sure it's the last
1510          instruction in the iteration.  */
1511       if (! (g = create_ddg (bb, 1)))
1512         {
1513           if (dump_file)
1514 	    fprintf (dump_file, "SMS create_ddg failed\n");
1515 	  continue;
1516         }
1517 
1518       g_arr[loop->num] = g;
1519       if (dump_file)
1520         fprintf (dump_file, "...OK\n");
1521 
1522     }
1523   if (dump_file)
1524   {
1525     fprintf (dump_file, "\nSMS transformation phase\n");
1526     fprintf (dump_file, "=========================\n\n");
1527   }
1528 
1529   /* We don't want to perform SMS on new loops - created by versioning.  */
1530   FOR_EACH_LOOP (loop, 0)
1531     {
1532       rtx_insn *head, *tail;
1533       rtx count_reg;
1534       rtx_insn *count_init;
1535       int mii, rec_mii, stage_count, min_cycle;
1536       int64_t loop_count = 0;
1537       bool opt_sc_p;
1538 
1539       if (! (g = g_arr[loop->num]))
1540         continue;
1541 
1542       if (dump_file)
1543 	{
1544 	  rtx_insn *insn = BB_END (loop->header);
1545 
1546 	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1547 	  dump_insn_location (insn);
1548 	  fprintf (dump_file, "\n");
1549 
1550 	  print_ddg (dump_file, g);
1551 	}
1552 
1553       get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1554 
1555       latch_edge = loop_latch_edge (loop);
1556       gcc_assert (single_exit (loop));
1557       trip_count = get_estimated_loop_iterations_int (loop);
1558       max_trip_count = get_max_loop_iterations_int (loop);
1559 
1560       if (dump_file)
1561 	{
1562 	  dump_insn_location (tail);
1563 	  fprintf (dump_file, "\nSMS single-bb-loop\n");
1564 	  if (profile_info && flag_branch_probabilities)
1565 	    {
1566 	      fprintf (dump_file, "SMS loop-count ");
1567 	      fprintf (dump_file, "%" PRId64,
1568 	               (int64_t) bb->count.to_gcov_type ());
1569 	      fprintf (dump_file, "\n");
1570 	      fprintf (dump_file, "SMS profile-sum-max ");
1571 	      fprintf (dump_file, "%" PRId64,
1572 	               (int64_t) profile_info->sum_max);
1573 	      fprintf (dump_file, "\n");
1574 	    }
1575 	  fprintf (dump_file, "SMS doloop\n");
1576 	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1577           fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1578           fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1579 	}
1580 
1581 
1582       /* In case of th loop have doloop register it gets special
1583 	 handling.  */
1584       count_init = NULL;
1585       if ((count_reg = doloop_register_get (head, tail)))
1586 	{
1587 	  basic_block pre_header;
1588 
1589 	  pre_header = loop_preheader_edge (loop)->src;
1590 	  count_init = const_iteration_count (count_reg, pre_header,
1591 					      &loop_count);
1592 	}
1593       gcc_assert (count_reg);
1594 
1595       if (dump_file && count_init)
1596         {
1597           fprintf (dump_file, "SMS const-doloop ");
1598           fprintf (dump_file, "%" PRId64,
1599 		     loop_count);
1600           fprintf (dump_file, "\n");
1601         }
1602 
1603       node_order = XNEWVEC (int, g->num_nodes);
1604 
1605       mii = 1; /* Need to pass some estimate of mii.  */
1606       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1607       mii = MAX (res_MII (g), rec_mii);
1608       mii = MAX (mii, 1);
1609       maxii = MAX (max_asap, MAXII_FACTOR * mii);
1610 
1611       if (dump_file)
1612 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1613 		 rec_mii, mii, maxii);
1614 
1615       for (;;)
1616 	{
1617 	  set_node_sched_params (g);
1618 
1619 	  stage_count = 0;
1620 	  opt_sc_p = false;
1621 	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
1622 
1623 	  if (ps)
1624 	    {
1625 	      /* Try to achieve optimized SC by normalizing the partial
1626 		 schedule (having the cycles start from cycle zero).
1627 		 The branch location must be placed in row ii-1 in the
1628 		 final scheduling.	If failed, shift all instructions to
1629 		 position the branch in row ii-1.  */
1630 	      opt_sc_p = optimize_sc (ps, g);
1631 	      if (opt_sc_p)
1632 		stage_count = calculate_stage_count (ps, 0);
1633 	      else
1634 		{
1635 		  /* Bring the branch to cycle ii-1.  */
1636 		  int amount = (SCHED_TIME (g->closing_branch->cuid)
1637 				- (ps->ii - 1));
1638 
1639 		  if (dump_file)
1640 		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1641 
1642 		  stage_count = calculate_stage_count (ps, amount);
1643 		}
1644 
1645 	      gcc_assert (stage_count >= 1);
1646 	    }
1647 
1648 	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1649 	     1 means that there is no interleaving between iterations thus
1650 	     we let the scheduling passes do the job in this case.  */
1651 	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1652 	      || (count_init && (loop_count <= stage_count))
1653 	      || (max_trip_count >= 0 && max_trip_count <= stage_count)
1654 	      || (trip_count >= 0 && trip_count <= stage_count))
1655 	    {
1656 	      if (dump_file)
1657 		{
1658 		  fprintf (dump_file, "SMS failed... \n");
1659 		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1660 			   " loop-count=", stage_count);
1661 		  fprintf (dump_file, "%" PRId64, loop_count);
1662 		  fprintf (dump_file, ", trip-count=");
1663 		  fprintf (dump_file, "%" PRId64 "max %" PRId64,
1664 			   (int64_t) trip_count, (int64_t) max_trip_count);
1665 		  fprintf (dump_file, ")\n");
1666 		}
1667 	      break;
1668 	    }
1669 
1670           if (!opt_sc_p)
1671             {
1672 	      /* Rotate the partial schedule to have the branch in row ii-1.  */
1673               int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1674 
1675               reset_sched_times (ps, amount);
1676               rotate_partial_schedule (ps, amount);
1677             }
1678 
1679 	  set_columns_for_ps (ps);
1680 
1681 	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1682 	  if (!schedule_reg_moves (ps))
1683 	    {
1684 	      mii = ps->ii + 1;
1685 	      free_partial_schedule (ps);
1686 	      continue;
1687 	    }
1688 
1689 	  /* Moves that handle incoming values might have been added
1690 	     to a new first stage.  Bump the stage count if so.
1691 
1692 	     ??? Perhaps we could consider rotating the schedule here
1693 	     instead?  */
1694 	  if (PS_MIN_CYCLE (ps) < min_cycle)
1695 	    {
1696 	      reset_sched_times (ps, 0);
1697 	      stage_count++;
1698 	    }
1699 
1700 	  /* The stage count should now be correct without rotation.  */
1701 	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1702 	  PS_STAGE_COUNT (ps) = stage_count;
1703 
1704 	  canon_loop (loop);
1705 
1706           if (dump_file)
1707             {
1708 	      dump_insn_location (tail);
1709 	      fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1710 		       ps->ii, stage_count);
1711 	      print_partial_schedule (ps, dump_file);
1712 	    }
1713 
1714           /* case the BCT count is not known , Do loop-versioning */
1715 	  if (count_reg && ! count_init)
1716             {
1717 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1718 					 gen_int_mode (stage_count,
1719 						       GET_MODE (count_reg)));
1720 	      profile_probability prob = profile_probability::guessed_always ()
1721 				.apply_scale (PROB_SMS_ENOUGH_ITERATIONS, 100);
1722 
1723 	      loop_version (loop, comp_rtx, &condition_bb,
1724 	  		    prob, prob.invert (),
1725 			    prob, prob.invert (), true);
1726 	     }
1727 
1728 	  /* Set new iteration count of loop kernel.  */
1729           if (count_reg && count_init)
1730 	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1731 						     - stage_count + 1);
1732 
1733 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
1734 	  permute_partial_schedule (ps, g->closing_branch->first_note);
1735 
1736           /* Mark this loop as software pipelined so the later
1737 	     scheduling passes don't touch it.  */
1738 	  if (! flag_resched_modulo_sched)
1739 	    mark_loop_unsched (loop);
1740 
1741 	  /* The life-info is not valid any more.  */
1742 	  df_set_bb_dirty (g->bb);
1743 
1744 	  apply_reg_moves (ps);
1745 	  if (dump_file)
1746 	    print_node_sched_params (dump_file, g->num_nodes, ps);
1747 	  /* Generate prolog and epilog.  */
1748           generate_prolog_epilog (ps, loop, count_reg, count_init);
1749 	  break;
1750 	}
1751 
1752       free_partial_schedule (ps);
1753       node_sched_param_vec.release ();
1754       free (node_order);
1755       free_ddg (g);
1756     }
1757 
1758   free (g_arr);
1759 
1760   /* Release scheduler data, needed until now because of DFA.  */
1761   haifa_sched_finish ();
1762   loop_optimizer_finalize ();
1763 }
1764 
1765 /* The SMS scheduling algorithm itself
1766    -----------------------------------
1767    Input: 'O' an ordered list of insns of a loop.
1768    Output: A scheduling of the loop - kernel, prolog, and epilogue.
1769 
1770    'Q' is the empty Set
1771    'PS' is the partial schedule; it holds the currently scheduled nodes with
1772 	their cycle/slot.
1773    'PSP' previously scheduled predecessors.
1774    'PSS' previously scheduled successors.
1775    't(u)' the cycle where u is scheduled.
1776    'l(u)' is the latency of u.
1777    'd(v,u)' is the dependence distance from v to u.
1778    'ASAP(u)' the earliest time at which u could be scheduled as computed in
1779 	     the node ordering phase.
1780    'check_hardware_resources_conflicts(u, PS, c)'
1781 			     run a trace around cycle/slot through DFA model
1782 			     to check resource conflicts involving instruction u
1783 			     at cycle c given the partial schedule PS.
1784    'add_to_partial_schedule_at_time(u, PS, c)'
1785 			     Add the node/instruction u to the partial schedule
1786 			     PS at time c.
1787    'calculate_register_pressure(PS)'
1788 			     Given a schedule of instructions, calculate the register
1789 			     pressure it implies.  One implementation could be the
1790 			     maximum number of overlapping live ranges.
1791    'maxRP' The maximum allowed register pressure, it is usually derived from the number
1792 	   registers available in the hardware.
1793 
1794    1. II = MII.
1795    2. PS = empty list
1796    3. for each node u in O in pre-computed order
1797    4.   if (PSP(u) != Q && PSS(u) == Q) then
1798    5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1799    6.     start = Early_start; end = Early_start + II - 1; step = 1
1800    11.  else if (PSP(u) == Q && PSS(u) != Q) then
1801    12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1802    13.     start = Late_start; end = Late_start - II + 1; step = -1
1803    14.  else if (PSP(u) != Q && PSS(u) != Q) then
1804    15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1805    16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1806    17.     start = Early_start;
1807    18.     end = min(Early_start + II - 1 , Late_start);
1808    19.     step = 1
1809    20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1810    21.	  start = ASAP(u); end = start + II - 1; step = 1
1811    22.  endif
1812 
1813    23.  success = false
1814    24.  for (c = start ; c != end ; c += step)
1815    25.     if check_hardware_resources_conflicts(u, PS, c) then
1816    26.       add_to_partial_schedule_at_time(u, PS, c)
1817    27.       success = true
1818    28.       break
1819    29.     endif
1820    30.  endfor
1821    31.  if (success == false) then
1822    32.    II = II + 1
1823    33.    if (II > maxII) then
1824    34.       finish - failed to schedule
1825    35.	 endif
1826    36.    goto 2.
1827    37.  endif
1828    38. endfor
1829    39. if (calculate_register_pressure(PS) > maxRP) then
1830    40.    goto 32.
1831    41. endif
1832    42. compute epilogue & prologue
1833    43. finish - succeeded to schedule
1834 
1835    ??? The algorithm restricts the scheduling window to II cycles.
1836    In rare cases, it may be better to allow windows of II+1 cycles.
1837    The window would then start and end on the same row, but with
1838    different "must precede" and "must follow" requirements.  */
1839 
1840 /* A limit on the number of cycles that resource conflicts can span.  ??? Should
1841    be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1842    set to 0 to save compile time.  */
1843 #define DFA_HISTORY SMS_DFA_HISTORY
1844 
1845 /* A threshold for the number of repeated unsuccessful attempts to insert
1846    an empty row, before we flush the partial schedule and start over.  */
1847 #define MAX_SPLIT_NUM 10
1848 /* Given the partial schedule PS, this function calculates and returns the
1849    cycles in which we can schedule the node with the given index I.
1850    NOTE: Here we do the backtracking in SMS, in some special cases. We have
1851    noticed that there are several cases in which we fail    to SMS the loop
1852    because the sched window of a node is empty    due to tight data-deps. In
1853    such cases we want to unschedule    some of the predecessors/successors
1854    until we get non-empty    scheduling window.  It returns -1 if the
1855    scheduling window is empty and zero otherwise.  */
1856 
1857 static int
1858 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1859 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1860 		  int *end_p)
1861 {
1862   int start, step, end;
1863   int early_start, late_start;
1864   ddg_edge_ptr e;
1865   auto_sbitmap psp (ps->g->num_nodes);
1866   auto_sbitmap pss (ps->g->num_nodes);
1867   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1868   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1869   int psp_not_empty;
1870   int pss_not_empty;
1871   int count_preds;
1872   int count_succs;
1873 
1874   /* 1. compute sched window for u (start, end, step).  */
1875   bitmap_clear (psp);
1876   bitmap_clear (pss);
1877   psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1878   pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1879 
1880   /* We first compute a forward range (start <= end), then decide whether
1881      to reverse it.  */
1882   early_start = INT_MIN;
1883   late_start = INT_MAX;
1884   start = INT_MIN;
1885   end = INT_MAX;
1886   step = 1;
1887 
1888   count_preds = 0;
1889   count_succs = 0;
1890 
1891   if (dump_file && (psp_not_empty || pss_not_empty))
1892     {
1893       fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1894 	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1895       fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1896 	       "start", "early start", "late start", "end", "time");
1897       fprintf (dump_file, "=========== =========== =========== ==========="
1898 	       " =====\n");
1899     }
1900   /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1901   if (psp_not_empty)
1902     for (e = u_node->in; e != 0; e = e->next_in)
1903       {
1904 	int v = e->src->cuid;
1905 
1906 	if (bitmap_bit_p (sched_nodes, v))
1907 	  {
1908 	    int p_st = SCHED_TIME (v);
1909 	    int earliest = p_st + e->latency - (e->distance * ii);
1910 	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1911 
1912 	    if (dump_file)
1913 	      {
1914 		fprintf (dump_file, "%11s %11d %11s %11d %5d",
1915 			 "", earliest, "", latest, p_st);
1916 		print_ddg_edge (dump_file, e);
1917 		fprintf (dump_file, "\n");
1918 	      }
1919 
1920 	    early_start = MAX (early_start, earliest);
1921 	    end = MIN (end, latest);
1922 
1923 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1924 	      count_preds++;
1925 	  }
1926       }
1927 
1928   /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1929   if (pss_not_empty)
1930     for (e = u_node->out; e != 0; e = e->next_out)
1931       {
1932 	int v = e->dest->cuid;
1933 
1934 	if (bitmap_bit_p (sched_nodes, v))
1935 	  {
1936 	    int s_st = SCHED_TIME (v);
1937 	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1938 	    int latest = s_st - e->latency + (e->distance * ii);
1939 
1940 	    if (dump_file)
1941 	      {
1942 		fprintf (dump_file, "%11d %11s %11d %11s %5d",
1943 			 earliest, "", latest, "", s_st);
1944 		print_ddg_edge (dump_file, e);
1945 		fprintf (dump_file, "\n");
1946 	      }
1947 
1948 	    start = MAX (start, earliest);
1949 	    late_start = MIN (late_start, latest);
1950 
1951 	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1952 	      count_succs++;
1953 	  }
1954       }
1955 
1956   if (dump_file && (psp_not_empty || pss_not_empty))
1957     {
1958       fprintf (dump_file, "----------- ----------- ----------- -----------"
1959 	       " -----\n");
1960       fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1961 	       start, early_start, late_start, end, "",
1962 	       "(max, max, min, min)");
1963     }
1964 
1965   /* Get a target scheduling window no bigger than ii.  */
1966   if (early_start == INT_MIN && late_start == INT_MAX)
1967     early_start = NODE_ASAP (u_node);
1968   else if (early_start == INT_MIN)
1969     early_start = late_start - (ii - 1);
1970   late_start = MIN (late_start, early_start + (ii - 1));
1971 
1972   /* Apply memory dependence limits.  */
1973   start = MAX (start, early_start);
1974   end = MIN (end, late_start);
1975 
1976   if (dump_file && (psp_not_empty || pss_not_empty))
1977     fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1978 	     "", start, end, "", "");
1979 
1980   /* If there are at least as many successors as predecessors, schedule the
1981      node close to its successors.  */
1982   if (pss_not_empty && count_succs >= count_preds)
1983     {
1984       std::swap (start, end);
1985       step = -1;
1986     }
1987 
1988   /* Now that we've finalized the window, make END an exclusive rather
1989      than an inclusive bound.  */
1990   end += step;
1991 
1992   *start_p = start;
1993   *step_p = step;
1994   *end_p = end;
1995 
1996   if ((start >= end && step == 1) || (start <= end && step == -1))
1997     {
1998       if (dump_file)
1999 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2000 		 start, end, step);
2001       return -1;
2002     }
2003 
2004   return 0;
2005 }
2006 
2007 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2008    node currently been scheduled.  At the end of the calculation
2009    MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2010    U_NODE which are (1) already scheduled in the first/last row of
2011    U_NODE's scheduling window, (2) whose dependence inequality with U
2012    becomes an equality when U is scheduled in this same row, and (3)
2013    whose dependence latency is zero.
2014 
2015    The first and last rows are calculated using the following parameters:
2016    START/END rows - The cycles that begins/ends the traversal on the window;
2017    searching for an empty cycle to schedule U_NODE.
2018    STEP - The direction in which we traverse the window.
2019    II - The initiation interval.  */
2020 
2021 static void
2022 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2023 			       int step, int ii, sbitmap sched_nodes,
2024 			       sbitmap must_precede, sbitmap must_follow)
2025 {
2026   ddg_edge_ptr e;
2027   int first_cycle_in_window, last_cycle_in_window;
2028 
2029   gcc_assert (must_precede && must_follow);
2030 
2031   /* Consider the following scheduling window:
2032      {first_cycle_in_window, first_cycle_in_window+1, ...,
2033      last_cycle_in_window}.  If step is 1 then the following will be
2034      the order we traverse the window: {start=first_cycle_in_window,
2035      first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2036      or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2037      end=first_cycle_in_window-1} if step is -1.  */
2038   first_cycle_in_window = (step == 1) ? start : end - step;
2039   last_cycle_in_window = (step == 1) ? end - step : start;
2040 
2041   bitmap_clear (must_precede);
2042   bitmap_clear (must_follow);
2043 
2044   if (dump_file)
2045     fprintf (dump_file, "\nmust_precede: ");
2046 
2047   /* Instead of checking if:
2048       (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2049       && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2050              first_cycle_in_window)
2051       && e->latency == 0
2052      we use the fact that latency is non-negative:
2053       SCHED_TIME (e->src) - (e->distance * ii) <=
2054       SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2055       first_cycle_in_window
2056      and check only if
2057       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2058   for (e = u_node->in; e != 0; e = e->next_in)
2059     if (bitmap_bit_p (sched_nodes, e->src->cuid)
2060 	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2061              first_cycle_in_window))
2062       {
2063 	if (dump_file)
2064 	  fprintf (dump_file, "%d ", e->src->cuid);
2065 
2066 	bitmap_set_bit (must_precede, e->src->cuid);
2067       }
2068 
2069   if (dump_file)
2070     fprintf (dump_file, "\nmust_follow: ");
2071 
2072   /* Instead of checking if:
2073       (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2074       && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2075              last_cycle_in_window)
2076       && e->latency == 0
2077      we use the fact that latency is non-negative:
2078       SCHED_TIME (e->dest) + (e->distance * ii) >=
2079       SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2080       last_cycle_in_window
2081      and check only if
2082       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2083   for (e = u_node->out; e != 0; e = e->next_out)
2084     if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2085 	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2086              last_cycle_in_window))
2087       {
2088 	if (dump_file)
2089 	  fprintf (dump_file, "%d ", e->dest->cuid);
2090 
2091 	bitmap_set_bit (must_follow, e->dest->cuid);
2092       }
2093 
2094   if (dump_file)
2095     fprintf (dump_file, "\n");
2096 }
2097 
2098 /* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2099    parameters to decide if that's possible:
2100    PS - The partial schedule.
2101    U - The serial number of U_NODE.
2102    NUM_SPLITS - The number of row splits made so far.
2103    MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2104    the first row of the scheduling window)
2105    MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2106    last row of the scheduling window)  */
2107 
2108 static bool
2109 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2110 			      int u, int cycle, sbitmap sched_nodes,
2111 			      int *num_splits, sbitmap must_precede,
2112 			      sbitmap must_follow)
2113 {
2114   ps_insn_ptr psi;
2115   bool success = 0;
2116 
2117   verify_partial_schedule (ps, sched_nodes);
2118   psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2119   if (psi)
2120     {
2121       SCHED_TIME (u) = cycle;
2122       bitmap_set_bit (sched_nodes, u);
2123       success = 1;
2124       *num_splits = 0;
2125       if (dump_file)
2126 	fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2127 
2128     }
2129 
2130   return success;
2131 }
2132 
2133 /* This function implements the scheduling algorithm for SMS according to the
2134    above algorithm.  */
2135 static partial_schedule_ptr
2136 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2137 {
2138   int ii = mii;
2139   int i, c, success, num_splits = 0;
2140   int flush_and_start_over = true;
2141   int num_nodes = g->num_nodes;
2142   int start, end, step; /* Place together into one struct?  */
2143   auto_sbitmap sched_nodes (num_nodes);
2144   auto_sbitmap must_precede (num_nodes);
2145   auto_sbitmap must_follow (num_nodes);
2146   auto_sbitmap tobe_scheduled (num_nodes);
2147 
2148   partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2149 
2150   bitmap_ones (tobe_scheduled);
2151   bitmap_clear (sched_nodes);
2152 
2153   while (flush_and_start_over && (ii < maxii))
2154     {
2155 
2156       if (dump_file)
2157 	fprintf (dump_file, "Starting with ii=%d\n", ii);
2158       flush_and_start_over = false;
2159       bitmap_clear (sched_nodes);
2160 
2161       for (i = 0; i < num_nodes; i++)
2162 	{
2163 	  int u = nodes_order[i];
2164   	  ddg_node_ptr u_node = &ps->g->nodes[u];
2165 	  rtx_insn *insn = u_node->insn;
2166 
2167 	  if (!NONDEBUG_INSN_P (insn))
2168 	    {
2169 	      bitmap_clear_bit (tobe_scheduled, u);
2170 	      continue;
2171 	    }
2172 
2173 	  if (bitmap_bit_p (sched_nodes, u))
2174 	    continue;
2175 
2176 	  /* Try to get non-empty scheduling window.  */
2177 	 success = 0;
2178          if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2179                                 &step, &end) == 0)
2180             {
2181               if (dump_file)
2182                 fprintf (dump_file, "\nTrying to schedule node %d "
2183 			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2184                         (g->nodes[u].insn)), start, end, step);
2185 
2186               gcc_assert ((step > 0 && start < end)
2187                           || (step < 0 && start > end));
2188 
2189               calculate_must_precede_follow (u_node, start, end, step, ii,
2190                                              sched_nodes, must_precede,
2191                                              must_follow);
2192 
2193               for (c = start; c != end; c += step)
2194                 {
2195 		  sbitmap tmp_precede, tmp_follow;
2196 
2197                   set_must_precede_follow (&tmp_follow, must_follow,
2198 		                           &tmp_precede, must_precede,
2199                                            c, start, end, step);
2200                   success =
2201                     try_scheduling_node_in_cycle (ps, u, c,
2202                                                   sched_nodes,
2203                                                   &num_splits, tmp_precede,
2204                                                   tmp_follow);
2205                   if (success)
2206                     break;
2207                 }
2208 
2209               verify_partial_schedule (ps, sched_nodes);
2210             }
2211             if (!success)
2212             {
2213               int split_row;
2214 
2215               if (ii++ == maxii)
2216                 break;
2217 
2218               if (num_splits >= MAX_SPLIT_NUM)
2219                 {
2220                   num_splits = 0;
2221                   flush_and_start_over = true;
2222                   verify_partial_schedule (ps, sched_nodes);
2223                   reset_partial_schedule (ps, ii);
2224                   verify_partial_schedule (ps, sched_nodes);
2225                   break;
2226                 }
2227 
2228               num_splits++;
2229               /* The scheduling window is exclusive of 'end'
2230                  whereas compute_split_window() expects an inclusive,
2231                  ordered range.  */
2232               if (step == 1)
2233                 split_row = compute_split_row (sched_nodes, start, end - 1,
2234                                                ps->ii, u_node);
2235               else
2236                 split_row = compute_split_row (sched_nodes, end + 1, start,
2237                                                ps->ii, u_node);
2238 
2239               ps_insert_empty_row (ps, split_row, sched_nodes);
2240               i--;              /* Go back and retry node i.  */
2241 
2242               if (dump_file)
2243                 fprintf (dump_file, "num_splits=%d\n", num_splits);
2244             }
2245 
2246           /* ??? If (success), check register pressure estimates.  */
2247         }                       /* Continue with next node.  */
2248     }                           /* While flush_and_start_over.  */
2249   if (ii >= maxii)
2250     {
2251       free_partial_schedule (ps);
2252       ps = NULL;
2253     }
2254   else
2255     gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2256 
2257   return ps;
2258 }
2259 
2260 /* This function inserts a new empty row into PS at the position
2261    according to SPLITROW, keeping all already scheduled instructions
2262    intact and updating their SCHED_TIME and cycle accordingly.  */
2263 static void
2264 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2265 		     sbitmap sched_nodes)
2266 {
2267   ps_insn_ptr crr_insn;
2268   ps_insn_ptr *rows_new;
2269   int ii = ps->ii;
2270   int new_ii = ii + 1;
2271   int row;
2272   int *rows_length_new;
2273 
2274   verify_partial_schedule (ps, sched_nodes);
2275 
2276   /* We normalize sched_time and rotate ps to have only non-negative sched
2277      times, for simplicity of updating cycles after inserting new row.  */
2278   split_row -= ps->min_cycle;
2279   split_row = SMODULO (split_row, ii);
2280   if (dump_file)
2281     fprintf (dump_file, "split_row=%d\n", split_row);
2282 
2283   reset_sched_times (ps, PS_MIN_CYCLE (ps));
2284   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2285 
2286   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2287   rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2288   for (row = 0; row < split_row; row++)
2289     {
2290       rows_new[row] = ps->rows[row];
2291       rows_length_new[row] = ps->rows_length[row];
2292       ps->rows[row] = NULL;
2293       for (crr_insn = rows_new[row];
2294 	   crr_insn; crr_insn = crr_insn->next_in_row)
2295 	{
2296 	  int u = crr_insn->id;
2297 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2298 
2299 	  SCHED_TIME (u) = new_time;
2300 	  crr_insn->cycle = new_time;
2301 	  SCHED_ROW (u) = new_time % new_ii;
2302 	  SCHED_STAGE (u) = new_time / new_ii;
2303 	}
2304 
2305     }
2306 
2307   rows_new[split_row] = NULL;
2308 
2309   for (row = split_row; row < ii; row++)
2310     {
2311       rows_new[row + 1] = ps->rows[row];
2312       rows_length_new[row + 1] = ps->rows_length[row];
2313       ps->rows[row] = NULL;
2314       for (crr_insn = rows_new[row + 1];
2315 	   crr_insn; crr_insn = crr_insn->next_in_row)
2316 	{
2317 	  int u = crr_insn->id;
2318 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2319 
2320 	  SCHED_TIME (u) = new_time;
2321 	  crr_insn->cycle = new_time;
2322 	  SCHED_ROW (u) = new_time % new_ii;
2323 	  SCHED_STAGE (u) = new_time / new_ii;
2324 	}
2325     }
2326 
2327   /* Updating ps.  */
2328   ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2329     + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2330   ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2331     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2332   free (ps->rows);
2333   ps->rows = rows_new;
2334   free (ps->rows_length);
2335   ps->rows_length = rows_length_new;
2336   ps->ii = new_ii;
2337   gcc_assert (ps->min_cycle >= 0);
2338 
2339   verify_partial_schedule (ps, sched_nodes);
2340 
2341   if (dump_file)
2342     fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2343 	     ps->max_cycle);
2344 }
2345 
2346 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2347    UP which are the boundaries of it's scheduling window; compute using
2348    SCHED_NODES and II a row in the partial schedule that can be split
2349    which will separate a critical predecessor from a critical successor
2350    thereby expanding the window, and return it.  */
2351 static int
2352 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2353 		   ddg_node_ptr u_node)
2354 {
2355   ddg_edge_ptr e;
2356   int lower = INT_MIN, upper = INT_MAX;
2357   int crit_pred = -1;
2358   int crit_succ = -1;
2359   int crit_cycle;
2360 
2361   for (e = u_node->in; e != 0; e = e->next_in)
2362     {
2363       int v = e->src->cuid;
2364 
2365       if (bitmap_bit_p (sched_nodes, v)
2366 	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2367 	if (SCHED_TIME (v) > lower)
2368 	  {
2369 	    crit_pred = v;
2370 	    lower = SCHED_TIME (v);
2371 	  }
2372     }
2373 
2374   if (crit_pred >= 0)
2375     {
2376       crit_cycle = SCHED_TIME (crit_pred) + 1;
2377       return SMODULO (crit_cycle, ii);
2378     }
2379 
2380   for (e = u_node->out; e != 0; e = e->next_out)
2381     {
2382       int v = e->dest->cuid;
2383 
2384       if (bitmap_bit_p (sched_nodes, v)
2385 	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2386 	if (SCHED_TIME (v) < upper)
2387 	  {
2388 	    crit_succ = v;
2389 	    upper = SCHED_TIME (v);
2390 	  }
2391     }
2392 
2393   if (crit_succ >= 0)
2394     {
2395       crit_cycle = SCHED_TIME (crit_succ);
2396       return SMODULO (crit_cycle, ii);
2397     }
2398 
2399   if (dump_file)
2400     fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2401 
2402   return SMODULO ((low + up + 1) / 2, ii);
2403 }
2404 
2405 static void
2406 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2407 {
2408   int row;
2409   ps_insn_ptr crr_insn;
2410 
2411   for (row = 0; row < ps->ii; row++)
2412     {
2413       int length = 0;
2414 
2415       for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2416 	{
2417 	  int u = crr_insn->id;
2418 
2419 	  length++;
2420 	  gcc_assert (bitmap_bit_p (sched_nodes, u));
2421 	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2422 	     popcount (sched_nodes) == number of insns in ps.  */
2423 	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2424 	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2425 	}
2426 
2427       gcc_assert (ps->rows_length[row] == length);
2428     }
2429 }
2430 
2431 
2432 /* This page implements the algorithm for ordering the nodes of a DDG
2433    for modulo scheduling, activated through the
2434    "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2435 
2436 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2437 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2438 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2439 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2440 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2441 #define DEPTH(x) (ASAP ((x)))
2442 
2443 typedef struct node_order_params * nopa;
2444 
2445 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2446 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2447 static nopa  calculate_order_params (ddg_ptr, int, int *);
2448 static int find_max_asap (ddg_ptr, sbitmap);
2449 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2450 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2451 
2452 enum sms_direction {BOTTOMUP, TOPDOWN};
2453 
2454 struct node_order_params
2455 {
2456   int asap;
2457   int alap;
2458   int height;
2459 };
2460 
2461 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2462 static void
2463 check_nodes_order (int *node_order, int num_nodes)
2464 {
2465   int i;
2466   auto_sbitmap tmp (num_nodes);
2467 
2468   bitmap_clear (tmp);
2469 
2470   if (dump_file)
2471     fprintf (dump_file, "SMS final nodes order: \n");
2472 
2473   for (i = 0; i < num_nodes; i++)
2474     {
2475       int u = node_order[i];
2476 
2477       if (dump_file)
2478         fprintf (dump_file, "%d ", u);
2479       gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2480 
2481       bitmap_set_bit (tmp, u);
2482     }
2483 
2484   if (dump_file)
2485     fprintf (dump_file, "\n");
2486 }
2487 
2488 /* Order the nodes of G for scheduling and pass the result in
2489    NODE_ORDER.  Also set aux.count of each node to ASAP.
2490    Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2491 static int
2492 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2493 {
2494   int i;
2495   int rec_mii = 0;
2496   ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2497 
2498   nopa nops = calculate_order_params (g, mii, pmax_asap);
2499 
2500   if (dump_file)
2501     print_sccs (dump_file, sccs, g);
2502 
2503   order_nodes_of_sccs (sccs, node_order);
2504 
2505   if (sccs->num_sccs > 0)
2506     /* First SCC has the largest recurrence_length.  */
2507     rec_mii = sccs->sccs[0]->recurrence_length;
2508 
2509   /* Save ASAP before destroying node_order_params.  */
2510   for (i = 0; i < g->num_nodes; i++)
2511     {
2512       ddg_node_ptr v = &g->nodes[i];
2513       v->aux.count = ASAP (v);
2514     }
2515 
2516   free (nops);
2517   free_ddg_all_sccs (sccs);
2518   check_nodes_order (node_order, g->num_nodes);
2519 
2520   return rec_mii;
2521 }
2522 
2523 static void
2524 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2525 {
2526   int i, pos = 0;
2527   ddg_ptr g = all_sccs->ddg;
2528   int num_nodes = g->num_nodes;
2529   auto_sbitmap prev_sccs (num_nodes);
2530   auto_sbitmap on_path (num_nodes);
2531   auto_sbitmap tmp (num_nodes);
2532   auto_sbitmap ones (num_nodes);
2533 
2534   bitmap_clear (prev_sccs);
2535   bitmap_ones (ones);
2536 
2537   /* Perform the node ordering starting from the SCC with the highest recMII.
2538      For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2539   for (i = 0; i < all_sccs->num_sccs; i++)
2540     {
2541       ddg_scc_ptr scc = all_sccs->sccs[i];
2542 
2543       /* Add nodes on paths from previous SCCs to the current SCC.  */
2544       find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2545       bitmap_ior (tmp, scc->nodes, on_path);
2546 
2547       /* Add nodes on paths from the current SCC to previous SCCs.  */
2548       find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2549       bitmap_ior (tmp, tmp, on_path);
2550 
2551       /* Remove nodes of previous SCCs from current extended SCC.  */
2552       bitmap_and_compl (tmp, tmp, prev_sccs);
2553 
2554       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2555       /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2556     }
2557 
2558   /* Handle the remaining nodes that do not belong to any scc.  Each call
2559      to order_nodes_in_scc handles a single connected component.  */
2560   while (pos < g->num_nodes)
2561     {
2562       bitmap_and_compl (tmp, ones, prev_sccs);
2563       pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2564     }
2565 }
2566 
2567 /* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2568 static struct node_order_params *
2569 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2570 {
2571   int u;
2572   int max_asap;
2573   int num_nodes = g->num_nodes;
2574   ddg_edge_ptr e;
2575   /* Allocate a place to hold ordering params for each node in the DDG.  */
2576   nopa node_order_params_arr;
2577 
2578   /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2579   node_order_params_arr = (nopa) xcalloc (num_nodes,
2580 					  sizeof (struct node_order_params));
2581 
2582   /* Set the aux pointer of each node to point to its order_params structure.  */
2583   for (u = 0; u < num_nodes; u++)
2584     g->nodes[u].aux.info = &node_order_params_arr[u];
2585 
2586   /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2587      calculate ASAP, ALAP, mobility, distance, and height for each node
2588      in the dependence (direct acyclic) graph.  */
2589 
2590   /* We assume that the nodes in the array are in topological order.  */
2591 
2592   max_asap = 0;
2593   for (u = 0; u < num_nodes; u++)
2594     {
2595       ddg_node_ptr u_node = &g->nodes[u];
2596 
2597       ASAP (u_node) = 0;
2598       for (e = u_node->in; e; e = e->next_in)
2599 	if (e->distance == 0)
2600 	  ASAP (u_node) = MAX (ASAP (u_node),
2601 			       ASAP (e->src) + e->latency);
2602       max_asap = MAX (max_asap, ASAP (u_node));
2603     }
2604 
2605   for (u = num_nodes - 1; u > -1; u--)
2606     {
2607       ddg_node_ptr u_node = &g->nodes[u];
2608 
2609       ALAP (u_node) = max_asap;
2610       HEIGHT (u_node) = 0;
2611       for (e = u_node->out; e; e = e->next_out)
2612 	if (e->distance == 0)
2613 	  {
2614 	    ALAP (u_node) = MIN (ALAP (u_node),
2615 				 ALAP (e->dest) - e->latency);
2616 	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
2617 				   HEIGHT (e->dest) + e->latency);
2618 	  }
2619     }
2620   if (dump_file)
2621   {
2622     fprintf (dump_file, "\nOrder params\n");
2623     for (u = 0; u < num_nodes; u++)
2624       {
2625         ddg_node_ptr u_node = &g->nodes[u];
2626 
2627         fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2628                  ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2629       }
2630   }
2631 
2632   *pmax_asap = max_asap;
2633   return node_order_params_arr;
2634 }
2635 
2636 static int
2637 find_max_asap (ddg_ptr g, sbitmap nodes)
2638 {
2639   unsigned int u = 0;
2640   int max_asap = -1;
2641   int result = -1;
2642   sbitmap_iterator sbi;
2643 
2644   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2645     {
2646       ddg_node_ptr u_node = &g->nodes[u];
2647 
2648       if (max_asap < ASAP (u_node))
2649 	{
2650 	  max_asap = ASAP (u_node);
2651 	  result = u;
2652 	}
2653     }
2654   return result;
2655 }
2656 
2657 static int
2658 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2659 {
2660   unsigned int u = 0;
2661   int max_hv = -1;
2662   int min_mob = INT_MAX;
2663   int result = -1;
2664   sbitmap_iterator sbi;
2665 
2666   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2667     {
2668       ddg_node_ptr u_node = &g->nodes[u];
2669 
2670       if (max_hv < HEIGHT (u_node))
2671 	{
2672 	  max_hv = HEIGHT (u_node);
2673 	  min_mob = MOB (u_node);
2674 	  result = u;
2675 	}
2676       else if ((max_hv == HEIGHT (u_node))
2677 	       && (min_mob > MOB (u_node)))
2678 	{
2679 	  min_mob = MOB (u_node);
2680 	  result = u;
2681 	}
2682     }
2683   return result;
2684 }
2685 
2686 static int
2687 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2688 {
2689   unsigned int u = 0;
2690   int max_dv = -1;
2691   int min_mob = INT_MAX;
2692   int result = -1;
2693   sbitmap_iterator sbi;
2694 
2695   EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2696     {
2697       ddg_node_ptr u_node = &g->nodes[u];
2698 
2699       if (max_dv < DEPTH (u_node))
2700 	{
2701 	  max_dv = DEPTH (u_node);
2702 	  min_mob = MOB (u_node);
2703 	  result = u;
2704 	}
2705       else if ((max_dv == DEPTH (u_node))
2706 	       && (min_mob > MOB (u_node)))
2707 	{
2708 	  min_mob = MOB (u_node);
2709 	  result = u;
2710 	}
2711     }
2712   return result;
2713 }
2714 
2715 /* Places the nodes of SCC into the NODE_ORDER array starting
2716    at position POS, according to the SMS ordering algorithm.
2717    NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2718    the NODE_ORDER array, starting from position zero.  */
2719 static int
2720 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2721 		    int * node_order, int pos)
2722 {
2723   enum sms_direction dir;
2724   int num_nodes = g->num_nodes;
2725   auto_sbitmap workset (num_nodes);
2726   auto_sbitmap tmp (num_nodes);
2727   sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2728   auto_sbitmap predecessors (num_nodes);
2729   auto_sbitmap successors (num_nodes);
2730 
2731   bitmap_clear (predecessors);
2732   find_predecessors (predecessors, g, nodes_ordered);
2733 
2734   bitmap_clear (successors);
2735   find_successors (successors, g, nodes_ordered);
2736 
2737   bitmap_clear (tmp);
2738   if (bitmap_and (tmp, predecessors, scc))
2739     {
2740       bitmap_copy (workset, tmp);
2741       dir = BOTTOMUP;
2742     }
2743   else if (bitmap_and (tmp, successors, scc))
2744     {
2745       bitmap_copy (workset, tmp);
2746       dir = TOPDOWN;
2747     }
2748   else
2749     {
2750       int u;
2751 
2752       bitmap_clear (workset);
2753       if ((u = find_max_asap (g, scc)) >= 0)
2754 	bitmap_set_bit (workset, u);
2755       dir = BOTTOMUP;
2756     }
2757 
2758   bitmap_clear (zero_bitmap);
2759   while (!bitmap_equal_p (workset, zero_bitmap))
2760     {
2761       int v;
2762       ddg_node_ptr v_node;
2763       sbitmap v_node_preds;
2764       sbitmap v_node_succs;
2765 
2766       if (dir == TOPDOWN)
2767 	{
2768 	  while (!bitmap_equal_p (workset, zero_bitmap))
2769 	    {
2770 	      v = find_max_hv_min_mob (g, workset);
2771 	      v_node = &g->nodes[v];
2772 	      node_order[pos++] = v;
2773 	      v_node_succs = NODE_SUCCESSORS (v_node);
2774 	      bitmap_and (tmp, v_node_succs, scc);
2775 
2776 	      /* Don't consider the already ordered successors again.  */
2777 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2778 	      bitmap_ior (workset, workset, tmp);
2779 	      bitmap_clear_bit (workset, v);
2780 	      bitmap_set_bit (nodes_ordered, v);
2781 	    }
2782 	  dir = BOTTOMUP;
2783 	  bitmap_clear (predecessors);
2784 	  find_predecessors (predecessors, g, nodes_ordered);
2785 	  bitmap_and (workset, predecessors, scc);
2786 	}
2787       else
2788 	{
2789 	  while (!bitmap_equal_p (workset, zero_bitmap))
2790 	    {
2791 	      v = find_max_dv_min_mob (g, workset);
2792 	      v_node = &g->nodes[v];
2793 	      node_order[pos++] = v;
2794 	      v_node_preds = NODE_PREDECESSORS (v_node);
2795 	      bitmap_and (tmp, v_node_preds, scc);
2796 
2797 	      /* Don't consider the already ordered predecessors again.  */
2798 	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2799 	      bitmap_ior (workset, workset, tmp);
2800 	      bitmap_clear_bit (workset, v);
2801 	      bitmap_set_bit (nodes_ordered, v);
2802 	    }
2803 	  dir = TOPDOWN;
2804 	  bitmap_clear (successors);
2805 	  find_successors (successors, g, nodes_ordered);
2806 	  bitmap_and (workset, successors, scc);
2807 	}
2808     }
2809   sbitmap_free (zero_bitmap);
2810   return pos;
2811 }
2812 
2813 
2814 /* This page contains functions for manipulating partial-schedules during
2815    modulo scheduling.  */
2816 
2817 /* Create a partial schedule and allocate a memory to hold II rows.  */
2818 
2819 static partial_schedule_ptr
2820 create_partial_schedule (int ii, ddg_ptr g, int history)
2821 {
2822   partial_schedule_ptr ps = XNEW (struct partial_schedule);
2823   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2824   ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2825   ps->reg_moves.create (0);
2826   ps->ii = ii;
2827   ps->history = history;
2828   ps->min_cycle = INT_MAX;
2829   ps->max_cycle = INT_MIN;
2830   ps->g = g;
2831 
2832   return ps;
2833 }
2834 
2835 /* Free the PS_INSNs in rows array of the given partial schedule.
2836    ??? Consider caching the PS_INSN's.  */
2837 static void
2838 free_ps_insns (partial_schedule_ptr ps)
2839 {
2840   int i;
2841 
2842   for (i = 0; i < ps->ii; i++)
2843     {
2844       while (ps->rows[i])
2845 	{
2846 	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2847 
2848 	  free (ps->rows[i]);
2849 	  ps->rows[i] = ps_insn;
2850 	}
2851       ps->rows[i] = NULL;
2852     }
2853 }
2854 
2855 /* Free all the memory allocated to the partial schedule.  */
2856 
2857 static void
2858 free_partial_schedule (partial_schedule_ptr ps)
2859 {
2860   ps_reg_move_info *move;
2861   unsigned int i;
2862 
2863   if (!ps)
2864     return;
2865 
2866   FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2867     sbitmap_free (move->uses);
2868   ps->reg_moves.release ();
2869 
2870   free_ps_insns (ps);
2871   free (ps->rows);
2872   free (ps->rows_length);
2873   free (ps);
2874 }
2875 
2876 /* Clear the rows array with its PS_INSNs, and create a new one with
2877    NEW_II rows.  */
2878 
2879 static void
2880 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2881 {
2882   if (!ps)
2883     return;
2884   free_ps_insns (ps);
2885   if (new_ii == ps->ii)
2886     return;
2887   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2888 						 * sizeof (ps_insn_ptr));
2889   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2890   ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2891   memset (ps->rows_length, 0, new_ii * sizeof (int));
2892   ps->ii = new_ii;
2893   ps->min_cycle = INT_MAX;
2894   ps->max_cycle = INT_MIN;
2895 }
2896 
2897 /* Prints the partial schedule as an ii rows array, for each rows
2898    print the ids of the insns in it.  */
2899 void
2900 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2901 {
2902   int i;
2903 
2904   for (i = 0; i < ps->ii; i++)
2905     {
2906       ps_insn_ptr ps_i = ps->rows[i];
2907 
2908       fprintf (dump, "\n[ROW %d ]: ", i);
2909       while (ps_i)
2910 	{
2911 	  rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2912 
2913 	  if (JUMP_P (insn))
2914 	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
2915 	  else
2916 	    fprintf (dump, "%d, ", INSN_UID (insn));
2917 
2918 	  ps_i = ps_i->next_in_row;
2919 	}
2920     }
2921 }
2922 
2923 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
2924 static ps_insn_ptr
2925 create_ps_insn (int id, int cycle)
2926 {
2927   ps_insn_ptr ps_i = XNEW (struct ps_insn);
2928 
2929   ps_i->id = id;
2930   ps_i->next_in_row = NULL;
2931   ps_i->prev_in_row = NULL;
2932   ps_i->cycle = cycle;
2933 
2934   return ps_i;
2935 }
2936 
2937 
2938 /* Removes the given PS_INSN from the partial schedule.  */
2939 static void
2940 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2941 {
2942   int row;
2943 
2944   gcc_assert (ps && ps_i);
2945 
2946   row = SMODULO (ps_i->cycle, ps->ii);
2947   if (! ps_i->prev_in_row)
2948     {
2949       gcc_assert (ps_i == ps->rows[row]);
2950       ps->rows[row] = ps_i->next_in_row;
2951       if (ps->rows[row])
2952 	ps->rows[row]->prev_in_row = NULL;
2953     }
2954   else
2955     {
2956       ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2957       if (ps_i->next_in_row)
2958 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2959     }
2960 
2961   ps->rows_length[row] -= 1;
2962   free (ps_i);
2963   return;
2964 }
2965 
2966 /* Unlike what literature describes for modulo scheduling (which focuses
2967    on VLIW machines) the order of the instructions inside a cycle is
2968    important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2969    where the current instruction should go relative to the already
2970    scheduled instructions in the given cycle.  Go over these
2971    instructions and find the first possible column to put it in.  */
2972 static bool
2973 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2974 		     sbitmap must_precede, sbitmap must_follow)
2975 {
2976   ps_insn_ptr next_ps_i;
2977   ps_insn_ptr first_must_follow = NULL;
2978   ps_insn_ptr last_must_precede = NULL;
2979   ps_insn_ptr last_in_row = NULL;
2980   int row;
2981 
2982   if (! ps_i)
2983     return false;
2984 
2985   row = SMODULO (ps_i->cycle, ps->ii);
2986 
2987   /* Find the first must follow and the last must precede
2988      and insert the node immediately after the must precede
2989      but make sure that it there is no must follow after it.  */
2990   for (next_ps_i = ps->rows[row];
2991        next_ps_i;
2992        next_ps_i = next_ps_i->next_in_row)
2993     {
2994       if (must_follow
2995 	  && bitmap_bit_p (must_follow, next_ps_i->id)
2996 	  && ! first_must_follow)
2997         first_must_follow = next_ps_i;
2998       if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
2999         {
3000           /* If we have already met a node that must follow, then
3001 	     there is no possible column.  */
3002   	  if (first_must_follow)
3003             return false;
3004 	  else
3005             last_must_precede = next_ps_i;
3006         }
3007       /* The closing branch must be the last in the row.  */
3008       if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3009 	return false;
3010 
3011        last_in_row = next_ps_i;
3012     }
3013 
3014   /* The closing branch is scheduled as well.  Make sure there is no
3015      dependent instruction after it as the branch should be the last
3016      instruction in the row.  */
3017   if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3018     {
3019       if (first_must_follow)
3020 	return false;
3021       if (last_in_row)
3022 	{
3023 	  /* Make the branch the last in the row.  New instructions
3024 	     will be inserted at the beginning of the row or after the
3025 	     last must_precede instruction thus the branch is guaranteed
3026 	     to remain the last instruction in the row.  */
3027 	  last_in_row->next_in_row = ps_i;
3028 	  ps_i->prev_in_row = last_in_row;
3029 	  ps_i->next_in_row = NULL;
3030 	}
3031       else
3032 	ps->rows[row] = ps_i;
3033       return true;
3034     }
3035 
3036   /* Now insert the node after INSERT_AFTER_PSI.  */
3037 
3038   if (! last_must_precede)
3039     {
3040       ps_i->next_in_row = ps->rows[row];
3041       ps_i->prev_in_row = NULL;
3042       if (ps_i->next_in_row)
3043     	ps_i->next_in_row->prev_in_row = ps_i;
3044       ps->rows[row] = ps_i;
3045     }
3046   else
3047     {
3048       ps_i->next_in_row = last_must_precede->next_in_row;
3049       last_must_precede->next_in_row = ps_i;
3050       ps_i->prev_in_row = last_must_precede;
3051       if (ps_i->next_in_row)
3052         ps_i->next_in_row->prev_in_row = ps_i;
3053     }
3054 
3055   return true;
3056 }
3057 
3058 /* Advances the PS_INSN one column in its current row; returns false
3059    in failure and true in success.  Bit N is set in MUST_FOLLOW if
3060    the node with cuid N must be come after the node pointed to by
3061    PS_I when scheduled in the same cycle.  */
3062 static int
3063 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3064 			sbitmap must_follow)
3065 {
3066   ps_insn_ptr prev, next;
3067   int row;
3068 
3069   if (!ps || !ps_i)
3070     return false;
3071 
3072   row = SMODULO (ps_i->cycle, ps->ii);
3073 
3074   if (! ps_i->next_in_row)
3075     return false;
3076 
3077   /* Check if next_in_row is dependent on ps_i, both having same sched
3078      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3079   if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3080     return false;
3081 
3082   /* Advance PS_I over its next_in_row in the doubly linked list.  */
3083   prev = ps_i->prev_in_row;
3084   next = ps_i->next_in_row;
3085 
3086   if (ps_i == ps->rows[row])
3087     ps->rows[row] = next;
3088 
3089   ps_i->next_in_row = next->next_in_row;
3090 
3091   if (next->next_in_row)
3092     next->next_in_row->prev_in_row = ps_i;
3093 
3094   next->next_in_row = ps_i;
3095   ps_i->prev_in_row = next;
3096 
3097   next->prev_in_row = prev;
3098   if (prev)
3099     prev->next_in_row = next;
3100 
3101   return true;
3102 }
3103 
3104 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3105    Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3106    set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3107    before/after (respectively) the node pointed to by PS_I when scheduled
3108    in the same cycle.  */
3109 static ps_insn_ptr
3110 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3111 		sbitmap must_precede, sbitmap must_follow)
3112 {
3113   ps_insn_ptr ps_i;
3114   int row = SMODULO (cycle, ps->ii);
3115 
3116   if (ps->rows_length[row] >= issue_rate)
3117     return NULL;
3118 
3119   ps_i = create_ps_insn (id, cycle);
3120 
3121   /* Finds and inserts PS_I according to MUST_FOLLOW and
3122      MUST_PRECEDE.  */
3123   if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3124     {
3125       free (ps_i);
3126       return NULL;
3127     }
3128 
3129   ps->rows_length[row] += 1;
3130   return ps_i;
3131 }
3132 
3133 /* Advance time one cycle.  Assumes DFA is being used.  */
3134 static void
3135 advance_one_cycle (void)
3136 {
3137   if (targetm.sched.dfa_pre_cycle_insn)
3138     state_transition (curr_state,
3139 		      targetm.sched.dfa_pre_cycle_insn ());
3140 
3141   state_transition (curr_state, NULL);
3142 
3143   if (targetm.sched.dfa_post_cycle_insn)
3144     state_transition (curr_state,
3145 		      targetm.sched.dfa_post_cycle_insn ());
3146 }
3147 
3148 
3149 
3150 /* Checks if PS has resource conflicts according to DFA, starting from
3151    FROM cycle to TO cycle; returns true if there are conflicts and false
3152    if there are no conflicts.  Assumes DFA is being used.  */
3153 static int
3154 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3155 {
3156   int cycle;
3157 
3158   state_reset (curr_state);
3159 
3160   for (cycle = from; cycle <= to; cycle++)
3161     {
3162       ps_insn_ptr crr_insn;
3163       /* Holds the remaining issue slots in the current row.  */
3164       int can_issue_more = issue_rate;
3165 
3166       /* Walk through the DFA for the current row.  */
3167       for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3168 	   crr_insn;
3169 	   crr_insn = crr_insn->next_in_row)
3170 	{
3171 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3172 
3173 	  if (!NONDEBUG_INSN_P (insn))
3174 	    continue;
3175 
3176 	  /* Check if there is room for the current insn.  */
3177 	  if (!can_issue_more || state_dead_lock_p (curr_state))
3178 	    return true;
3179 
3180 	  /* Update the DFA state and return with failure if the DFA found
3181 	     resource conflicts.  */
3182 	  if (state_transition (curr_state, insn) >= 0)
3183 	    return true;
3184 
3185 	  if (targetm.sched.variable_issue)
3186 	    can_issue_more =
3187 	      targetm.sched.variable_issue (sched_dump, sched_verbose,
3188 					    insn, can_issue_more);
3189 	  /* A naked CLOBBER or USE generates no instruction, so don't
3190 	     let them consume issue slots.  */
3191 	  else if (GET_CODE (PATTERN (insn)) != USE
3192 		   && GET_CODE (PATTERN (insn)) != CLOBBER)
3193 	    can_issue_more--;
3194 	}
3195 
3196       /* Advance the DFA to the next cycle.  */
3197       advance_one_cycle ();
3198     }
3199   return false;
3200 }
3201 
3202 /* Checks if the given node causes resource conflicts when added to PS at
3203    cycle C.  If not the node is added to PS and returned; otherwise zero
3204    is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3205    cuid N must be come before/after (respectively) the node pointed to by
3206    PS_I when scheduled in the same cycle.  */
3207 ps_insn_ptr
3208 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3209    			     int c, sbitmap must_precede,
3210 			     sbitmap must_follow)
3211 {
3212   int i, first, amount, has_conflicts = 0;
3213   ps_insn_ptr ps_i;
3214 
3215   /* First add the node to the PS, if this succeeds check for
3216      conflicts, trying different issue slots in the same row.  */
3217   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3218     return NULL; /* Failed to insert the node at the given cycle.  */
3219 
3220   while (1)
3221     {
3222       has_conflicts = ps_has_conflicts (ps, c, c);
3223       if (ps->history > 0 && !has_conflicts)
3224 	{
3225 	  /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
3226 	     but not more than ii intervals.  */
3227 	  first = c - ps->history;
3228 	  amount = 2 * ps->history + 1;
3229 	  if (amount > ps->ii)
3230 	    amount = ps->ii;
3231 	  for (i = first; i < first + amount; i++)
3232 	    {
3233 	      has_conflicts = ps_has_conflicts (ps,
3234 						i - ps->history,
3235 						i + ps->history);
3236 	      if (has_conflicts)
3237 		break;
3238 	    }
3239 	}
3240       if (!has_conflicts)
3241 	break;
3242       /* Try different issue slots to find one that the given node can be
3243 	 scheduled in without conflicts.  */
3244       if (! ps_insn_advance_column (ps, ps_i, must_follow))
3245 	break;
3246     }
3247 
3248   if (has_conflicts)
3249     {
3250       remove_node_from_ps (ps, ps_i);
3251       return NULL;
3252     }
3253 
3254   ps->min_cycle = MIN (ps->min_cycle, c);
3255   ps->max_cycle = MAX (ps->max_cycle, c);
3256   return ps_i;
3257 }
3258 
3259 /* Calculate the stage count of the partial schedule PS.  The calculation
3260    takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3261 int
3262 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3263 {
3264   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3265   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3266   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3267 
3268   /* The calculation of stage count is done adding the number of stages
3269      before cycle zero and after cycle zero.  */
3270   stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3271 
3272   return stage_count;
3273 }
3274 
3275 /* Rotate the rows of PS such that insns scheduled at time
3276    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3277 void
3278 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3279 {
3280   int i, row, backward_rotates;
3281   int last_row = ps->ii - 1;
3282 
3283   if (start_cycle == 0)
3284     return;
3285 
3286   backward_rotates = SMODULO (start_cycle, ps->ii);
3287 
3288   /* Revisit later and optimize this into a single loop.  */
3289   for (i = 0; i < backward_rotates; i++)
3290     {
3291       ps_insn_ptr first_row = ps->rows[0];
3292       int first_row_length = ps->rows_length[0];
3293 
3294       for (row = 0; row < last_row; row++)
3295 	{
3296 	  ps->rows[row] = ps->rows[row + 1];
3297 	  ps->rows_length[row] = ps->rows_length[row + 1];
3298 	}
3299 
3300       ps->rows[last_row] = first_row;
3301       ps->rows_length[last_row] = first_row_length;
3302     }
3303 
3304   ps->max_cycle -= start_cycle;
3305   ps->min_cycle -= start_cycle;
3306 }
3307 
3308 #endif /* INSN_SCHEDULING */
3309 
3310 /* Run instruction scheduler.  */
3311 /* Perform SMS module scheduling.  */
3312 
3313 namespace {
3314 
3315 const pass_data pass_data_sms =
3316 {
3317   RTL_PASS, /* type */
3318   "sms", /* name */
3319   OPTGROUP_NONE, /* optinfo_flags */
3320   TV_SMS, /* tv_id */
3321   0, /* properties_required */
3322   0, /* properties_provided */
3323   0, /* properties_destroyed */
3324   0, /* todo_flags_start */
3325   TODO_df_finish, /* todo_flags_finish */
3326 };
3327 
3328 class pass_sms : public rtl_opt_pass
3329 {
3330 public:
3331   pass_sms (gcc::context *ctxt)
3332     : rtl_opt_pass (pass_data_sms, ctxt)
3333   {}
3334 
3335   /* opt_pass methods: */
3336   virtual bool gate (function *)
3337 {
3338   return (optimize > 0 && flag_modulo_sched);
3339 }
3340 
3341   virtual unsigned int execute (function *);
3342 
3343 }; // class pass_sms
3344 
3345 unsigned int
3346 pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3347 {
3348 #ifdef INSN_SCHEDULING
3349   basic_block bb;
3350 
3351   /* Collect loop information to be used in SMS.  */
3352   cfg_layout_initialize (0);
3353   sms_schedule ();
3354 
3355   /* Update the life information, because we add pseudos.  */
3356   max_regno = max_reg_num ();
3357 
3358   /* Finalize layout changes.  */
3359   FOR_EACH_BB_FN (bb, fun)
3360     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3361       bb->aux = bb->next_bb;
3362   free_dominance_info (CDI_DOMINATORS);
3363   cfg_layout_finalize ();
3364 #endif /* INSN_SCHEDULING */
3365   return 0;
3366 }
3367 
3368 } // anon namespace
3369 
3370 rtl_opt_pass *
3371 make_pass_sms (gcc::context *ctxt)
3372 {
3373   return new pass_sms (ctxt);
3374 }
3375