1*e4b17023SJohn Marino /* Swing Modulo Scheduling implementation.
2*e4b17023SJohn Marino Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino This file is part of GCC.
7*e4b17023SJohn Marino
8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
11*e4b17023SJohn Marino version.
12*e4b17023SJohn Marino
13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16*e4b17023SJohn Marino for more details.
17*e4b17023SJohn Marino
18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
21*e4b17023SJohn Marino
22*e4b17023SJohn Marino
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "coretypes.h"
26*e4b17023SJohn Marino #include "tm.h"
27*e4b17023SJohn Marino #include "diagnostic-core.h"
28*e4b17023SJohn Marino #include "rtl.h"
29*e4b17023SJohn Marino #include "tm_p.h"
30*e4b17023SJohn Marino #include "hard-reg-set.h"
31*e4b17023SJohn Marino #include "regs.h"
32*e4b17023SJohn Marino #include "function.h"
33*e4b17023SJohn Marino #include "flags.h"
34*e4b17023SJohn Marino #include "insn-config.h"
35*e4b17023SJohn Marino #include "insn-attr.h"
36*e4b17023SJohn Marino #include "except.h"
37*e4b17023SJohn Marino #include "recog.h"
38*e4b17023SJohn Marino #include "sched-int.h"
39*e4b17023SJohn Marino #include "target.h"
40*e4b17023SJohn Marino #include "cfglayout.h"
41*e4b17023SJohn Marino #include "cfgloop.h"
42*e4b17023SJohn Marino #include "cfghooks.h"
43*e4b17023SJohn Marino #include "expr.h"
44*e4b17023SJohn Marino #include "params.h"
45*e4b17023SJohn Marino #include "gcov-io.h"
46*e4b17023SJohn Marino #include "ddg.h"
47*e4b17023SJohn Marino #include "timevar.h"
48*e4b17023SJohn Marino #include "tree-pass.h"
49*e4b17023SJohn Marino #include "dbgcnt.h"
50*e4b17023SJohn Marino #include "df.h"
51*e4b17023SJohn Marino
52*e4b17023SJohn Marino #ifdef INSN_SCHEDULING
53*e4b17023SJohn Marino
54*e4b17023SJohn Marino /* This file contains the implementation of the Swing Modulo Scheduler,
55*e4b17023SJohn Marino described in the following references:
56*e4b17023SJohn Marino [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57*e4b17023SJohn Marino Lifetime--sensitive modulo scheduling in a production environment.
58*e4b17023SJohn Marino IEEE Trans. on Comps., 50(3), March 2001
59*e4b17023SJohn Marino [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60*e4b17023SJohn Marino Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61*e4b17023SJohn Marino PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62*e4b17023SJohn Marino
63*e4b17023SJohn Marino The basic structure is:
64*e4b17023SJohn Marino 1. Build a data-dependence graph (DDG) for each loop.
65*e4b17023SJohn Marino 2. Use the DDG to order the insns of a loop (not in topological order
66*e4b17023SJohn Marino necessarily, but rather) trying to place each insn after all its
67*e4b17023SJohn Marino predecessors _or_ after all its successors.
68*e4b17023SJohn Marino 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69*e4b17023SJohn Marino 4. Use the ordering to perform list-scheduling of the loop:
70*e4b17023SJohn Marino 1. Set II = MII. We will try to schedule the loop within II cycles.
71*e4b17023SJohn Marino 2. Try to schedule the insns one by one according to the ordering.
72*e4b17023SJohn Marino For each insn compute an interval of cycles by considering already-
73*e4b17023SJohn Marino scheduled preds and succs (and associated latencies); try to place
74*e4b17023SJohn Marino the insn in the cycles of this window checking for potential
75*e4b17023SJohn Marino resource conflicts (using the DFA interface).
76*e4b17023SJohn Marino Note: this is different from the cycle-scheduling of schedule_insns;
77*e4b17023SJohn Marino here the insns are not scheduled monotonically top-down (nor bottom-
78*e4b17023SJohn Marino up).
79*e4b17023SJohn Marino 3. If failed in scheduling all insns - bump II++ and try again, unless
80*e4b17023SJohn Marino II reaches an upper bound MaxII, in which case report failure.
81*e4b17023SJohn Marino 5. If we succeeded in scheduling the loop within II cycles, we now
82*e4b17023SJohn Marino generate prolog and epilog, decrease the counter of the loop, and
83*e4b17023SJohn Marino perform modulo variable expansion for live ranges that span more than
84*e4b17023SJohn Marino II cycles (i.e. use register copies to prevent a def from overwriting
85*e4b17023SJohn Marino itself before reaching the use).
86*e4b17023SJohn Marino
87*e4b17023SJohn Marino SMS works with countable loops (1) whose control part can be easily
88*e4b17023SJohn Marino decoupled from the rest of the loop and (2) whose loop count can
89*e4b17023SJohn Marino be easily adjusted. This is because we peel a constant number of
90*e4b17023SJohn Marino iterations into a prologue and epilogue for which we want to avoid
91*e4b17023SJohn Marino emitting the control part, and a kernel which is to iterate that
92*e4b17023SJohn Marino constant number of iterations less than the original loop. So the
93*e4b17023SJohn Marino control part should be a set of insns clearly identified and having
94*e4b17023SJohn Marino its own iv, not otherwise used in the loop (at-least for now), which
95*e4b17023SJohn Marino initializes a register before the loop to the number of iterations.
96*e4b17023SJohn Marino Currently SMS relies on the do-loop pattern to recognize such loops,
97*e4b17023SJohn Marino where (1) the control part comprises of all insns defining and/or
98*e4b17023SJohn Marino using a certain 'count' register and (2) the loop count can be
99*e4b17023SJohn Marino adjusted by modifying this register prior to the loop.
100*e4b17023SJohn Marino TODO: Rely on cfgloop analysis instead. */
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino /* This page defines partial-schedule structures and functions for
103*e4b17023SJohn Marino modulo scheduling. */
104*e4b17023SJohn Marino
105*e4b17023SJohn Marino typedef struct partial_schedule *partial_schedule_ptr;
106*e4b17023SJohn Marino typedef struct ps_insn *ps_insn_ptr;
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109*e4b17023SJohn Marino #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110*e4b17023SJohn Marino
111*e4b17023SJohn Marino /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112*e4b17023SJohn Marino #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113*e4b17023SJohn Marino
114*e4b17023SJohn Marino /* Perform signed modulo, always returning a non-negative value. */
115*e4b17023SJohn Marino #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116*e4b17023SJohn Marino
117*e4b17023SJohn Marino /* The number of different iterations the nodes in ps span, assuming
118*e4b17023SJohn Marino the stage boundaries are placed efficiently. */
119*e4b17023SJohn Marino #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
120*e4b17023SJohn Marino + 1 + ii - 1) / ii)
121*e4b17023SJohn Marino /* The stage count of ps. */
122*e4b17023SJohn Marino #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123*e4b17023SJohn Marino
124*e4b17023SJohn Marino /* A single instruction in the partial schedule. */
125*e4b17023SJohn Marino struct ps_insn
126*e4b17023SJohn Marino {
127*e4b17023SJohn Marino /* Identifies the instruction to be scheduled. Values smaller than
128*e4b17023SJohn Marino the ddg's num_nodes refer directly to ddg nodes. A value of
129*e4b17023SJohn Marino X - num_nodes refers to register move X. */
130*e4b17023SJohn Marino int id;
131*e4b17023SJohn Marino
132*e4b17023SJohn Marino /* The (absolute) cycle in which the PS instruction is scheduled.
133*e4b17023SJohn Marino Same as SCHED_TIME (node). */
134*e4b17023SJohn Marino int cycle;
135*e4b17023SJohn Marino
136*e4b17023SJohn Marino /* The next/prev PS_INSN in the same row. */
137*e4b17023SJohn Marino ps_insn_ptr next_in_row,
138*e4b17023SJohn Marino prev_in_row;
139*e4b17023SJohn Marino
140*e4b17023SJohn Marino };
141*e4b17023SJohn Marino
142*e4b17023SJohn Marino /* Information about a register move that has been added to a partial
143*e4b17023SJohn Marino schedule. */
144*e4b17023SJohn Marino struct ps_reg_move_info
145*e4b17023SJohn Marino {
146*e4b17023SJohn Marino /* The source of the move is defined by the ps_insn with id DEF.
147*e4b17023SJohn Marino The destination is used by the ps_insns with the ids in USES. */
148*e4b17023SJohn Marino int def;
149*e4b17023SJohn Marino sbitmap uses;
150*e4b17023SJohn Marino
151*e4b17023SJohn Marino /* The original form of USES' instructions used OLD_REG, but they
152*e4b17023SJohn Marino should now use NEW_REG. */
153*e4b17023SJohn Marino rtx old_reg;
154*e4b17023SJohn Marino rtx new_reg;
155*e4b17023SJohn Marino
156*e4b17023SJohn Marino /* The number of consecutive stages that the move occupies. */
157*e4b17023SJohn Marino int num_consecutive_stages;
158*e4b17023SJohn Marino
159*e4b17023SJohn Marino /* An instruction that sets NEW_REG to the correct value. The first
160*e4b17023SJohn Marino move associated with DEF will have an rhs of OLD_REG; later moves
161*e4b17023SJohn Marino use the result of the previous move. */
162*e4b17023SJohn Marino rtx insn;
163*e4b17023SJohn Marino };
164*e4b17023SJohn Marino
165*e4b17023SJohn Marino typedef struct ps_reg_move_info ps_reg_move_info;
166*e4b17023SJohn Marino DEF_VEC_O (ps_reg_move_info);
167*e4b17023SJohn Marino DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
168*e4b17023SJohn Marino
169*e4b17023SJohn Marino /* Holds the partial schedule as an array of II rows. Each entry of the
170*e4b17023SJohn Marino array points to a linked list of PS_INSNs, which represents the
171*e4b17023SJohn Marino instructions that are scheduled for that row. */
172*e4b17023SJohn Marino struct partial_schedule
173*e4b17023SJohn Marino {
174*e4b17023SJohn Marino int ii; /* Number of rows in the partial schedule. */
175*e4b17023SJohn Marino int history; /* Threshold for conflict checking using DFA. */
176*e4b17023SJohn Marino
177*e4b17023SJohn Marino /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
178*e4b17023SJohn Marino ps_insn_ptr *rows;
179*e4b17023SJohn Marino
180*e4b17023SJohn Marino /* All the moves added for this partial schedule. Index X has
181*e4b17023SJohn Marino a ps_insn id of X + g->num_nodes. */
182*e4b17023SJohn Marino VEC (ps_reg_move_info, heap) *reg_moves;
183*e4b17023SJohn Marino
184*e4b17023SJohn Marino /* rows_length[i] holds the number of instructions in the row.
185*e4b17023SJohn Marino It is used only (as an optimization) to back off quickly from
186*e4b17023SJohn Marino trying to schedule a node in a full row; that is, to avoid running
187*e4b17023SJohn Marino through futile DFA state transitions. */
188*e4b17023SJohn Marino int *rows_length;
189*e4b17023SJohn Marino
190*e4b17023SJohn Marino /* The earliest absolute cycle of an insn in the partial schedule. */
191*e4b17023SJohn Marino int min_cycle;
192*e4b17023SJohn Marino
193*e4b17023SJohn Marino /* The latest absolute cycle of an insn in the partial schedule. */
194*e4b17023SJohn Marino int max_cycle;
195*e4b17023SJohn Marino
196*e4b17023SJohn Marino ddg_ptr g; /* The DDG of the insns in the partial schedule. */
197*e4b17023SJohn Marino
198*e4b17023SJohn Marino int stage_count; /* The stage count of the partial schedule. */
199*e4b17023SJohn Marino };
200*e4b17023SJohn Marino
201*e4b17023SJohn Marino
202*e4b17023SJohn Marino static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
203*e4b17023SJohn Marino static void free_partial_schedule (partial_schedule_ptr);
204*e4b17023SJohn Marino static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
205*e4b17023SJohn Marino void print_partial_schedule (partial_schedule_ptr, FILE *);
206*e4b17023SJohn Marino static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
207*e4b17023SJohn Marino static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
208*e4b17023SJohn Marino int, int, sbitmap, sbitmap);
209*e4b17023SJohn Marino static void rotate_partial_schedule (partial_schedule_ptr, int);
210*e4b17023SJohn Marino void set_row_column_for_ps (partial_schedule_ptr);
211*e4b17023SJohn Marino static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
212*e4b17023SJohn Marino static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
213*e4b17023SJohn Marino
214*e4b17023SJohn Marino
215*e4b17023SJohn Marino /* This page defines constants and structures for the modulo scheduling
216*e4b17023SJohn Marino driver. */
217*e4b17023SJohn Marino
218*e4b17023SJohn Marino static int sms_order_nodes (ddg_ptr, int, int *, int *);
219*e4b17023SJohn Marino static void set_node_sched_params (ddg_ptr);
220*e4b17023SJohn Marino static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
221*e4b17023SJohn Marino static void permute_partial_schedule (partial_schedule_ptr, rtx);
222*e4b17023SJohn Marino static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
223*e4b17023SJohn Marino rtx, rtx);
224*e4b17023SJohn Marino static int calculate_stage_count (partial_schedule_ptr, int);
225*e4b17023SJohn Marino static void calculate_must_precede_follow (ddg_node_ptr, int, int,
226*e4b17023SJohn Marino int, int, sbitmap, sbitmap, sbitmap);
227*e4b17023SJohn Marino static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
228*e4b17023SJohn Marino sbitmap, int, int *, int *, int *);
229*e4b17023SJohn Marino static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
230*e4b17023SJohn Marino sbitmap, int *, sbitmap, sbitmap);
231*e4b17023SJohn Marino static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
232*e4b17023SJohn Marino
233*e4b17023SJohn Marino #define NODE_ASAP(node) ((node)->aux.count)
234*e4b17023SJohn Marino
235*e4b17023SJohn Marino #define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
236*e4b17023SJohn Marino #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
237*e4b17023SJohn Marino #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
238*e4b17023SJohn Marino #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
239*e4b17023SJohn Marino #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
240*e4b17023SJohn Marino
241*e4b17023SJohn Marino /* The scheduling parameters held for each node. */
242*e4b17023SJohn Marino typedef struct node_sched_params
243*e4b17023SJohn Marino {
244*e4b17023SJohn Marino int time; /* The absolute scheduling cycle. */
245*e4b17023SJohn Marino
246*e4b17023SJohn Marino int row; /* Holds time % ii. */
247*e4b17023SJohn Marino int stage; /* Holds time / ii. */
248*e4b17023SJohn Marino
249*e4b17023SJohn Marino /* The column of a node inside the ps. If nodes u, v are on the same row,
250*e4b17023SJohn Marino u will precede v if column (u) < column (v). */
251*e4b17023SJohn Marino int column;
252*e4b17023SJohn Marino } *node_sched_params_ptr;
253*e4b17023SJohn Marino
254*e4b17023SJohn Marino typedef struct node_sched_params node_sched_params;
255*e4b17023SJohn Marino DEF_VEC_O (node_sched_params);
256*e4b17023SJohn Marino DEF_VEC_ALLOC_O (node_sched_params, heap);
257*e4b17023SJohn Marino
258*e4b17023SJohn Marino /* The following three functions are copied from the current scheduler
259*e4b17023SJohn Marino code in order to use sched_analyze() for computing the dependencies.
260*e4b17023SJohn Marino They are used when initializing the sched_info structure. */
261*e4b17023SJohn Marino static const char *
sms_print_insn(const_rtx insn,int aligned ATTRIBUTE_UNUSED)262*e4b17023SJohn Marino sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino static char tmp[80];
265*e4b17023SJohn Marino
266*e4b17023SJohn Marino sprintf (tmp, "i%4d", INSN_UID (insn));
267*e4b17023SJohn Marino return tmp;
268*e4b17023SJohn Marino }
269*e4b17023SJohn Marino
270*e4b17023SJohn Marino static void
compute_jump_reg_dependencies(rtx insn ATTRIBUTE_UNUSED,regset used ATTRIBUTE_UNUSED)271*e4b17023SJohn Marino compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
272*e4b17023SJohn Marino regset used ATTRIBUTE_UNUSED)
273*e4b17023SJohn Marino {
274*e4b17023SJohn Marino }
275*e4b17023SJohn Marino
276*e4b17023SJohn Marino static struct common_sched_info_def sms_common_sched_info;
277*e4b17023SJohn Marino
278*e4b17023SJohn Marino static struct sched_deps_info_def sms_sched_deps_info =
279*e4b17023SJohn Marino {
280*e4b17023SJohn Marino compute_jump_reg_dependencies,
281*e4b17023SJohn Marino NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
282*e4b17023SJohn Marino NULL,
283*e4b17023SJohn Marino 0, 0, 0
284*e4b17023SJohn Marino };
285*e4b17023SJohn Marino
286*e4b17023SJohn Marino static struct haifa_sched_info sms_sched_info =
287*e4b17023SJohn Marino {
288*e4b17023SJohn Marino NULL,
289*e4b17023SJohn Marino NULL,
290*e4b17023SJohn Marino NULL,
291*e4b17023SJohn Marino NULL,
292*e4b17023SJohn Marino NULL,
293*e4b17023SJohn Marino sms_print_insn,
294*e4b17023SJohn Marino NULL,
295*e4b17023SJohn Marino NULL, /* insn_finishes_block_p */
296*e4b17023SJohn Marino NULL, NULL,
297*e4b17023SJohn Marino NULL, NULL,
298*e4b17023SJohn Marino 0, 0,
299*e4b17023SJohn Marino
300*e4b17023SJohn Marino NULL, NULL, NULL, NULL,
301*e4b17023SJohn Marino NULL, NULL,
302*e4b17023SJohn Marino 0
303*e4b17023SJohn Marino };
304*e4b17023SJohn Marino
305*e4b17023SJohn Marino /* Partial schedule instruction ID in PS is a register move. Return
306*e4b17023SJohn Marino information about it. */
307*e4b17023SJohn Marino static struct ps_reg_move_info *
ps_reg_move(partial_schedule_ptr ps,int id)308*e4b17023SJohn Marino ps_reg_move (partial_schedule_ptr ps, int id)
309*e4b17023SJohn Marino {
310*e4b17023SJohn Marino gcc_checking_assert (id >= ps->g->num_nodes);
311*e4b17023SJohn Marino return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
312*e4b17023SJohn Marino }
313*e4b17023SJohn Marino
314*e4b17023SJohn Marino /* Return the rtl instruction that is being scheduled by partial schedule
315*e4b17023SJohn Marino instruction ID, which belongs to schedule PS. */
316*e4b17023SJohn Marino static rtx
ps_rtl_insn(partial_schedule_ptr ps,int id)317*e4b17023SJohn Marino ps_rtl_insn (partial_schedule_ptr ps, int id)
318*e4b17023SJohn Marino {
319*e4b17023SJohn Marino if (id < ps->g->num_nodes)
320*e4b17023SJohn Marino return ps->g->nodes[id].insn;
321*e4b17023SJohn Marino else
322*e4b17023SJohn Marino return ps_reg_move (ps, id)->insn;
323*e4b17023SJohn Marino }
324*e4b17023SJohn Marino
325*e4b17023SJohn Marino /* Partial schedule instruction ID, which belongs to PS, occured in
326*e4b17023SJohn Marino the original (unscheduled) loop. Return the first instruction
327*e4b17023SJohn Marino in the loop that was associated with ps_rtl_insn (PS, ID).
328*e4b17023SJohn Marino If the instruction had some notes before it, this is the first
329*e4b17023SJohn Marino of those notes. */
330*e4b17023SJohn Marino static rtx
ps_first_note(partial_schedule_ptr ps,int id)331*e4b17023SJohn Marino ps_first_note (partial_schedule_ptr ps, int id)
332*e4b17023SJohn Marino {
333*e4b17023SJohn Marino gcc_assert (id < ps->g->num_nodes);
334*e4b17023SJohn Marino return ps->g->nodes[id].first_note;
335*e4b17023SJohn Marino }
336*e4b17023SJohn Marino
337*e4b17023SJohn Marino /* Return the number of consecutive stages that are occupied by
338*e4b17023SJohn Marino partial schedule instruction ID in PS. */
339*e4b17023SJohn Marino static int
ps_num_consecutive_stages(partial_schedule_ptr ps,int id)340*e4b17023SJohn Marino ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
341*e4b17023SJohn Marino {
342*e4b17023SJohn Marino if (id < ps->g->num_nodes)
343*e4b17023SJohn Marino return 1;
344*e4b17023SJohn Marino else
345*e4b17023SJohn Marino return ps_reg_move (ps, id)->num_consecutive_stages;
346*e4b17023SJohn Marino }
347*e4b17023SJohn Marino
348*e4b17023SJohn Marino /* Given HEAD and TAIL which are the first and last insns in a loop;
349*e4b17023SJohn Marino return the register which controls the loop. Return zero if it has
350*e4b17023SJohn Marino more than one occurrence in the loop besides the control part or the
351*e4b17023SJohn Marino do-loop pattern is not of the form we expect. */
352*e4b17023SJohn Marino static rtx
doloop_register_get(rtx head ATTRIBUTE_UNUSED,rtx tail ATTRIBUTE_UNUSED)353*e4b17023SJohn Marino doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
354*e4b17023SJohn Marino {
355*e4b17023SJohn Marino #ifdef HAVE_doloop_end
356*e4b17023SJohn Marino rtx reg, condition, insn, first_insn_not_to_check;
357*e4b17023SJohn Marino
358*e4b17023SJohn Marino if (!JUMP_P (tail))
359*e4b17023SJohn Marino return NULL_RTX;
360*e4b17023SJohn Marino
361*e4b17023SJohn Marino /* TODO: Free SMS's dependence on doloop_condition_get. */
362*e4b17023SJohn Marino condition = doloop_condition_get (tail);
363*e4b17023SJohn Marino if (! condition)
364*e4b17023SJohn Marino return NULL_RTX;
365*e4b17023SJohn Marino
366*e4b17023SJohn Marino if (REG_P (XEXP (condition, 0)))
367*e4b17023SJohn Marino reg = XEXP (condition, 0);
368*e4b17023SJohn Marino else if (GET_CODE (XEXP (condition, 0)) == PLUS
369*e4b17023SJohn Marino && REG_P (XEXP (XEXP (condition, 0), 0)))
370*e4b17023SJohn Marino reg = XEXP (XEXP (condition, 0), 0);
371*e4b17023SJohn Marino else
372*e4b17023SJohn Marino gcc_unreachable ();
373*e4b17023SJohn Marino
374*e4b17023SJohn Marino /* Check that the COUNT_REG has no other occurrences in the loop
375*e4b17023SJohn Marino until the decrement. We assume the control part consists of
376*e4b17023SJohn Marino either a single (parallel) branch-on-count or a (non-parallel)
377*e4b17023SJohn Marino branch immediately preceded by a single (decrement) insn. */
378*e4b17023SJohn Marino first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
379*e4b17023SJohn Marino : prev_nondebug_insn (tail));
380*e4b17023SJohn Marino
381*e4b17023SJohn Marino for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
382*e4b17023SJohn Marino if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
383*e4b17023SJohn Marino {
384*e4b17023SJohn Marino if (dump_file)
385*e4b17023SJohn Marino {
386*e4b17023SJohn Marino fprintf (dump_file, "SMS count_reg found ");
387*e4b17023SJohn Marino print_rtl_single (dump_file, reg);
388*e4b17023SJohn Marino fprintf (dump_file, " outside control in insn:\n");
389*e4b17023SJohn Marino print_rtl_single (dump_file, insn);
390*e4b17023SJohn Marino }
391*e4b17023SJohn Marino
392*e4b17023SJohn Marino return NULL_RTX;
393*e4b17023SJohn Marino }
394*e4b17023SJohn Marino
395*e4b17023SJohn Marino return reg;
396*e4b17023SJohn Marino #else
397*e4b17023SJohn Marino return NULL_RTX;
398*e4b17023SJohn Marino #endif
399*e4b17023SJohn Marino }
400*e4b17023SJohn Marino
401*e4b17023SJohn Marino /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
402*e4b17023SJohn Marino that the number of iterations is a compile-time constant. If so,
403*e4b17023SJohn Marino return the rtx that sets COUNT_REG to a constant, and set COUNT to
404*e4b17023SJohn Marino this constant. Otherwise return 0. */
405*e4b17023SJohn Marino static rtx
const_iteration_count(rtx count_reg,basic_block pre_header,HOST_WIDEST_INT * count)406*e4b17023SJohn Marino const_iteration_count (rtx count_reg, basic_block pre_header,
407*e4b17023SJohn Marino HOST_WIDEST_INT * count)
408*e4b17023SJohn Marino {
409*e4b17023SJohn Marino rtx insn;
410*e4b17023SJohn Marino rtx head, tail;
411*e4b17023SJohn Marino
412*e4b17023SJohn Marino if (! pre_header)
413*e4b17023SJohn Marino return NULL_RTX;
414*e4b17023SJohn Marino
415*e4b17023SJohn Marino get_ebb_head_tail (pre_header, pre_header, &head, &tail);
416*e4b17023SJohn Marino
417*e4b17023SJohn Marino for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
418*e4b17023SJohn Marino if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
419*e4b17023SJohn Marino rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
420*e4b17023SJohn Marino {
421*e4b17023SJohn Marino rtx pat = single_set (insn);
422*e4b17023SJohn Marino
423*e4b17023SJohn Marino if (CONST_INT_P (SET_SRC (pat)))
424*e4b17023SJohn Marino {
425*e4b17023SJohn Marino *count = INTVAL (SET_SRC (pat));
426*e4b17023SJohn Marino return insn;
427*e4b17023SJohn Marino }
428*e4b17023SJohn Marino
429*e4b17023SJohn Marino return NULL_RTX;
430*e4b17023SJohn Marino }
431*e4b17023SJohn Marino
432*e4b17023SJohn Marino return NULL_RTX;
433*e4b17023SJohn Marino }
434*e4b17023SJohn Marino
435*e4b17023SJohn Marino /* A very simple resource-based lower bound on the initiation interval.
436*e4b17023SJohn Marino ??? Improve the accuracy of this bound by considering the
437*e4b17023SJohn Marino utilization of various units. */
438*e4b17023SJohn Marino static int
res_MII(ddg_ptr g)439*e4b17023SJohn Marino res_MII (ddg_ptr g)
440*e4b17023SJohn Marino {
441*e4b17023SJohn Marino if (targetm.sched.sms_res_mii)
442*e4b17023SJohn Marino return targetm.sched.sms_res_mii (g);
443*e4b17023SJohn Marino
444*e4b17023SJohn Marino return ((g->num_nodes - g->num_debug) / issue_rate);
445*e4b17023SJohn Marino }
446*e4b17023SJohn Marino
447*e4b17023SJohn Marino
448*e4b17023SJohn Marino /* A vector that contains the sched data for each ps_insn. */
VEC(node_sched_params,heap)449*e4b17023SJohn Marino static VEC (node_sched_params, heap) *node_sched_param_vec;
450*e4b17023SJohn Marino
451*e4b17023SJohn Marino /* Allocate sched_params for each node and initialize it. */
452*e4b17023SJohn Marino static void
453*e4b17023SJohn Marino set_node_sched_params (ddg_ptr g)
454*e4b17023SJohn Marino {
455*e4b17023SJohn Marino VEC_truncate (node_sched_params, node_sched_param_vec, 0);
456*e4b17023SJohn Marino VEC_safe_grow_cleared (node_sched_params, heap,
457*e4b17023SJohn Marino node_sched_param_vec, g->num_nodes);
458*e4b17023SJohn Marino }
459*e4b17023SJohn Marino
460*e4b17023SJohn Marino /* Make sure that node_sched_param_vec has an entry for every move in PS. */
461*e4b17023SJohn Marino static void
extend_node_sched_params(partial_schedule_ptr ps)462*e4b17023SJohn Marino extend_node_sched_params (partial_schedule_ptr ps)
463*e4b17023SJohn Marino {
464*e4b17023SJohn Marino VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
465*e4b17023SJohn Marino ps->g->num_nodes + VEC_length (ps_reg_move_info,
466*e4b17023SJohn Marino ps->reg_moves));
467*e4b17023SJohn Marino }
468*e4b17023SJohn Marino
469*e4b17023SJohn Marino /* Update the sched_params (time, row and stage) for node U using the II,
470*e4b17023SJohn Marino the CYCLE of U and MIN_CYCLE.
471*e4b17023SJohn Marino We're not simply taking the following
472*e4b17023SJohn Marino SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
473*e4b17023SJohn Marino because the stages may not be aligned on cycle 0. */
474*e4b17023SJohn Marino static void
update_node_sched_params(int u,int ii,int cycle,int min_cycle)475*e4b17023SJohn Marino update_node_sched_params (int u, int ii, int cycle, int min_cycle)
476*e4b17023SJohn Marino {
477*e4b17023SJohn Marino int sc_until_cycle_zero;
478*e4b17023SJohn Marino int stage;
479*e4b17023SJohn Marino
480*e4b17023SJohn Marino SCHED_TIME (u) = cycle;
481*e4b17023SJohn Marino SCHED_ROW (u) = SMODULO (cycle, ii);
482*e4b17023SJohn Marino
483*e4b17023SJohn Marino /* The calculation of stage count is done adding the number
484*e4b17023SJohn Marino of stages before cycle zero and after cycle zero. */
485*e4b17023SJohn Marino sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
486*e4b17023SJohn Marino
487*e4b17023SJohn Marino if (SCHED_TIME (u) < 0)
488*e4b17023SJohn Marino {
489*e4b17023SJohn Marino stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
490*e4b17023SJohn Marino SCHED_STAGE (u) = sc_until_cycle_zero - stage;
491*e4b17023SJohn Marino }
492*e4b17023SJohn Marino else
493*e4b17023SJohn Marino {
494*e4b17023SJohn Marino stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
495*e4b17023SJohn Marino SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
496*e4b17023SJohn Marino }
497*e4b17023SJohn Marino }
498*e4b17023SJohn Marino
499*e4b17023SJohn Marino static void
print_node_sched_params(FILE * file,int num_nodes,partial_schedule_ptr ps)500*e4b17023SJohn Marino print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
501*e4b17023SJohn Marino {
502*e4b17023SJohn Marino int i;
503*e4b17023SJohn Marino
504*e4b17023SJohn Marino if (! file)
505*e4b17023SJohn Marino return;
506*e4b17023SJohn Marino for (i = 0; i < num_nodes; i++)
507*e4b17023SJohn Marino {
508*e4b17023SJohn Marino node_sched_params_ptr nsp = SCHED_PARAMS (i);
509*e4b17023SJohn Marino
510*e4b17023SJohn Marino fprintf (file, "Node = %d; INSN = %d\n", i,
511*e4b17023SJohn Marino INSN_UID (ps_rtl_insn (ps, i)));
512*e4b17023SJohn Marino fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
513*e4b17023SJohn Marino fprintf (file, " time = %d:\n", nsp->time);
514*e4b17023SJohn Marino fprintf (file, " stage = %d:\n", nsp->stage);
515*e4b17023SJohn Marino }
516*e4b17023SJohn Marino }
517*e4b17023SJohn Marino
518*e4b17023SJohn Marino /* Set SCHED_COLUMN for each instruction in row ROW of PS. */
519*e4b17023SJohn Marino static void
set_columns_for_row(partial_schedule_ptr ps,int row)520*e4b17023SJohn Marino set_columns_for_row (partial_schedule_ptr ps, int row)
521*e4b17023SJohn Marino {
522*e4b17023SJohn Marino ps_insn_ptr cur_insn;
523*e4b17023SJohn Marino int column;
524*e4b17023SJohn Marino
525*e4b17023SJohn Marino column = 0;
526*e4b17023SJohn Marino for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
527*e4b17023SJohn Marino SCHED_COLUMN (cur_insn->id) = column++;
528*e4b17023SJohn Marino }
529*e4b17023SJohn Marino
530*e4b17023SJohn Marino /* Set SCHED_COLUMN for each instruction in PS. */
531*e4b17023SJohn Marino static void
set_columns_for_ps(partial_schedule_ptr ps)532*e4b17023SJohn Marino set_columns_for_ps (partial_schedule_ptr ps)
533*e4b17023SJohn Marino {
534*e4b17023SJohn Marino int row;
535*e4b17023SJohn Marino
536*e4b17023SJohn Marino for (row = 0; row < ps->ii; row++)
537*e4b17023SJohn Marino set_columns_for_row (ps, row);
538*e4b17023SJohn Marino }
539*e4b17023SJohn Marino
540*e4b17023SJohn Marino /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
541*e4b17023SJohn Marino Its single predecessor has already been scheduled, as has its
542*e4b17023SJohn Marino ddg node successors. (The move may have also another move as its
543*e4b17023SJohn Marino successor, in which case that successor will be scheduled later.)
544*e4b17023SJohn Marino
545*e4b17023SJohn Marino The move is part of a chain that satisfies register dependencies
546*e4b17023SJohn Marino between a producing ddg node and various consuming ddg nodes.
547*e4b17023SJohn Marino If some of these dependencies have a distance of 1 (meaning that
548*e4b17023SJohn Marino the use is upward-exposed) then DISTANCE1_USES is nonnull and
549*e4b17023SJohn Marino contains the set of uses with distance-1 dependencies.
550*e4b17023SJohn Marino DISTANCE1_USES is null otherwise.
551*e4b17023SJohn Marino
552*e4b17023SJohn Marino MUST_FOLLOW is a scratch bitmap that is big enough to hold
553*e4b17023SJohn Marino all current ps_insn ids.
554*e4b17023SJohn Marino
555*e4b17023SJohn Marino Return true on success. */
556*e4b17023SJohn Marino static bool
schedule_reg_move(partial_schedule_ptr ps,int i_reg_move,sbitmap distance1_uses,sbitmap must_follow)557*e4b17023SJohn Marino schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
558*e4b17023SJohn Marino sbitmap distance1_uses, sbitmap must_follow)
559*e4b17023SJohn Marino {
560*e4b17023SJohn Marino unsigned int u;
561*e4b17023SJohn Marino int this_time, this_distance, this_start, this_end, this_latency;
562*e4b17023SJohn Marino int start, end, c, ii;
563*e4b17023SJohn Marino sbitmap_iterator sbi;
564*e4b17023SJohn Marino ps_reg_move_info *move;
565*e4b17023SJohn Marino rtx this_insn;
566*e4b17023SJohn Marino ps_insn_ptr psi;
567*e4b17023SJohn Marino
568*e4b17023SJohn Marino move = ps_reg_move (ps, i_reg_move);
569*e4b17023SJohn Marino ii = ps->ii;
570*e4b17023SJohn Marino if (dump_file)
571*e4b17023SJohn Marino {
572*e4b17023SJohn Marino fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
573*e4b17023SJohn Marino ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
574*e4b17023SJohn Marino PS_MIN_CYCLE (ps));
575*e4b17023SJohn Marino print_rtl_single (dump_file, move->insn);
576*e4b17023SJohn Marino fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
577*e4b17023SJohn Marino fprintf (dump_file, "=========== =========== =====\n");
578*e4b17023SJohn Marino }
579*e4b17023SJohn Marino
580*e4b17023SJohn Marino start = INT_MIN;
581*e4b17023SJohn Marino end = INT_MAX;
582*e4b17023SJohn Marino
583*e4b17023SJohn Marino /* For dependencies of distance 1 between a producer ddg node A
584*e4b17023SJohn Marino and consumer ddg node B, we have a chain of dependencies:
585*e4b17023SJohn Marino
586*e4b17023SJohn Marino A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
587*e4b17023SJohn Marino
588*e4b17023SJohn Marino where Mi is the ith move. For dependencies of distance 0 between
589*e4b17023SJohn Marino a producer ddg node A and consumer ddg node C, we have a chain of
590*e4b17023SJohn Marino dependencies:
591*e4b17023SJohn Marino
592*e4b17023SJohn Marino A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
593*e4b17023SJohn Marino
594*e4b17023SJohn Marino where Mi' occupies the same position as Mi but occurs a stage later.
595*e4b17023SJohn Marino We can only schedule each move once, so if we have both types of
596*e4b17023SJohn Marino chain, we model the second as:
597*e4b17023SJohn Marino
598*e4b17023SJohn Marino A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
599*e4b17023SJohn Marino
600*e4b17023SJohn Marino First handle the dependencies between the previously-scheduled
601*e4b17023SJohn Marino predecessor and the move. */
602*e4b17023SJohn Marino this_insn = ps_rtl_insn (ps, move->def);
603*e4b17023SJohn Marino this_latency = insn_latency (this_insn, move->insn);
604*e4b17023SJohn Marino this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
605*e4b17023SJohn Marino this_time = SCHED_TIME (move->def) - this_distance * ii;
606*e4b17023SJohn Marino this_start = this_time + this_latency;
607*e4b17023SJohn Marino this_end = this_time + ii;
608*e4b17023SJohn Marino if (dump_file)
609*e4b17023SJohn Marino fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
610*e4b17023SJohn Marino this_start, this_end, SCHED_TIME (move->def),
611*e4b17023SJohn Marino INSN_UID (this_insn), this_latency, this_distance,
612*e4b17023SJohn Marino INSN_UID (move->insn));
613*e4b17023SJohn Marino
614*e4b17023SJohn Marino if (start < this_start)
615*e4b17023SJohn Marino start = this_start;
616*e4b17023SJohn Marino if (end > this_end)
617*e4b17023SJohn Marino end = this_end;
618*e4b17023SJohn Marino
619*e4b17023SJohn Marino /* Handle the dependencies between the move and previously-scheduled
620*e4b17023SJohn Marino successors. */
621*e4b17023SJohn Marino EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
622*e4b17023SJohn Marino {
623*e4b17023SJohn Marino this_insn = ps_rtl_insn (ps, u);
624*e4b17023SJohn Marino this_latency = insn_latency (move->insn, this_insn);
625*e4b17023SJohn Marino if (distance1_uses && !TEST_BIT (distance1_uses, u))
626*e4b17023SJohn Marino this_distance = -1;
627*e4b17023SJohn Marino else
628*e4b17023SJohn Marino this_distance = 0;
629*e4b17023SJohn Marino this_time = SCHED_TIME (u) + this_distance * ii;
630*e4b17023SJohn Marino this_start = this_time - ii;
631*e4b17023SJohn Marino this_end = this_time - this_latency;
632*e4b17023SJohn Marino if (dump_file)
633*e4b17023SJohn Marino fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
634*e4b17023SJohn Marino this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
635*e4b17023SJohn Marino this_latency, this_distance, INSN_UID (this_insn));
636*e4b17023SJohn Marino
637*e4b17023SJohn Marino if (start < this_start)
638*e4b17023SJohn Marino start = this_start;
639*e4b17023SJohn Marino if (end > this_end)
640*e4b17023SJohn Marino end = this_end;
641*e4b17023SJohn Marino }
642*e4b17023SJohn Marino
643*e4b17023SJohn Marino if (dump_file)
644*e4b17023SJohn Marino {
645*e4b17023SJohn Marino fprintf (dump_file, "----------- ----------- -----\n");
646*e4b17023SJohn Marino fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
647*e4b17023SJohn Marino }
648*e4b17023SJohn Marino
649*e4b17023SJohn Marino sbitmap_zero (must_follow);
650*e4b17023SJohn Marino SET_BIT (must_follow, move->def);
651*e4b17023SJohn Marino
652*e4b17023SJohn Marino start = MAX (start, end - (ii - 1));
653*e4b17023SJohn Marino for (c = end; c >= start; c--)
654*e4b17023SJohn Marino {
655*e4b17023SJohn Marino psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
656*e4b17023SJohn Marino move->uses, must_follow);
657*e4b17023SJohn Marino if (psi)
658*e4b17023SJohn Marino {
659*e4b17023SJohn Marino update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
660*e4b17023SJohn Marino if (dump_file)
661*e4b17023SJohn Marino fprintf (dump_file, "\nScheduled register move INSN %d at"
662*e4b17023SJohn Marino " time %d, row %d\n\n", INSN_UID (move->insn), c,
663*e4b17023SJohn Marino SCHED_ROW (i_reg_move));
664*e4b17023SJohn Marino return true;
665*e4b17023SJohn Marino }
666*e4b17023SJohn Marino }
667*e4b17023SJohn Marino
668*e4b17023SJohn Marino if (dump_file)
669*e4b17023SJohn Marino fprintf (dump_file, "\nNo available slot\n\n");
670*e4b17023SJohn Marino
671*e4b17023SJohn Marino return false;
672*e4b17023SJohn Marino }
673*e4b17023SJohn Marino
674*e4b17023SJohn Marino /*
675*e4b17023SJohn Marino Breaking intra-loop register anti-dependences:
676*e4b17023SJohn Marino Each intra-loop register anti-dependence implies a cross-iteration true
677*e4b17023SJohn Marino dependence of distance 1. Therefore, we can remove such false dependencies
678*e4b17023SJohn Marino and figure out if the partial schedule broke them by checking if (for a
679*e4b17023SJohn Marino true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
680*e4b17023SJohn Marino if so generate a register move. The number of such moves is equal to:
681*e4b17023SJohn Marino SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
682*e4b17023SJohn Marino nreg_moves = ----------------------------------- + 1 - { dependence.
683*e4b17023SJohn Marino ii { 1 if not.
684*e4b17023SJohn Marino */
685*e4b17023SJohn Marino static bool
schedule_reg_moves(partial_schedule_ptr ps)686*e4b17023SJohn Marino schedule_reg_moves (partial_schedule_ptr ps)
687*e4b17023SJohn Marino {
688*e4b17023SJohn Marino ddg_ptr g = ps->g;
689*e4b17023SJohn Marino int ii = ps->ii;
690*e4b17023SJohn Marino int i;
691*e4b17023SJohn Marino
692*e4b17023SJohn Marino for (i = 0; i < g->num_nodes; i++)
693*e4b17023SJohn Marino {
694*e4b17023SJohn Marino ddg_node_ptr u = &g->nodes[i];
695*e4b17023SJohn Marino ddg_edge_ptr e;
696*e4b17023SJohn Marino int nreg_moves = 0, i_reg_move;
697*e4b17023SJohn Marino rtx prev_reg, old_reg;
698*e4b17023SJohn Marino int first_move;
699*e4b17023SJohn Marino int distances[2];
700*e4b17023SJohn Marino sbitmap must_follow;
701*e4b17023SJohn Marino sbitmap distance1_uses;
702*e4b17023SJohn Marino rtx set = single_set (u->insn);
703*e4b17023SJohn Marino
704*e4b17023SJohn Marino /* Skip instructions that do not set a register. */
705*e4b17023SJohn Marino if ((set && !REG_P (SET_DEST (set))))
706*e4b17023SJohn Marino continue;
707*e4b17023SJohn Marino
708*e4b17023SJohn Marino /* Compute the number of reg_moves needed for u, by looking at life
709*e4b17023SJohn Marino ranges started at u (excluding self-loops). */
710*e4b17023SJohn Marino distances[0] = distances[1] = false;
711*e4b17023SJohn Marino for (e = u->out; e; e = e->next_out)
712*e4b17023SJohn Marino if (e->type == TRUE_DEP && e->dest != e->src)
713*e4b17023SJohn Marino {
714*e4b17023SJohn Marino int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
715*e4b17023SJohn Marino - SCHED_TIME (e->src->cuid)) / ii;
716*e4b17023SJohn Marino
717*e4b17023SJohn Marino if (e->distance == 1)
718*e4b17023SJohn Marino nreg_moves4e = (SCHED_TIME (e->dest->cuid)
719*e4b17023SJohn Marino - SCHED_TIME (e->src->cuid) + ii) / ii;
720*e4b17023SJohn Marino
721*e4b17023SJohn Marino /* If dest precedes src in the schedule of the kernel, then dest
722*e4b17023SJohn Marino will read before src writes and we can save one reg_copy. */
723*e4b17023SJohn Marino if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
724*e4b17023SJohn Marino && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
725*e4b17023SJohn Marino nreg_moves4e--;
726*e4b17023SJohn Marino
727*e4b17023SJohn Marino if (nreg_moves4e >= 1)
728*e4b17023SJohn Marino {
729*e4b17023SJohn Marino /* !single_set instructions are not supported yet and
730*e4b17023SJohn Marino thus we do not except to encounter them in the loop
731*e4b17023SJohn Marino except from the doloop part. For the latter case
732*e4b17023SJohn Marino we assume no regmoves are generated as the doloop
733*e4b17023SJohn Marino instructions are tied to the branch with an edge. */
734*e4b17023SJohn Marino gcc_assert (set);
735*e4b17023SJohn Marino /* If the instruction contains auto-inc register then
736*e4b17023SJohn Marino validate that the regmov is being generated for the
737*e4b17023SJohn Marino target regsiter rather then the inc'ed register. */
738*e4b17023SJohn Marino gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
739*e4b17023SJohn Marino }
740*e4b17023SJohn Marino
741*e4b17023SJohn Marino if (nreg_moves4e)
742*e4b17023SJohn Marino {
743*e4b17023SJohn Marino gcc_assert (e->distance < 2);
744*e4b17023SJohn Marino distances[e->distance] = true;
745*e4b17023SJohn Marino }
746*e4b17023SJohn Marino nreg_moves = MAX (nreg_moves, nreg_moves4e);
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino
749*e4b17023SJohn Marino if (nreg_moves == 0)
750*e4b17023SJohn Marino continue;
751*e4b17023SJohn Marino
752*e4b17023SJohn Marino /* Create NREG_MOVES register moves. */
753*e4b17023SJohn Marino first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
754*e4b17023SJohn Marino VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
755*e4b17023SJohn Marino first_move + nreg_moves);
756*e4b17023SJohn Marino extend_node_sched_params (ps);
757*e4b17023SJohn Marino
758*e4b17023SJohn Marino /* Record the moves associated with this node. */
759*e4b17023SJohn Marino first_move += ps->g->num_nodes;
760*e4b17023SJohn Marino
761*e4b17023SJohn Marino /* Generate each move. */
762*e4b17023SJohn Marino old_reg = prev_reg = SET_DEST (single_set (u->insn));
763*e4b17023SJohn Marino for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
764*e4b17023SJohn Marino {
765*e4b17023SJohn Marino ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
766*e4b17023SJohn Marino
767*e4b17023SJohn Marino move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
768*e4b17023SJohn Marino move->uses = sbitmap_alloc (first_move + nreg_moves);
769*e4b17023SJohn Marino move->old_reg = old_reg;
770*e4b17023SJohn Marino move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
771*e4b17023SJohn Marino move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
772*e4b17023SJohn Marino move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
773*e4b17023SJohn Marino sbitmap_zero (move->uses);
774*e4b17023SJohn Marino
775*e4b17023SJohn Marino prev_reg = move->new_reg;
776*e4b17023SJohn Marino }
777*e4b17023SJohn Marino
778*e4b17023SJohn Marino distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
779*e4b17023SJohn Marino
780*e4b17023SJohn Marino /* Every use of the register defined by node may require a different
781*e4b17023SJohn Marino copy of this register, depending on the time the use is scheduled.
782*e4b17023SJohn Marino Record which uses require which move results. */
783*e4b17023SJohn Marino for (e = u->out; e; e = e->next_out)
784*e4b17023SJohn Marino if (e->type == TRUE_DEP && e->dest != e->src)
785*e4b17023SJohn Marino {
786*e4b17023SJohn Marino int dest_copy = (SCHED_TIME (e->dest->cuid)
787*e4b17023SJohn Marino - SCHED_TIME (e->src->cuid)) / ii;
788*e4b17023SJohn Marino
789*e4b17023SJohn Marino if (e->distance == 1)
790*e4b17023SJohn Marino dest_copy = (SCHED_TIME (e->dest->cuid)
791*e4b17023SJohn Marino - SCHED_TIME (e->src->cuid) + ii) / ii;
792*e4b17023SJohn Marino
793*e4b17023SJohn Marino if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
794*e4b17023SJohn Marino && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
795*e4b17023SJohn Marino dest_copy--;
796*e4b17023SJohn Marino
797*e4b17023SJohn Marino if (dest_copy)
798*e4b17023SJohn Marino {
799*e4b17023SJohn Marino ps_reg_move_info *move;
800*e4b17023SJohn Marino
801*e4b17023SJohn Marino move = ps_reg_move (ps, first_move + dest_copy - 1);
802*e4b17023SJohn Marino SET_BIT (move->uses, e->dest->cuid);
803*e4b17023SJohn Marino if (e->distance == 1)
804*e4b17023SJohn Marino SET_BIT (distance1_uses, e->dest->cuid);
805*e4b17023SJohn Marino }
806*e4b17023SJohn Marino }
807*e4b17023SJohn Marino
808*e4b17023SJohn Marino must_follow = sbitmap_alloc (first_move + nreg_moves);
809*e4b17023SJohn Marino for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
810*e4b17023SJohn Marino if (!schedule_reg_move (ps, first_move + i_reg_move,
811*e4b17023SJohn Marino distance1_uses, must_follow))
812*e4b17023SJohn Marino break;
813*e4b17023SJohn Marino sbitmap_free (must_follow);
814*e4b17023SJohn Marino if (distance1_uses)
815*e4b17023SJohn Marino sbitmap_free (distance1_uses);
816*e4b17023SJohn Marino if (i_reg_move < nreg_moves)
817*e4b17023SJohn Marino return false;
818*e4b17023SJohn Marino }
819*e4b17023SJohn Marino return true;
820*e4b17023SJohn Marino }
821*e4b17023SJohn Marino
822*e4b17023SJohn Marino /* Emit the moves associatied with PS. Apply the substitutions
823*e4b17023SJohn Marino associated with them. */
824*e4b17023SJohn Marino static void
apply_reg_moves(partial_schedule_ptr ps)825*e4b17023SJohn Marino apply_reg_moves (partial_schedule_ptr ps)
826*e4b17023SJohn Marino {
827*e4b17023SJohn Marino ps_reg_move_info *move;
828*e4b17023SJohn Marino int i;
829*e4b17023SJohn Marino
830*e4b17023SJohn Marino FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
831*e4b17023SJohn Marino {
832*e4b17023SJohn Marino unsigned int i_use;
833*e4b17023SJohn Marino sbitmap_iterator sbi;
834*e4b17023SJohn Marino
835*e4b17023SJohn Marino EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
836*e4b17023SJohn Marino {
837*e4b17023SJohn Marino replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
838*e4b17023SJohn Marino df_insn_rescan (ps->g->nodes[i_use].insn);
839*e4b17023SJohn Marino }
840*e4b17023SJohn Marino }
841*e4b17023SJohn Marino }
842*e4b17023SJohn Marino
843*e4b17023SJohn Marino /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
844*e4b17023SJohn Marino SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
845*e4b17023SJohn Marino will move to cycle zero. */
846*e4b17023SJohn Marino static void
reset_sched_times(partial_schedule_ptr ps,int amount)847*e4b17023SJohn Marino reset_sched_times (partial_schedule_ptr ps, int amount)
848*e4b17023SJohn Marino {
849*e4b17023SJohn Marino int row;
850*e4b17023SJohn Marino int ii = ps->ii;
851*e4b17023SJohn Marino ps_insn_ptr crr_insn;
852*e4b17023SJohn Marino
853*e4b17023SJohn Marino for (row = 0; row < ii; row++)
854*e4b17023SJohn Marino for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
855*e4b17023SJohn Marino {
856*e4b17023SJohn Marino int u = crr_insn->id;
857*e4b17023SJohn Marino int normalized_time = SCHED_TIME (u) - amount;
858*e4b17023SJohn Marino int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
859*e4b17023SJohn Marino
860*e4b17023SJohn Marino if (dump_file)
861*e4b17023SJohn Marino {
862*e4b17023SJohn Marino /* Print the scheduling times after the rotation. */
863*e4b17023SJohn Marino rtx insn = ps_rtl_insn (ps, u);
864*e4b17023SJohn Marino
865*e4b17023SJohn Marino fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
866*e4b17023SJohn Marino "crr_insn->cycle=%d, min_cycle=%d", u,
867*e4b17023SJohn Marino INSN_UID (insn), normalized_time, new_min_cycle);
868*e4b17023SJohn Marino if (JUMP_P (insn))
869*e4b17023SJohn Marino fprintf (dump_file, " (branch)");
870*e4b17023SJohn Marino fprintf (dump_file, "\n");
871*e4b17023SJohn Marino }
872*e4b17023SJohn Marino
873*e4b17023SJohn Marino gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
874*e4b17023SJohn Marino gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
875*e4b17023SJohn Marino
876*e4b17023SJohn Marino crr_insn->cycle = normalized_time;
877*e4b17023SJohn Marino update_node_sched_params (u, ii, normalized_time, new_min_cycle);
878*e4b17023SJohn Marino }
879*e4b17023SJohn Marino }
880*e4b17023SJohn Marino
881*e4b17023SJohn Marino /* Permute the insns according to their order in PS, from row 0 to
882*e4b17023SJohn Marino row ii-1, and position them right before LAST. This schedules
883*e4b17023SJohn Marino the insns of the loop kernel. */
884*e4b17023SJohn Marino static void
permute_partial_schedule(partial_schedule_ptr ps,rtx last)885*e4b17023SJohn Marino permute_partial_schedule (partial_schedule_ptr ps, rtx last)
886*e4b17023SJohn Marino {
887*e4b17023SJohn Marino int ii = ps->ii;
888*e4b17023SJohn Marino int row;
889*e4b17023SJohn Marino ps_insn_ptr ps_ij;
890*e4b17023SJohn Marino
891*e4b17023SJohn Marino for (row = 0; row < ii ; row++)
892*e4b17023SJohn Marino for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
893*e4b17023SJohn Marino {
894*e4b17023SJohn Marino rtx insn = ps_rtl_insn (ps, ps_ij->id);
895*e4b17023SJohn Marino
896*e4b17023SJohn Marino if (PREV_INSN (last) != insn)
897*e4b17023SJohn Marino {
898*e4b17023SJohn Marino if (ps_ij->id < ps->g->num_nodes)
899*e4b17023SJohn Marino reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
900*e4b17023SJohn Marino PREV_INSN (last));
901*e4b17023SJohn Marino else
902*e4b17023SJohn Marino add_insn_before (insn, last, NULL);
903*e4b17023SJohn Marino }
904*e4b17023SJohn Marino }
905*e4b17023SJohn Marino }
906*e4b17023SJohn Marino
907*e4b17023SJohn Marino /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
908*e4b17023SJohn Marino respectively only if cycle C falls on the border of the scheduling
909*e4b17023SJohn Marino window boundaries marked by START and END cycles. STEP is the
910*e4b17023SJohn Marino direction of the window. */
911*e4b17023SJohn Marino static inline void
set_must_precede_follow(sbitmap * tmp_follow,sbitmap must_follow,sbitmap * tmp_precede,sbitmap must_precede,int c,int start,int end,int step)912*e4b17023SJohn Marino set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
913*e4b17023SJohn Marino sbitmap *tmp_precede, sbitmap must_precede, int c,
914*e4b17023SJohn Marino int start, int end, int step)
915*e4b17023SJohn Marino {
916*e4b17023SJohn Marino *tmp_precede = NULL;
917*e4b17023SJohn Marino *tmp_follow = NULL;
918*e4b17023SJohn Marino
919*e4b17023SJohn Marino if (c == start)
920*e4b17023SJohn Marino {
921*e4b17023SJohn Marino if (step == 1)
922*e4b17023SJohn Marino *tmp_precede = must_precede;
923*e4b17023SJohn Marino else /* step == -1. */
924*e4b17023SJohn Marino *tmp_follow = must_follow;
925*e4b17023SJohn Marino }
926*e4b17023SJohn Marino if (c == end - step)
927*e4b17023SJohn Marino {
928*e4b17023SJohn Marino if (step == 1)
929*e4b17023SJohn Marino *tmp_follow = must_follow;
930*e4b17023SJohn Marino else /* step == -1. */
931*e4b17023SJohn Marino *tmp_precede = must_precede;
932*e4b17023SJohn Marino }
933*e4b17023SJohn Marino
934*e4b17023SJohn Marino }
935*e4b17023SJohn Marino
936*e4b17023SJohn Marino /* Return True if the branch can be moved to row ii-1 while
937*e4b17023SJohn Marino normalizing the partial schedule PS to start from cycle zero and thus
938*e4b17023SJohn Marino optimize the SC. Otherwise return False. */
939*e4b17023SJohn Marino static bool
optimize_sc(partial_schedule_ptr ps,ddg_ptr g)940*e4b17023SJohn Marino optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
941*e4b17023SJohn Marino {
942*e4b17023SJohn Marino int amount = PS_MIN_CYCLE (ps);
943*e4b17023SJohn Marino sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
944*e4b17023SJohn Marino int start, end, step;
945*e4b17023SJohn Marino int ii = ps->ii;
946*e4b17023SJohn Marino bool ok = false;
947*e4b17023SJohn Marino int stage_count, stage_count_curr;
948*e4b17023SJohn Marino
949*e4b17023SJohn Marino /* Compare the SC after normalization and SC after bringing the branch
950*e4b17023SJohn Marino to row ii-1. If they are equal just bail out. */
951*e4b17023SJohn Marino stage_count = calculate_stage_count (ps, amount);
952*e4b17023SJohn Marino stage_count_curr =
953*e4b17023SJohn Marino calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
954*e4b17023SJohn Marino
955*e4b17023SJohn Marino if (stage_count == stage_count_curr)
956*e4b17023SJohn Marino {
957*e4b17023SJohn Marino if (dump_file)
958*e4b17023SJohn Marino fprintf (dump_file, "SMS SC already optimized.\n");
959*e4b17023SJohn Marino
960*e4b17023SJohn Marino ok = false;
961*e4b17023SJohn Marino goto clear;
962*e4b17023SJohn Marino }
963*e4b17023SJohn Marino
964*e4b17023SJohn Marino if (dump_file)
965*e4b17023SJohn Marino {
966*e4b17023SJohn Marino fprintf (dump_file, "SMS Trying to optimize branch location\n");
967*e4b17023SJohn Marino fprintf (dump_file, "SMS partial schedule before trial:\n");
968*e4b17023SJohn Marino print_partial_schedule (ps, dump_file);
969*e4b17023SJohn Marino }
970*e4b17023SJohn Marino
971*e4b17023SJohn Marino /* First, normalize the partial scheduling. */
972*e4b17023SJohn Marino reset_sched_times (ps, amount);
973*e4b17023SJohn Marino rotate_partial_schedule (ps, amount);
974*e4b17023SJohn Marino if (dump_file)
975*e4b17023SJohn Marino {
976*e4b17023SJohn Marino fprintf (dump_file,
977*e4b17023SJohn Marino "SMS partial schedule after normalization (ii, %d, SC %d):\n",
978*e4b17023SJohn Marino ii, stage_count);
979*e4b17023SJohn Marino print_partial_schedule (ps, dump_file);
980*e4b17023SJohn Marino }
981*e4b17023SJohn Marino
982*e4b17023SJohn Marino if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
983*e4b17023SJohn Marino {
984*e4b17023SJohn Marino ok = true;
985*e4b17023SJohn Marino goto clear;
986*e4b17023SJohn Marino }
987*e4b17023SJohn Marino
988*e4b17023SJohn Marino sbitmap_ones (sched_nodes);
989*e4b17023SJohn Marino
990*e4b17023SJohn Marino /* Calculate the new placement of the branch. It should be in row
991*e4b17023SJohn Marino ii-1 and fall into it's scheduling window. */
992*e4b17023SJohn Marino if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
993*e4b17023SJohn Marino &step, &end) == 0)
994*e4b17023SJohn Marino {
995*e4b17023SJohn Marino bool success;
996*e4b17023SJohn Marino ps_insn_ptr next_ps_i;
997*e4b17023SJohn Marino int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
998*e4b17023SJohn Marino int row = SMODULO (branch_cycle, ps->ii);
999*e4b17023SJohn Marino int num_splits = 0;
1000*e4b17023SJohn Marino sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1001*e4b17023SJohn Marino int c;
1002*e4b17023SJohn Marino
1003*e4b17023SJohn Marino if (dump_file)
1004*e4b17023SJohn Marino fprintf (dump_file, "\nTrying to schedule node %d "
1005*e4b17023SJohn Marino "INSN = %d in (%d .. %d) step %d\n",
1006*e4b17023SJohn Marino g->closing_branch->cuid,
1007*e4b17023SJohn Marino (INSN_UID (g->closing_branch->insn)), start, end, step);
1008*e4b17023SJohn Marino
1009*e4b17023SJohn Marino gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1010*e4b17023SJohn Marino if (step == 1)
1011*e4b17023SJohn Marino {
1012*e4b17023SJohn Marino c = start + ii - SMODULO (start, ii) - 1;
1013*e4b17023SJohn Marino gcc_assert (c >= start);
1014*e4b17023SJohn Marino if (c >= end)
1015*e4b17023SJohn Marino {
1016*e4b17023SJohn Marino ok = false;
1017*e4b17023SJohn Marino if (dump_file)
1018*e4b17023SJohn Marino fprintf (dump_file,
1019*e4b17023SJohn Marino "SMS failed to schedule branch at cycle: %d\n", c);
1020*e4b17023SJohn Marino goto clear;
1021*e4b17023SJohn Marino }
1022*e4b17023SJohn Marino }
1023*e4b17023SJohn Marino else
1024*e4b17023SJohn Marino {
1025*e4b17023SJohn Marino c = start - SMODULO (start, ii) - 1;
1026*e4b17023SJohn Marino gcc_assert (c <= start);
1027*e4b17023SJohn Marino
1028*e4b17023SJohn Marino if (c <= end)
1029*e4b17023SJohn Marino {
1030*e4b17023SJohn Marino if (dump_file)
1031*e4b17023SJohn Marino fprintf (dump_file,
1032*e4b17023SJohn Marino "SMS failed to schedule branch at cycle: %d\n", c);
1033*e4b17023SJohn Marino ok = false;
1034*e4b17023SJohn Marino goto clear;
1035*e4b17023SJohn Marino }
1036*e4b17023SJohn Marino }
1037*e4b17023SJohn Marino
1038*e4b17023SJohn Marino must_precede = sbitmap_alloc (g->num_nodes);
1039*e4b17023SJohn Marino must_follow = sbitmap_alloc (g->num_nodes);
1040*e4b17023SJohn Marino
1041*e4b17023SJohn Marino /* Try to schedule the branch is it's new cycle. */
1042*e4b17023SJohn Marino calculate_must_precede_follow (g->closing_branch, start, end,
1043*e4b17023SJohn Marino step, ii, sched_nodes,
1044*e4b17023SJohn Marino must_precede, must_follow);
1045*e4b17023SJohn Marino
1046*e4b17023SJohn Marino set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1047*e4b17023SJohn Marino must_precede, c, start, end, step);
1048*e4b17023SJohn Marino
1049*e4b17023SJohn Marino /* Find the element in the partial schedule related to the closing
1050*e4b17023SJohn Marino branch so we can remove it from it's current cycle. */
1051*e4b17023SJohn Marino for (next_ps_i = ps->rows[row];
1052*e4b17023SJohn Marino next_ps_i; next_ps_i = next_ps_i->next_in_row)
1053*e4b17023SJohn Marino if (next_ps_i->id == g->closing_branch->cuid)
1054*e4b17023SJohn Marino break;
1055*e4b17023SJohn Marino
1056*e4b17023SJohn Marino remove_node_from_ps (ps, next_ps_i);
1057*e4b17023SJohn Marino success =
1058*e4b17023SJohn Marino try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1059*e4b17023SJohn Marino sched_nodes, &num_splits,
1060*e4b17023SJohn Marino tmp_precede, tmp_follow);
1061*e4b17023SJohn Marino gcc_assert (num_splits == 0);
1062*e4b17023SJohn Marino if (!success)
1063*e4b17023SJohn Marino {
1064*e4b17023SJohn Marino if (dump_file)
1065*e4b17023SJohn Marino fprintf (dump_file,
1066*e4b17023SJohn Marino "SMS failed to schedule branch at cycle: %d, "
1067*e4b17023SJohn Marino "bringing it back to cycle %d\n", c, branch_cycle);
1068*e4b17023SJohn Marino
1069*e4b17023SJohn Marino /* The branch was failed to be placed in row ii - 1.
1070*e4b17023SJohn Marino Put it back in it's original place in the partial
1071*e4b17023SJohn Marino schedualing. */
1072*e4b17023SJohn Marino set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1073*e4b17023SJohn Marino must_precede, branch_cycle, start, end,
1074*e4b17023SJohn Marino step);
1075*e4b17023SJohn Marino success =
1076*e4b17023SJohn Marino try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1077*e4b17023SJohn Marino branch_cycle, sched_nodes,
1078*e4b17023SJohn Marino &num_splits, tmp_precede,
1079*e4b17023SJohn Marino tmp_follow);
1080*e4b17023SJohn Marino gcc_assert (success && (num_splits == 0));
1081*e4b17023SJohn Marino ok = false;
1082*e4b17023SJohn Marino }
1083*e4b17023SJohn Marino else
1084*e4b17023SJohn Marino {
1085*e4b17023SJohn Marino /* The branch is placed in row ii - 1. */
1086*e4b17023SJohn Marino if (dump_file)
1087*e4b17023SJohn Marino fprintf (dump_file,
1088*e4b17023SJohn Marino "SMS success in moving branch to cycle %d\n", c);
1089*e4b17023SJohn Marino
1090*e4b17023SJohn Marino update_node_sched_params (g->closing_branch->cuid, ii, c,
1091*e4b17023SJohn Marino PS_MIN_CYCLE (ps));
1092*e4b17023SJohn Marino ok = true;
1093*e4b17023SJohn Marino }
1094*e4b17023SJohn Marino
1095*e4b17023SJohn Marino free (must_precede);
1096*e4b17023SJohn Marino free (must_follow);
1097*e4b17023SJohn Marino }
1098*e4b17023SJohn Marino
1099*e4b17023SJohn Marino clear:
1100*e4b17023SJohn Marino free (sched_nodes);
1101*e4b17023SJohn Marino return ok;
1102*e4b17023SJohn Marino }
1103*e4b17023SJohn Marino
1104*e4b17023SJohn Marino static void
duplicate_insns_of_cycles(partial_schedule_ptr ps,int from_stage,int to_stage,rtx count_reg)1105*e4b17023SJohn Marino duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1106*e4b17023SJohn Marino int to_stage, rtx count_reg)
1107*e4b17023SJohn Marino {
1108*e4b17023SJohn Marino int row;
1109*e4b17023SJohn Marino ps_insn_ptr ps_ij;
1110*e4b17023SJohn Marino
1111*e4b17023SJohn Marino for (row = 0; row < ps->ii; row++)
1112*e4b17023SJohn Marino for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1113*e4b17023SJohn Marino {
1114*e4b17023SJohn Marino int u = ps_ij->id;
1115*e4b17023SJohn Marino int first_u, last_u;
1116*e4b17023SJohn Marino rtx u_insn;
1117*e4b17023SJohn Marino
1118*e4b17023SJohn Marino /* Do not duplicate any insn which refers to count_reg as it
1119*e4b17023SJohn Marino belongs to the control part.
1120*e4b17023SJohn Marino The closing branch is scheduled as well and thus should
1121*e4b17023SJohn Marino be ignored.
1122*e4b17023SJohn Marino TODO: This should be done by analyzing the control part of
1123*e4b17023SJohn Marino the loop. */
1124*e4b17023SJohn Marino u_insn = ps_rtl_insn (ps, u);
1125*e4b17023SJohn Marino if (reg_mentioned_p (count_reg, u_insn)
1126*e4b17023SJohn Marino || JUMP_P (u_insn))
1127*e4b17023SJohn Marino continue;
1128*e4b17023SJohn Marino
1129*e4b17023SJohn Marino first_u = SCHED_STAGE (u);
1130*e4b17023SJohn Marino last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1131*e4b17023SJohn Marino if (from_stage <= last_u && to_stage >= first_u)
1132*e4b17023SJohn Marino {
1133*e4b17023SJohn Marino if (u < ps->g->num_nodes)
1134*e4b17023SJohn Marino duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1135*e4b17023SJohn Marino else
1136*e4b17023SJohn Marino emit_insn (copy_rtx (PATTERN (u_insn)));
1137*e4b17023SJohn Marino }
1138*e4b17023SJohn Marino }
1139*e4b17023SJohn Marino }
1140*e4b17023SJohn Marino
1141*e4b17023SJohn Marino
1142*e4b17023SJohn Marino /* Generate the instructions (including reg_moves) for prolog & epilog. */
1143*e4b17023SJohn Marino static void
generate_prolog_epilog(partial_schedule_ptr ps,struct loop * loop,rtx count_reg,rtx count_init)1144*e4b17023SJohn Marino generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1145*e4b17023SJohn Marino rtx count_reg, rtx count_init)
1146*e4b17023SJohn Marino {
1147*e4b17023SJohn Marino int i;
1148*e4b17023SJohn Marino int last_stage = PS_STAGE_COUNT (ps) - 1;
1149*e4b17023SJohn Marino edge e;
1150*e4b17023SJohn Marino
1151*e4b17023SJohn Marino /* Generate the prolog, inserting its insns on the loop-entry edge. */
1152*e4b17023SJohn Marino start_sequence ();
1153*e4b17023SJohn Marino
1154*e4b17023SJohn Marino if (!count_init)
1155*e4b17023SJohn Marino {
1156*e4b17023SJohn Marino /* Generate instructions at the beginning of the prolog to
1157*e4b17023SJohn Marino adjust the loop count by STAGE_COUNT. If loop count is constant
1158*e4b17023SJohn Marino (count_init), this constant is adjusted by STAGE_COUNT in
1159*e4b17023SJohn Marino generate_prolog_epilog function. */
1160*e4b17023SJohn Marino rtx sub_reg = NULL_RTX;
1161*e4b17023SJohn Marino
1162*e4b17023SJohn Marino sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
1163*e4b17023SJohn Marino count_reg, GEN_INT (last_stage),
1164*e4b17023SJohn Marino count_reg, 1, OPTAB_DIRECT);
1165*e4b17023SJohn Marino gcc_assert (REG_P (sub_reg));
1166*e4b17023SJohn Marino if (REGNO (sub_reg) != REGNO (count_reg))
1167*e4b17023SJohn Marino emit_move_insn (count_reg, sub_reg);
1168*e4b17023SJohn Marino }
1169*e4b17023SJohn Marino
1170*e4b17023SJohn Marino for (i = 0; i < last_stage; i++)
1171*e4b17023SJohn Marino duplicate_insns_of_cycles (ps, 0, i, count_reg);
1172*e4b17023SJohn Marino
1173*e4b17023SJohn Marino /* Put the prolog on the entry edge. */
1174*e4b17023SJohn Marino e = loop_preheader_edge (loop);
1175*e4b17023SJohn Marino split_edge_and_insert (e, get_insns ());
1176*e4b17023SJohn Marino if (!flag_resched_modulo_sched)
1177*e4b17023SJohn Marino e->dest->flags |= BB_DISABLE_SCHEDULE;
1178*e4b17023SJohn Marino
1179*e4b17023SJohn Marino end_sequence ();
1180*e4b17023SJohn Marino
1181*e4b17023SJohn Marino /* Generate the epilog, inserting its insns on the loop-exit edge. */
1182*e4b17023SJohn Marino start_sequence ();
1183*e4b17023SJohn Marino
1184*e4b17023SJohn Marino for (i = 0; i < last_stage; i++)
1185*e4b17023SJohn Marino duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1186*e4b17023SJohn Marino
1187*e4b17023SJohn Marino /* Put the epilogue on the exit edge. */
1188*e4b17023SJohn Marino gcc_assert (single_exit (loop));
1189*e4b17023SJohn Marino e = single_exit (loop);
1190*e4b17023SJohn Marino split_edge_and_insert (e, get_insns ());
1191*e4b17023SJohn Marino if (!flag_resched_modulo_sched)
1192*e4b17023SJohn Marino e->dest->flags |= BB_DISABLE_SCHEDULE;
1193*e4b17023SJohn Marino
1194*e4b17023SJohn Marino end_sequence ();
1195*e4b17023SJohn Marino }
1196*e4b17023SJohn Marino
1197*e4b17023SJohn Marino /* Mark LOOP as software pipelined so the later
1198*e4b17023SJohn Marino scheduling passes don't touch it. */
1199*e4b17023SJohn Marino static void
mark_loop_unsched(struct loop * loop)1200*e4b17023SJohn Marino mark_loop_unsched (struct loop *loop)
1201*e4b17023SJohn Marino {
1202*e4b17023SJohn Marino unsigned i;
1203*e4b17023SJohn Marino basic_block *bbs = get_loop_body (loop);
1204*e4b17023SJohn Marino
1205*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
1206*e4b17023SJohn Marino bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1207*e4b17023SJohn Marino
1208*e4b17023SJohn Marino free (bbs);
1209*e4b17023SJohn Marino }
1210*e4b17023SJohn Marino
1211*e4b17023SJohn Marino /* Return true if all the BBs of the loop are empty except the
1212*e4b17023SJohn Marino loop header. */
1213*e4b17023SJohn Marino static bool
loop_single_full_bb_p(struct loop * loop)1214*e4b17023SJohn Marino loop_single_full_bb_p (struct loop *loop)
1215*e4b17023SJohn Marino {
1216*e4b17023SJohn Marino unsigned i;
1217*e4b17023SJohn Marino basic_block *bbs = get_loop_body (loop);
1218*e4b17023SJohn Marino
1219*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes ; i++)
1220*e4b17023SJohn Marino {
1221*e4b17023SJohn Marino rtx head, tail;
1222*e4b17023SJohn Marino bool empty_bb = true;
1223*e4b17023SJohn Marino
1224*e4b17023SJohn Marino if (bbs[i] == loop->header)
1225*e4b17023SJohn Marino continue;
1226*e4b17023SJohn Marino
1227*e4b17023SJohn Marino /* Make sure that basic blocks other than the header
1228*e4b17023SJohn Marino have only notes labels or jumps. */
1229*e4b17023SJohn Marino get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1230*e4b17023SJohn Marino for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1231*e4b17023SJohn Marino {
1232*e4b17023SJohn Marino if (NOTE_P (head) || LABEL_P (head)
1233*e4b17023SJohn Marino || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1234*e4b17023SJohn Marino continue;
1235*e4b17023SJohn Marino empty_bb = false;
1236*e4b17023SJohn Marino break;
1237*e4b17023SJohn Marino }
1238*e4b17023SJohn Marino
1239*e4b17023SJohn Marino if (! empty_bb)
1240*e4b17023SJohn Marino {
1241*e4b17023SJohn Marino free (bbs);
1242*e4b17023SJohn Marino return false;
1243*e4b17023SJohn Marino }
1244*e4b17023SJohn Marino }
1245*e4b17023SJohn Marino free (bbs);
1246*e4b17023SJohn Marino return true;
1247*e4b17023SJohn Marino }
1248*e4b17023SJohn Marino
1249*e4b17023SJohn Marino /* Dump file:line from INSN's location info to dump_file. */
1250*e4b17023SJohn Marino
1251*e4b17023SJohn Marino static void
dump_insn_locator(rtx insn)1252*e4b17023SJohn Marino dump_insn_locator (rtx insn)
1253*e4b17023SJohn Marino {
1254*e4b17023SJohn Marino if (dump_file && INSN_LOCATOR (insn))
1255*e4b17023SJohn Marino {
1256*e4b17023SJohn Marino const char *file = insn_file (insn);
1257*e4b17023SJohn Marino if (file)
1258*e4b17023SJohn Marino fprintf (dump_file, " %s:%i", file, insn_line (insn));
1259*e4b17023SJohn Marino }
1260*e4b17023SJohn Marino }
1261*e4b17023SJohn Marino
1262*e4b17023SJohn Marino /* A simple loop from SMS point of view; it is a loop that is composed of
1263*e4b17023SJohn Marino either a single basic block or two BBs - a header and a latch. */
1264*e4b17023SJohn Marino #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1265*e4b17023SJohn Marino && (EDGE_COUNT (loop->latch->preds) == 1) \
1266*e4b17023SJohn Marino && (EDGE_COUNT (loop->latch->succs) == 1))
1267*e4b17023SJohn Marino
1268*e4b17023SJohn Marino /* Return true if the loop is in its canonical form and false if not.
1269*e4b17023SJohn Marino i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1270*e4b17023SJohn Marino static bool
loop_canon_p(struct loop * loop)1271*e4b17023SJohn Marino loop_canon_p (struct loop *loop)
1272*e4b17023SJohn Marino {
1273*e4b17023SJohn Marino
1274*e4b17023SJohn Marino if (loop->inner || !loop_outer (loop))
1275*e4b17023SJohn Marino {
1276*e4b17023SJohn Marino if (dump_file)
1277*e4b17023SJohn Marino fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1278*e4b17023SJohn Marino return false;
1279*e4b17023SJohn Marino }
1280*e4b17023SJohn Marino
1281*e4b17023SJohn Marino if (!single_exit (loop))
1282*e4b17023SJohn Marino {
1283*e4b17023SJohn Marino if (dump_file)
1284*e4b17023SJohn Marino {
1285*e4b17023SJohn Marino rtx insn = BB_END (loop->header);
1286*e4b17023SJohn Marino
1287*e4b17023SJohn Marino fprintf (dump_file, "SMS loop many exits");
1288*e4b17023SJohn Marino dump_insn_locator (insn);
1289*e4b17023SJohn Marino fprintf (dump_file, "\n");
1290*e4b17023SJohn Marino }
1291*e4b17023SJohn Marino return false;
1292*e4b17023SJohn Marino }
1293*e4b17023SJohn Marino
1294*e4b17023SJohn Marino if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1295*e4b17023SJohn Marino {
1296*e4b17023SJohn Marino if (dump_file)
1297*e4b17023SJohn Marino {
1298*e4b17023SJohn Marino rtx insn = BB_END (loop->header);
1299*e4b17023SJohn Marino
1300*e4b17023SJohn Marino fprintf (dump_file, "SMS loop many BBs.");
1301*e4b17023SJohn Marino dump_insn_locator (insn);
1302*e4b17023SJohn Marino fprintf (dump_file, "\n");
1303*e4b17023SJohn Marino }
1304*e4b17023SJohn Marino return false;
1305*e4b17023SJohn Marino }
1306*e4b17023SJohn Marino
1307*e4b17023SJohn Marino return true;
1308*e4b17023SJohn Marino }
1309*e4b17023SJohn Marino
1310*e4b17023SJohn Marino /* If there are more than one entry for the loop,
1311*e4b17023SJohn Marino make it one by splitting the first entry edge and
1312*e4b17023SJohn Marino redirecting the others to the new BB. */
1313*e4b17023SJohn Marino static void
canon_loop(struct loop * loop)1314*e4b17023SJohn Marino canon_loop (struct loop *loop)
1315*e4b17023SJohn Marino {
1316*e4b17023SJohn Marino edge e;
1317*e4b17023SJohn Marino edge_iterator i;
1318*e4b17023SJohn Marino
1319*e4b17023SJohn Marino /* Avoid annoying special cases of edges going to exit
1320*e4b17023SJohn Marino block. */
1321*e4b17023SJohn Marino FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
1322*e4b17023SJohn Marino if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1323*e4b17023SJohn Marino split_edge (e);
1324*e4b17023SJohn Marino
1325*e4b17023SJohn Marino if (loop->latch == loop->header
1326*e4b17023SJohn Marino || EDGE_COUNT (loop->latch->succs) > 1)
1327*e4b17023SJohn Marino {
1328*e4b17023SJohn Marino FOR_EACH_EDGE (e, i, loop->header->preds)
1329*e4b17023SJohn Marino if (e->src == loop->latch)
1330*e4b17023SJohn Marino break;
1331*e4b17023SJohn Marino split_edge (e);
1332*e4b17023SJohn Marino }
1333*e4b17023SJohn Marino }
1334*e4b17023SJohn Marino
1335*e4b17023SJohn Marino /* Setup infos. */
1336*e4b17023SJohn Marino static void
setup_sched_infos(void)1337*e4b17023SJohn Marino setup_sched_infos (void)
1338*e4b17023SJohn Marino {
1339*e4b17023SJohn Marino memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1340*e4b17023SJohn Marino sizeof (sms_common_sched_info));
1341*e4b17023SJohn Marino sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1342*e4b17023SJohn Marino common_sched_info = &sms_common_sched_info;
1343*e4b17023SJohn Marino
1344*e4b17023SJohn Marino sched_deps_info = &sms_sched_deps_info;
1345*e4b17023SJohn Marino current_sched_info = &sms_sched_info;
1346*e4b17023SJohn Marino }
1347*e4b17023SJohn Marino
1348*e4b17023SJohn Marino /* Probability in % that the sms-ed loop rolls enough so that optimized
1349*e4b17023SJohn Marino version may be entered. Just a guess. */
1350*e4b17023SJohn Marino #define PROB_SMS_ENOUGH_ITERATIONS 80
1351*e4b17023SJohn Marino
1352*e4b17023SJohn Marino /* Used to calculate the upper bound of ii. */
1353*e4b17023SJohn Marino #define MAXII_FACTOR 2
1354*e4b17023SJohn Marino
1355*e4b17023SJohn Marino /* Main entry point, perform SMS scheduling on the loops of the function
1356*e4b17023SJohn Marino that consist of single basic blocks. */
1357*e4b17023SJohn Marino static void
sms_schedule(void)1358*e4b17023SJohn Marino sms_schedule (void)
1359*e4b17023SJohn Marino {
1360*e4b17023SJohn Marino rtx insn;
1361*e4b17023SJohn Marino ddg_ptr *g_arr, g;
1362*e4b17023SJohn Marino int * node_order;
1363*e4b17023SJohn Marino int maxii, max_asap;
1364*e4b17023SJohn Marino loop_iterator li;
1365*e4b17023SJohn Marino partial_schedule_ptr ps;
1366*e4b17023SJohn Marino basic_block bb = NULL;
1367*e4b17023SJohn Marino struct loop *loop;
1368*e4b17023SJohn Marino basic_block condition_bb = NULL;
1369*e4b17023SJohn Marino edge latch_edge;
1370*e4b17023SJohn Marino gcov_type trip_count = 0;
1371*e4b17023SJohn Marino
1372*e4b17023SJohn Marino loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1373*e4b17023SJohn Marino | LOOPS_HAVE_RECORDED_EXITS);
1374*e4b17023SJohn Marino if (number_of_loops () <= 1)
1375*e4b17023SJohn Marino {
1376*e4b17023SJohn Marino loop_optimizer_finalize ();
1377*e4b17023SJohn Marino return; /* There are no loops to schedule. */
1378*e4b17023SJohn Marino }
1379*e4b17023SJohn Marino
1380*e4b17023SJohn Marino /* Initialize issue_rate. */
1381*e4b17023SJohn Marino if (targetm.sched.issue_rate)
1382*e4b17023SJohn Marino {
1383*e4b17023SJohn Marino int temp = reload_completed;
1384*e4b17023SJohn Marino
1385*e4b17023SJohn Marino reload_completed = 1;
1386*e4b17023SJohn Marino issue_rate = targetm.sched.issue_rate ();
1387*e4b17023SJohn Marino reload_completed = temp;
1388*e4b17023SJohn Marino }
1389*e4b17023SJohn Marino else
1390*e4b17023SJohn Marino issue_rate = 1;
1391*e4b17023SJohn Marino
1392*e4b17023SJohn Marino /* Initialize the scheduler. */
1393*e4b17023SJohn Marino setup_sched_infos ();
1394*e4b17023SJohn Marino haifa_sched_init ();
1395*e4b17023SJohn Marino
1396*e4b17023SJohn Marino /* Allocate memory to hold the DDG array one entry for each loop.
1397*e4b17023SJohn Marino We use loop->num as index into this array. */
1398*e4b17023SJohn Marino g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
1399*e4b17023SJohn Marino
1400*e4b17023SJohn Marino if (dump_file)
1401*e4b17023SJohn Marino {
1402*e4b17023SJohn Marino fprintf (dump_file, "\n\nSMS analysis phase\n");
1403*e4b17023SJohn Marino fprintf (dump_file, "===================\n\n");
1404*e4b17023SJohn Marino }
1405*e4b17023SJohn Marino
1406*e4b17023SJohn Marino /* Build DDGs for all the relevant loops and hold them in G_ARR
1407*e4b17023SJohn Marino indexed by the loop index. */
1408*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
1409*e4b17023SJohn Marino {
1410*e4b17023SJohn Marino rtx head, tail;
1411*e4b17023SJohn Marino rtx count_reg;
1412*e4b17023SJohn Marino
1413*e4b17023SJohn Marino /* For debugging. */
1414*e4b17023SJohn Marino if (dbg_cnt (sms_sched_loop) == false)
1415*e4b17023SJohn Marino {
1416*e4b17023SJohn Marino if (dump_file)
1417*e4b17023SJohn Marino fprintf (dump_file, "SMS reached max limit... \n");
1418*e4b17023SJohn Marino
1419*e4b17023SJohn Marino FOR_EACH_LOOP_BREAK (li);
1420*e4b17023SJohn Marino }
1421*e4b17023SJohn Marino
1422*e4b17023SJohn Marino if (dump_file)
1423*e4b17023SJohn Marino {
1424*e4b17023SJohn Marino rtx insn = BB_END (loop->header);
1425*e4b17023SJohn Marino
1426*e4b17023SJohn Marino fprintf (dump_file, "SMS loop num: %d", loop->num);
1427*e4b17023SJohn Marino dump_insn_locator (insn);
1428*e4b17023SJohn Marino fprintf (dump_file, "\n");
1429*e4b17023SJohn Marino }
1430*e4b17023SJohn Marino
1431*e4b17023SJohn Marino if (! loop_canon_p (loop))
1432*e4b17023SJohn Marino continue;
1433*e4b17023SJohn Marino
1434*e4b17023SJohn Marino if (! loop_single_full_bb_p (loop))
1435*e4b17023SJohn Marino {
1436*e4b17023SJohn Marino if (dump_file)
1437*e4b17023SJohn Marino fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1438*e4b17023SJohn Marino continue;
1439*e4b17023SJohn Marino }
1440*e4b17023SJohn Marino
1441*e4b17023SJohn Marino bb = loop->header;
1442*e4b17023SJohn Marino
1443*e4b17023SJohn Marino get_ebb_head_tail (bb, bb, &head, &tail);
1444*e4b17023SJohn Marino latch_edge = loop_latch_edge (loop);
1445*e4b17023SJohn Marino gcc_assert (single_exit (loop));
1446*e4b17023SJohn Marino if (single_exit (loop)->count)
1447*e4b17023SJohn Marino trip_count = latch_edge->count / single_exit (loop)->count;
1448*e4b17023SJohn Marino
1449*e4b17023SJohn Marino /* Perform SMS only on loops that their average count is above threshold. */
1450*e4b17023SJohn Marino
1451*e4b17023SJohn Marino if ( latch_edge->count
1452*e4b17023SJohn Marino && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1453*e4b17023SJohn Marino {
1454*e4b17023SJohn Marino if (dump_file)
1455*e4b17023SJohn Marino {
1456*e4b17023SJohn Marino dump_insn_locator (tail);
1457*e4b17023SJohn Marino fprintf (dump_file, "\nSMS single-bb-loop\n");
1458*e4b17023SJohn Marino if (profile_info && flag_branch_probabilities)
1459*e4b17023SJohn Marino {
1460*e4b17023SJohn Marino fprintf (dump_file, "SMS loop-count ");
1461*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1462*e4b17023SJohn Marino (HOST_WIDEST_INT) bb->count);
1463*e4b17023SJohn Marino fprintf (dump_file, "\n");
1464*e4b17023SJohn Marino fprintf (dump_file, "SMS trip-count ");
1465*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1466*e4b17023SJohn Marino (HOST_WIDEST_INT) trip_count);
1467*e4b17023SJohn Marino fprintf (dump_file, "\n");
1468*e4b17023SJohn Marino fprintf (dump_file, "SMS profile-sum-max ");
1469*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1470*e4b17023SJohn Marino (HOST_WIDEST_INT) profile_info->sum_max);
1471*e4b17023SJohn Marino fprintf (dump_file, "\n");
1472*e4b17023SJohn Marino }
1473*e4b17023SJohn Marino }
1474*e4b17023SJohn Marino continue;
1475*e4b17023SJohn Marino }
1476*e4b17023SJohn Marino
1477*e4b17023SJohn Marino /* Make sure this is a doloop. */
1478*e4b17023SJohn Marino if ( !(count_reg = doloop_register_get (head, tail)))
1479*e4b17023SJohn Marino {
1480*e4b17023SJohn Marino if (dump_file)
1481*e4b17023SJohn Marino fprintf (dump_file, "SMS doloop_register_get failed\n");
1482*e4b17023SJohn Marino continue;
1483*e4b17023SJohn Marino }
1484*e4b17023SJohn Marino
1485*e4b17023SJohn Marino /* Don't handle BBs with calls or barriers
1486*e4b17023SJohn Marino or !single_set with the exception of instructions that include
1487*e4b17023SJohn Marino count_reg---these instructions are part of the control part
1488*e4b17023SJohn Marino that do-loop recognizes.
1489*e4b17023SJohn Marino ??? Should handle insns defining subregs. */
1490*e4b17023SJohn Marino for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1491*e4b17023SJohn Marino {
1492*e4b17023SJohn Marino rtx set;
1493*e4b17023SJohn Marino
1494*e4b17023SJohn Marino if (CALL_P (insn)
1495*e4b17023SJohn Marino || BARRIER_P (insn)
1496*e4b17023SJohn Marino || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1497*e4b17023SJohn Marino && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1498*e4b17023SJohn Marino && !reg_mentioned_p (count_reg, insn))
1499*e4b17023SJohn Marino || (INSN_P (insn) && (set = single_set (insn))
1500*e4b17023SJohn Marino && GET_CODE (SET_DEST (set)) == SUBREG))
1501*e4b17023SJohn Marino break;
1502*e4b17023SJohn Marino }
1503*e4b17023SJohn Marino
1504*e4b17023SJohn Marino if (insn != NEXT_INSN (tail))
1505*e4b17023SJohn Marino {
1506*e4b17023SJohn Marino if (dump_file)
1507*e4b17023SJohn Marino {
1508*e4b17023SJohn Marino if (CALL_P (insn))
1509*e4b17023SJohn Marino fprintf (dump_file, "SMS loop-with-call\n");
1510*e4b17023SJohn Marino else if (BARRIER_P (insn))
1511*e4b17023SJohn Marino fprintf (dump_file, "SMS loop-with-barrier\n");
1512*e4b17023SJohn Marino else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1513*e4b17023SJohn Marino && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1514*e4b17023SJohn Marino fprintf (dump_file, "SMS loop-with-not-single-set\n");
1515*e4b17023SJohn Marino else
1516*e4b17023SJohn Marino fprintf (dump_file, "SMS loop with subreg in lhs\n");
1517*e4b17023SJohn Marino print_rtl_single (dump_file, insn);
1518*e4b17023SJohn Marino }
1519*e4b17023SJohn Marino
1520*e4b17023SJohn Marino continue;
1521*e4b17023SJohn Marino }
1522*e4b17023SJohn Marino
1523*e4b17023SJohn Marino /* Always schedule the closing branch with the rest of the
1524*e4b17023SJohn Marino instructions. The branch is rotated to be in row ii-1 at the
1525*e4b17023SJohn Marino end of the scheduling procedure to make sure it's the last
1526*e4b17023SJohn Marino instruction in the iteration. */
1527*e4b17023SJohn Marino if (! (g = create_ddg (bb, 1)))
1528*e4b17023SJohn Marino {
1529*e4b17023SJohn Marino if (dump_file)
1530*e4b17023SJohn Marino fprintf (dump_file, "SMS create_ddg failed\n");
1531*e4b17023SJohn Marino continue;
1532*e4b17023SJohn Marino }
1533*e4b17023SJohn Marino
1534*e4b17023SJohn Marino g_arr[loop->num] = g;
1535*e4b17023SJohn Marino if (dump_file)
1536*e4b17023SJohn Marino fprintf (dump_file, "...OK\n");
1537*e4b17023SJohn Marino
1538*e4b17023SJohn Marino }
1539*e4b17023SJohn Marino if (dump_file)
1540*e4b17023SJohn Marino {
1541*e4b17023SJohn Marino fprintf (dump_file, "\nSMS transformation phase\n");
1542*e4b17023SJohn Marino fprintf (dump_file, "=========================\n\n");
1543*e4b17023SJohn Marino }
1544*e4b17023SJohn Marino
1545*e4b17023SJohn Marino /* We don't want to perform SMS on new loops - created by versioning. */
1546*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
1547*e4b17023SJohn Marino {
1548*e4b17023SJohn Marino rtx head, tail;
1549*e4b17023SJohn Marino rtx count_reg, count_init;
1550*e4b17023SJohn Marino int mii, rec_mii, stage_count, min_cycle;
1551*e4b17023SJohn Marino HOST_WIDEST_INT loop_count = 0;
1552*e4b17023SJohn Marino bool opt_sc_p;
1553*e4b17023SJohn Marino
1554*e4b17023SJohn Marino if (! (g = g_arr[loop->num]))
1555*e4b17023SJohn Marino continue;
1556*e4b17023SJohn Marino
1557*e4b17023SJohn Marino if (dump_file)
1558*e4b17023SJohn Marino {
1559*e4b17023SJohn Marino rtx insn = BB_END (loop->header);
1560*e4b17023SJohn Marino
1561*e4b17023SJohn Marino fprintf (dump_file, "SMS loop num: %d", loop->num);
1562*e4b17023SJohn Marino dump_insn_locator (insn);
1563*e4b17023SJohn Marino fprintf (dump_file, "\n");
1564*e4b17023SJohn Marino
1565*e4b17023SJohn Marino print_ddg (dump_file, g);
1566*e4b17023SJohn Marino }
1567*e4b17023SJohn Marino
1568*e4b17023SJohn Marino get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1569*e4b17023SJohn Marino
1570*e4b17023SJohn Marino latch_edge = loop_latch_edge (loop);
1571*e4b17023SJohn Marino gcc_assert (single_exit (loop));
1572*e4b17023SJohn Marino if (single_exit (loop)->count)
1573*e4b17023SJohn Marino trip_count = latch_edge->count / single_exit (loop)->count;
1574*e4b17023SJohn Marino
1575*e4b17023SJohn Marino if (dump_file)
1576*e4b17023SJohn Marino {
1577*e4b17023SJohn Marino dump_insn_locator (tail);
1578*e4b17023SJohn Marino fprintf (dump_file, "\nSMS single-bb-loop\n");
1579*e4b17023SJohn Marino if (profile_info && flag_branch_probabilities)
1580*e4b17023SJohn Marino {
1581*e4b17023SJohn Marino fprintf (dump_file, "SMS loop-count ");
1582*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1583*e4b17023SJohn Marino (HOST_WIDEST_INT) bb->count);
1584*e4b17023SJohn Marino fprintf (dump_file, "\n");
1585*e4b17023SJohn Marino fprintf (dump_file, "SMS profile-sum-max ");
1586*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1587*e4b17023SJohn Marino (HOST_WIDEST_INT) profile_info->sum_max);
1588*e4b17023SJohn Marino fprintf (dump_file, "\n");
1589*e4b17023SJohn Marino }
1590*e4b17023SJohn Marino fprintf (dump_file, "SMS doloop\n");
1591*e4b17023SJohn Marino fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1592*e4b17023SJohn Marino fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1593*e4b17023SJohn Marino fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1594*e4b17023SJohn Marino }
1595*e4b17023SJohn Marino
1596*e4b17023SJohn Marino
1597*e4b17023SJohn Marino /* In case of th loop have doloop register it gets special
1598*e4b17023SJohn Marino handling. */
1599*e4b17023SJohn Marino count_init = NULL_RTX;
1600*e4b17023SJohn Marino if ((count_reg = doloop_register_get (head, tail)))
1601*e4b17023SJohn Marino {
1602*e4b17023SJohn Marino basic_block pre_header;
1603*e4b17023SJohn Marino
1604*e4b17023SJohn Marino pre_header = loop_preheader_edge (loop)->src;
1605*e4b17023SJohn Marino count_init = const_iteration_count (count_reg, pre_header,
1606*e4b17023SJohn Marino &loop_count);
1607*e4b17023SJohn Marino }
1608*e4b17023SJohn Marino gcc_assert (count_reg);
1609*e4b17023SJohn Marino
1610*e4b17023SJohn Marino if (dump_file && count_init)
1611*e4b17023SJohn Marino {
1612*e4b17023SJohn Marino fprintf (dump_file, "SMS const-doloop ");
1613*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1614*e4b17023SJohn Marino loop_count);
1615*e4b17023SJohn Marino fprintf (dump_file, "\n");
1616*e4b17023SJohn Marino }
1617*e4b17023SJohn Marino
1618*e4b17023SJohn Marino node_order = XNEWVEC (int, g->num_nodes);
1619*e4b17023SJohn Marino
1620*e4b17023SJohn Marino mii = 1; /* Need to pass some estimate of mii. */
1621*e4b17023SJohn Marino rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1622*e4b17023SJohn Marino mii = MAX (res_MII (g), rec_mii);
1623*e4b17023SJohn Marino maxii = MAX (max_asap, MAXII_FACTOR * mii);
1624*e4b17023SJohn Marino
1625*e4b17023SJohn Marino if (dump_file)
1626*e4b17023SJohn Marino fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1627*e4b17023SJohn Marino rec_mii, mii, maxii);
1628*e4b17023SJohn Marino
1629*e4b17023SJohn Marino for (;;)
1630*e4b17023SJohn Marino {
1631*e4b17023SJohn Marino set_node_sched_params (g);
1632*e4b17023SJohn Marino
1633*e4b17023SJohn Marino stage_count = 0;
1634*e4b17023SJohn Marino opt_sc_p = false;
1635*e4b17023SJohn Marino ps = sms_schedule_by_order (g, mii, maxii, node_order);
1636*e4b17023SJohn Marino
1637*e4b17023SJohn Marino if (ps)
1638*e4b17023SJohn Marino {
1639*e4b17023SJohn Marino /* Try to achieve optimized SC by normalizing the partial
1640*e4b17023SJohn Marino schedule (having the cycles start from cycle zero).
1641*e4b17023SJohn Marino The branch location must be placed in row ii-1 in the
1642*e4b17023SJohn Marino final scheduling. If failed, shift all instructions to
1643*e4b17023SJohn Marino position the branch in row ii-1. */
1644*e4b17023SJohn Marino opt_sc_p = optimize_sc (ps, g);
1645*e4b17023SJohn Marino if (opt_sc_p)
1646*e4b17023SJohn Marino stage_count = calculate_stage_count (ps, 0);
1647*e4b17023SJohn Marino else
1648*e4b17023SJohn Marino {
1649*e4b17023SJohn Marino /* Bring the branch to cycle ii-1. */
1650*e4b17023SJohn Marino int amount = (SCHED_TIME (g->closing_branch->cuid)
1651*e4b17023SJohn Marino - (ps->ii - 1));
1652*e4b17023SJohn Marino
1653*e4b17023SJohn Marino if (dump_file)
1654*e4b17023SJohn Marino fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1655*e4b17023SJohn Marino
1656*e4b17023SJohn Marino stage_count = calculate_stage_count (ps, amount);
1657*e4b17023SJohn Marino }
1658*e4b17023SJohn Marino
1659*e4b17023SJohn Marino gcc_assert (stage_count >= 1);
1660*e4b17023SJohn Marino }
1661*e4b17023SJohn Marino
1662*e4b17023SJohn Marino /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1663*e4b17023SJohn Marino 1 means that there is no interleaving between iterations thus
1664*e4b17023SJohn Marino we let the scheduling passes do the job in this case. */
1665*e4b17023SJohn Marino if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1666*e4b17023SJohn Marino || (count_init && (loop_count <= stage_count))
1667*e4b17023SJohn Marino || (flag_branch_probabilities && (trip_count <= stage_count)))
1668*e4b17023SJohn Marino {
1669*e4b17023SJohn Marino if (dump_file)
1670*e4b17023SJohn Marino {
1671*e4b17023SJohn Marino fprintf (dump_file, "SMS failed... \n");
1672*e4b17023SJohn Marino fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1673*e4b17023SJohn Marino " loop-count=", stage_count);
1674*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1675*e4b17023SJohn Marino fprintf (dump_file, ", trip-count=");
1676*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1677*e4b17023SJohn Marino fprintf (dump_file, ")\n");
1678*e4b17023SJohn Marino }
1679*e4b17023SJohn Marino break;
1680*e4b17023SJohn Marino }
1681*e4b17023SJohn Marino
1682*e4b17023SJohn Marino if (!opt_sc_p)
1683*e4b17023SJohn Marino {
1684*e4b17023SJohn Marino /* Rotate the partial schedule to have the branch in row ii-1. */
1685*e4b17023SJohn Marino int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1686*e4b17023SJohn Marino
1687*e4b17023SJohn Marino reset_sched_times (ps, amount);
1688*e4b17023SJohn Marino rotate_partial_schedule (ps, amount);
1689*e4b17023SJohn Marino }
1690*e4b17023SJohn Marino
1691*e4b17023SJohn Marino set_columns_for_ps (ps);
1692*e4b17023SJohn Marino
1693*e4b17023SJohn Marino min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1694*e4b17023SJohn Marino if (!schedule_reg_moves (ps))
1695*e4b17023SJohn Marino {
1696*e4b17023SJohn Marino mii = ps->ii + 1;
1697*e4b17023SJohn Marino free_partial_schedule (ps);
1698*e4b17023SJohn Marino continue;
1699*e4b17023SJohn Marino }
1700*e4b17023SJohn Marino
1701*e4b17023SJohn Marino /* Moves that handle incoming values might have been added
1702*e4b17023SJohn Marino to a new first stage. Bump the stage count if so.
1703*e4b17023SJohn Marino
1704*e4b17023SJohn Marino ??? Perhaps we could consider rotating the schedule here
1705*e4b17023SJohn Marino instead? */
1706*e4b17023SJohn Marino if (PS_MIN_CYCLE (ps) < min_cycle)
1707*e4b17023SJohn Marino {
1708*e4b17023SJohn Marino reset_sched_times (ps, 0);
1709*e4b17023SJohn Marino stage_count++;
1710*e4b17023SJohn Marino }
1711*e4b17023SJohn Marino
1712*e4b17023SJohn Marino /* The stage count should now be correct without rotation. */
1713*e4b17023SJohn Marino gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1714*e4b17023SJohn Marino PS_STAGE_COUNT (ps) = stage_count;
1715*e4b17023SJohn Marino
1716*e4b17023SJohn Marino canon_loop (loop);
1717*e4b17023SJohn Marino
1718*e4b17023SJohn Marino if (dump_file)
1719*e4b17023SJohn Marino {
1720*e4b17023SJohn Marino dump_insn_locator (tail);
1721*e4b17023SJohn Marino fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1722*e4b17023SJohn Marino ps->ii, stage_count);
1723*e4b17023SJohn Marino print_partial_schedule (ps, dump_file);
1724*e4b17023SJohn Marino }
1725*e4b17023SJohn Marino
1726*e4b17023SJohn Marino /* case the BCT count is not known , Do loop-versioning */
1727*e4b17023SJohn Marino if (count_reg && ! count_init)
1728*e4b17023SJohn Marino {
1729*e4b17023SJohn Marino rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1730*e4b17023SJohn Marino GEN_INT(stage_count));
1731*e4b17023SJohn Marino unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1732*e4b17023SJohn Marino * REG_BR_PROB_BASE) / 100;
1733*e4b17023SJohn Marino
1734*e4b17023SJohn Marino loop_version (loop, comp_rtx, &condition_bb,
1735*e4b17023SJohn Marino prob, prob, REG_BR_PROB_BASE - prob,
1736*e4b17023SJohn Marino true);
1737*e4b17023SJohn Marino }
1738*e4b17023SJohn Marino
1739*e4b17023SJohn Marino /* Set new iteration count of loop kernel. */
1740*e4b17023SJohn Marino if (count_reg && count_init)
1741*e4b17023SJohn Marino SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1742*e4b17023SJohn Marino - stage_count + 1);
1743*e4b17023SJohn Marino
1744*e4b17023SJohn Marino /* Now apply the scheduled kernel to the RTL of the loop. */
1745*e4b17023SJohn Marino permute_partial_schedule (ps, g->closing_branch->first_note);
1746*e4b17023SJohn Marino
1747*e4b17023SJohn Marino /* Mark this loop as software pipelined so the later
1748*e4b17023SJohn Marino scheduling passes don't touch it. */
1749*e4b17023SJohn Marino if (! flag_resched_modulo_sched)
1750*e4b17023SJohn Marino mark_loop_unsched (loop);
1751*e4b17023SJohn Marino
1752*e4b17023SJohn Marino /* The life-info is not valid any more. */
1753*e4b17023SJohn Marino df_set_bb_dirty (g->bb);
1754*e4b17023SJohn Marino
1755*e4b17023SJohn Marino apply_reg_moves (ps);
1756*e4b17023SJohn Marino if (dump_file)
1757*e4b17023SJohn Marino print_node_sched_params (dump_file, g->num_nodes, ps);
1758*e4b17023SJohn Marino /* Generate prolog and epilog. */
1759*e4b17023SJohn Marino generate_prolog_epilog (ps, loop, count_reg, count_init);
1760*e4b17023SJohn Marino break;
1761*e4b17023SJohn Marino }
1762*e4b17023SJohn Marino
1763*e4b17023SJohn Marino free_partial_schedule (ps);
1764*e4b17023SJohn Marino VEC_free (node_sched_params, heap, node_sched_param_vec);
1765*e4b17023SJohn Marino free (node_order);
1766*e4b17023SJohn Marino free_ddg (g);
1767*e4b17023SJohn Marino }
1768*e4b17023SJohn Marino
1769*e4b17023SJohn Marino free (g_arr);
1770*e4b17023SJohn Marino
1771*e4b17023SJohn Marino /* Release scheduler data, needed until now because of DFA. */
1772*e4b17023SJohn Marino haifa_sched_finish ();
1773*e4b17023SJohn Marino loop_optimizer_finalize ();
1774*e4b17023SJohn Marino }
1775*e4b17023SJohn Marino
1776*e4b17023SJohn Marino /* The SMS scheduling algorithm itself
1777*e4b17023SJohn Marino -----------------------------------
1778*e4b17023SJohn Marino Input: 'O' an ordered list of insns of a loop.
1779*e4b17023SJohn Marino Output: A scheduling of the loop - kernel, prolog, and epilogue.
1780*e4b17023SJohn Marino
1781*e4b17023SJohn Marino 'Q' is the empty Set
1782*e4b17023SJohn Marino 'PS' is the partial schedule; it holds the currently scheduled nodes with
1783*e4b17023SJohn Marino their cycle/slot.
1784*e4b17023SJohn Marino 'PSP' previously scheduled predecessors.
1785*e4b17023SJohn Marino 'PSS' previously scheduled successors.
1786*e4b17023SJohn Marino 't(u)' the cycle where u is scheduled.
1787*e4b17023SJohn Marino 'l(u)' is the latency of u.
1788*e4b17023SJohn Marino 'd(v,u)' is the dependence distance from v to u.
1789*e4b17023SJohn Marino 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1790*e4b17023SJohn Marino the node ordering phase.
1791*e4b17023SJohn Marino 'check_hardware_resources_conflicts(u, PS, c)'
1792*e4b17023SJohn Marino run a trace around cycle/slot through DFA model
1793*e4b17023SJohn Marino to check resource conflicts involving instruction u
1794*e4b17023SJohn Marino at cycle c given the partial schedule PS.
1795*e4b17023SJohn Marino 'add_to_partial_schedule_at_time(u, PS, c)'
1796*e4b17023SJohn Marino Add the node/instruction u to the partial schedule
1797*e4b17023SJohn Marino PS at time c.
1798*e4b17023SJohn Marino 'calculate_register_pressure(PS)'
1799*e4b17023SJohn Marino Given a schedule of instructions, calculate the register
1800*e4b17023SJohn Marino pressure it implies. One implementation could be the
1801*e4b17023SJohn Marino maximum number of overlapping live ranges.
1802*e4b17023SJohn Marino 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1803*e4b17023SJohn Marino registers available in the hardware.
1804*e4b17023SJohn Marino
1805*e4b17023SJohn Marino 1. II = MII.
1806*e4b17023SJohn Marino 2. PS = empty list
1807*e4b17023SJohn Marino 3. for each node u in O in pre-computed order
1808*e4b17023SJohn Marino 4. if (PSP(u) != Q && PSS(u) == Q) then
1809*e4b17023SJohn Marino 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1810*e4b17023SJohn Marino 6. start = Early_start; end = Early_start + II - 1; step = 1
1811*e4b17023SJohn Marino 11. else if (PSP(u) == Q && PSS(u) != Q) then
1812*e4b17023SJohn Marino 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1813*e4b17023SJohn Marino 13. start = Late_start; end = Late_start - II + 1; step = -1
1814*e4b17023SJohn Marino 14. else if (PSP(u) != Q && PSS(u) != Q) then
1815*e4b17023SJohn Marino 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1816*e4b17023SJohn Marino 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1817*e4b17023SJohn Marino 17. start = Early_start;
1818*e4b17023SJohn Marino 18. end = min(Early_start + II - 1 , Late_start);
1819*e4b17023SJohn Marino 19. step = 1
1820*e4b17023SJohn Marino 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1821*e4b17023SJohn Marino 21. start = ASAP(u); end = start + II - 1; step = 1
1822*e4b17023SJohn Marino 22. endif
1823*e4b17023SJohn Marino
1824*e4b17023SJohn Marino 23. success = false
1825*e4b17023SJohn Marino 24. for (c = start ; c != end ; c += step)
1826*e4b17023SJohn Marino 25. if check_hardware_resources_conflicts(u, PS, c) then
1827*e4b17023SJohn Marino 26. add_to_partial_schedule_at_time(u, PS, c)
1828*e4b17023SJohn Marino 27. success = true
1829*e4b17023SJohn Marino 28. break
1830*e4b17023SJohn Marino 29. endif
1831*e4b17023SJohn Marino 30. endfor
1832*e4b17023SJohn Marino 31. if (success == false) then
1833*e4b17023SJohn Marino 32. II = II + 1
1834*e4b17023SJohn Marino 33. if (II > maxII) then
1835*e4b17023SJohn Marino 34. finish - failed to schedule
1836*e4b17023SJohn Marino 35. endif
1837*e4b17023SJohn Marino 36. goto 2.
1838*e4b17023SJohn Marino 37. endif
1839*e4b17023SJohn Marino 38. endfor
1840*e4b17023SJohn Marino 39. if (calculate_register_pressure(PS) > maxRP) then
1841*e4b17023SJohn Marino 40. goto 32.
1842*e4b17023SJohn Marino 41. endif
1843*e4b17023SJohn Marino 42. compute epilogue & prologue
1844*e4b17023SJohn Marino 43. finish - succeeded to schedule
1845*e4b17023SJohn Marino
1846*e4b17023SJohn Marino ??? The algorithm restricts the scheduling window to II cycles.
1847*e4b17023SJohn Marino In rare cases, it may be better to allow windows of II+1 cycles.
1848*e4b17023SJohn Marino The window would then start and end on the same row, but with
1849*e4b17023SJohn Marino different "must precede" and "must follow" requirements. */
1850*e4b17023SJohn Marino
1851*e4b17023SJohn Marino /* A limit on the number of cycles that resource conflicts can span. ??? Should
1852*e4b17023SJohn Marino be provided by DFA, and be dependent on the type of insn scheduled. Currently
1853*e4b17023SJohn Marino set to 0 to save compile time. */
1854*e4b17023SJohn Marino #define DFA_HISTORY SMS_DFA_HISTORY
1855*e4b17023SJohn Marino
1856*e4b17023SJohn Marino /* A threshold for the number of repeated unsuccessful attempts to insert
1857*e4b17023SJohn Marino an empty row, before we flush the partial schedule and start over. */
1858*e4b17023SJohn Marino #define MAX_SPLIT_NUM 10
1859*e4b17023SJohn Marino /* Given the partial schedule PS, this function calculates and returns the
1860*e4b17023SJohn Marino cycles in which we can schedule the node with the given index I.
1861*e4b17023SJohn Marino NOTE: Here we do the backtracking in SMS, in some special cases. We have
1862*e4b17023SJohn Marino noticed that there are several cases in which we fail to SMS the loop
1863*e4b17023SJohn Marino because the sched window of a node is empty due to tight data-deps. In
1864*e4b17023SJohn Marino such cases we want to unschedule some of the predecessors/successors
1865*e4b17023SJohn Marino until we get non-empty scheduling window. It returns -1 if the
1866*e4b17023SJohn Marino scheduling window is empty and zero otherwise. */
1867*e4b17023SJohn Marino
1868*e4b17023SJohn Marino static int
get_sched_window(partial_schedule_ptr ps,ddg_node_ptr u_node,sbitmap sched_nodes,int ii,int * start_p,int * step_p,int * end_p)1869*e4b17023SJohn Marino get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1870*e4b17023SJohn Marino sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1871*e4b17023SJohn Marino int *end_p)
1872*e4b17023SJohn Marino {
1873*e4b17023SJohn Marino int start, step, end;
1874*e4b17023SJohn Marino int early_start, late_start;
1875*e4b17023SJohn Marino ddg_edge_ptr e;
1876*e4b17023SJohn Marino sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1877*e4b17023SJohn Marino sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1878*e4b17023SJohn Marino sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1879*e4b17023SJohn Marino sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1880*e4b17023SJohn Marino int psp_not_empty;
1881*e4b17023SJohn Marino int pss_not_empty;
1882*e4b17023SJohn Marino int count_preds;
1883*e4b17023SJohn Marino int count_succs;
1884*e4b17023SJohn Marino
1885*e4b17023SJohn Marino /* 1. compute sched window for u (start, end, step). */
1886*e4b17023SJohn Marino sbitmap_zero (psp);
1887*e4b17023SJohn Marino sbitmap_zero (pss);
1888*e4b17023SJohn Marino psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1889*e4b17023SJohn Marino pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1890*e4b17023SJohn Marino
1891*e4b17023SJohn Marino /* We first compute a forward range (start <= end), then decide whether
1892*e4b17023SJohn Marino to reverse it. */
1893*e4b17023SJohn Marino early_start = INT_MIN;
1894*e4b17023SJohn Marino late_start = INT_MAX;
1895*e4b17023SJohn Marino start = INT_MIN;
1896*e4b17023SJohn Marino end = INT_MAX;
1897*e4b17023SJohn Marino step = 1;
1898*e4b17023SJohn Marino
1899*e4b17023SJohn Marino count_preds = 0;
1900*e4b17023SJohn Marino count_succs = 0;
1901*e4b17023SJohn Marino
1902*e4b17023SJohn Marino if (dump_file && (psp_not_empty || pss_not_empty))
1903*e4b17023SJohn Marino {
1904*e4b17023SJohn Marino fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1905*e4b17023SJohn Marino "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1906*e4b17023SJohn Marino fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1907*e4b17023SJohn Marino "start", "early start", "late start", "end", "time");
1908*e4b17023SJohn Marino fprintf (dump_file, "=========== =========== =========== ==========="
1909*e4b17023SJohn Marino " =====\n");
1910*e4b17023SJohn Marino }
1911*e4b17023SJohn Marino /* Calculate early_start and limit end. Both bounds are inclusive. */
1912*e4b17023SJohn Marino if (psp_not_empty)
1913*e4b17023SJohn Marino for (e = u_node->in; e != 0; e = e->next_in)
1914*e4b17023SJohn Marino {
1915*e4b17023SJohn Marino int v = e->src->cuid;
1916*e4b17023SJohn Marino
1917*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, v))
1918*e4b17023SJohn Marino {
1919*e4b17023SJohn Marino int p_st = SCHED_TIME (v);
1920*e4b17023SJohn Marino int earliest = p_st + e->latency - (e->distance * ii);
1921*e4b17023SJohn Marino int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1922*e4b17023SJohn Marino
1923*e4b17023SJohn Marino if (dump_file)
1924*e4b17023SJohn Marino {
1925*e4b17023SJohn Marino fprintf (dump_file, "%11s %11d %11s %11d %5d",
1926*e4b17023SJohn Marino "", earliest, "", latest, p_st);
1927*e4b17023SJohn Marino print_ddg_edge (dump_file, e);
1928*e4b17023SJohn Marino fprintf (dump_file, "\n");
1929*e4b17023SJohn Marino }
1930*e4b17023SJohn Marino
1931*e4b17023SJohn Marino early_start = MAX (early_start, earliest);
1932*e4b17023SJohn Marino end = MIN (end, latest);
1933*e4b17023SJohn Marino
1934*e4b17023SJohn Marino if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1935*e4b17023SJohn Marino count_preds++;
1936*e4b17023SJohn Marino }
1937*e4b17023SJohn Marino }
1938*e4b17023SJohn Marino
1939*e4b17023SJohn Marino /* Calculate late_start and limit start. Both bounds are inclusive. */
1940*e4b17023SJohn Marino if (pss_not_empty)
1941*e4b17023SJohn Marino for (e = u_node->out; e != 0; e = e->next_out)
1942*e4b17023SJohn Marino {
1943*e4b17023SJohn Marino int v = e->dest->cuid;
1944*e4b17023SJohn Marino
1945*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, v))
1946*e4b17023SJohn Marino {
1947*e4b17023SJohn Marino int s_st = SCHED_TIME (v);
1948*e4b17023SJohn Marino int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1949*e4b17023SJohn Marino int latest = s_st - e->latency + (e->distance * ii);
1950*e4b17023SJohn Marino
1951*e4b17023SJohn Marino if (dump_file)
1952*e4b17023SJohn Marino {
1953*e4b17023SJohn Marino fprintf (dump_file, "%11d %11s %11d %11s %5d",
1954*e4b17023SJohn Marino earliest, "", latest, "", s_st);
1955*e4b17023SJohn Marino print_ddg_edge (dump_file, e);
1956*e4b17023SJohn Marino fprintf (dump_file, "\n");
1957*e4b17023SJohn Marino }
1958*e4b17023SJohn Marino
1959*e4b17023SJohn Marino start = MAX (start, earliest);
1960*e4b17023SJohn Marino late_start = MIN (late_start, latest);
1961*e4b17023SJohn Marino
1962*e4b17023SJohn Marino if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1963*e4b17023SJohn Marino count_succs++;
1964*e4b17023SJohn Marino }
1965*e4b17023SJohn Marino }
1966*e4b17023SJohn Marino
1967*e4b17023SJohn Marino if (dump_file && (psp_not_empty || pss_not_empty))
1968*e4b17023SJohn Marino {
1969*e4b17023SJohn Marino fprintf (dump_file, "----------- ----------- ----------- -----------"
1970*e4b17023SJohn Marino " -----\n");
1971*e4b17023SJohn Marino fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1972*e4b17023SJohn Marino start, early_start, late_start, end, "",
1973*e4b17023SJohn Marino "(max, max, min, min)");
1974*e4b17023SJohn Marino }
1975*e4b17023SJohn Marino
1976*e4b17023SJohn Marino /* Get a target scheduling window no bigger than ii. */
1977*e4b17023SJohn Marino if (early_start == INT_MIN && late_start == INT_MAX)
1978*e4b17023SJohn Marino early_start = NODE_ASAP (u_node);
1979*e4b17023SJohn Marino else if (early_start == INT_MIN)
1980*e4b17023SJohn Marino early_start = late_start - (ii - 1);
1981*e4b17023SJohn Marino late_start = MIN (late_start, early_start + (ii - 1));
1982*e4b17023SJohn Marino
1983*e4b17023SJohn Marino /* Apply memory dependence limits. */
1984*e4b17023SJohn Marino start = MAX (start, early_start);
1985*e4b17023SJohn Marino end = MIN (end, late_start);
1986*e4b17023SJohn Marino
1987*e4b17023SJohn Marino if (dump_file && (psp_not_empty || pss_not_empty))
1988*e4b17023SJohn Marino fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1989*e4b17023SJohn Marino "", start, end, "", "");
1990*e4b17023SJohn Marino
1991*e4b17023SJohn Marino /* If there are at least as many successors as predecessors, schedule the
1992*e4b17023SJohn Marino node close to its successors. */
1993*e4b17023SJohn Marino if (pss_not_empty && count_succs >= count_preds)
1994*e4b17023SJohn Marino {
1995*e4b17023SJohn Marino int tmp = end;
1996*e4b17023SJohn Marino end = start;
1997*e4b17023SJohn Marino start = tmp;
1998*e4b17023SJohn Marino step = -1;
1999*e4b17023SJohn Marino }
2000*e4b17023SJohn Marino
2001*e4b17023SJohn Marino /* Now that we've finalized the window, make END an exclusive rather
2002*e4b17023SJohn Marino than an inclusive bound. */
2003*e4b17023SJohn Marino end += step;
2004*e4b17023SJohn Marino
2005*e4b17023SJohn Marino *start_p = start;
2006*e4b17023SJohn Marino *step_p = step;
2007*e4b17023SJohn Marino *end_p = end;
2008*e4b17023SJohn Marino sbitmap_free (psp);
2009*e4b17023SJohn Marino sbitmap_free (pss);
2010*e4b17023SJohn Marino
2011*e4b17023SJohn Marino if ((start >= end && step == 1) || (start <= end && step == -1))
2012*e4b17023SJohn Marino {
2013*e4b17023SJohn Marino if (dump_file)
2014*e4b17023SJohn Marino fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2015*e4b17023SJohn Marino start, end, step);
2016*e4b17023SJohn Marino return -1;
2017*e4b17023SJohn Marino }
2018*e4b17023SJohn Marino
2019*e4b17023SJohn Marino return 0;
2020*e4b17023SJohn Marino }
2021*e4b17023SJohn Marino
2022*e4b17023SJohn Marino /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2023*e4b17023SJohn Marino node currently been scheduled. At the end of the calculation
2024*e4b17023SJohn Marino MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2025*e4b17023SJohn Marino U_NODE which are (1) already scheduled in the first/last row of
2026*e4b17023SJohn Marino U_NODE's scheduling window, (2) whose dependence inequality with U
2027*e4b17023SJohn Marino becomes an equality when U is scheduled in this same row, and (3)
2028*e4b17023SJohn Marino whose dependence latency is zero.
2029*e4b17023SJohn Marino
2030*e4b17023SJohn Marino The first and last rows are calculated using the following parameters:
2031*e4b17023SJohn Marino START/END rows - The cycles that begins/ends the traversal on the window;
2032*e4b17023SJohn Marino searching for an empty cycle to schedule U_NODE.
2033*e4b17023SJohn Marino STEP - The direction in which we traverse the window.
2034*e4b17023SJohn Marino II - The initiation interval. */
2035*e4b17023SJohn Marino
2036*e4b17023SJohn Marino static void
calculate_must_precede_follow(ddg_node_ptr u_node,int start,int end,int step,int ii,sbitmap sched_nodes,sbitmap must_precede,sbitmap must_follow)2037*e4b17023SJohn Marino calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2038*e4b17023SJohn Marino int step, int ii, sbitmap sched_nodes,
2039*e4b17023SJohn Marino sbitmap must_precede, sbitmap must_follow)
2040*e4b17023SJohn Marino {
2041*e4b17023SJohn Marino ddg_edge_ptr e;
2042*e4b17023SJohn Marino int first_cycle_in_window, last_cycle_in_window;
2043*e4b17023SJohn Marino
2044*e4b17023SJohn Marino gcc_assert (must_precede && must_follow);
2045*e4b17023SJohn Marino
2046*e4b17023SJohn Marino /* Consider the following scheduling window:
2047*e4b17023SJohn Marino {first_cycle_in_window, first_cycle_in_window+1, ...,
2048*e4b17023SJohn Marino last_cycle_in_window}. If step is 1 then the following will be
2049*e4b17023SJohn Marino the order we traverse the window: {start=first_cycle_in_window,
2050*e4b17023SJohn Marino first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2051*e4b17023SJohn Marino or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2052*e4b17023SJohn Marino end=first_cycle_in_window-1} if step is -1. */
2053*e4b17023SJohn Marino first_cycle_in_window = (step == 1) ? start : end - step;
2054*e4b17023SJohn Marino last_cycle_in_window = (step == 1) ? end - step : start;
2055*e4b17023SJohn Marino
2056*e4b17023SJohn Marino sbitmap_zero (must_precede);
2057*e4b17023SJohn Marino sbitmap_zero (must_follow);
2058*e4b17023SJohn Marino
2059*e4b17023SJohn Marino if (dump_file)
2060*e4b17023SJohn Marino fprintf (dump_file, "\nmust_precede: ");
2061*e4b17023SJohn Marino
2062*e4b17023SJohn Marino /* Instead of checking if:
2063*e4b17023SJohn Marino (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2064*e4b17023SJohn Marino && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2065*e4b17023SJohn Marino first_cycle_in_window)
2066*e4b17023SJohn Marino && e->latency == 0
2067*e4b17023SJohn Marino we use the fact that latency is non-negative:
2068*e4b17023SJohn Marino SCHED_TIME (e->src) - (e->distance * ii) <=
2069*e4b17023SJohn Marino SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2070*e4b17023SJohn Marino first_cycle_in_window
2071*e4b17023SJohn Marino and check only if
2072*e4b17023SJohn Marino SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
2073*e4b17023SJohn Marino for (e = u_node->in; e != 0; e = e->next_in)
2074*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, e->src->cuid)
2075*e4b17023SJohn Marino && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2076*e4b17023SJohn Marino first_cycle_in_window))
2077*e4b17023SJohn Marino {
2078*e4b17023SJohn Marino if (dump_file)
2079*e4b17023SJohn Marino fprintf (dump_file, "%d ", e->src->cuid);
2080*e4b17023SJohn Marino
2081*e4b17023SJohn Marino SET_BIT (must_precede, e->src->cuid);
2082*e4b17023SJohn Marino }
2083*e4b17023SJohn Marino
2084*e4b17023SJohn Marino if (dump_file)
2085*e4b17023SJohn Marino fprintf (dump_file, "\nmust_follow: ");
2086*e4b17023SJohn Marino
2087*e4b17023SJohn Marino /* Instead of checking if:
2088*e4b17023SJohn Marino (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2089*e4b17023SJohn Marino && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2090*e4b17023SJohn Marino last_cycle_in_window)
2091*e4b17023SJohn Marino && e->latency == 0
2092*e4b17023SJohn Marino we use the fact that latency is non-negative:
2093*e4b17023SJohn Marino SCHED_TIME (e->dest) + (e->distance * ii) >=
2094*e4b17023SJohn Marino SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2095*e4b17023SJohn Marino last_cycle_in_window
2096*e4b17023SJohn Marino and check only if
2097*e4b17023SJohn Marino SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
2098*e4b17023SJohn Marino for (e = u_node->out; e != 0; e = e->next_out)
2099*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, e->dest->cuid)
2100*e4b17023SJohn Marino && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2101*e4b17023SJohn Marino last_cycle_in_window))
2102*e4b17023SJohn Marino {
2103*e4b17023SJohn Marino if (dump_file)
2104*e4b17023SJohn Marino fprintf (dump_file, "%d ", e->dest->cuid);
2105*e4b17023SJohn Marino
2106*e4b17023SJohn Marino SET_BIT (must_follow, e->dest->cuid);
2107*e4b17023SJohn Marino }
2108*e4b17023SJohn Marino
2109*e4b17023SJohn Marino if (dump_file)
2110*e4b17023SJohn Marino fprintf (dump_file, "\n");
2111*e4b17023SJohn Marino }
2112*e4b17023SJohn Marino
2113*e4b17023SJohn Marino /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
2114*e4b17023SJohn Marino parameters to decide if that's possible:
2115*e4b17023SJohn Marino PS - The partial schedule.
2116*e4b17023SJohn Marino U - The serial number of U_NODE.
2117*e4b17023SJohn Marino NUM_SPLITS - The number of row splits made so far.
2118*e4b17023SJohn Marino MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2119*e4b17023SJohn Marino the first row of the scheduling window)
2120*e4b17023SJohn Marino MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2121*e4b17023SJohn Marino last row of the scheduling window) */
2122*e4b17023SJohn Marino
2123*e4b17023SJohn Marino static bool
try_scheduling_node_in_cycle(partial_schedule_ptr ps,int u,int cycle,sbitmap sched_nodes,int * num_splits,sbitmap must_precede,sbitmap must_follow)2124*e4b17023SJohn Marino try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2125*e4b17023SJohn Marino int u, int cycle, sbitmap sched_nodes,
2126*e4b17023SJohn Marino int *num_splits, sbitmap must_precede,
2127*e4b17023SJohn Marino sbitmap must_follow)
2128*e4b17023SJohn Marino {
2129*e4b17023SJohn Marino ps_insn_ptr psi;
2130*e4b17023SJohn Marino bool success = 0;
2131*e4b17023SJohn Marino
2132*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2133*e4b17023SJohn Marino psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2134*e4b17023SJohn Marino if (psi)
2135*e4b17023SJohn Marino {
2136*e4b17023SJohn Marino SCHED_TIME (u) = cycle;
2137*e4b17023SJohn Marino SET_BIT (sched_nodes, u);
2138*e4b17023SJohn Marino success = 1;
2139*e4b17023SJohn Marino *num_splits = 0;
2140*e4b17023SJohn Marino if (dump_file)
2141*e4b17023SJohn Marino fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2142*e4b17023SJohn Marino
2143*e4b17023SJohn Marino }
2144*e4b17023SJohn Marino
2145*e4b17023SJohn Marino return success;
2146*e4b17023SJohn Marino }
2147*e4b17023SJohn Marino
2148*e4b17023SJohn Marino /* This function implements the scheduling algorithm for SMS according to the
2149*e4b17023SJohn Marino above algorithm. */
2150*e4b17023SJohn Marino static partial_schedule_ptr
sms_schedule_by_order(ddg_ptr g,int mii,int maxii,int * nodes_order)2151*e4b17023SJohn Marino sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2152*e4b17023SJohn Marino {
2153*e4b17023SJohn Marino int ii = mii;
2154*e4b17023SJohn Marino int i, c, success, num_splits = 0;
2155*e4b17023SJohn Marino int flush_and_start_over = true;
2156*e4b17023SJohn Marino int num_nodes = g->num_nodes;
2157*e4b17023SJohn Marino int start, end, step; /* Place together into one struct? */
2158*e4b17023SJohn Marino sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2159*e4b17023SJohn Marino sbitmap must_precede = sbitmap_alloc (num_nodes);
2160*e4b17023SJohn Marino sbitmap must_follow = sbitmap_alloc (num_nodes);
2161*e4b17023SJohn Marino sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2162*e4b17023SJohn Marino
2163*e4b17023SJohn Marino partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2164*e4b17023SJohn Marino
2165*e4b17023SJohn Marino sbitmap_ones (tobe_scheduled);
2166*e4b17023SJohn Marino sbitmap_zero (sched_nodes);
2167*e4b17023SJohn Marino
2168*e4b17023SJohn Marino while (flush_and_start_over && (ii < maxii))
2169*e4b17023SJohn Marino {
2170*e4b17023SJohn Marino
2171*e4b17023SJohn Marino if (dump_file)
2172*e4b17023SJohn Marino fprintf (dump_file, "Starting with ii=%d\n", ii);
2173*e4b17023SJohn Marino flush_and_start_over = false;
2174*e4b17023SJohn Marino sbitmap_zero (sched_nodes);
2175*e4b17023SJohn Marino
2176*e4b17023SJohn Marino for (i = 0; i < num_nodes; i++)
2177*e4b17023SJohn Marino {
2178*e4b17023SJohn Marino int u = nodes_order[i];
2179*e4b17023SJohn Marino ddg_node_ptr u_node = &ps->g->nodes[u];
2180*e4b17023SJohn Marino rtx insn = u_node->insn;
2181*e4b17023SJohn Marino
2182*e4b17023SJohn Marino if (!NONDEBUG_INSN_P (insn))
2183*e4b17023SJohn Marino {
2184*e4b17023SJohn Marino RESET_BIT (tobe_scheduled, u);
2185*e4b17023SJohn Marino continue;
2186*e4b17023SJohn Marino }
2187*e4b17023SJohn Marino
2188*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, u))
2189*e4b17023SJohn Marino continue;
2190*e4b17023SJohn Marino
2191*e4b17023SJohn Marino /* Try to get non-empty scheduling window. */
2192*e4b17023SJohn Marino success = 0;
2193*e4b17023SJohn Marino if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2194*e4b17023SJohn Marino &step, &end) == 0)
2195*e4b17023SJohn Marino {
2196*e4b17023SJohn Marino if (dump_file)
2197*e4b17023SJohn Marino fprintf (dump_file, "\nTrying to schedule node %d "
2198*e4b17023SJohn Marino "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2199*e4b17023SJohn Marino (g->nodes[u].insn)), start, end, step);
2200*e4b17023SJohn Marino
2201*e4b17023SJohn Marino gcc_assert ((step > 0 && start < end)
2202*e4b17023SJohn Marino || (step < 0 && start > end));
2203*e4b17023SJohn Marino
2204*e4b17023SJohn Marino calculate_must_precede_follow (u_node, start, end, step, ii,
2205*e4b17023SJohn Marino sched_nodes, must_precede,
2206*e4b17023SJohn Marino must_follow);
2207*e4b17023SJohn Marino
2208*e4b17023SJohn Marino for (c = start; c != end; c += step)
2209*e4b17023SJohn Marino {
2210*e4b17023SJohn Marino sbitmap tmp_precede, tmp_follow;
2211*e4b17023SJohn Marino
2212*e4b17023SJohn Marino set_must_precede_follow (&tmp_follow, must_follow,
2213*e4b17023SJohn Marino &tmp_precede, must_precede,
2214*e4b17023SJohn Marino c, start, end, step);
2215*e4b17023SJohn Marino success =
2216*e4b17023SJohn Marino try_scheduling_node_in_cycle (ps, u, c,
2217*e4b17023SJohn Marino sched_nodes,
2218*e4b17023SJohn Marino &num_splits, tmp_precede,
2219*e4b17023SJohn Marino tmp_follow);
2220*e4b17023SJohn Marino if (success)
2221*e4b17023SJohn Marino break;
2222*e4b17023SJohn Marino }
2223*e4b17023SJohn Marino
2224*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2225*e4b17023SJohn Marino }
2226*e4b17023SJohn Marino if (!success)
2227*e4b17023SJohn Marino {
2228*e4b17023SJohn Marino int split_row;
2229*e4b17023SJohn Marino
2230*e4b17023SJohn Marino if (ii++ == maxii)
2231*e4b17023SJohn Marino break;
2232*e4b17023SJohn Marino
2233*e4b17023SJohn Marino if (num_splits >= MAX_SPLIT_NUM)
2234*e4b17023SJohn Marino {
2235*e4b17023SJohn Marino num_splits = 0;
2236*e4b17023SJohn Marino flush_and_start_over = true;
2237*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2238*e4b17023SJohn Marino reset_partial_schedule (ps, ii);
2239*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2240*e4b17023SJohn Marino break;
2241*e4b17023SJohn Marino }
2242*e4b17023SJohn Marino
2243*e4b17023SJohn Marino num_splits++;
2244*e4b17023SJohn Marino /* The scheduling window is exclusive of 'end'
2245*e4b17023SJohn Marino whereas compute_split_window() expects an inclusive,
2246*e4b17023SJohn Marino ordered range. */
2247*e4b17023SJohn Marino if (step == 1)
2248*e4b17023SJohn Marino split_row = compute_split_row (sched_nodes, start, end - 1,
2249*e4b17023SJohn Marino ps->ii, u_node);
2250*e4b17023SJohn Marino else
2251*e4b17023SJohn Marino split_row = compute_split_row (sched_nodes, end + 1, start,
2252*e4b17023SJohn Marino ps->ii, u_node);
2253*e4b17023SJohn Marino
2254*e4b17023SJohn Marino ps_insert_empty_row (ps, split_row, sched_nodes);
2255*e4b17023SJohn Marino i--; /* Go back and retry node i. */
2256*e4b17023SJohn Marino
2257*e4b17023SJohn Marino if (dump_file)
2258*e4b17023SJohn Marino fprintf (dump_file, "num_splits=%d\n", num_splits);
2259*e4b17023SJohn Marino }
2260*e4b17023SJohn Marino
2261*e4b17023SJohn Marino /* ??? If (success), check register pressure estimates. */
2262*e4b17023SJohn Marino } /* Continue with next node. */
2263*e4b17023SJohn Marino } /* While flush_and_start_over. */
2264*e4b17023SJohn Marino if (ii >= maxii)
2265*e4b17023SJohn Marino {
2266*e4b17023SJohn Marino free_partial_schedule (ps);
2267*e4b17023SJohn Marino ps = NULL;
2268*e4b17023SJohn Marino }
2269*e4b17023SJohn Marino else
2270*e4b17023SJohn Marino gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
2271*e4b17023SJohn Marino
2272*e4b17023SJohn Marino sbitmap_free (sched_nodes);
2273*e4b17023SJohn Marino sbitmap_free (must_precede);
2274*e4b17023SJohn Marino sbitmap_free (must_follow);
2275*e4b17023SJohn Marino sbitmap_free (tobe_scheduled);
2276*e4b17023SJohn Marino
2277*e4b17023SJohn Marino return ps;
2278*e4b17023SJohn Marino }
2279*e4b17023SJohn Marino
2280*e4b17023SJohn Marino /* This function inserts a new empty row into PS at the position
2281*e4b17023SJohn Marino according to SPLITROW, keeping all already scheduled instructions
2282*e4b17023SJohn Marino intact and updating their SCHED_TIME and cycle accordingly. */
2283*e4b17023SJohn Marino static void
ps_insert_empty_row(partial_schedule_ptr ps,int split_row,sbitmap sched_nodes)2284*e4b17023SJohn Marino ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2285*e4b17023SJohn Marino sbitmap sched_nodes)
2286*e4b17023SJohn Marino {
2287*e4b17023SJohn Marino ps_insn_ptr crr_insn;
2288*e4b17023SJohn Marino ps_insn_ptr *rows_new;
2289*e4b17023SJohn Marino int ii = ps->ii;
2290*e4b17023SJohn Marino int new_ii = ii + 1;
2291*e4b17023SJohn Marino int row;
2292*e4b17023SJohn Marino int *rows_length_new;
2293*e4b17023SJohn Marino
2294*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2295*e4b17023SJohn Marino
2296*e4b17023SJohn Marino /* We normalize sched_time and rotate ps to have only non-negative sched
2297*e4b17023SJohn Marino times, for simplicity of updating cycles after inserting new row. */
2298*e4b17023SJohn Marino split_row -= ps->min_cycle;
2299*e4b17023SJohn Marino split_row = SMODULO (split_row, ii);
2300*e4b17023SJohn Marino if (dump_file)
2301*e4b17023SJohn Marino fprintf (dump_file, "split_row=%d\n", split_row);
2302*e4b17023SJohn Marino
2303*e4b17023SJohn Marino reset_sched_times (ps, PS_MIN_CYCLE (ps));
2304*e4b17023SJohn Marino rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2305*e4b17023SJohn Marino
2306*e4b17023SJohn Marino rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2307*e4b17023SJohn Marino rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2308*e4b17023SJohn Marino for (row = 0; row < split_row; row++)
2309*e4b17023SJohn Marino {
2310*e4b17023SJohn Marino rows_new[row] = ps->rows[row];
2311*e4b17023SJohn Marino rows_length_new[row] = ps->rows_length[row];
2312*e4b17023SJohn Marino ps->rows[row] = NULL;
2313*e4b17023SJohn Marino for (crr_insn = rows_new[row];
2314*e4b17023SJohn Marino crr_insn; crr_insn = crr_insn->next_in_row)
2315*e4b17023SJohn Marino {
2316*e4b17023SJohn Marino int u = crr_insn->id;
2317*e4b17023SJohn Marino int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2318*e4b17023SJohn Marino
2319*e4b17023SJohn Marino SCHED_TIME (u) = new_time;
2320*e4b17023SJohn Marino crr_insn->cycle = new_time;
2321*e4b17023SJohn Marino SCHED_ROW (u) = new_time % new_ii;
2322*e4b17023SJohn Marino SCHED_STAGE (u) = new_time / new_ii;
2323*e4b17023SJohn Marino }
2324*e4b17023SJohn Marino
2325*e4b17023SJohn Marino }
2326*e4b17023SJohn Marino
2327*e4b17023SJohn Marino rows_new[split_row] = NULL;
2328*e4b17023SJohn Marino
2329*e4b17023SJohn Marino for (row = split_row; row < ii; row++)
2330*e4b17023SJohn Marino {
2331*e4b17023SJohn Marino rows_new[row + 1] = ps->rows[row];
2332*e4b17023SJohn Marino rows_length_new[row + 1] = ps->rows_length[row];
2333*e4b17023SJohn Marino ps->rows[row] = NULL;
2334*e4b17023SJohn Marino for (crr_insn = rows_new[row + 1];
2335*e4b17023SJohn Marino crr_insn; crr_insn = crr_insn->next_in_row)
2336*e4b17023SJohn Marino {
2337*e4b17023SJohn Marino int u = crr_insn->id;
2338*e4b17023SJohn Marino int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2339*e4b17023SJohn Marino
2340*e4b17023SJohn Marino SCHED_TIME (u) = new_time;
2341*e4b17023SJohn Marino crr_insn->cycle = new_time;
2342*e4b17023SJohn Marino SCHED_ROW (u) = new_time % new_ii;
2343*e4b17023SJohn Marino SCHED_STAGE (u) = new_time / new_ii;
2344*e4b17023SJohn Marino }
2345*e4b17023SJohn Marino }
2346*e4b17023SJohn Marino
2347*e4b17023SJohn Marino /* Updating ps. */
2348*e4b17023SJohn Marino ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2349*e4b17023SJohn Marino + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2350*e4b17023SJohn Marino ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2351*e4b17023SJohn Marino + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2352*e4b17023SJohn Marino free (ps->rows);
2353*e4b17023SJohn Marino ps->rows = rows_new;
2354*e4b17023SJohn Marino free (ps->rows_length);
2355*e4b17023SJohn Marino ps->rows_length = rows_length_new;
2356*e4b17023SJohn Marino ps->ii = new_ii;
2357*e4b17023SJohn Marino gcc_assert (ps->min_cycle >= 0);
2358*e4b17023SJohn Marino
2359*e4b17023SJohn Marino verify_partial_schedule (ps, sched_nodes);
2360*e4b17023SJohn Marino
2361*e4b17023SJohn Marino if (dump_file)
2362*e4b17023SJohn Marino fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2363*e4b17023SJohn Marino ps->max_cycle);
2364*e4b17023SJohn Marino }
2365*e4b17023SJohn Marino
2366*e4b17023SJohn Marino /* Given U_NODE which is the node that failed to be scheduled; LOW and
2367*e4b17023SJohn Marino UP which are the boundaries of it's scheduling window; compute using
2368*e4b17023SJohn Marino SCHED_NODES and II a row in the partial schedule that can be split
2369*e4b17023SJohn Marino which will separate a critical predecessor from a critical successor
2370*e4b17023SJohn Marino thereby expanding the window, and return it. */
2371*e4b17023SJohn Marino static int
compute_split_row(sbitmap sched_nodes,int low,int up,int ii,ddg_node_ptr u_node)2372*e4b17023SJohn Marino compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2373*e4b17023SJohn Marino ddg_node_ptr u_node)
2374*e4b17023SJohn Marino {
2375*e4b17023SJohn Marino ddg_edge_ptr e;
2376*e4b17023SJohn Marino int lower = INT_MIN, upper = INT_MAX;
2377*e4b17023SJohn Marino int crit_pred = -1;
2378*e4b17023SJohn Marino int crit_succ = -1;
2379*e4b17023SJohn Marino int crit_cycle;
2380*e4b17023SJohn Marino
2381*e4b17023SJohn Marino for (e = u_node->in; e != 0; e = e->next_in)
2382*e4b17023SJohn Marino {
2383*e4b17023SJohn Marino int v = e->src->cuid;
2384*e4b17023SJohn Marino
2385*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, v)
2386*e4b17023SJohn Marino && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2387*e4b17023SJohn Marino if (SCHED_TIME (v) > lower)
2388*e4b17023SJohn Marino {
2389*e4b17023SJohn Marino crit_pred = v;
2390*e4b17023SJohn Marino lower = SCHED_TIME (v);
2391*e4b17023SJohn Marino }
2392*e4b17023SJohn Marino }
2393*e4b17023SJohn Marino
2394*e4b17023SJohn Marino if (crit_pred >= 0)
2395*e4b17023SJohn Marino {
2396*e4b17023SJohn Marino crit_cycle = SCHED_TIME (crit_pred) + 1;
2397*e4b17023SJohn Marino return SMODULO (crit_cycle, ii);
2398*e4b17023SJohn Marino }
2399*e4b17023SJohn Marino
2400*e4b17023SJohn Marino for (e = u_node->out; e != 0; e = e->next_out)
2401*e4b17023SJohn Marino {
2402*e4b17023SJohn Marino int v = e->dest->cuid;
2403*e4b17023SJohn Marino
2404*e4b17023SJohn Marino if (TEST_BIT (sched_nodes, v)
2405*e4b17023SJohn Marino && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2406*e4b17023SJohn Marino if (SCHED_TIME (v) < upper)
2407*e4b17023SJohn Marino {
2408*e4b17023SJohn Marino crit_succ = v;
2409*e4b17023SJohn Marino upper = SCHED_TIME (v);
2410*e4b17023SJohn Marino }
2411*e4b17023SJohn Marino }
2412*e4b17023SJohn Marino
2413*e4b17023SJohn Marino if (crit_succ >= 0)
2414*e4b17023SJohn Marino {
2415*e4b17023SJohn Marino crit_cycle = SCHED_TIME (crit_succ);
2416*e4b17023SJohn Marino return SMODULO (crit_cycle, ii);
2417*e4b17023SJohn Marino }
2418*e4b17023SJohn Marino
2419*e4b17023SJohn Marino if (dump_file)
2420*e4b17023SJohn Marino fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2421*e4b17023SJohn Marino
2422*e4b17023SJohn Marino return SMODULO ((low + up + 1) / 2, ii);
2423*e4b17023SJohn Marino }
2424*e4b17023SJohn Marino
2425*e4b17023SJohn Marino static void
verify_partial_schedule(partial_schedule_ptr ps,sbitmap sched_nodes)2426*e4b17023SJohn Marino verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2427*e4b17023SJohn Marino {
2428*e4b17023SJohn Marino int row;
2429*e4b17023SJohn Marino ps_insn_ptr crr_insn;
2430*e4b17023SJohn Marino
2431*e4b17023SJohn Marino for (row = 0; row < ps->ii; row++)
2432*e4b17023SJohn Marino {
2433*e4b17023SJohn Marino int length = 0;
2434*e4b17023SJohn Marino
2435*e4b17023SJohn Marino for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2436*e4b17023SJohn Marino {
2437*e4b17023SJohn Marino int u = crr_insn->id;
2438*e4b17023SJohn Marino
2439*e4b17023SJohn Marino length++;
2440*e4b17023SJohn Marino gcc_assert (TEST_BIT (sched_nodes, u));
2441*e4b17023SJohn Marino /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2442*e4b17023SJohn Marino popcount (sched_nodes) == number of insns in ps. */
2443*e4b17023SJohn Marino gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2444*e4b17023SJohn Marino gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2445*e4b17023SJohn Marino }
2446*e4b17023SJohn Marino
2447*e4b17023SJohn Marino gcc_assert (ps->rows_length[row] == length);
2448*e4b17023SJohn Marino }
2449*e4b17023SJohn Marino }
2450*e4b17023SJohn Marino
2451*e4b17023SJohn Marino
2452*e4b17023SJohn Marino /* This page implements the algorithm for ordering the nodes of a DDG
2453*e4b17023SJohn Marino for modulo scheduling, activated through the
2454*e4b17023SJohn Marino "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2455*e4b17023SJohn Marino
2456*e4b17023SJohn Marino #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2457*e4b17023SJohn Marino #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2458*e4b17023SJohn Marino #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2459*e4b17023SJohn Marino #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2460*e4b17023SJohn Marino #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2461*e4b17023SJohn Marino #define DEPTH(x) (ASAP ((x)))
2462*e4b17023SJohn Marino
2463*e4b17023SJohn Marino typedef struct node_order_params * nopa;
2464*e4b17023SJohn Marino
2465*e4b17023SJohn Marino static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2466*e4b17023SJohn Marino static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2467*e4b17023SJohn Marino static nopa calculate_order_params (ddg_ptr, int, int *);
2468*e4b17023SJohn Marino static int find_max_asap (ddg_ptr, sbitmap);
2469*e4b17023SJohn Marino static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2470*e4b17023SJohn Marino static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2471*e4b17023SJohn Marino
2472*e4b17023SJohn Marino enum sms_direction {BOTTOMUP, TOPDOWN};
2473*e4b17023SJohn Marino
2474*e4b17023SJohn Marino struct node_order_params
2475*e4b17023SJohn Marino {
2476*e4b17023SJohn Marino int asap;
2477*e4b17023SJohn Marino int alap;
2478*e4b17023SJohn Marino int height;
2479*e4b17023SJohn Marino };
2480*e4b17023SJohn Marino
2481*e4b17023SJohn Marino /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2482*e4b17023SJohn Marino static void
check_nodes_order(int * node_order,int num_nodes)2483*e4b17023SJohn Marino check_nodes_order (int *node_order, int num_nodes)
2484*e4b17023SJohn Marino {
2485*e4b17023SJohn Marino int i;
2486*e4b17023SJohn Marino sbitmap tmp = sbitmap_alloc (num_nodes);
2487*e4b17023SJohn Marino
2488*e4b17023SJohn Marino sbitmap_zero (tmp);
2489*e4b17023SJohn Marino
2490*e4b17023SJohn Marino if (dump_file)
2491*e4b17023SJohn Marino fprintf (dump_file, "SMS final nodes order: \n");
2492*e4b17023SJohn Marino
2493*e4b17023SJohn Marino for (i = 0; i < num_nodes; i++)
2494*e4b17023SJohn Marino {
2495*e4b17023SJohn Marino int u = node_order[i];
2496*e4b17023SJohn Marino
2497*e4b17023SJohn Marino if (dump_file)
2498*e4b17023SJohn Marino fprintf (dump_file, "%d ", u);
2499*e4b17023SJohn Marino gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2500*e4b17023SJohn Marino
2501*e4b17023SJohn Marino SET_BIT (tmp, u);
2502*e4b17023SJohn Marino }
2503*e4b17023SJohn Marino
2504*e4b17023SJohn Marino if (dump_file)
2505*e4b17023SJohn Marino fprintf (dump_file, "\n");
2506*e4b17023SJohn Marino
2507*e4b17023SJohn Marino sbitmap_free (tmp);
2508*e4b17023SJohn Marino }
2509*e4b17023SJohn Marino
2510*e4b17023SJohn Marino /* Order the nodes of G for scheduling and pass the result in
2511*e4b17023SJohn Marino NODE_ORDER. Also set aux.count of each node to ASAP.
2512*e4b17023SJohn Marino Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2513*e4b17023SJohn Marino static int
sms_order_nodes(ddg_ptr g,int mii,int * node_order,int * pmax_asap)2514*e4b17023SJohn Marino sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2515*e4b17023SJohn Marino {
2516*e4b17023SJohn Marino int i;
2517*e4b17023SJohn Marino int rec_mii = 0;
2518*e4b17023SJohn Marino ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2519*e4b17023SJohn Marino
2520*e4b17023SJohn Marino nopa nops = calculate_order_params (g, mii, pmax_asap);
2521*e4b17023SJohn Marino
2522*e4b17023SJohn Marino if (dump_file)
2523*e4b17023SJohn Marino print_sccs (dump_file, sccs, g);
2524*e4b17023SJohn Marino
2525*e4b17023SJohn Marino order_nodes_of_sccs (sccs, node_order);
2526*e4b17023SJohn Marino
2527*e4b17023SJohn Marino if (sccs->num_sccs > 0)
2528*e4b17023SJohn Marino /* First SCC has the largest recurrence_length. */
2529*e4b17023SJohn Marino rec_mii = sccs->sccs[0]->recurrence_length;
2530*e4b17023SJohn Marino
2531*e4b17023SJohn Marino /* Save ASAP before destroying node_order_params. */
2532*e4b17023SJohn Marino for (i = 0; i < g->num_nodes; i++)
2533*e4b17023SJohn Marino {
2534*e4b17023SJohn Marino ddg_node_ptr v = &g->nodes[i];
2535*e4b17023SJohn Marino v->aux.count = ASAP (v);
2536*e4b17023SJohn Marino }
2537*e4b17023SJohn Marino
2538*e4b17023SJohn Marino free (nops);
2539*e4b17023SJohn Marino free_ddg_all_sccs (sccs);
2540*e4b17023SJohn Marino check_nodes_order (node_order, g->num_nodes);
2541*e4b17023SJohn Marino
2542*e4b17023SJohn Marino return rec_mii;
2543*e4b17023SJohn Marino }
2544*e4b17023SJohn Marino
2545*e4b17023SJohn Marino static void
order_nodes_of_sccs(ddg_all_sccs_ptr all_sccs,int * node_order)2546*e4b17023SJohn Marino order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2547*e4b17023SJohn Marino {
2548*e4b17023SJohn Marino int i, pos = 0;
2549*e4b17023SJohn Marino ddg_ptr g = all_sccs->ddg;
2550*e4b17023SJohn Marino int num_nodes = g->num_nodes;
2551*e4b17023SJohn Marino sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2552*e4b17023SJohn Marino sbitmap on_path = sbitmap_alloc (num_nodes);
2553*e4b17023SJohn Marino sbitmap tmp = sbitmap_alloc (num_nodes);
2554*e4b17023SJohn Marino sbitmap ones = sbitmap_alloc (num_nodes);
2555*e4b17023SJohn Marino
2556*e4b17023SJohn Marino sbitmap_zero (prev_sccs);
2557*e4b17023SJohn Marino sbitmap_ones (ones);
2558*e4b17023SJohn Marino
2559*e4b17023SJohn Marino /* Perform the node ordering starting from the SCC with the highest recMII.
2560*e4b17023SJohn Marino For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2561*e4b17023SJohn Marino for (i = 0; i < all_sccs->num_sccs; i++)
2562*e4b17023SJohn Marino {
2563*e4b17023SJohn Marino ddg_scc_ptr scc = all_sccs->sccs[i];
2564*e4b17023SJohn Marino
2565*e4b17023SJohn Marino /* Add nodes on paths from previous SCCs to the current SCC. */
2566*e4b17023SJohn Marino find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2567*e4b17023SJohn Marino sbitmap_a_or_b (tmp, scc->nodes, on_path);
2568*e4b17023SJohn Marino
2569*e4b17023SJohn Marino /* Add nodes on paths from the current SCC to previous SCCs. */
2570*e4b17023SJohn Marino find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2571*e4b17023SJohn Marino sbitmap_a_or_b (tmp, tmp, on_path);
2572*e4b17023SJohn Marino
2573*e4b17023SJohn Marino /* Remove nodes of previous SCCs from current extended SCC. */
2574*e4b17023SJohn Marino sbitmap_difference (tmp, tmp, prev_sccs);
2575*e4b17023SJohn Marino
2576*e4b17023SJohn Marino pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2577*e4b17023SJohn Marino /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2578*e4b17023SJohn Marino }
2579*e4b17023SJohn Marino
2580*e4b17023SJohn Marino /* Handle the remaining nodes that do not belong to any scc. Each call
2581*e4b17023SJohn Marino to order_nodes_in_scc handles a single connected component. */
2582*e4b17023SJohn Marino while (pos < g->num_nodes)
2583*e4b17023SJohn Marino {
2584*e4b17023SJohn Marino sbitmap_difference (tmp, ones, prev_sccs);
2585*e4b17023SJohn Marino pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2586*e4b17023SJohn Marino }
2587*e4b17023SJohn Marino sbitmap_free (prev_sccs);
2588*e4b17023SJohn Marino sbitmap_free (on_path);
2589*e4b17023SJohn Marino sbitmap_free (tmp);
2590*e4b17023SJohn Marino sbitmap_free (ones);
2591*e4b17023SJohn Marino }
2592*e4b17023SJohn Marino
2593*e4b17023SJohn Marino /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2594*e4b17023SJohn Marino static struct node_order_params *
calculate_order_params(ddg_ptr g,int mii ATTRIBUTE_UNUSED,int * pmax_asap)2595*e4b17023SJohn Marino calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2596*e4b17023SJohn Marino {
2597*e4b17023SJohn Marino int u;
2598*e4b17023SJohn Marino int max_asap;
2599*e4b17023SJohn Marino int num_nodes = g->num_nodes;
2600*e4b17023SJohn Marino ddg_edge_ptr e;
2601*e4b17023SJohn Marino /* Allocate a place to hold ordering params for each node in the DDG. */
2602*e4b17023SJohn Marino nopa node_order_params_arr;
2603*e4b17023SJohn Marino
2604*e4b17023SJohn Marino /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2605*e4b17023SJohn Marino node_order_params_arr = (nopa) xcalloc (num_nodes,
2606*e4b17023SJohn Marino sizeof (struct node_order_params));
2607*e4b17023SJohn Marino
2608*e4b17023SJohn Marino /* Set the aux pointer of each node to point to its order_params structure. */
2609*e4b17023SJohn Marino for (u = 0; u < num_nodes; u++)
2610*e4b17023SJohn Marino g->nodes[u].aux.info = &node_order_params_arr[u];
2611*e4b17023SJohn Marino
2612*e4b17023SJohn Marino /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2613*e4b17023SJohn Marino calculate ASAP, ALAP, mobility, distance, and height for each node
2614*e4b17023SJohn Marino in the dependence (direct acyclic) graph. */
2615*e4b17023SJohn Marino
2616*e4b17023SJohn Marino /* We assume that the nodes in the array are in topological order. */
2617*e4b17023SJohn Marino
2618*e4b17023SJohn Marino max_asap = 0;
2619*e4b17023SJohn Marino for (u = 0; u < num_nodes; u++)
2620*e4b17023SJohn Marino {
2621*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2622*e4b17023SJohn Marino
2623*e4b17023SJohn Marino ASAP (u_node) = 0;
2624*e4b17023SJohn Marino for (e = u_node->in; e; e = e->next_in)
2625*e4b17023SJohn Marino if (e->distance == 0)
2626*e4b17023SJohn Marino ASAP (u_node) = MAX (ASAP (u_node),
2627*e4b17023SJohn Marino ASAP (e->src) + e->latency);
2628*e4b17023SJohn Marino max_asap = MAX (max_asap, ASAP (u_node));
2629*e4b17023SJohn Marino }
2630*e4b17023SJohn Marino
2631*e4b17023SJohn Marino for (u = num_nodes - 1; u > -1; u--)
2632*e4b17023SJohn Marino {
2633*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2634*e4b17023SJohn Marino
2635*e4b17023SJohn Marino ALAP (u_node) = max_asap;
2636*e4b17023SJohn Marino HEIGHT (u_node) = 0;
2637*e4b17023SJohn Marino for (e = u_node->out; e; e = e->next_out)
2638*e4b17023SJohn Marino if (e->distance == 0)
2639*e4b17023SJohn Marino {
2640*e4b17023SJohn Marino ALAP (u_node) = MIN (ALAP (u_node),
2641*e4b17023SJohn Marino ALAP (e->dest) - e->latency);
2642*e4b17023SJohn Marino HEIGHT (u_node) = MAX (HEIGHT (u_node),
2643*e4b17023SJohn Marino HEIGHT (e->dest) + e->latency);
2644*e4b17023SJohn Marino }
2645*e4b17023SJohn Marino }
2646*e4b17023SJohn Marino if (dump_file)
2647*e4b17023SJohn Marino {
2648*e4b17023SJohn Marino fprintf (dump_file, "\nOrder params\n");
2649*e4b17023SJohn Marino for (u = 0; u < num_nodes; u++)
2650*e4b17023SJohn Marino {
2651*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2652*e4b17023SJohn Marino
2653*e4b17023SJohn Marino fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2654*e4b17023SJohn Marino ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2655*e4b17023SJohn Marino }
2656*e4b17023SJohn Marino }
2657*e4b17023SJohn Marino
2658*e4b17023SJohn Marino *pmax_asap = max_asap;
2659*e4b17023SJohn Marino return node_order_params_arr;
2660*e4b17023SJohn Marino }
2661*e4b17023SJohn Marino
2662*e4b17023SJohn Marino static int
find_max_asap(ddg_ptr g,sbitmap nodes)2663*e4b17023SJohn Marino find_max_asap (ddg_ptr g, sbitmap nodes)
2664*e4b17023SJohn Marino {
2665*e4b17023SJohn Marino unsigned int u = 0;
2666*e4b17023SJohn Marino int max_asap = -1;
2667*e4b17023SJohn Marino int result = -1;
2668*e4b17023SJohn Marino sbitmap_iterator sbi;
2669*e4b17023SJohn Marino
2670*e4b17023SJohn Marino EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2671*e4b17023SJohn Marino {
2672*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2673*e4b17023SJohn Marino
2674*e4b17023SJohn Marino if (max_asap < ASAP (u_node))
2675*e4b17023SJohn Marino {
2676*e4b17023SJohn Marino max_asap = ASAP (u_node);
2677*e4b17023SJohn Marino result = u;
2678*e4b17023SJohn Marino }
2679*e4b17023SJohn Marino }
2680*e4b17023SJohn Marino return result;
2681*e4b17023SJohn Marino }
2682*e4b17023SJohn Marino
2683*e4b17023SJohn Marino static int
find_max_hv_min_mob(ddg_ptr g,sbitmap nodes)2684*e4b17023SJohn Marino find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2685*e4b17023SJohn Marino {
2686*e4b17023SJohn Marino unsigned int u = 0;
2687*e4b17023SJohn Marino int max_hv = -1;
2688*e4b17023SJohn Marino int min_mob = INT_MAX;
2689*e4b17023SJohn Marino int result = -1;
2690*e4b17023SJohn Marino sbitmap_iterator sbi;
2691*e4b17023SJohn Marino
2692*e4b17023SJohn Marino EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2693*e4b17023SJohn Marino {
2694*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2695*e4b17023SJohn Marino
2696*e4b17023SJohn Marino if (max_hv < HEIGHT (u_node))
2697*e4b17023SJohn Marino {
2698*e4b17023SJohn Marino max_hv = HEIGHT (u_node);
2699*e4b17023SJohn Marino min_mob = MOB (u_node);
2700*e4b17023SJohn Marino result = u;
2701*e4b17023SJohn Marino }
2702*e4b17023SJohn Marino else if ((max_hv == HEIGHT (u_node))
2703*e4b17023SJohn Marino && (min_mob > MOB (u_node)))
2704*e4b17023SJohn Marino {
2705*e4b17023SJohn Marino min_mob = MOB (u_node);
2706*e4b17023SJohn Marino result = u;
2707*e4b17023SJohn Marino }
2708*e4b17023SJohn Marino }
2709*e4b17023SJohn Marino return result;
2710*e4b17023SJohn Marino }
2711*e4b17023SJohn Marino
2712*e4b17023SJohn Marino static int
find_max_dv_min_mob(ddg_ptr g,sbitmap nodes)2713*e4b17023SJohn Marino find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2714*e4b17023SJohn Marino {
2715*e4b17023SJohn Marino unsigned int u = 0;
2716*e4b17023SJohn Marino int max_dv = -1;
2717*e4b17023SJohn Marino int min_mob = INT_MAX;
2718*e4b17023SJohn Marino int result = -1;
2719*e4b17023SJohn Marino sbitmap_iterator sbi;
2720*e4b17023SJohn Marino
2721*e4b17023SJohn Marino EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2722*e4b17023SJohn Marino {
2723*e4b17023SJohn Marino ddg_node_ptr u_node = &g->nodes[u];
2724*e4b17023SJohn Marino
2725*e4b17023SJohn Marino if (max_dv < DEPTH (u_node))
2726*e4b17023SJohn Marino {
2727*e4b17023SJohn Marino max_dv = DEPTH (u_node);
2728*e4b17023SJohn Marino min_mob = MOB (u_node);
2729*e4b17023SJohn Marino result = u;
2730*e4b17023SJohn Marino }
2731*e4b17023SJohn Marino else if ((max_dv == DEPTH (u_node))
2732*e4b17023SJohn Marino && (min_mob > MOB (u_node)))
2733*e4b17023SJohn Marino {
2734*e4b17023SJohn Marino min_mob = MOB (u_node);
2735*e4b17023SJohn Marino result = u;
2736*e4b17023SJohn Marino }
2737*e4b17023SJohn Marino }
2738*e4b17023SJohn Marino return result;
2739*e4b17023SJohn Marino }
2740*e4b17023SJohn Marino
2741*e4b17023SJohn Marino /* Places the nodes of SCC into the NODE_ORDER array starting
2742*e4b17023SJohn Marino at position POS, according to the SMS ordering algorithm.
2743*e4b17023SJohn Marino NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2744*e4b17023SJohn Marino the NODE_ORDER array, starting from position zero. */
2745*e4b17023SJohn Marino static int
order_nodes_in_scc(ddg_ptr g,sbitmap nodes_ordered,sbitmap scc,int * node_order,int pos)2746*e4b17023SJohn Marino order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2747*e4b17023SJohn Marino int * node_order, int pos)
2748*e4b17023SJohn Marino {
2749*e4b17023SJohn Marino enum sms_direction dir;
2750*e4b17023SJohn Marino int num_nodes = g->num_nodes;
2751*e4b17023SJohn Marino sbitmap workset = sbitmap_alloc (num_nodes);
2752*e4b17023SJohn Marino sbitmap tmp = sbitmap_alloc (num_nodes);
2753*e4b17023SJohn Marino sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2754*e4b17023SJohn Marino sbitmap predecessors = sbitmap_alloc (num_nodes);
2755*e4b17023SJohn Marino sbitmap successors = sbitmap_alloc (num_nodes);
2756*e4b17023SJohn Marino
2757*e4b17023SJohn Marino sbitmap_zero (predecessors);
2758*e4b17023SJohn Marino find_predecessors (predecessors, g, nodes_ordered);
2759*e4b17023SJohn Marino
2760*e4b17023SJohn Marino sbitmap_zero (successors);
2761*e4b17023SJohn Marino find_successors (successors, g, nodes_ordered);
2762*e4b17023SJohn Marino
2763*e4b17023SJohn Marino sbitmap_zero (tmp);
2764*e4b17023SJohn Marino if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2765*e4b17023SJohn Marino {
2766*e4b17023SJohn Marino sbitmap_copy (workset, tmp);
2767*e4b17023SJohn Marino dir = BOTTOMUP;
2768*e4b17023SJohn Marino }
2769*e4b17023SJohn Marino else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2770*e4b17023SJohn Marino {
2771*e4b17023SJohn Marino sbitmap_copy (workset, tmp);
2772*e4b17023SJohn Marino dir = TOPDOWN;
2773*e4b17023SJohn Marino }
2774*e4b17023SJohn Marino else
2775*e4b17023SJohn Marino {
2776*e4b17023SJohn Marino int u;
2777*e4b17023SJohn Marino
2778*e4b17023SJohn Marino sbitmap_zero (workset);
2779*e4b17023SJohn Marino if ((u = find_max_asap (g, scc)) >= 0)
2780*e4b17023SJohn Marino SET_BIT (workset, u);
2781*e4b17023SJohn Marino dir = BOTTOMUP;
2782*e4b17023SJohn Marino }
2783*e4b17023SJohn Marino
2784*e4b17023SJohn Marino sbitmap_zero (zero_bitmap);
2785*e4b17023SJohn Marino while (!sbitmap_equal (workset, zero_bitmap))
2786*e4b17023SJohn Marino {
2787*e4b17023SJohn Marino int v;
2788*e4b17023SJohn Marino ddg_node_ptr v_node;
2789*e4b17023SJohn Marino sbitmap v_node_preds;
2790*e4b17023SJohn Marino sbitmap v_node_succs;
2791*e4b17023SJohn Marino
2792*e4b17023SJohn Marino if (dir == TOPDOWN)
2793*e4b17023SJohn Marino {
2794*e4b17023SJohn Marino while (!sbitmap_equal (workset, zero_bitmap))
2795*e4b17023SJohn Marino {
2796*e4b17023SJohn Marino v = find_max_hv_min_mob (g, workset);
2797*e4b17023SJohn Marino v_node = &g->nodes[v];
2798*e4b17023SJohn Marino node_order[pos++] = v;
2799*e4b17023SJohn Marino v_node_succs = NODE_SUCCESSORS (v_node);
2800*e4b17023SJohn Marino sbitmap_a_and_b (tmp, v_node_succs, scc);
2801*e4b17023SJohn Marino
2802*e4b17023SJohn Marino /* Don't consider the already ordered successors again. */
2803*e4b17023SJohn Marino sbitmap_difference (tmp, tmp, nodes_ordered);
2804*e4b17023SJohn Marino sbitmap_a_or_b (workset, workset, tmp);
2805*e4b17023SJohn Marino RESET_BIT (workset, v);
2806*e4b17023SJohn Marino SET_BIT (nodes_ordered, v);
2807*e4b17023SJohn Marino }
2808*e4b17023SJohn Marino dir = BOTTOMUP;
2809*e4b17023SJohn Marino sbitmap_zero (predecessors);
2810*e4b17023SJohn Marino find_predecessors (predecessors, g, nodes_ordered);
2811*e4b17023SJohn Marino sbitmap_a_and_b (workset, predecessors, scc);
2812*e4b17023SJohn Marino }
2813*e4b17023SJohn Marino else
2814*e4b17023SJohn Marino {
2815*e4b17023SJohn Marino while (!sbitmap_equal (workset, zero_bitmap))
2816*e4b17023SJohn Marino {
2817*e4b17023SJohn Marino v = find_max_dv_min_mob (g, workset);
2818*e4b17023SJohn Marino v_node = &g->nodes[v];
2819*e4b17023SJohn Marino node_order[pos++] = v;
2820*e4b17023SJohn Marino v_node_preds = NODE_PREDECESSORS (v_node);
2821*e4b17023SJohn Marino sbitmap_a_and_b (tmp, v_node_preds, scc);
2822*e4b17023SJohn Marino
2823*e4b17023SJohn Marino /* Don't consider the already ordered predecessors again. */
2824*e4b17023SJohn Marino sbitmap_difference (tmp, tmp, nodes_ordered);
2825*e4b17023SJohn Marino sbitmap_a_or_b (workset, workset, tmp);
2826*e4b17023SJohn Marino RESET_BIT (workset, v);
2827*e4b17023SJohn Marino SET_BIT (nodes_ordered, v);
2828*e4b17023SJohn Marino }
2829*e4b17023SJohn Marino dir = TOPDOWN;
2830*e4b17023SJohn Marino sbitmap_zero (successors);
2831*e4b17023SJohn Marino find_successors (successors, g, nodes_ordered);
2832*e4b17023SJohn Marino sbitmap_a_and_b (workset, successors, scc);
2833*e4b17023SJohn Marino }
2834*e4b17023SJohn Marino }
2835*e4b17023SJohn Marino sbitmap_free (tmp);
2836*e4b17023SJohn Marino sbitmap_free (workset);
2837*e4b17023SJohn Marino sbitmap_free (zero_bitmap);
2838*e4b17023SJohn Marino sbitmap_free (predecessors);
2839*e4b17023SJohn Marino sbitmap_free (successors);
2840*e4b17023SJohn Marino return pos;
2841*e4b17023SJohn Marino }
2842*e4b17023SJohn Marino
2843*e4b17023SJohn Marino
2844*e4b17023SJohn Marino /* This page contains functions for manipulating partial-schedules during
2845*e4b17023SJohn Marino modulo scheduling. */
2846*e4b17023SJohn Marino
2847*e4b17023SJohn Marino /* Create a partial schedule and allocate a memory to hold II rows. */
2848*e4b17023SJohn Marino
2849*e4b17023SJohn Marino static partial_schedule_ptr
create_partial_schedule(int ii,ddg_ptr g,int history)2850*e4b17023SJohn Marino create_partial_schedule (int ii, ddg_ptr g, int history)
2851*e4b17023SJohn Marino {
2852*e4b17023SJohn Marino partial_schedule_ptr ps = XNEW (struct partial_schedule);
2853*e4b17023SJohn Marino ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2854*e4b17023SJohn Marino ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2855*e4b17023SJohn Marino ps->reg_moves = NULL;
2856*e4b17023SJohn Marino ps->ii = ii;
2857*e4b17023SJohn Marino ps->history = history;
2858*e4b17023SJohn Marino ps->min_cycle = INT_MAX;
2859*e4b17023SJohn Marino ps->max_cycle = INT_MIN;
2860*e4b17023SJohn Marino ps->g = g;
2861*e4b17023SJohn Marino
2862*e4b17023SJohn Marino return ps;
2863*e4b17023SJohn Marino }
2864*e4b17023SJohn Marino
2865*e4b17023SJohn Marino /* Free the PS_INSNs in rows array of the given partial schedule.
2866*e4b17023SJohn Marino ??? Consider caching the PS_INSN's. */
2867*e4b17023SJohn Marino static void
free_ps_insns(partial_schedule_ptr ps)2868*e4b17023SJohn Marino free_ps_insns (partial_schedule_ptr ps)
2869*e4b17023SJohn Marino {
2870*e4b17023SJohn Marino int i;
2871*e4b17023SJohn Marino
2872*e4b17023SJohn Marino for (i = 0; i < ps->ii; i++)
2873*e4b17023SJohn Marino {
2874*e4b17023SJohn Marino while (ps->rows[i])
2875*e4b17023SJohn Marino {
2876*e4b17023SJohn Marino ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2877*e4b17023SJohn Marino
2878*e4b17023SJohn Marino free (ps->rows[i]);
2879*e4b17023SJohn Marino ps->rows[i] = ps_insn;
2880*e4b17023SJohn Marino }
2881*e4b17023SJohn Marino ps->rows[i] = NULL;
2882*e4b17023SJohn Marino }
2883*e4b17023SJohn Marino }
2884*e4b17023SJohn Marino
2885*e4b17023SJohn Marino /* Free all the memory allocated to the partial schedule. */
2886*e4b17023SJohn Marino
2887*e4b17023SJohn Marino static void
free_partial_schedule(partial_schedule_ptr ps)2888*e4b17023SJohn Marino free_partial_schedule (partial_schedule_ptr ps)
2889*e4b17023SJohn Marino {
2890*e4b17023SJohn Marino ps_reg_move_info *move;
2891*e4b17023SJohn Marino unsigned int i;
2892*e4b17023SJohn Marino
2893*e4b17023SJohn Marino if (!ps)
2894*e4b17023SJohn Marino return;
2895*e4b17023SJohn Marino
2896*e4b17023SJohn Marino FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
2897*e4b17023SJohn Marino sbitmap_free (move->uses);
2898*e4b17023SJohn Marino VEC_free (ps_reg_move_info, heap, ps->reg_moves);
2899*e4b17023SJohn Marino
2900*e4b17023SJohn Marino free_ps_insns (ps);
2901*e4b17023SJohn Marino free (ps->rows);
2902*e4b17023SJohn Marino free (ps->rows_length);
2903*e4b17023SJohn Marino free (ps);
2904*e4b17023SJohn Marino }
2905*e4b17023SJohn Marino
2906*e4b17023SJohn Marino /* Clear the rows array with its PS_INSNs, and create a new one with
2907*e4b17023SJohn Marino NEW_II rows. */
2908*e4b17023SJohn Marino
2909*e4b17023SJohn Marino static void
reset_partial_schedule(partial_schedule_ptr ps,int new_ii)2910*e4b17023SJohn Marino reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2911*e4b17023SJohn Marino {
2912*e4b17023SJohn Marino if (!ps)
2913*e4b17023SJohn Marino return;
2914*e4b17023SJohn Marino free_ps_insns (ps);
2915*e4b17023SJohn Marino if (new_ii == ps->ii)
2916*e4b17023SJohn Marino return;
2917*e4b17023SJohn Marino ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2918*e4b17023SJohn Marino * sizeof (ps_insn_ptr));
2919*e4b17023SJohn Marino memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2920*e4b17023SJohn Marino ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2921*e4b17023SJohn Marino memset (ps->rows_length, 0, new_ii * sizeof (int));
2922*e4b17023SJohn Marino ps->ii = new_ii;
2923*e4b17023SJohn Marino ps->min_cycle = INT_MAX;
2924*e4b17023SJohn Marino ps->max_cycle = INT_MIN;
2925*e4b17023SJohn Marino }
2926*e4b17023SJohn Marino
2927*e4b17023SJohn Marino /* Prints the partial schedule as an ii rows array, for each rows
2928*e4b17023SJohn Marino print the ids of the insns in it. */
2929*e4b17023SJohn Marino void
print_partial_schedule(partial_schedule_ptr ps,FILE * dump)2930*e4b17023SJohn Marino print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2931*e4b17023SJohn Marino {
2932*e4b17023SJohn Marino int i;
2933*e4b17023SJohn Marino
2934*e4b17023SJohn Marino for (i = 0; i < ps->ii; i++)
2935*e4b17023SJohn Marino {
2936*e4b17023SJohn Marino ps_insn_ptr ps_i = ps->rows[i];
2937*e4b17023SJohn Marino
2938*e4b17023SJohn Marino fprintf (dump, "\n[ROW %d ]: ", i);
2939*e4b17023SJohn Marino while (ps_i)
2940*e4b17023SJohn Marino {
2941*e4b17023SJohn Marino rtx insn = ps_rtl_insn (ps, ps_i->id);
2942*e4b17023SJohn Marino
2943*e4b17023SJohn Marino if (JUMP_P (insn))
2944*e4b17023SJohn Marino fprintf (dump, "%d (branch), ", INSN_UID (insn));
2945*e4b17023SJohn Marino else
2946*e4b17023SJohn Marino fprintf (dump, "%d, ", INSN_UID (insn));
2947*e4b17023SJohn Marino
2948*e4b17023SJohn Marino ps_i = ps_i->next_in_row;
2949*e4b17023SJohn Marino }
2950*e4b17023SJohn Marino }
2951*e4b17023SJohn Marino }
2952*e4b17023SJohn Marino
2953*e4b17023SJohn Marino /* Creates an object of PS_INSN and initializes it to the given parameters. */
2954*e4b17023SJohn Marino static ps_insn_ptr
create_ps_insn(int id,int cycle)2955*e4b17023SJohn Marino create_ps_insn (int id, int cycle)
2956*e4b17023SJohn Marino {
2957*e4b17023SJohn Marino ps_insn_ptr ps_i = XNEW (struct ps_insn);
2958*e4b17023SJohn Marino
2959*e4b17023SJohn Marino ps_i->id = id;
2960*e4b17023SJohn Marino ps_i->next_in_row = NULL;
2961*e4b17023SJohn Marino ps_i->prev_in_row = NULL;
2962*e4b17023SJohn Marino ps_i->cycle = cycle;
2963*e4b17023SJohn Marino
2964*e4b17023SJohn Marino return ps_i;
2965*e4b17023SJohn Marino }
2966*e4b17023SJohn Marino
2967*e4b17023SJohn Marino
2968*e4b17023SJohn Marino /* Removes the given PS_INSN from the partial schedule. */
2969*e4b17023SJohn Marino static void
remove_node_from_ps(partial_schedule_ptr ps,ps_insn_ptr ps_i)2970*e4b17023SJohn Marino remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2971*e4b17023SJohn Marino {
2972*e4b17023SJohn Marino int row;
2973*e4b17023SJohn Marino
2974*e4b17023SJohn Marino gcc_assert (ps && ps_i);
2975*e4b17023SJohn Marino
2976*e4b17023SJohn Marino row = SMODULO (ps_i->cycle, ps->ii);
2977*e4b17023SJohn Marino if (! ps_i->prev_in_row)
2978*e4b17023SJohn Marino {
2979*e4b17023SJohn Marino gcc_assert (ps_i == ps->rows[row]);
2980*e4b17023SJohn Marino ps->rows[row] = ps_i->next_in_row;
2981*e4b17023SJohn Marino if (ps->rows[row])
2982*e4b17023SJohn Marino ps->rows[row]->prev_in_row = NULL;
2983*e4b17023SJohn Marino }
2984*e4b17023SJohn Marino else
2985*e4b17023SJohn Marino {
2986*e4b17023SJohn Marino ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2987*e4b17023SJohn Marino if (ps_i->next_in_row)
2988*e4b17023SJohn Marino ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2989*e4b17023SJohn Marino }
2990*e4b17023SJohn Marino
2991*e4b17023SJohn Marino ps->rows_length[row] -= 1;
2992*e4b17023SJohn Marino free (ps_i);
2993*e4b17023SJohn Marino return;
2994*e4b17023SJohn Marino }
2995*e4b17023SJohn Marino
2996*e4b17023SJohn Marino /* Unlike what literature describes for modulo scheduling (which focuses
2997*e4b17023SJohn Marino on VLIW machines) the order of the instructions inside a cycle is
2998*e4b17023SJohn Marino important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2999*e4b17023SJohn Marino where the current instruction should go relative to the already
3000*e4b17023SJohn Marino scheduled instructions in the given cycle. Go over these
3001*e4b17023SJohn Marino instructions and find the first possible column to put it in. */
3002*e4b17023SJohn Marino static bool
ps_insn_find_column(partial_schedule_ptr ps,ps_insn_ptr ps_i,sbitmap must_precede,sbitmap must_follow)3003*e4b17023SJohn Marino ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3004*e4b17023SJohn Marino sbitmap must_precede, sbitmap must_follow)
3005*e4b17023SJohn Marino {
3006*e4b17023SJohn Marino ps_insn_ptr next_ps_i;
3007*e4b17023SJohn Marino ps_insn_ptr first_must_follow = NULL;
3008*e4b17023SJohn Marino ps_insn_ptr last_must_precede = NULL;
3009*e4b17023SJohn Marino ps_insn_ptr last_in_row = NULL;
3010*e4b17023SJohn Marino int row;
3011*e4b17023SJohn Marino
3012*e4b17023SJohn Marino if (! ps_i)
3013*e4b17023SJohn Marino return false;
3014*e4b17023SJohn Marino
3015*e4b17023SJohn Marino row = SMODULO (ps_i->cycle, ps->ii);
3016*e4b17023SJohn Marino
3017*e4b17023SJohn Marino /* Find the first must follow and the last must precede
3018*e4b17023SJohn Marino and insert the node immediately after the must precede
3019*e4b17023SJohn Marino but make sure that it there is no must follow after it. */
3020*e4b17023SJohn Marino for (next_ps_i = ps->rows[row];
3021*e4b17023SJohn Marino next_ps_i;
3022*e4b17023SJohn Marino next_ps_i = next_ps_i->next_in_row)
3023*e4b17023SJohn Marino {
3024*e4b17023SJohn Marino if (must_follow
3025*e4b17023SJohn Marino && TEST_BIT (must_follow, next_ps_i->id)
3026*e4b17023SJohn Marino && ! first_must_follow)
3027*e4b17023SJohn Marino first_must_follow = next_ps_i;
3028*e4b17023SJohn Marino if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
3029*e4b17023SJohn Marino {
3030*e4b17023SJohn Marino /* If we have already met a node that must follow, then
3031*e4b17023SJohn Marino there is no possible column. */
3032*e4b17023SJohn Marino if (first_must_follow)
3033*e4b17023SJohn Marino return false;
3034*e4b17023SJohn Marino else
3035*e4b17023SJohn Marino last_must_precede = next_ps_i;
3036*e4b17023SJohn Marino }
3037*e4b17023SJohn Marino /* The closing branch must be the last in the row. */
3038*e4b17023SJohn Marino if (must_precede
3039*e4b17023SJohn Marino && TEST_BIT (must_precede, next_ps_i->id)
3040*e4b17023SJohn Marino && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3041*e4b17023SJohn Marino return false;
3042*e4b17023SJohn Marino
3043*e4b17023SJohn Marino last_in_row = next_ps_i;
3044*e4b17023SJohn Marino }
3045*e4b17023SJohn Marino
3046*e4b17023SJohn Marino /* The closing branch is scheduled as well. Make sure there is no
3047*e4b17023SJohn Marino dependent instruction after it as the branch should be the last
3048*e4b17023SJohn Marino instruction in the row. */
3049*e4b17023SJohn Marino if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3050*e4b17023SJohn Marino {
3051*e4b17023SJohn Marino if (first_must_follow)
3052*e4b17023SJohn Marino return false;
3053*e4b17023SJohn Marino if (last_in_row)
3054*e4b17023SJohn Marino {
3055*e4b17023SJohn Marino /* Make the branch the last in the row. New instructions
3056*e4b17023SJohn Marino will be inserted at the beginning of the row or after the
3057*e4b17023SJohn Marino last must_precede instruction thus the branch is guaranteed
3058*e4b17023SJohn Marino to remain the last instruction in the row. */
3059*e4b17023SJohn Marino last_in_row->next_in_row = ps_i;
3060*e4b17023SJohn Marino ps_i->prev_in_row = last_in_row;
3061*e4b17023SJohn Marino ps_i->next_in_row = NULL;
3062*e4b17023SJohn Marino }
3063*e4b17023SJohn Marino else
3064*e4b17023SJohn Marino ps->rows[row] = ps_i;
3065*e4b17023SJohn Marino return true;
3066*e4b17023SJohn Marino }
3067*e4b17023SJohn Marino
3068*e4b17023SJohn Marino /* Now insert the node after INSERT_AFTER_PSI. */
3069*e4b17023SJohn Marino
3070*e4b17023SJohn Marino if (! last_must_precede)
3071*e4b17023SJohn Marino {
3072*e4b17023SJohn Marino ps_i->next_in_row = ps->rows[row];
3073*e4b17023SJohn Marino ps_i->prev_in_row = NULL;
3074*e4b17023SJohn Marino if (ps_i->next_in_row)
3075*e4b17023SJohn Marino ps_i->next_in_row->prev_in_row = ps_i;
3076*e4b17023SJohn Marino ps->rows[row] = ps_i;
3077*e4b17023SJohn Marino }
3078*e4b17023SJohn Marino else
3079*e4b17023SJohn Marino {
3080*e4b17023SJohn Marino ps_i->next_in_row = last_must_precede->next_in_row;
3081*e4b17023SJohn Marino last_must_precede->next_in_row = ps_i;
3082*e4b17023SJohn Marino ps_i->prev_in_row = last_must_precede;
3083*e4b17023SJohn Marino if (ps_i->next_in_row)
3084*e4b17023SJohn Marino ps_i->next_in_row->prev_in_row = ps_i;
3085*e4b17023SJohn Marino }
3086*e4b17023SJohn Marino
3087*e4b17023SJohn Marino return true;
3088*e4b17023SJohn Marino }
3089*e4b17023SJohn Marino
3090*e4b17023SJohn Marino /* Advances the PS_INSN one column in its current row; returns false
3091*e4b17023SJohn Marino in failure and true in success. Bit N is set in MUST_FOLLOW if
3092*e4b17023SJohn Marino the node with cuid N must be come after the node pointed to by
3093*e4b17023SJohn Marino PS_I when scheduled in the same cycle. */
3094*e4b17023SJohn Marino static int
ps_insn_advance_column(partial_schedule_ptr ps,ps_insn_ptr ps_i,sbitmap must_follow)3095*e4b17023SJohn Marino ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3096*e4b17023SJohn Marino sbitmap must_follow)
3097*e4b17023SJohn Marino {
3098*e4b17023SJohn Marino ps_insn_ptr prev, next;
3099*e4b17023SJohn Marino int row;
3100*e4b17023SJohn Marino
3101*e4b17023SJohn Marino if (!ps || !ps_i)
3102*e4b17023SJohn Marino return false;
3103*e4b17023SJohn Marino
3104*e4b17023SJohn Marino row = SMODULO (ps_i->cycle, ps->ii);
3105*e4b17023SJohn Marino
3106*e4b17023SJohn Marino if (! ps_i->next_in_row)
3107*e4b17023SJohn Marino return false;
3108*e4b17023SJohn Marino
3109*e4b17023SJohn Marino /* Check if next_in_row is dependent on ps_i, both having same sched
3110*e4b17023SJohn Marino times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
3111*e4b17023SJohn Marino if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
3112*e4b17023SJohn Marino return false;
3113*e4b17023SJohn Marino
3114*e4b17023SJohn Marino /* Advance PS_I over its next_in_row in the doubly linked list. */
3115*e4b17023SJohn Marino prev = ps_i->prev_in_row;
3116*e4b17023SJohn Marino next = ps_i->next_in_row;
3117*e4b17023SJohn Marino
3118*e4b17023SJohn Marino if (ps_i == ps->rows[row])
3119*e4b17023SJohn Marino ps->rows[row] = next;
3120*e4b17023SJohn Marino
3121*e4b17023SJohn Marino ps_i->next_in_row = next->next_in_row;
3122*e4b17023SJohn Marino
3123*e4b17023SJohn Marino if (next->next_in_row)
3124*e4b17023SJohn Marino next->next_in_row->prev_in_row = ps_i;
3125*e4b17023SJohn Marino
3126*e4b17023SJohn Marino next->next_in_row = ps_i;
3127*e4b17023SJohn Marino ps_i->prev_in_row = next;
3128*e4b17023SJohn Marino
3129*e4b17023SJohn Marino next->prev_in_row = prev;
3130*e4b17023SJohn Marino if (prev)
3131*e4b17023SJohn Marino prev->next_in_row = next;
3132*e4b17023SJohn Marino
3133*e4b17023SJohn Marino return true;
3134*e4b17023SJohn Marino }
3135*e4b17023SJohn Marino
3136*e4b17023SJohn Marino /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3137*e4b17023SJohn Marino Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
3138*e4b17023SJohn Marino set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3139*e4b17023SJohn Marino before/after (respectively) the node pointed to by PS_I when scheduled
3140*e4b17023SJohn Marino in the same cycle. */
3141*e4b17023SJohn Marino static ps_insn_ptr
add_node_to_ps(partial_schedule_ptr ps,int id,int cycle,sbitmap must_precede,sbitmap must_follow)3142*e4b17023SJohn Marino add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3143*e4b17023SJohn Marino sbitmap must_precede, sbitmap must_follow)
3144*e4b17023SJohn Marino {
3145*e4b17023SJohn Marino ps_insn_ptr ps_i;
3146*e4b17023SJohn Marino int row = SMODULO (cycle, ps->ii);
3147*e4b17023SJohn Marino
3148*e4b17023SJohn Marino if (ps->rows_length[row] >= issue_rate)
3149*e4b17023SJohn Marino return NULL;
3150*e4b17023SJohn Marino
3151*e4b17023SJohn Marino ps_i = create_ps_insn (id, cycle);
3152*e4b17023SJohn Marino
3153*e4b17023SJohn Marino /* Finds and inserts PS_I according to MUST_FOLLOW and
3154*e4b17023SJohn Marino MUST_PRECEDE. */
3155*e4b17023SJohn Marino if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3156*e4b17023SJohn Marino {
3157*e4b17023SJohn Marino free (ps_i);
3158*e4b17023SJohn Marino return NULL;
3159*e4b17023SJohn Marino }
3160*e4b17023SJohn Marino
3161*e4b17023SJohn Marino ps->rows_length[row] += 1;
3162*e4b17023SJohn Marino return ps_i;
3163*e4b17023SJohn Marino }
3164*e4b17023SJohn Marino
3165*e4b17023SJohn Marino /* Advance time one cycle. Assumes DFA is being used. */
3166*e4b17023SJohn Marino static void
advance_one_cycle(void)3167*e4b17023SJohn Marino advance_one_cycle (void)
3168*e4b17023SJohn Marino {
3169*e4b17023SJohn Marino if (targetm.sched.dfa_pre_cycle_insn)
3170*e4b17023SJohn Marino state_transition (curr_state,
3171*e4b17023SJohn Marino targetm.sched.dfa_pre_cycle_insn ());
3172*e4b17023SJohn Marino
3173*e4b17023SJohn Marino state_transition (curr_state, NULL);
3174*e4b17023SJohn Marino
3175*e4b17023SJohn Marino if (targetm.sched.dfa_post_cycle_insn)
3176*e4b17023SJohn Marino state_transition (curr_state,
3177*e4b17023SJohn Marino targetm.sched.dfa_post_cycle_insn ());
3178*e4b17023SJohn Marino }
3179*e4b17023SJohn Marino
3180*e4b17023SJohn Marino
3181*e4b17023SJohn Marino
3182*e4b17023SJohn Marino /* Checks if PS has resource conflicts according to DFA, starting from
3183*e4b17023SJohn Marino FROM cycle to TO cycle; returns true if there are conflicts and false
3184*e4b17023SJohn Marino if there are no conflicts. Assumes DFA is being used. */
3185*e4b17023SJohn Marino static int
ps_has_conflicts(partial_schedule_ptr ps,int from,int to)3186*e4b17023SJohn Marino ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3187*e4b17023SJohn Marino {
3188*e4b17023SJohn Marino int cycle;
3189*e4b17023SJohn Marino
3190*e4b17023SJohn Marino state_reset (curr_state);
3191*e4b17023SJohn Marino
3192*e4b17023SJohn Marino for (cycle = from; cycle <= to; cycle++)
3193*e4b17023SJohn Marino {
3194*e4b17023SJohn Marino ps_insn_ptr crr_insn;
3195*e4b17023SJohn Marino /* Holds the remaining issue slots in the current row. */
3196*e4b17023SJohn Marino int can_issue_more = issue_rate;
3197*e4b17023SJohn Marino
3198*e4b17023SJohn Marino /* Walk through the DFA for the current row. */
3199*e4b17023SJohn Marino for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3200*e4b17023SJohn Marino crr_insn;
3201*e4b17023SJohn Marino crr_insn = crr_insn->next_in_row)
3202*e4b17023SJohn Marino {
3203*e4b17023SJohn Marino rtx insn = ps_rtl_insn (ps, crr_insn->id);
3204*e4b17023SJohn Marino
3205*e4b17023SJohn Marino if (!NONDEBUG_INSN_P (insn))
3206*e4b17023SJohn Marino continue;
3207*e4b17023SJohn Marino
3208*e4b17023SJohn Marino /* Check if there is room for the current insn. */
3209*e4b17023SJohn Marino if (!can_issue_more || state_dead_lock_p (curr_state))
3210*e4b17023SJohn Marino return true;
3211*e4b17023SJohn Marino
3212*e4b17023SJohn Marino /* Update the DFA state and return with failure if the DFA found
3213*e4b17023SJohn Marino resource conflicts. */
3214*e4b17023SJohn Marino if (state_transition (curr_state, insn) >= 0)
3215*e4b17023SJohn Marino return true;
3216*e4b17023SJohn Marino
3217*e4b17023SJohn Marino if (targetm.sched.variable_issue)
3218*e4b17023SJohn Marino can_issue_more =
3219*e4b17023SJohn Marino targetm.sched.variable_issue (sched_dump, sched_verbose,
3220*e4b17023SJohn Marino insn, can_issue_more);
3221*e4b17023SJohn Marino /* A naked CLOBBER or USE generates no instruction, so don't
3222*e4b17023SJohn Marino let them consume issue slots. */
3223*e4b17023SJohn Marino else if (GET_CODE (PATTERN (insn)) != USE
3224*e4b17023SJohn Marino && GET_CODE (PATTERN (insn)) != CLOBBER)
3225*e4b17023SJohn Marino can_issue_more--;
3226*e4b17023SJohn Marino }
3227*e4b17023SJohn Marino
3228*e4b17023SJohn Marino /* Advance the DFA to the next cycle. */
3229*e4b17023SJohn Marino advance_one_cycle ();
3230*e4b17023SJohn Marino }
3231*e4b17023SJohn Marino return false;
3232*e4b17023SJohn Marino }
3233*e4b17023SJohn Marino
3234*e4b17023SJohn Marino /* Checks if the given node causes resource conflicts when added to PS at
3235*e4b17023SJohn Marino cycle C. If not the node is added to PS and returned; otherwise zero
3236*e4b17023SJohn Marino is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3237*e4b17023SJohn Marino cuid N must be come before/after (respectively) the node pointed to by
3238*e4b17023SJohn Marino PS_I when scheduled in the same cycle. */
3239*e4b17023SJohn Marino ps_insn_ptr
ps_add_node_check_conflicts(partial_schedule_ptr ps,int n,int c,sbitmap must_precede,sbitmap must_follow)3240*e4b17023SJohn Marino ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3241*e4b17023SJohn Marino int c, sbitmap must_precede,
3242*e4b17023SJohn Marino sbitmap must_follow)
3243*e4b17023SJohn Marino {
3244*e4b17023SJohn Marino int has_conflicts = 0;
3245*e4b17023SJohn Marino ps_insn_ptr ps_i;
3246*e4b17023SJohn Marino
3247*e4b17023SJohn Marino /* First add the node to the PS, if this succeeds check for
3248*e4b17023SJohn Marino conflicts, trying different issue slots in the same row. */
3249*e4b17023SJohn Marino if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3250*e4b17023SJohn Marino return NULL; /* Failed to insert the node at the given cycle. */
3251*e4b17023SJohn Marino
3252*e4b17023SJohn Marino has_conflicts = ps_has_conflicts (ps, c, c)
3253*e4b17023SJohn Marino || (ps->history > 0
3254*e4b17023SJohn Marino && ps_has_conflicts (ps,
3255*e4b17023SJohn Marino c - ps->history,
3256*e4b17023SJohn Marino c + ps->history));
3257*e4b17023SJohn Marino
3258*e4b17023SJohn Marino /* Try different issue slots to find one that the given node can be
3259*e4b17023SJohn Marino scheduled in without conflicts. */
3260*e4b17023SJohn Marino while (has_conflicts)
3261*e4b17023SJohn Marino {
3262*e4b17023SJohn Marino if (! ps_insn_advance_column (ps, ps_i, must_follow))
3263*e4b17023SJohn Marino break;
3264*e4b17023SJohn Marino has_conflicts = ps_has_conflicts (ps, c, c)
3265*e4b17023SJohn Marino || (ps->history > 0
3266*e4b17023SJohn Marino && ps_has_conflicts (ps,
3267*e4b17023SJohn Marino c - ps->history,
3268*e4b17023SJohn Marino c + ps->history));
3269*e4b17023SJohn Marino }
3270*e4b17023SJohn Marino
3271*e4b17023SJohn Marino if (has_conflicts)
3272*e4b17023SJohn Marino {
3273*e4b17023SJohn Marino remove_node_from_ps (ps, ps_i);
3274*e4b17023SJohn Marino return NULL;
3275*e4b17023SJohn Marino }
3276*e4b17023SJohn Marino
3277*e4b17023SJohn Marino ps->min_cycle = MIN (ps->min_cycle, c);
3278*e4b17023SJohn Marino ps->max_cycle = MAX (ps->max_cycle, c);
3279*e4b17023SJohn Marino return ps_i;
3280*e4b17023SJohn Marino }
3281*e4b17023SJohn Marino
3282*e4b17023SJohn Marino /* Calculate the stage count of the partial schedule PS. The calculation
3283*e4b17023SJohn Marino takes into account the rotation amount passed in ROTATION_AMOUNT. */
3284*e4b17023SJohn Marino int
calculate_stage_count(partial_schedule_ptr ps,int rotation_amount)3285*e4b17023SJohn Marino calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3286*e4b17023SJohn Marino {
3287*e4b17023SJohn Marino int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3288*e4b17023SJohn Marino int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3289*e4b17023SJohn Marino int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3290*e4b17023SJohn Marino
3291*e4b17023SJohn Marino /* The calculation of stage count is done adding the number of stages
3292*e4b17023SJohn Marino before cycle zero and after cycle zero. */
3293*e4b17023SJohn Marino stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3294*e4b17023SJohn Marino
3295*e4b17023SJohn Marino return stage_count;
3296*e4b17023SJohn Marino }
3297*e4b17023SJohn Marino
3298*e4b17023SJohn Marino /* Rotate the rows of PS such that insns scheduled at time
3299*e4b17023SJohn Marino START_CYCLE will appear in row 0. Updates max/min_cycles. */
3300*e4b17023SJohn Marino void
rotate_partial_schedule(partial_schedule_ptr ps,int start_cycle)3301*e4b17023SJohn Marino rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3302*e4b17023SJohn Marino {
3303*e4b17023SJohn Marino int i, row, backward_rotates;
3304*e4b17023SJohn Marino int last_row = ps->ii - 1;
3305*e4b17023SJohn Marino
3306*e4b17023SJohn Marino if (start_cycle == 0)
3307*e4b17023SJohn Marino return;
3308*e4b17023SJohn Marino
3309*e4b17023SJohn Marino backward_rotates = SMODULO (start_cycle, ps->ii);
3310*e4b17023SJohn Marino
3311*e4b17023SJohn Marino /* Revisit later and optimize this into a single loop. */
3312*e4b17023SJohn Marino for (i = 0; i < backward_rotates; i++)
3313*e4b17023SJohn Marino {
3314*e4b17023SJohn Marino ps_insn_ptr first_row = ps->rows[0];
3315*e4b17023SJohn Marino int first_row_length = ps->rows_length[0];
3316*e4b17023SJohn Marino
3317*e4b17023SJohn Marino for (row = 0; row < last_row; row++)
3318*e4b17023SJohn Marino {
3319*e4b17023SJohn Marino ps->rows[row] = ps->rows[row + 1];
3320*e4b17023SJohn Marino ps->rows_length[row] = ps->rows_length[row + 1];
3321*e4b17023SJohn Marino }
3322*e4b17023SJohn Marino
3323*e4b17023SJohn Marino ps->rows[last_row] = first_row;
3324*e4b17023SJohn Marino ps->rows_length[last_row] = first_row_length;
3325*e4b17023SJohn Marino }
3326*e4b17023SJohn Marino
3327*e4b17023SJohn Marino ps->max_cycle -= start_cycle;
3328*e4b17023SJohn Marino ps->min_cycle -= start_cycle;
3329*e4b17023SJohn Marino }
3330*e4b17023SJohn Marino
3331*e4b17023SJohn Marino #endif /* INSN_SCHEDULING */
3332*e4b17023SJohn Marino
3333*e4b17023SJohn Marino static bool
gate_handle_sms(void)3334*e4b17023SJohn Marino gate_handle_sms (void)
3335*e4b17023SJohn Marino {
3336*e4b17023SJohn Marino return (optimize > 0 && flag_modulo_sched);
3337*e4b17023SJohn Marino }
3338*e4b17023SJohn Marino
3339*e4b17023SJohn Marino
3340*e4b17023SJohn Marino /* Run instruction scheduler. */
3341*e4b17023SJohn Marino /* Perform SMS module scheduling. */
3342*e4b17023SJohn Marino static unsigned int
rest_of_handle_sms(void)3343*e4b17023SJohn Marino rest_of_handle_sms (void)
3344*e4b17023SJohn Marino {
3345*e4b17023SJohn Marino #ifdef INSN_SCHEDULING
3346*e4b17023SJohn Marino basic_block bb;
3347*e4b17023SJohn Marino
3348*e4b17023SJohn Marino /* Collect loop information to be used in SMS. */
3349*e4b17023SJohn Marino cfg_layout_initialize (0);
3350*e4b17023SJohn Marino sms_schedule ();
3351*e4b17023SJohn Marino
3352*e4b17023SJohn Marino /* Update the life information, because we add pseudos. */
3353*e4b17023SJohn Marino max_regno = max_reg_num ();
3354*e4b17023SJohn Marino
3355*e4b17023SJohn Marino /* Finalize layout changes. */
3356*e4b17023SJohn Marino FOR_EACH_BB (bb)
3357*e4b17023SJohn Marino if (bb->next_bb != EXIT_BLOCK_PTR)
3358*e4b17023SJohn Marino bb->aux = bb->next_bb;
3359*e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
3360*e4b17023SJohn Marino cfg_layout_finalize ();
3361*e4b17023SJohn Marino #endif /* INSN_SCHEDULING */
3362*e4b17023SJohn Marino return 0;
3363*e4b17023SJohn Marino }
3364*e4b17023SJohn Marino
3365*e4b17023SJohn Marino struct rtl_opt_pass pass_sms =
3366*e4b17023SJohn Marino {
3367*e4b17023SJohn Marino {
3368*e4b17023SJohn Marino RTL_PASS,
3369*e4b17023SJohn Marino "sms", /* name */
3370*e4b17023SJohn Marino gate_handle_sms, /* gate */
3371*e4b17023SJohn Marino rest_of_handle_sms, /* execute */
3372*e4b17023SJohn Marino NULL, /* sub */
3373*e4b17023SJohn Marino NULL, /* next */
3374*e4b17023SJohn Marino 0, /* static_pass_number */
3375*e4b17023SJohn Marino TV_SMS, /* tv_id */
3376*e4b17023SJohn Marino 0, /* properties_required */
3377*e4b17023SJohn Marino 0, /* properties_provided */
3378*e4b17023SJohn Marino 0, /* properties_destroyed */
3379*e4b17023SJohn Marino 0, /* todo_flags_start */
3380*e4b17023SJohn Marino TODO_df_finish
3381*e4b17023SJohn Marino | TODO_verify_flow
3382*e4b17023SJohn Marino | TODO_verify_rtl_sharing
3383*e4b17023SJohn Marino | TODO_ggc_collect /* todo_flags_finish */
3384*e4b17023SJohn Marino }
3385*e4b17023SJohn Marino };
3386