1*38fd1498Szrj /* RTL-level loop invariant motion.
2*38fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it
7*38fd1498Szrj under the terms of the GNU General Public License as published by the
8*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
9*38fd1498Szrj later version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
12*38fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj /* This implements the loop invariant motion pass. It is very simple
21*38fd1498Szrj (no calls, no loads/stores, etc.). This should be sufficient to cleanup
22*38fd1498Szrj things like address arithmetics -- other more complicated invariants should
23*38fd1498Szrj be eliminated on GIMPLE either in tree-ssa-loop-im.c or in tree-ssa-pre.c.
24*38fd1498Szrj
25*38fd1498Szrj We proceed loop by loop -- it is simpler than trying to handle things
26*38fd1498Szrj globally and should not lose much. First we inspect all sets inside loop
27*38fd1498Szrj and create a dependency graph on insns (saying "to move this insn, you must
28*38fd1498Szrj also move the following insns").
29*38fd1498Szrj
30*38fd1498Szrj We then need to determine what to move. We estimate the number of registers
31*38fd1498Szrj used and move as many invariants as possible while we still have enough free
32*38fd1498Szrj registers. We prefer the expensive invariants.
33*38fd1498Szrj
34*38fd1498Szrj Then we move the selected invariants out of the loop, creating a new
35*38fd1498Szrj temporaries for them if necessary. */
36*38fd1498Szrj
37*38fd1498Szrj #include "config.h"
38*38fd1498Szrj #include "system.h"
39*38fd1498Szrj #include "coretypes.h"
40*38fd1498Szrj #include "backend.h"
41*38fd1498Szrj #include "target.h"
42*38fd1498Szrj #include "rtl.h"
43*38fd1498Szrj #include "tree.h"
44*38fd1498Szrj #include "cfghooks.h"
45*38fd1498Szrj #include "df.h"
46*38fd1498Szrj #include "memmodel.h"
47*38fd1498Szrj #include "tm_p.h"
48*38fd1498Szrj #include "insn-config.h"
49*38fd1498Szrj #include "regs.h"
50*38fd1498Szrj #include "ira.h"
51*38fd1498Szrj #include "recog.h"
52*38fd1498Szrj #include "cfgrtl.h"
53*38fd1498Szrj #include "cfgloop.h"
54*38fd1498Szrj #include "expr.h"
55*38fd1498Szrj #include "params.h"
56*38fd1498Szrj #include "rtl-iter.h"
57*38fd1498Szrj #include "dumpfile.h"
58*38fd1498Szrj
59*38fd1498Szrj /* The data stored for the loop. */
60*38fd1498Szrj
61*38fd1498Szrj struct loop_data
62*38fd1498Szrj {
63*38fd1498Szrj struct loop *outermost_exit; /* The outermost exit of the loop. */
64*38fd1498Szrj bool has_call; /* True if the loop contains a call. */
65*38fd1498Szrj /* Maximal register pressure inside loop for given register class
66*38fd1498Szrj (defined only for the pressure classes). */
67*38fd1498Szrj int max_reg_pressure[N_REG_CLASSES];
68*38fd1498Szrj /* Loop regs referenced and live pseudo-registers. */
69*38fd1498Szrj bitmap_head regs_ref;
70*38fd1498Szrj bitmap_head regs_live;
71*38fd1498Szrj };
72*38fd1498Szrj
73*38fd1498Szrj #define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux)
74*38fd1498Szrj
75*38fd1498Szrj /* The description of an use. */
76*38fd1498Szrj
77*38fd1498Szrj struct use
78*38fd1498Szrj {
79*38fd1498Szrj rtx *pos; /* Position of the use. */
80*38fd1498Szrj rtx_insn *insn; /* The insn in that the use occurs. */
81*38fd1498Szrj unsigned addr_use_p; /* Whether the use occurs in an address. */
82*38fd1498Szrj struct use *next; /* Next use in the list. */
83*38fd1498Szrj };
84*38fd1498Szrj
85*38fd1498Szrj /* The description of a def. */
86*38fd1498Szrj
87*38fd1498Szrj struct def
88*38fd1498Szrj {
89*38fd1498Szrj struct use *uses; /* The list of uses that are uniquely reached
90*38fd1498Szrj by it. */
91*38fd1498Szrj unsigned n_uses; /* Number of such uses. */
92*38fd1498Szrj unsigned n_addr_uses; /* Number of uses in addresses. */
93*38fd1498Szrj unsigned invno; /* The corresponding invariant. */
94*38fd1498Szrj bool can_prop_to_addr_uses; /* True if the corresponding inv can be
95*38fd1498Szrj propagated into its address uses. */
96*38fd1498Szrj };
97*38fd1498Szrj
98*38fd1498Szrj /* The data stored for each invariant. */
99*38fd1498Szrj
100*38fd1498Szrj struct invariant
101*38fd1498Szrj {
102*38fd1498Szrj /* The number of the invariant. */
103*38fd1498Szrj unsigned invno;
104*38fd1498Szrj
105*38fd1498Szrj /* The number of the invariant with the same value. */
106*38fd1498Szrj unsigned eqto;
107*38fd1498Szrj
108*38fd1498Szrj /* The number of invariants which eqto this. */
109*38fd1498Szrj unsigned eqno;
110*38fd1498Szrj
111*38fd1498Szrj /* If we moved the invariant out of the loop, the original regno
112*38fd1498Szrj that contained its value. */
113*38fd1498Szrj int orig_regno;
114*38fd1498Szrj
115*38fd1498Szrj /* If we moved the invariant out of the loop, the register that contains its
116*38fd1498Szrj value. */
117*38fd1498Szrj rtx reg;
118*38fd1498Szrj
119*38fd1498Szrj /* The definition of the invariant. */
120*38fd1498Szrj struct def *def;
121*38fd1498Szrj
122*38fd1498Szrj /* The insn in that it is defined. */
123*38fd1498Szrj rtx_insn *insn;
124*38fd1498Szrj
125*38fd1498Szrj /* Whether it is always executed. */
126*38fd1498Szrj bool always_executed;
127*38fd1498Szrj
128*38fd1498Szrj /* Whether to move the invariant. */
129*38fd1498Szrj bool move;
130*38fd1498Szrj
131*38fd1498Szrj /* Whether the invariant is cheap when used as an address. */
132*38fd1498Szrj bool cheap_address;
133*38fd1498Szrj
134*38fd1498Szrj /* Cost of the invariant. */
135*38fd1498Szrj unsigned cost;
136*38fd1498Szrj
137*38fd1498Szrj /* Used for detecting already visited invariants during determining
138*38fd1498Szrj costs of movements. */
139*38fd1498Szrj unsigned stamp;
140*38fd1498Szrj
141*38fd1498Szrj /* The invariants it depends on. */
142*38fd1498Szrj bitmap depends_on;
143*38fd1498Szrj };
144*38fd1498Szrj
145*38fd1498Szrj /* Currently processed loop. */
146*38fd1498Szrj static struct loop *curr_loop;
147*38fd1498Szrj
148*38fd1498Szrj /* Table of invariants indexed by the df_ref uid field. */
149*38fd1498Szrj
150*38fd1498Szrj static unsigned int invariant_table_size = 0;
151*38fd1498Szrj static struct invariant ** invariant_table;
152*38fd1498Szrj
153*38fd1498Szrj /* Entry for hash table of invariant expressions. */
154*38fd1498Szrj
155*38fd1498Szrj struct invariant_expr_entry
156*38fd1498Szrj {
157*38fd1498Szrj /* The invariant. */
158*38fd1498Szrj struct invariant *inv;
159*38fd1498Szrj
160*38fd1498Szrj /* Its value. */
161*38fd1498Szrj rtx expr;
162*38fd1498Szrj
163*38fd1498Szrj /* Its mode. */
164*38fd1498Szrj machine_mode mode;
165*38fd1498Szrj
166*38fd1498Szrj /* Its hash. */
167*38fd1498Szrj hashval_t hash;
168*38fd1498Szrj };
169*38fd1498Szrj
170*38fd1498Szrj /* The actual stamp for marking already visited invariants during determining
171*38fd1498Szrj costs of movements. */
172*38fd1498Szrj
173*38fd1498Szrj static unsigned actual_stamp;
174*38fd1498Szrj
175*38fd1498Szrj typedef struct invariant *invariant_p;
176*38fd1498Szrj
177*38fd1498Szrj
178*38fd1498Szrj /* The invariants. */
179*38fd1498Szrj
180*38fd1498Szrj static vec<invariant_p> invariants;
181*38fd1498Szrj
182*38fd1498Szrj /* Check the size of the invariant table and realloc if necessary. */
183*38fd1498Szrj
184*38fd1498Szrj static void
check_invariant_table_size(void)185*38fd1498Szrj check_invariant_table_size (void)
186*38fd1498Szrj {
187*38fd1498Szrj if (invariant_table_size < DF_DEFS_TABLE_SIZE ())
188*38fd1498Szrj {
189*38fd1498Szrj unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4);
190*38fd1498Szrj invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size);
191*38fd1498Szrj memset (&invariant_table[invariant_table_size], 0,
192*38fd1498Szrj (new_size - invariant_table_size) * sizeof (struct invariant *));
193*38fd1498Szrj invariant_table_size = new_size;
194*38fd1498Szrj }
195*38fd1498Szrj }
196*38fd1498Szrj
197*38fd1498Szrj /* Test for possibility of invariantness of X. */
198*38fd1498Szrj
199*38fd1498Szrj static bool
check_maybe_invariant(rtx x)200*38fd1498Szrj check_maybe_invariant (rtx x)
201*38fd1498Szrj {
202*38fd1498Szrj enum rtx_code code = GET_CODE (x);
203*38fd1498Szrj int i, j;
204*38fd1498Szrj const char *fmt;
205*38fd1498Szrj
206*38fd1498Szrj switch (code)
207*38fd1498Szrj {
208*38fd1498Szrj CASE_CONST_ANY:
209*38fd1498Szrj case SYMBOL_REF:
210*38fd1498Szrj case CONST:
211*38fd1498Szrj case LABEL_REF:
212*38fd1498Szrj return true;
213*38fd1498Szrj
214*38fd1498Szrj case PC:
215*38fd1498Szrj case CC0:
216*38fd1498Szrj case UNSPEC_VOLATILE:
217*38fd1498Szrj case CALL:
218*38fd1498Szrj return false;
219*38fd1498Szrj
220*38fd1498Szrj case REG:
221*38fd1498Szrj return true;
222*38fd1498Szrj
223*38fd1498Szrj case MEM:
224*38fd1498Szrj /* Load/store motion is done elsewhere. ??? Perhaps also add it here?
225*38fd1498Szrj It should not be hard, and might be faster than "elsewhere". */
226*38fd1498Szrj
227*38fd1498Szrj /* Just handle the most trivial case where we load from an unchanging
228*38fd1498Szrj location (most importantly, pic tables). */
229*38fd1498Szrj if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x))
230*38fd1498Szrj break;
231*38fd1498Szrj
232*38fd1498Szrj return false;
233*38fd1498Szrj
234*38fd1498Szrj case ASM_OPERANDS:
235*38fd1498Szrj /* Don't mess with insns declared volatile. */
236*38fd1498Szrj if (MEM_VOLATILE_P (x))
237*38fd1498Szrj return false;
238*38fd1498Szrj break;
239*38fd1498Szrj
240*38fd1498Szrj default:
241*38fd1498Szrj break;
242*38fd1498Szrj }
243*38fd1498Szrj
244*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
245*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
246*38fd1498Szrj {
247*38fd1498Szrj if (fmt[i] == 'e')
248*38fd1498Szrj {
249*38fd1498Szrj if (!check_maybe_invariant (XEXP (x, i)))
250*38fd1498Szrj return false;
251*38fd1498Szrj }
252*38fd1498Szrj else if (fmt[i] == 'E')
253*38fd1498Szrj {
254*38fd1498Szrj for (j = 0; j < XVECLEN (x, i); j++)
255*38fd1498Szrj if (!check_maybe_invariant (XVECEXP (x, i, j)))
256*38fd1498Szrj return false;
257*38fd1498Szrj }
258*38fd1498Szrj }
259*38fd1498Szrj
260*38fd1498Szrj return true;
261*38fd1498Szrj }
262*38fd1498Szrj
263*38fd1498Szrj /* Returns the invariant definition for USE, or NULL if USE is not
264*38fd1498Szrj invariant. */
265*38fd1498Szrj
266*38fd1498Szrj static struct invariant *
invariant_for_use(df_ref use)267*38fd1498Szrj invariant_for_use (df_ref use)
268*38fd1498Szrj {
269*38fd1498Szrj struct df_link *defs;
270*38fd1498Szrj df_ref def;
271*38fd1498Szrj basic_block bb = DF_REF_BB (use), def_bb;
272*38fd1498Szrj
273*38fd1498Szrj if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
274*38fd1498Szrj return NULL;
275*38fd1498Szrj
276*38fd1498Szrj defs = DF_REF_CHAIN (use);
277*38fd1498Szrj if (!defs || defs->next)
278*38fd1498Szrj return NULL;
279*38fd1498Szrj def = defs->ref;
280*38fd1498Szrj check_invariant_table_size ();
281*38fd1498Szrj if (!invariant_table[DF_REF_ID (def)])
282*38fd1498Szrj return NULL;
283*38fd1498Szrj
284*38fd1498Szrj def_bb = DF_REF_BB (def);
285*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
286*38fd1498Szrj return NULL;
287*38fd1498Szrj return invariant_table[DF_REF_ID (def)];
288*38fd1498Szrj }
289*38fd1498Szrj
290*38fd1498Szrj /* Computes hash value for invariant expression X in INSN. */
291*38fd1498Szrj
292*38fd1498Szrj static hashval_t
hash_invariant_expr_1(rtx_insn * insn,rtx x)293*38fd1498Szrj hash_invariant_expr_1 (rtx_insn *insn, rtx x)
294*38fd1498Szrj {
295*38fd1498Szrj enum rtx_code code = GET_CODE (x);
296*38fd1498Szrj int i, j;
297*38fd1498Szrj const char *fmt;
298*38fd1498Szrj hashval_t val = code;
299*38fd1498Szrj int do_not_record_p;
300*38fd1498Szrj df_ref use;
301*38fd1498Szrj struct invariant *inv;
302*38fd1498Szrj
303*38fd1498Szrj switch (code)
304*38fd1498Szrj {
305*38fd1498Szrj CASE_CONST_ANY:
306*38fd1498Szrj case SYMBOL_REF:
307*38fd1498Szrj case CONST:
308*38fd1498Szrj case LABEL_REF:
309*38fd1498Szrj return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
310*38fd1498Szrj
311*38fd1498Szrj case REG:
312*38fd1498Szrj use = df_find_use (insn, x);
313*38fd1498Szrj if (!use)
314*38fd1498Szrj return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
315*38fd1498Szrj inv = invariant_for_use (use);
316*38fd1498Szrj if (!inv)
317*38fd1498Szrj return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
318*38fd1498Szrj
319*38fd1498Szrj gcc_assert (inv->eqto != ~0u);
320*38fd1498Szrj return inv->eqto;
321*38fd1498Szrj
322*38fd1498Szrj default:
323*38fd1498Szrj break;
324*38fd1498Szrj }
325*38fd1498Szrj
326*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
327*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
328*38fd1498Szrj {
329*38fd1498Szrj if (fmt[i] == 'e')
330*38fd1498Szrj val ^= hash_invariant_expr_1 (insn, XEXP (x, i));
331*38fd1498Szrj else if (fmt[i] == 'E')
332*38fd1498Szrj {
333*38fd1498Szrj for (j = 0; j < XVECLEN (x, i); j++)
334*38fd1498Szrj val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j));
335*38fd1498Szrj }
336*38fd1498Szrj else if (fmt[i] == 'i' || fmt[i] == 'n')
337*38fd1498Szrj val ^= XINT (x, i);
338*38fd1498Szrj else if (fmt[i] == 'p')
339*38fd1498Szrj val ^= constant_lower_bound (SUBREG_BYTE (x));
340*38fd1498Szrj }
341*38fd1498Szrj
342*38fd1498Szrj return val;
343*38fd1498Szrj }
344*38fd1498Szrj
345*38fd1498Szrj /* Returns true if the invariant expressions E1 and E2 used in insns INSN1
346*38fd1498Szrj and INSN2 have always the same value. */
347*38fd1498Szrj
348*38fd1498Szrj static bool
invariant_expr_equal_p(rtx_insn * insn1,rtx e1,rtx_insn * insn2,rtx e2)349*38fd1498Szrj invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2)
350*38fd1498Szrj {
351*38fd1498Szrj enum rtx_code code = GET_CODE (e1);
352*38fd1498Szrj int i, j;
353*38fd1498Szrj const char *fmt;
354*38fd1498Szrj df_ref use1, use2;
355*38fd1498Szrj struct invariant *inv1 = NULL, *inv2 = NULL;
356*38fd1498Szrj rtx sub1, sub2;
357*38fd1498Szrj
358*38fd1498Szrj /* If mode of only one of the operands is VOIDmode, it is not equivalent to
359*38fd1498Szrj the other one. If both are VOIDmode, we rely on the caller of this
360*38fd1498Szrj function to verify that their modes are the same. */
361*38fd1498Szrj if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2))
362*38fd1498Szrj return false;
363*38fd1498Szrj
364*38fd1498Szrj switch (code)
365*38fd1498Szrj {
366*38fd1498Szrj CASE_CONST_ANY:
367*38fd1498Szrj case SYMBOL_REF:
368*38fd1498Szrj case CONST:
369*38fd1498Szrj case LABEL_REF:
370*38fd1498Szrj return rtx_equal_p (e1, e2);
371*38fd1498Szrj
372*38fd1498Szrj case REG:
373*38fd1498Szrj use1 = df_find_use (insn1, e1);
374*38fd1498Szrj use2 = df_find_use (insn2, e2);
375*38fd1498Szrj if (use1)
376*38fd1498Szrj inv1 = invariant_for_use (use1);
377*38fd1498Szrj if (use2)
378*38fd1498Szrj inv2 = invariant_for_use (use2);
379*38fd1498Szrj
380*38fd1498Szrj if (!inv1 && !inv2)
381*38fd1498Szrj return rtx_equal_p (e1, e2);
382*38fd1498Szrj
383*38fd1498Szrj if (!inv1 || !inv2)
384*38fd1498Szrj return false;
385*38fd1498Szrj
386*38fd1498Szrj gcc_assert (inv1->eqto != ~0u);
387*38fd1498Szrj gcc_assert (inv2->eqto != ~0u);
388*38fd1498Szrj return inv1->eqto == inv2->eqto;
389*38fd1498Szrj
390*38fd1498Szrj default:
391*38fd1498Szrj break;
392*38fd1498Szrj }
393*38fd1498Szrj
394*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
395*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
396*38fd1498Szrj {
397*38fd1498Szrj if (fmt[i] == 'e')
398*38fd1498Szrj {
399*38fd1498Szrj sub1 = XEXP (e1, i);
400*38fd1498Szrj sub2 = XEXP (e2, i);
401*38fd1498Szrj
402*38fd1498Szrj if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
403*38fd1498Szrj return false;
404*38fd1498Szrj }
405*38fd1498Szrj
406*38fd1498Szrj else if (fmt[i] == 'E')
407*38fd1498Szrj {
408*38fd1498Szrj if (XVECLEN (e1, i) != XVECLEN (e2, i))
409*38fd1498Szrj return false;
410*38fd1498Szrj
411*38fd1498Szrj for (j = 0; j < XVECLEN (e1, i); j++)
412*38fd1498Szrj {
413*38fd1498Szrj sub1 = XVECEXP (e1, i, j);
414*38fd1498Szrj sub2 = XVECEXP (e2, i, j);
415*38fd1498Szrj
416*38fd1498Szrj if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
417*38fd1498Szrj return false;
418*38fd1498Szrj }
419*38fd1498Szrj }
420*38fd1498Szrj else if (fmt[i] == 'i' || fmt[i] == 'n')
421*38fd1498Szrj {
422*38fd1498Szrj if (XINT (e1, i) != XINT (e2, i))
423*38fd1498Szrj return false;
424*38fd1498Szrj }
425*38fd1498Szrj else if (fmt[i] == 'p')
426*38fd1498Szrj {
427*38fd1498Szrj if (maybe_ne (SUBREG_BYTE (e1), SUBREG_BYTE (e2)))
428*38fd1498Szrj return false;
429*38fd1498Szrj }
430*38fd1498Szrj /* Unhandled type of subexpression, we fail conservatively. */
431*38fd1498Szrj else
432*38fd1498Szrj return false;
433*38fd1498Szrj }
434*38fd1498Szrj
435*38fd1498Szrj return true;
436*38fd1498Szrj }
437*38fd1498Szrj
438*38fd1498Szrj struct invariant_expr_hasher : free_ptr_hash <invariant_expr_entry>
439*38fd1498Szrj {
440*38fd1498Szrj static inline hashval_t hash (const invariant_expr_entry *);
441*38fd1498Szrj static inline bool equal (const invariant_expr_entry *,
442*38fd1498Szrj const invariant_expr_entry *);
443*38fd1498Szrj };
444*38fd1498Szrj
445*38fd1498Szrj /* Returns hash value for invariant expression entry ENTRY. */
446*38fd1498Szrj
447*38fd1498Szrj inline hashval_t
hash(const invariant_expr_entry * entry)448*38fd1498Szrj invariant_expr_hasher::hash (const invariant_expr_entry *entry)
449*38fd1498Szrj {
450*38fd1498Szrj return entry->hash;
451*38fd1498Szrj }
452*38fd1498Szrj
453*38fd1498Szrj /* Compares invariant expression entries ENTRY1 and ENTRY2. */
454*38fd1498Szrj
455*38fd1498Szrj inline bool
equal(const invariant_expr_entry * entry1,const invariant_expr_entry * entry2)456*38fd1498Szrj invariant_expr_hasher::equal (const invariant_expr_entry *entry1,
457*38fd1498Szrj const invariant_expr_entry *entry2)
458*38fd1498Szrj {
459*38fd1498Szrj if (entry1->mode != entry2->mode)
460*38fd1498Szrj return 0;
461*38fd1498Szrj
462*38fd1498Szrj return invariant_expr_equal_p (entry1->inv->insn, entry1->expr,
463*38fd1498Szrj entry2->inv->insn, entry2->expr);
464*38fd1498Szrj }
465*38fd1498Szrj
466*38fd1498Szrj typedef hash_table<invariant_expr_hasher> invariant_htab_type;
467*38fd1498Szrj
468*38fd1498Szrj /* Checks whether invariant with value EXPR in machine mode MODE is
469*38fd1498Szrj recorded in EQ. If this is the case, return the invariant. Otherwise
470*38fd1498Szrj insert INV to the table for this expression and return INV. */
471*38fd1498Szrj
472*38fd1498Szrj static struct invariant *
find_or_insert_inv(invariant_htab_type * eq,rtx expr,machine_mode mode,struct invariant * inv)473*38fd1498Szrj find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode,
474*38fd1498Szrj struct invariant *inv)
475*38fd1498Szrj {
476*38fd1498Szrj hashval_t hash = hash_invariant_expr_1 (inv->insn, expr);
477*38fd1498Szrj struct invariant_expr_entry *entry;
478*38fd1498Szrj struct invariant_expr_entry pentry;
479*38fd1498Szrj invariant_expr_entry **slot;
480*38fd1498Szrj
481*38fd1498Szrj pentry.expr = expr;
482*38fd1498Szrj pentry.inv = inv;
483*38fd1498Szrj pentry.mode = mode;
484*38fd1498Szrj slot = eq->find_slot_with_hash (&pentry, hash, INSERT);
485*38fd1498Szrj entry = *slot;
486*38fd1498Szrj
487*38fd1498Szrj if (entry)
488*38fd1498Szrj return entry->inv;
489*38fd1498Szrj
490*38fd1498Szrj entry = XNEW (struct invariant_expr_entry);
491*38fd1498Szrj entry->inv = inv;
492*38fd1498Szrj entry->expr = expr;
493*38fd1498Szrj entry->mode = mode;
494*38fd1498Szrj entry->hash = hash;
495*38fd1498Szrj *slot = entry;
496*38fd1498Szrj
497*38fd1498Szrj return inv;
498*38fd1498Szrj }
499*38fd1498Szrj
500*38fd1498Szrj /* Finds invariants identical to INV and records the equivalence. EQ is the
501*38fd1498Szrj hash table of the invariants. */
502*38fd1498Szrj
503*38fd1498Szrj static void
find_identical_invariants(invariant_htab_type * eq,struct invariant * inv)504*38fd1498Szrj find_identical_invariants (invariant_htab_type *eq, struct invariant *inv)
505*38fd1498Szrj {
506*38fd1498Szrj unsigned depno;
507*38fd1498Szrj bitmap_iterator bi;
508*38fd1498Szrj struct invariant *dep;
509*38fd1498Szrj rtx expr, set;
510*38fd1498Szrj machine_mode mode;
511*38fd1498Szrj struct invariant *tmp;
512*38fd1498Szrj
513*38fd1498Szrj if (inv->eqto != ~0u)
514*38fd1498Szrj return;
515*38fd1498Szrj
516*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
517*38fd1498Szrj {
518*38fd1498Szrj dep = invariants[depno];
519*38fd1498Szrj find_identical_invariants (eq, dep);
520*38fd1498Szrj }
521*38fd1498Szrj
522*38fd1498Szrj set = single_set (inv->insn);
523*38fd1498Szrj expr = SET_SRC (set);
524*38fd1498Szrj mode = GET_MODE (expr);
525*38fd1498Szrj if (mode == VOIDmode)
526*38fd1498Szrj mode = GET_MODE (SET_DEST (set));
527*38fd1498Szrj
528*38fd1498Szrj tmp = find_or_insert_inv (eq, expr, mode, inv);
529*38fd1498Szrj inv->eqto = tmp->invno;
530*38fd1498Szrj
531*38fd1498Szrj if (tmp->invno != inv->invno && inv->always_executed)
532*38fd1498Szrj tmp->eqno++;
533*38fd1498Szrj
534*38fd1498Szrj if (dump_file && inv->eqto != inv->invno)
535*38fd1498Szrj fprintf (dump_file,
536*38fd1498Szrj "Invariant %d is equivalent to invariant %d.\n",
537*38fd1498Szrj inv->invno, inv->eqto);
538*38fd1498Szrj }
539*38fd1498Szrj
540*38fd1498Szrj /* Find invariants with the same value and record the equivalences. */
541*38fd1498Szrj
542*38fd1498Szrj static void
merge_identical_invariants(void)543*38fd1498Szrj merge_identical_invariants (void)
544*38fd1498Szrj {
545*38fd1498Szrj unsigned i;
546*38fd1498Szrj struct invariant *inv;
547*38fd1498Szrj invariant_htab_type eq (invariants.length ());
548*38fd1498Szrj
549*38fd1498Szrj FOR_EACH_VEC_ELT (invariants, i, inv)
550*38fd1498Szrj find_identical_invariants (&eq, inv);
551*38fd1498Szrj }
552*38fd1498Szrj
553*38fd1498Szrj /* Determines the basic blocks inside LOOP that are always executed and
554*38fd1498Szrj stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of
555*38fd1498Szrj basic blocks that may either exit the loop, or contain the call that
556*38fd1498Szrj does not have to return. BODY is body of the loop obtained by
557*38fd1498Szrj get_loop_body_in_dom_order. */
558*38fd1498Szrj
559*38fd1498Szrj static void
compute_always_reached(struct loop * loop,basic_block * body,bitmap may_exit,bitmap always_reached)560*38fd1498Szrj compute_always_reached (struct loop *loop, basic_block *body,
561*38fd1498Szrj bitmap may_exit, bitmap always_reached)
562*38fd1498Szrj {
563*38fd1498Szrj unsigned i;
564*38fd1498Szrj
565*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
566*38fd1498Szrj {
567*38fd1498Szrj if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i]))
568*38fd1498Szrj bitmap_set_bit (always_reached, i);
569*38fd1498Szrj
570*38fd1498Szrj if (bitmap_bit_p (may_exit, i))
571*38fd1498Szrj return;
572*38fd1498Szrj }
573*38fd1498Szrj }
574*38fd1498Szrj
575*38fd1498Szrj /* Finds exits out of the LOOP with body BODY. Marks blocks in that we may
576*38fd1498Szrj exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT
577*38fd1498Szrj additionally mark blocks that may exit due to a call. */
578*38fd1498Szrj
579*38fd1498Szrj static void
find_exits(struct loop * loop,basic_block * body,bitmap may_exit,bitmap has_exit)580*38fd1498Szrj find_exits (struct loop *loop, basic_block *body,
581*38fd1498Szrj bitmap may_exit, bitmap has_exit)
582*38fd1498Szrj {
583*38fd1498Szrj unsigned i;
584*38fd1498Szrj edge_iterator ei;
585*38fd1498Szrj edge e;
586*38fd1498Szrj struct loop *outermost_exit = loop, *aexit;
587*38fd1498Szrj bool has_call = false;
588*38fd1498Szrj rtx_insn *insn;
589*38fd1498Szrj
590*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
591*38fd1498Szrj {
592*38fd1498Szrj if (body[i]->loop_father == loop)
593*38fd1498Szrj {
594*38fd1498Szrj FOR_BB_INSNS (body[i], insn)
595*38fd1498Szrj {
596*38fd1498Szrj if (CALL_P (insn)
597*38fd1498Szrj && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
598*38fd1498Szrj || !RTL_CONST_OR_PURE_CALL_P (insn)))
599*38fd1498Szrj {
600*38fd1498Szrj has_call = true;
601*38fd1498Szrj bitmap_set_bit (may_exit, i);
602*38fd1498Szrj break;
603*38fd1498Szrj }
604*38fd1498Szrj }
605*38fd1498Szrj
606*38fd1498Szrj FOR_EACH_EDGE (e, ei, body[i]->succs)
607*38fd1498Szrj {
608*38fd1498Szrj if (! flow_bb_inside_loop_p (loop, e->dest))
609*38fd1498Szrj {
610*38fd1498Szrj bitmap_set_bit (may_exit, i);
611*38fd1498Szrj bitmap_set_bit (has_exit, i);
612*38fd1498Szrj outermost_exit = find_common_loop (outermost_exit,
613*38fd1498Szrj e->dest->loop_father);
614*38fd1498Szrj }
615*38fd1498Szrj /* If we enter a subloop that might never terminate treat
616*38fd1498Szrj it like a possible exit. */
617*38fd1498Szrj if (flow_loop_nested_p (loop, e->dest->loop_father))
618*38fd1498Szrj bitmap_set_bit (may_exit, i);
619*38fd1498Szrj }
620*38fd1498Szrj continue;
621*38fd1498Szrj }
622*38fd1498Szrj
623*38fd1498Szrj /* Use the data stored for the subloop to decide whether we may exit
624*38fd1498Szrj through it. It is sufficient to do this for header of the loop,
625*38fd1498Szrj as other basic blocks inside it must be dominated by it. */
626*38fd1498Szrj if (body[i]->loop_father->header != body[i])
627*38fd1498Szrj continue;
628*38fd1498Szrj
629*38fd1498Szrj if (LOOP_DATA (body[i]->loop_father)->has_call)
630*38fd1498Szrj {
631*38fd1498Szrj has_call = true;
632*38fd1498Szrj bitmap_set_bit (may_exit, i);
633*38fd1498Szrj }
634*38fd1498Szrj aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit;
635*38fd1498Szrj if (aexit != loop)
636*38fd1498Szrj {
637*38fd1498Szrj bitmap_set_bit (may_exit, i);
638*38fd1498Szrj bitmap_set_bit (has_exit, i);
639*38fd1498Szrj
640*38fd1498Szrj if (flow_loop_nested_p (aexit, outermost_exit))
641*38fd1498Szrj outermost_exit = aexit;
642*38fd1498Szrj }
643*38fd1498Szrj }
644*38fd1498Szrj
645*38fd1498Szrj if (loop->aux == NULL)
646*38fd1498Szrj {
647*38fd1498Szrj loop->aux = xcalloc (1, sizeof (struct loop_data));
648*38fd1498Szrj bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
649*38fd1498Szrj bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
650*38fd1498Szrj }
651*38fd1498Szrj LOOP_DATA (loop)->outermost_exit = outermost_exit;
652*38fd1498Szrj LOOP_DATA (loop)->has_call = has_call;
653*38fd1498Szrj }
654*38fd1498Szrj
655*38fd1498Szrj /* Check whether we may assign a value to X from a register. */
656*38fd1498Szrj
657*38fd1498Szrj static bool
may_assign_reg_p(rtx x)658*38fd1498Szrj may_assign_reg_p (rtx x)
659*38fd1498Szrj {
660*38fd1498Szrj return (GET_MODE (x) != VOIDmode
661*38fd1498Szrj && GET_MODE (x) != BLKmode
662*38fd1498Szrj && can_copy_p (GET_MODE (x))
663*38fd1498Szrj && (!REG_P (x)
664*38fd1498Szrj || !HARD_REGISTER_P (x)
665*38fd1498Szrj || REGNO_REG_CLASS (REGNO (x)) != NO_REGS));
666*38fd1498Szrj }
667*38fd1498Szrj
668*38fd1498Szrj /* Finds definitions that may correspond to invariants in LOOP with body
669*38fd1498Szrj BODY. */
670*38fd1498Szrj
671*38fd1498Szrj static void
find_defs(struct loop * loop)672*38fd1498Szrj find_defs (struct loop *loop)
673*38fd1498Szrj {
674*38fd1498Szrj if (dump_file)
675*38fd1498Szrj {
676*38fd1498Szrj fprintf (dump_file,
677*38fd1498Szrj "*****starting processing of loop %d ******\n",
678*38fd1498Szrj loop->num);
679*38fd1498Szrj }
680*38fd1498Szrj
681*38fd1498Szrj df_remove_problem (df_chain);
682*38fd1498Szrj df_process_deferred_rescans ();
683*38fd1498Szrj df_chain_add_problem (DF_UD_CHAIN);
684*38fd1498Szrj df_live_add_problem ();
685*38fd1498Szrj df_live_set_all_dirty ();
686*38fd1498Szrj df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
687*38fd1498Szrj df_analyze_loop (loop);
688*38fd1498Szrj check_invariant_table_size ();
689*38fd1498Szrj
690*38fd1498Szrj if (dump_file)
691*38fd1498Szrj {
692*38fd1498Szrj df_dump_region (dump_file);
693*38fd1498Szrj fprintf (dump_file,
694*38fd1498Szrj "*****ending processing of loop %d ******\n",
695*38fd1498Szrj loop->num);
696*38fd1498Szrj }
697*38fd1498Szrj }
698*38fd1498Szrj
699*38fd1498Szrj /* Creates a new invariant for definition DEF in INSN, depending on invariants
700*38fd1498Szrj in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed,
701*38fd1498Szrj unless the program ends due to a function call. The newly created invariant
702*38fd1498Szrj is returned. */
703*38fd1498Szrj
704*38fd1498Szrj static struct invariant *
create_new_invariant(struct def * def,rtx_insn * insn,bitmap depends_on,bool always_executed)705*38fd1498Szrj create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
706*38fd1498Szrj bool always_executed)
707*38fd1498Szrj {
708*38fd1498Szrj struct invariant *inv = XNEW (struct invariant);
709*38fd1498Szrj rtx set = single_set (insn);
710*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
711*38fd1498Szrj
712*38fd1498Szrj inv->def = def;
713*38fd1498Szrj inv->always_executed = always_executed;
714*38fd1498Szrj inv->depends_on = depends_on;
715*38fd1498Szrj
716*38fd1498Szrj /* If the set is simple, usually by moving it we move the whole store out of
717*38fd1498Szrj the loop. Otherwise we save only cost of the computation. */
718*38fd1498Szrj if (def)
719*38fd1498Szrj {
720*38fd1498Szrj inv->cost = set_rtx_cost (set, speed);
721*38fd1498Szrj /* ??? Try to determine cheapness of address computation. Unfortunately
722*38fd1498Szrj the address cost is only a relative measure, we can't really compare
723*38fd1498Szrj it with any absolute number, but only with other address costs.
724*38fd1498Szrj But here we don't have any other addresses, so compare with a magic
725*38fd1498Szrj number anyway. It has to be large enough to not regress PR33928
726*38fd1498Szrj (by avoiding to move reg+8,reg+16,reg+24 invariants), but small
727*38fd1498Szrj enough to not regress 410.bwaves either (by still moving reg+reg
728*38fd1498Szrj invariants).
729*38fd1498Szrj See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */
730*38fd1498Szrj if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
731*38fd1498Szrj inv->cheap_address = address_cost (SET_SRC (set), word_mode,
732*38fd1498Szrj ADDR_SPACE_GENERIC, speed) < 3;
733*38fd1498Szrj else
734*38fd1498Szrj inv->cheap_address = false;
735*38fd1498Szrj }
736*38fd1498Szrj else
737*38fd1498Szrj {
738*38fd1498Szrj inv->cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)),
739*38fd1498Szrj speed);
740*38fd1498Szrj inv->cheap_address = false;
741*38fd1498Szrj }
742*38fd1498Szrj
743*38fd1498Szrj inv->move = false;
744*38fd1498Szrj inv->reg = NULL_RTX;
745*38fd1498Szrj inv->orig_regno = -1;
746*38fd1498Szrj inv->stamp = 0;
747*38fd1498Szrj inv->insn = insn;
748*38fd1498Szrj
749*38fd1498Szrj inv->invno = invariants.length ();
750*38fd1498Szrj inv->eqto = ~0u;
751*38fd1498Szrj
752*38fd1498Szrj /* Itself. */
753*38fd1498Szrj inv->eqno = 1;
754*38fd1498Szrj
755*38fd1498Szrj if (def)
756*38fd1498Szrj def->invno = inv->invno;
757*38fd1498Szrj invariants.safe_push (inv);
758*38fd1498Szrj
759*38fd1498Szrj if (dump_file)
760*38fd1498Szrj {
761*38fd1498Szrj fprintf (dump_file,
762*38fd1498Szrj "Set in insn %d is invariant (%d), cost %d, depends on ",
763*38fd1498Szrj INSN_UID (insn), inv->invno, inv->cost);
764*38fd1498Szrj dump_bitmap (dump_file, inv->depends_on);
765*38fd1498Szrj }
766*38fd1498Szrj
767*38fd1498Szrj return inv;
768*38fd1498Szrj }
769*38fd1498Szrj
770*38fd1498Szrj /* Return a canonical version of X for the address, from the point of view,
771*38fd1498Szrj that all multiplications are represented as MULT instead of the multiply
772*38fd1498Szrj by a power of 2 being represented as ASHIFT.
773*38fd1498Szrj
774*38fd1498Szrj Callers should prepare a copy of X because this function may modify it
775*38fd1498Szrj in place. */
776*38fd1498Szrj
777*38fd1498Szrj static void
canonicalize_address_mult(rtx x)778*38fd1498Szrj canonicalize_address_mult (rtx x)
779*38fd1498Szrj {
780*38fd1498Szrj subrtx_var_iterator::array_type array;
781*38fd1498Szrj FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
782*38fd1498Szrj {
783*38fd1498Szrj rtx sub = *iter;
784*38fd1498Szrj scalar_int_mode sub_mode;
785*38fd1498Szrj if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode)
786*38fd1498Szrj && GET_CODE (sub) == ASHIFT
787*38fd1498Szrj && CONST_INT_P (XEXP (sub, 1))
788*38fd1498Szrj && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode)
789*38fd1498Szrj && INTVAL (XEXP (sub, 1)) >= 0)
790*38fd1498Szrj {
791*38fd1498Szrj HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
792*38fd1498Szrj PUT_CODE (sub, MULT);
793*38fd1498Szrj XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode);
794*38fd1498Szrj iter.skip_subrtxes ();
795*38fd1498Szrj }
796*38fd1498Szrj }
797*38fd1498Szrj }
798*38fd1498Szrj
799*38fd1498Szrj /* Maximum number of sub expressions in address. We set it to
800*38fd1498Szrj a small integer since it's unlikely to have a complicated
801*38fd1498Szrj address expression. */
802*38fd1498Szrj
803*38fd1498Szrj #define MAX_CANON_ADDR_PARTS (5)
804*38fd1498Szrj
805*38fd1498Szrj /* Collect sub expressions in address X with PLUS as the seperator.
806*38fd1498Szrj Sub expressions are stored in vector ADDR_PARTS. */
807*38fd1498Szrj
808*38fd1498Szrj static void
collect_address_parts(rtx x,vec<rtx> * addr_parts)809*38fd1498Szrj collect_address_parts (rtx x, vec<rtx> *addr_parts)
810*38fd1498Szrj {
811*38fd1498Szrj subrtx_var_iterator::array_type array;
812*38fd1498Szrj FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
813*38fd1498Szrj {
814*38fd1498Szrj rtx sub = *iter;
815*38fd1498Szrj
816*38fd1498Szrj if (GET_CODE (sub) != PLUS)
817*38fd1498Szrj {
818*38fd1498Szrj addr_parts->safe_push (sub);
819*38fd1498Szrj iter.skip_subrtxes ();
820*38fd1498Szrj }
821*38fd1498Szrj }
822*38fd1498Szrj }
823*38fd1498Szrj
824*38fd1498Szrj /* Compare function for sorting sub expressions X and Y based on
825*38fd1498Szrj precedence defined for communitive operations. */
826*38fd1498Szrj
827*38fd1498Szrj static int
compare_address_parts(const void * x,const void * y)828*38fd1498Szrj compare_address_parts (const void *x, const void *y)
829*38fd1498Szrj {
830*38fd1498Szrj const rtx *rx = (const rtx *)x;
831*38fd1498Szrj const rtx *ry = (const rtx *)y;
832*38fd1498Szrj int px = commutative_operand_precedence (*rx);
833*38fd1498Szrj int py = commutative_operand_precedence (*ry);
834*38fd1498Szrj
835*38fd1498Szrj return (py - px);
836*38fd1498Szrj }
837*38fd1498Szrj
838*38fd1498Szrj /* Return a canonical version address for X by following steps:
839*38fd1498Szrj 1) Rewrite ASHIFT into MULT recursively.
840*38fd1498Szrj 2) Divide address into sub expressions with PLUS as the
841*38fd1498Szrj separator.
842*38fd1498Szrj 3) Sort sub expressions according to precedence defined
843*38fd1498Szrj for communative operations.
844*38fd1498Szrj 4) Simplify CONST_INT_P sub expressions.
845*38fd1498Szrj 5) Create new canonicalized address and return.
846*38fd1498Szrj Callers should prepare a copy of X because this function may
847*38fd1498Szrj modify it in place. */
848*38fd1498Szrj
849*38fd1498Szrj static rtx
canonicalize_address(rtx x)850*38fd1498Szrj canonicalize_address (rtx x)
851*38fd1498Szrj {
852*38fd1498Szrj rtx res;
853*38fd1498Szrj unsigned int i, j;
854*38fd1498Szrj machine_mode mode = GET_MODE (x);
855*38fd1498Szrj auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
856*38fd1498Szrj
857*38fd1498Szrj /* Rewrite ASHIFT into MULT. */
858*38fd1498Szrj canonicalize_address_mult (x);
859*38fd1498Szrj /* Divide address into sub expressions. */
860*38fd1498Szrj collect_address_parts (x, &addr_parts);
861*38fd1498Szrj /* Unlikely to have very complicated address. */
862*38fd1498Szrj if (addr_parts.length () < 2
863*38fd1498Szrj || addr_parts.length () > MAX_CANON_ADDR_PARTS)
864*38fd1498Szrj return x;
865*38fd1498Szrj
866*38fd1498Szrj /* Sort sub expressions according to canonicalization precedence. */
867*38fd1498Szrj addr_parts.qsort (compare_address_parts);
868*38fd1498Szrj
869*38fd1498Szrj /* Simplify all constant int summary if possible. */
870*38fd1498Szrj for (i = 0; i < addr_parts.length (); i++)
871*38fd1498Szrj if (CONST_INT_P (addr_parts[i]))
872*38fd1498Szrj break;
873*38fd1498Szrj
874*38fd1498Szrj for (j = i + 1; j < addr_parts.length (); j++)
875*38fd1498Szrj {
876*38fd1498Szrj gcc_assert (CONST_INT_P (addr_parts[j]));
877*38fd1498Szrj addr_parts[i] = simplify_gen_binary (PLUS, mode,
878*38fd1498Szrj addr_parts[i],
879*38fd1498Szrj addr_parts[j]);
880*38fd1498Szrj }
881*38fd1498Szrj
882*38fd1498Szrj /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */
883*38fd1498Szrj res = addr_parts[0];
884*38fd1498Szrj for (j = 1; j < i; j++)
885*38fd1498Szrj res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
886*38fd1498Szrj
887*38fd1498Szrj /* Pickup the last CONST_INT_P sub expression. */
888*38fd1498Szrj if (i < addr_parts.length ())
889*38fd1498Szrj res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
890*38fd1498Szrj
891*38fd1498Szrj return res;
892*38fd1498Szrj }
893*38fd1498Szrj
894*38fd1498Szrj /* Given invariant DEF and its address USE, check if the corresponding
895*38fd1498Szrj invariant expr can be propagated into the use or not. */
896*38fd1498Szrj
897*38fd1498Szrj static bool
inv_can_prop_to_addr_use(struct def * def,df_ref use)898*38fd1498Szrj inv_can_prop_to_addr_use (struct def *def, df_ref use)
899*38fd1498Szrj {
900*38fd1498Szrj struct invariant *inv;
901*38fd1498Szrj rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
902*38fd1498Szrj rtx_insn *use_insn = DF_REF_INSN (use);
903*38fd1498Szrj rtx_insn *def_insn;
904*38fd1498Szrj bool ok;
905*38fd1498Szrj
906*38fd1498Szrj inv = invariants[def->invno];
907*38fd1498Szrj /* No need to check if address expression is expensive. */
908*38fd1498Szrj if (!inv->cheap_address)
909*38fd1498Szrj return false;
910*38fd1498Szrj
911*38fd1498Szrj def_insn = inv->insn;
912*38fd1498Szrj def_set = single_set (def_insn);
913*38fd1498Szrj if (!def_set)
914*38fd1498Szrj return false;
915*38fd1498Szrj
916*38fd1498Szrj validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
917*38fd1498Szrj ok = verify_changes (0);
918*38fd1498Szrj /* Try harder with canonicalization in address expression. */
919*38fd1498Szrj if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
920*38fd1498Szrj {
921*38fd1498Szrj rtx src, dest, mem = NULL_RTX;
922*38fd1498Szrj
923*38fd1498Szrj src = SET_SRC (use_set);
924*38fd1498Szrj dest = SET_DEST (use_set);
925*38fd1498Szrj if (MEM_P (src))
926*38fd1498Szrj mem = src;
927*38fd1498Szrj else if (MEM_P (dest))
928*38fd1498Szrj mem = dest;
929*38fd1498Szrj
930*38fd1498Szrj if (mem != NULL_RTX
931*38fd1498Szrj && !memory_address_addr_space_p (GET_MODE (mem),
932*38fd1498Szrj XEXP (mem, 0),
933*38fd1498Szrj MEM_ADDR_SPACE (mem)))
934*38fd1498Szrj {
935*38fd1498Szrj rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
936*38fd1498Szrj if (memory_address_addr_space_p (GET_MODE (mem),
937*38fd1498Szrj addr, MEM_ADDR_SPACE (mem)))
938*38fd1498Szrj ok = true;
939*38fd1498Szrj }
940*38fd1498Szrj }
941*38fd1498Szrj cancel_changes (0);
942*38fd1498Szrj return ok;
943*38fd1498Szrj }
944*38fd1498Szrj
945*38fd1498Szrj /* Record USE at DEF. */
946*38fd1498Szrj
947*38fd1498Szrj static void
record_use(struct def * def,df_ref use)948*38fd1498Szrj record_use (struct def *def, df_ref use)
949*38fd1498Szrj {
950*38fd1498Szrj struct use *u = XNEW (struct use);
951*38fd1498Szrj
952*38fd1498Szrj u->pos = DF_REF_REAL_LOC (use);
953*38fd1498Szrj u->insn = DF_REF_INSN (use);
954*38fd1498Szrj u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD
955*38fd1498Szrj || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE);
956*38fd1498Szrj u->next = def->uses;
957*38fd1498Szrj def->uses = u;
958*38fd1498Szrj def->n_uses++;
959*38fd1498Szrj if (u->addr_use_p)
960*38fd1498Szrj {
961*38fd1498Szrj /* Initialize propagation information if this is the first addr
962*38fd1498Szrj use of the inv def. */
963*38fd1498Szrj if (def->n_addr_uses == 0)
964*38fd1498Szrj def->can_prop_to_addr_uses = true;
965*38fd1498Szrj
966*38fd1498Szrj def->n_addr_uses++;
967*38fd1498Szrj if (def->can_prop_to_addr_uses && !inv_can_prop_to_addr_use (def, use))
968*38fd1498Szrj def->can_prop_to_addr_uses = false;
969*38fd1498Szrj }
970*38fd1498Szrj }
971*38fd1498Szrj
972*38fd1498Szrj /* Finds the invariants USE depends on and store them to the DEPENDS_ON
973*38fd1498Szrj bitmap. Returns true if all dependencies of USE are known to be
974*38fd1498Szrj loop invariants, false otherwise. */
975*38fd1498Szrj
976*38fd1498Szrj static bool
check_dependency(basic_block bb,df_ref use,bitmap depends_on)977*38fd1498Szrj check_dependency (basic_block bb, df_ref use, bitmap depends_on)
978*38fd1498Szrj {
979*38fd1498Szrj df_ref def;
980*38fd1498Szrj basic_block def_bb;
981*38fd1498Szrj struct df_link *defs;
982*38fd1498Szrj struct def *def_data;
983*38fd1498Szrj struct invariant *inv;
984*38fd1498Szrj
985*38fd1498Szrj if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
986*38fd1498Szrj return false;
987*38fd1498Szrj
988*38fd1498Szrj defs = DF_REF_CHAIN (use);
989*38fd1498Szrj if (!defs)
990*38fd1498Szrj {
991*38fd1498Szrj unsigned int regno = DF_REF_REGNO (use);
992*38fd1498Szrj
993*38fd1498Szrj /* If this is the use of an uninitialized argument register that is
994*38fd1498Szrj likely to be spilled, do not move it lest this might extend its
995*38fd1498Szrj lifetime and cause reload to die. This can occur for a call to
996*38fd1498Szrj a function taking complex number arguments and moving the insns
997*38fd1498Szrj preparing the arguments without moving the call itself wouldn't
998*38fd1498Szrj gain much in practice. */
999*38fd1498Szrj if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE)
1000*38fd1498Szrj && FUNCTION_ARG_REGNO_P (regno)
1001*38fd1498Szrj && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno)))
1002*38fd1498Szrj return false;
1003*38fd1498Szrj
1004*38fd1498Szrj return true;
1005*38fd1498Szrj }
1006*38fd1498Szrj
1007*38fd1498Szrj if (defs->next)
1008*38fd1498Szrj return false;
1009*38fd1498Szrj
1010*38fd1498Szrj def = defs->ref;
1011*38fd1498Szrj check_invariant_table_size ();
1012*38fd1498Szrj inv = invariant_table[DF_REF_ID (def)];
1013*38fd1498Szrj if (!inv)
1014*38fd1498Szrj return false;
1015*38fd1498Szrj
1016*38fd1498Szrj def_data = inv->def;
1017*38fd1498Szrj gcc_assert (def_data != NULL);
1018*38fd1498Szrj
1019*38fd1498Szrj def_bb = DF_REF_BB (def);
1020*38fd1498Szrj /* Note that in case bb == def_bb, we know that the definition
1021*38fd1498Szrj dominates insn, because def has invariant_table[DF_REF_ID(def)]
1022*38fd1498Szrj defined and we process the insns in the basic block bb
1023*38fd1498Szrj sequentially. */
1024*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
1025*38fd1498Szrj return false;
1026*38fd1498Szrj
1027*38fd1498Szrj bitmap_set_bit (depends_on, def_data->invno);
1028*38fd1498Szrj return true;
1029*38fd1498Szrj }
1030*38fd1498Szrj
1031*38fd1498Szrj
1032*38fd1498Szrj /* Finds the invariants INSN depends on and store them to the DEPENDS_ON
1033*38fd1498Szrj bitmap. Returns true if all dependencies of INSN are known to be
1034*38fd1498Szrj loop invariants, false otherwise. */
1035*38fd1498Szrj
1036*38fd1498Szrj static bool
check_dependencies(rtx_insn * insn,bitmap depends_on)1037*38fd1498Szrj check_dependencies (rtx_insn *insn, bitmap depends_on)
1038*38fd1498Szrj {
1039*38fd1498Szrj struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1040*38fd1498Szrj df_ref use;
1041*38fd1498Szrj basic_block bb = BLOCK_FOR_INSN (insn);
1042*38fd1498Szrj
1043*38fd1498Szrj FOR_EACH_INSN_INFO_USE (use, insn_info)
1044*38fd1498Szrj if (!check_dependency (bb, use, depends_on))
1045*38fd1498Szrj return false;
1046*38fd1498Szrj FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1047*38fd1498Szrj if (!check_dependency (bb, use, depends_on))
1048*38fd1498Szrj return false;
1049*38fd1498Szrj
1050*38fd1498Szrj return true;
1051*38fd1498Szrj }
1052*38fd1498Szrj
1053*38fd1498Szrj /* Pre-check candidate DEST to skip the one which can not make a valid insn
1054*38fd1498Szrj during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */
1055*38fd1498Szrj static bool
pre_check_invariant_p(bool simple,rtx dest)1056*38fd1498Szrj pre_check_invariant_p (bool simple, rtx dest)
1057*38fd1498Szrj {
1058*38fd1498Szrj if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1)
1059*38fd1498Szrj {
1060*38fd1498Szrj df_ref use;
1061*38fd1498Szrj unsigned int i = REGNO (dest);
1062*38fd1498Szrj struct df_insn_info *insn_info;
1063*38fd1498Szrj df_ref def_rec;
1064*38fd1498Szrj
1065*38fd1498Szrj for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
1066*38fd1498Szrj {
1067*38fd1498Szrj rtx_insn *ref = DF_REF_INSN (use);
1068*38fd1498Szrj insn_info = DF_INSN_INFO_GET (ref);
1069*38fd1498Szrj
1070*38fd1498Szrj FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
1071*38fd1498Szrj if (DF_REF_REGNO (def_rec) == i)
1072*38fd1498Szrj {
1073*38fd1498Szrj /* Multi definitions at this stage, most likely are due to
1074*38fd1498Szrj instruction constraints, which requires both read and write
1075*38fd1498Szrj on the same register. Since move_invariant_reg is not
1076*38fd1498Szrj powerful enough to handle such cases, just ignore the INV
1077*38fd1498Szrj and leave the chance to others. */
1078*38fd1498Szrj return false;
1079*38fd1498Szrj }
1080*38fd1498Szrj }
1081*38fd1498Szrj }
1082*38fd1498Szrj return true;
1083*38fd1498Szrj }
1084*38fd1498Szrj
1085*38fd1498Szrj /* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always
1086*38fd1498Szrj executed. ALWAYS_EXECUTED is true if the insn is always executed,
1087*38fd1498Szrj unless the program ends due to a function call. */
1088*38fd1498Szrj
1089*38fd1498Szrj static void
find_invariant_insn(rtx_insn * insn,bool always_reached,bool always_executed)1090*38fd1498Szrj find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1091*38fd1498Szrj {
1092*38fd1498Szrj df_ref ref;
1093*38fd1498Szrj struct def *def;
1094*38fd1498Szrj bitmap depends_on;
1095*38fd1498Szrj rtx set, dest;
1096*38fd1498Szrj bool simple = true;
1097*38fd1498Szrj struct invariant *inv;
1098*38fd1498Szrj
1099*38fd1498Szrj /* We can't move a CC0 setter without the user. */
1100*38fd1498Szrj if (HAVE_cc0 && sets_cc0_p (insn))
1101*38fd1498Szrj return;
1102*38fd1498Szrj
1103*38fd1498Szrj set = single_set (insn);
1104*38fd1498Szrj if (!set)
1105*38fd1498Szrj return;
1106*38fd1498Szrj dest = SET_DEST (set);
1107*38fd1498Szrj
1108*38fd1498Szrj if (!REG_P (dest)
1109*38fd1498Szrj || HARD_REGISTER_P (dest))
1110*38fd1498Szrj simple = false;
1111*38fd1498Szrj
1112*38fd1498Szrj if (!may_assign_reg_p (dest)
1113*38fd1498Szrj || !pre_check_invariant_p (simple, dest)
1114*38fd1498Szrj || !check_maybe_invariant (SET_SRC (set)))
1115*38fd1498Szrj return;
1116*38fd1498Szrj
1117*38fd1498Szrj /* If the insn can throw exception, we cannot move it at all without changing
1118*38fd1498Szrj cfg. */
1119*38fd1498Szrj if (can_throw_internal (insn))
1120*38fd1498Szrj return;
1121*38fd1498Szrj
1122*38fd1498Szrj /* We cannot make trapping insn executed, unless it was executed before. */
1123*38fd1498Szrj if (may_trap_or_fault_p (PATTERN (insn)) && !always_reached)
1124*38fd1498Szrj return;
1125*38fd1498Szrj
1126*38fd1498Szrj depends_on = BITMAP_ALLOC (NULL);
1127*38fd1498Szrj if (!check_dependencies (insn, depends_on))
1128*38fd1498Szrj {
1129*38fd1498Szrj BITMAP_FREE (depends_on);
1130*38fd1498Szrj return;
1131*38fd1498Szrj }
1132*38fd1498Szrj
1133*38fd1498Szrj if (simple)
1134*38fd1498Szrj def = XCNEW (struct def);
1135*38fd1498Szrj else
1136*38fd1498Szrj def = NULL;
1137*38fd1498Szrj
1138*38fd1498Szrj inv = create_new_invariant (def, insn, depends_on, always_executed);
1139*38fd1498Szrj
1140*38fd1498Szrj if (simple)
1141*38fd1498Szrj {
1142*38fd1498Szrj ref = df_find_def (insn, dest);
1143*38fd1498Szrj check_invariant_table_size ();
1144*38fd1498Szrj invariant_table[DF_REF_ID (ref)] = inv;
1145*38fd1498Szrj }
1146*38fd1498Szrj }
1147*38fd1498Szrj
1148*38fd1498Szrj /* Record registers used in INSN that have a unique invariant definition. */
1149*38fd1498Szrj
1150*38fd1498Szrj static void
record_uses(rtx_insn * insn)1151*38fd1498Szrj record_uses (rtx_insn *insn)
1152*38fd1498Szrj {
1153*38fd1498Szrj struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1154*38fd1498Szrj df_ref use;
1155*38fd1498Szrj struct invariant *inv;
1156*38fd1498Szrj
1157*38fd1498Szrj FOR_EACH_INSN_INFO_USE (use, insn_info)
1158*38fd1498Szrj {
1159*38fd1498Szrj inv = invariant_for_use (use);
1160*38fd1498Szrj if (inv)
1161*38fd1498Szrj record_use (inv->def, use);
1162*38fd1498Szrj }
1163*38fd1498Szrj FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1164*38fd1498Szrj {
1165*38fd1498Szrj inv = invariant_for_use (use);
1166*38fd1498Szrj if (inv)
1167*38fd1498Szrj record_use (inv->def, use);
1168*38fd1498Szrj }
1169*38fd1498Szrj }
1170*38fd1498Szrj
1171*38fd1498Szrj /* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always
1172*38fd1498Szrj executed. ALWAYS_EXECUTED is true if the insn is always executed,
1173*38fd1498Szrj unless the program ends due to a function call. */
1174*38fd1498Szrj
1175*38fd1498Szrj static void
find_invariants_insn(rtx_insn * insn,bool always_reached,bool always_executed)1176*38fd1498Szrj find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1177*38fd1498Szrj {
1178*38fd1498Szrj find_invariant_insn (insn, always_reached, always_executed);
1179*38fd1498Szrj record_uses (insn);
1180*38fd1498Szrj }
1181*38fd1498Szrj
1182*38fd1498Szrj /* Finds invariants in basic block BB. ALWAYS_REACHED is true if the
1183*38fd1498Szrj basic block is always executed. ALWAYS_EXECUTED is true if the basic
1184*38fd1498Szrj block is always executed, unless the program ends due to a function
1185*38fd1498Szrj call. */
1186*38fd1498Szrj
1187*38fd1498Szrj static void
find_invariants_bb(basic_block bb,bool always_reached,bool always_executed)1188*38fd1498Szrj find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
1189*38fd1498Szrj {
1190*38fd1498Szrj rtx_insn *insn;
1191*38fd1498Szrj
1192*38fd1498Szrj FOR_BB_INSNS (bb, insn)
1193*38fd1498Szrj {
1194*38fd1498Szrj if (!NONDEBUG_INSN_P (insn))
1195*38fd1498Szrj continue;
1196*38fd1498Szrj
1197*38fd1498Szrj find_invariants_insn (insn, always_reached, always_executed);
1198*38fd1498Szrj
1199*38fd1498Szrj if (always_reached
1200*38fd1498Szrj && CALL_P (insn)
1201*38fd1498Szrj && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1202*38fd1498Szrj || ! RTL_CONST_OR_PURE_CALL_P (insn)))
1203*38fd1498Szrj always_reached = false;
1204*38fd1498Szrj }
1205*38fd1498Szrj }
1206*38fd1498Szrj
1207*38fd1498Szrj /* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of
1208*38fd1498Szrj basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the
1209*38fd1498Szrj bitmap of basic blocks in BODY that are always executed unless the program
1210*38fd1498Szrj ends due to a function call. */
1211*38fd1498Szrj
1212*38fd1498Szrj static void
find_invariants_body(struct loop * loop,basic_block * body,bitmap always_reached,bitmap always_executed)1213*38fd1498Szrj find_invariants_body (struct loop *loop, basic_block *body,
1214*38fd1498Szrj bitmap always_reached, bitmap always_executed)
1215*38fd1498Szrj {
1216*38fd1498Szrj unsigned i;
1217*38fd1498Szrj
1218*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1219*38fd1498Szrj find_invariants_bb (body[i],
1220*38fd1498Szrj bitmap_bit_p (always_reached, i),
1221*38fd1498Szrj bitmap_bit_p (always_executed, i));
1222*38fd1498Szrj }
1223*38fd1498Szrj
1224*38fd1498Szrj /* Finds invariants in LOOP. */
1225*38fd1498Szrj
1226*38fd1498Szrj static void
find_invariants(struct loop * loop)1227*38fd1498Szrj find_invariants (struct loop *loop)
1228*38fd1498Szrj {
1229*38fd1498Szrj auto_bitmap may_exit;
1230*38fd1498Szrj auto_bitmap always_reached;
1231*38fd1498Szrj auto_bitmap has_exit;
1232*38fd1498Szrj auto_bitmap always_executed;
1233*38fd1498Szrj basic_block *body = get_loop_body_in_dom_order (loop);
1234*38fd1498Szrj
1235*38fd1498Szrj find_exits (loop, body, may_exit, has_exit);
1236*38fd1498Szrj compute_always_reached (loop, body, may_exit, always_reached);
1237*38fd1498Szrj compute_always_reached (loop, body, has_exit, always_executed);
1238*38fd1498Szrj
1239*38fd1498Szrj find_defs (loop);
1240*38fd1498Szrj find_invariants_body (loop, body, always_reached, always_executed);
1241*38fd1498Szrj merge_identical_invariants ();
1242*38fd1498Szrj
1243*38fd1498Szrj free (body);
1244*38fd1498Szrj }
1245*38fd1498Szrj
1246*38fd1498Szrj /* Frees a list of uses USE. */
1247*38fd1498Szrj
1248*38fd1498Szrj static void
free_use_list(struct use * use)1249*38fd1498Szrj free_use_list (struct use *use)
1250*38fd1498Szrj {
1251*38fd1498Szrj struct use *next;
1252*38fd1498Szrj
1253*38fd1498Szrj for (; use; use = next)
1254*38fd1498Szrj {
1255*38fd1498Szrj next = use->next;
1256*38fd1498Szrj free (use);
1257*38fd1498Szrj }
1258*38fd1498Szrj }
1259*38fd1498Szrj
1260*38fd1498Szrj /* Return pressure class and number of hard registers (through *NREGS)
1261*38fd1498Szrj for destination of INSN. */
1262*38fd1498Szrj static enum reg_class
get_pressure_class_and_nregs(rtx_insn * insn,int * nregs)1263*38fd1498Szrj get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
1264*38fd1498Szrj {
1265*38fd1498Szrj rtx reg;
1266*38fd1498Szrj enum reg_class pressure_class;
1267*38fd1498Szrj rtx set = single_set (insn);
1268*38fd1498Szrj
1269*38fd1498Szrj /* Considered invariant insns have only one set. */
1270*38fd1498Szrj gcc_assert (set != NULL_RTX);
1271*38fd1498Szrj reg = SET_DEST (set);
1272*38fd1498Szrj if (GET_CODE (reg) == SUBREG)
1273*38fd1498Szrj reg = SUBREG_REG (reg);
1274*38fd1498Szrj if (MEM_P (reg))
1275*38fd1498Szrj {
1276*38fd1498Szrj *nregs = 0;
1277*38fd1498Szrj pressure_class = NO_REGS;
1278*38fd1498Szrj }
1279*38fd1498Szrj else
1280*38fd1498Szrj {
1281*38fd1498Szrj if (! REG_P (reg))
1282*38fd1498Szrj reg = NULL_RTX;
1283*38fd1498Szrj if (reg == NULL_RTX)
1284*38fd1498Szrj pressure_class = GENERAL_REGS;
1285*38fd1498Szrj else
1286*38fd1498Szrj {
1287*38fd1498Szrj pressure_class = reg_allocno_class (REGNO (reg));
1288*38fd1498Szrj pressure_class = ira_pressure_class_translate[pressure_class];
1289*38fd1498Szrj }
1290*38fd1498Szrj *nregs
1291*38fd1498Szrj = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
1292*38fd1498Szrj }
1293*38fd1498Szrj return pressure_class;
1294*38fd1498Szrj }
1295*38fd1498Szrj
1296*38fd1498Szrj /* Calculates cost and number of registers needed for moving invariant INV
1297*38fd1498Szrj out of the loop and stores them to *COST and *REGS_NEEDED. *CL will be
1298*38fd1498Szrj the REG_CLASS of INV. Return
1299*38fd1498Szrj -1: if INV is invalid.
1300*38fd1498Szrj 0: if INV and its depends_on have same reg_class
1301*38fd1498Szrj 1: if INV and its depends_on have different reg_classes. */
1302*38fd1498Szrj
1303*38fd1498Szrj static int
get_inv_cost(struct invariant * inv,int * comp_cost,unsigned * regs_needed,enum reg_class * cl)1304*38fd1498Szrj get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
1305*38fd1498Szrj enum reg_class *cl)
1306*38fd1498Szrj {
1307*38fd1498Szrj int i, acomp_cost;
1308*38fd1498Szrj unsigned aregs_needed[N_REG_CLASSES];
1309*38fd1498Szrj unsigned depno;
1310*38fd1498Szrj struct invariant *dep;
1311*38fd1498Szrj bitmap_iterator bi;
1312*38fd1498Szrj int ret = 1;
1313*38fd1498Szrj
1314*38fd1498Szrj /* Find the representative of the class of the equivalent invariants. */
1315*38fd1498Szrj inv = invariants[inv->eqto];
1316*38fd1498Szrj
1317*38fd1498Szrj *comp_cost = 0;
1318*38fd1498Szrj if (! flag_ira_loop_pressure)
1319*38fd1498Szrj regs_needed[0] = 0;
1320*38fd1498Szrj else
1321*38fd1498Szrj {
1322*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
1323*38fd1498Szrj regs_needed[ira_pressure_classes[i]] = 0;
1324*38fd1498Szrj }
1325*38fd1498Szrj
1326*38fd1498Szrj if (inv->move
1327*38fd1498Szrj || inv->stamp == actual_stamp)
1328*38fd1498Szrj return -1;
1329*38fd1498Szrj inv->stamp = actual_stamp;
1330*38fd1498Szrj
1331*38fd1498Szrj if (! flag_ira_loop_pressure)
1332*38fd1498Szrj regs_needed[0]++;
1333*38fd1498Szrj else
1334*38fd1498Szrj {
1335*38fd1498Szrj int nregs;
1336*38fd1498Szrj enum reg_class pressure_class;
1337*38fd1498Szrj
1338*38fd1498Szrj pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1339*38fd1498Szrj regs_needed[pressure_class] += nregs;
1340*38fd1498Szrj *cl = pressure_class;
1341*38fd1498Szrj ret = 0;
1342*38fd1498Szrj }
1343*38fd1498Szrj
1344*38fd1498Szrj if (!inv->cheap_address
1345*38fd1498Szrj || inv->def->n_uses == 0
1346*38fd1498Szrj || inv->def->n_addr_uses < inv->def->n_uses
1347*38fd1498Szrj /* Count cost if the inv can't be propagated into address uses. */
1348*38fd1498Szrj || !inv->def->can_prop_to_addr_uses)
1349*38fd1498Szrj (*comp_cost) += inv->cost * inv->eqno;
1350*38fd1498Szrj
1351*38fd1498Szrj #ifdef STACK_REGS
1352*38fd1498Szrj {
1353*38fd1498Szrj /* Hoisting constant pool constants into stack regs may cost more than
1354*38fd1498Szrj just single register. On x87, the balance is affected both by the
1355*38fd1498Szrj small number of FP registers, and by its register stack organization,
1356*38fd1498Szrj that forces us to add compensation code in and around the loop to
1357*38fd1498Szrj shuffle the operands to the top of stack before use, and pop them
1358*38fd1498Szrj from the stack after the loop finishes.
1359*38fd1498Szrj
1360*38fd1498Szrj To model this effect, we increase the number of registers needed for
1361*38fd1498Szrj stack registers by two: one register push, and one register pop.
1362*38fd1498Szrj This usually has the effect that FP constant loads from the constant
1363*38fd1498Szrj pool are not moved out of the loop.
1364*38fd1498Szrj
1365*38fd1498Szrj Note that this also means that dependent invariants can not be moved.
1366*38fd1498Szrj However, the primary purpose of this pass is to move loop invariant
1367*38fd1498Szrj address arithmetic out of loops, and address arithmetic that depends
1368*38fd1498Szrj on floating point constants is unlikely to ever occur. */
1369*38fd1498Szrj rtx set = single_set (inv->insn);
1370*38fd1498Szrj if (set
1371*38fd1498Szrj && IS_STACK_MODE (GET_MODE (SET_SRC (set)))
1372*38fd1498Szrj && constant_pool_constant_p (SET_SRC (set)))
1373*38fd1498Szrj {
1374*38fd1498Szrj if (flag_ira_loop_pressure)
1375*38fd1498Szrj regs_needed[ira_stack_reg_pressure_class] += 2;
1376*38fd1498Szrj else
1377*38fd1498Szrj regs_needed[0] += 2;
1378*38fd1498Szrj }
1379*38fd1498Szrj }
1380*38fd1498Szrj #endif
1381*38fd1498Szrj
1382*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
1383*38fd1498Szrj {
1384*38fd1498Szrj bool check_p;
1385*38fd1498Szrj enum reg_class dep_cl = ALL_REGS;
1386*38fd1498Szrj int dep_ret;
1387*38fd1498Szrj
1388*38fd1498Szrj dep = invariants[depno];
1389*38fd1498Szrj
1390*38fd1498Szrj /* If DEP is moved out of the loop, it is not a depends_on any more. */
1391*38fd1498Szrj if (dep->move)
1392*38fd1498Szrj continue;
1393*38fd1498Szrj
1394*38fd1498Szrj dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl);
1395*38fd1498Szrj
1396*38fd1498Szrj if (! flag_ira_loop_pressure)
1397*38fd1498Szrj check_p = aregs_needed[0] != 0;
1398*38fd1498Szrj else
1399*38fd1498Szrj {
1400*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
1401*38fd1498Szrj if (aregs_needed[ira_pressure_classes[i]] != 0)
1402*38fd1498Szrj break;
1403*38fd1498Szrj check_p = i < ira_pressure_classes_num;
1404*38fd1498Szrj
1405*38fd1498Szrj if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl)))
1406*38fd1498Szrj {
1407*38fd1498Szrj *cl = ALL_REGS;
1408*38fd1498Szrj ret = 1;
1409*38fd1498Szrj }
1410*38fd1498Szrj }
1411*38fd1498Szrj if (check_p
1412*38fd1498Szrj /* We need to check always_executed, since if the original value of
1413*38fd1498Szrj the invariant may be preserved, we may need to keep it in a
1414*38fd1498Szrj separate register. TODO check whether the register has an
1415*38fd1498Szrj use outside of the loop. */
1416*38fd1498Szrj && dep->always_executed
1417*38fd1498Szrj && !dep->def->uses->next)
1418*38fd1498Szrj {
1419*38fd1498Szrj /* If this is a single use, after moving the dependency we will not
1420*38fd1498Szrj need a new register. */
1421*38fd1498Szrj if (! flag_ira_loop_pressure)
1422*38fd1498Szrj aregs_needed[0]--;
1423*38fd1498Szrj else
1424*38fd1498Szrj {
1425*38fd1498Szrj int nregs;
1426*38fd1498Szrj enum reg_class pressure_class;
1427*38fd1498Szrj
1428*38fd1498Szrj pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1429*38fd1498Szrj aregs_needed[pressure_class] -= nregs;
1430*38fd1498Szrj }
1431*38fd1498Szrj }
1432*38fd1498Szrj
1433*38fd1498Szrj if (! flag_ira_loop_pressure)
1434*38fd1498Szrj regs_needed[0] += aregs_needed[0];
1435*38fd1498Szrj else
1436*38fd1498Szrj {
1437*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
1438*38fd1498Szrj regs_needed[ira_pressure_classes[i]]
1439*38fd1498Szrj += aregs_needed[ira_pressure_classes[i]];
1440*38fd1498Szrj }
1441*38fd1498Szrj (*comp_cost) += acomp_cost;
1442*38fd1498Szrj }
1443*38fd1498Szrj return ret;
1444*38fd1498Szrj }
1445*38fd1498Szrj
1446*38fd1498Szrj /* Calculates gain for eliminating invariant INV. REGS_USED is the number
1447*38fd1498Szrj of registers used in the loop, NEW_REGS is the number of new variables
1448*38fd1498Szrj already added due to the invariant motion. The number of registers needed
1449*38fd1498Szrj for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
1450*38fd1498Szrj through to estimate_reg_pressure_cost. */
1451*38fd1498Szrj
1452*38fd1498Szrj static int
gain_for_invariant(struct invariant * inv,unsigned * regs_needed,unsigned * new_regs,unsigned regs_used,bool speed,bool call_p)1453*38fd1498Szrj gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
1454*38fd1498Szrj unsigned *new_regs, unsigned regs_used,
1455*38fd1498Szrj bool speed, bool call_p)
1456*38fd1498Szrj {
1457*38fd1498Szrj int comp_cost, size_cost;
1458*38fd1498Szrj /* Workaround -Wmaybe-uninitialized false positive during
1459*38fd1498Szrj profiledbootstrap by initializing it. */
1460*38fd1498Szrj enum reg_class cl = NO_REGS;
1461*38fd1498Szrj int ret;
1462*38fd1498Szrj
1463*38fd1498Szrj actual_stamp++;
1464*38fd1498Szrj
1465*38fd1498Szrj ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl);
1466*38fd1498Szrj
1467*38fd1498Szrj if (! flag_ira_loop_pressure)
1468*38fd1498Szrj {
1469*38fd1498Szrj size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
1470*38fd1498Szrj regs_used, speed, call_p)
1471*38fd1498Szrj - estimate_reg_pressure_cost (new_regs[0],
1472*38fd1498Szrj regs_used, speed, call_p));
1473*38fd1498Szrj }
1474*38fd1498Szrj else if (ret < 0)
1475*38fd1498Szrj return -1;
1476*38fd1498Szrj else if ((ret == 0) && (cl == NO_REGS))
1477*38fd1498Szrj /* Hoist it anyway since it does not impact register pressure. */
1478*38fd1498Szrj return 1;
1479*38fd1498Szrj else
1480*38fd1498Szrj {
1481*38fd1498Szrj int i;
1482*38fd1498Szrj enum reg_class pressure_class;
1483*38fd1498Szrj
1484*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
1485*38fd1498Szrj {
1486*38fd1498Szrj pressure_class = ira_pressure_classes[i];
1487*38fd1498Szrj
1488*38fd1498Szrj if (!reg_classes_intersect_p (pressure_class, cl))
1489*38fd1498Szrj continue;
1490*38fd1498Szrj
1491*38fd1498Szrj if ((int) new_regs[pressure_class]
1492*38fd1498Szrj + (int) regs_needed[pressure_class]
1493*38fd1498Szrj + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
1494*38fd1498Szrj + IRA_LOOP_RESERVED_REGS
1495*38fd1498Szrj > ira_class_hard_regs_num[pressure_class])
1496*38fd1498Szrj break;
1497*38fd1498Szrj }
1498*38fd1498Szrj if (i < ira_pressure_classes_num)
1499*38fd1498Szrj /* There will be register pressure excess and we want not to
1500*38fd1498Szrj make this loop invariant motion. All loop invariants with
1501*38fd1498Szrj non-positive gains will be rejected in function
1502*38fd1498Szrj find_invariants_to_move. Therefore we return the negative
1503*38fd1498Szrj number here.
1504*38fd1498Szrj
1505*38fd1498Szrj One could think that this rejects also expensive loop
1506*38fd1498Szrj invariant motions and this will hurt code performance.
1507*38fd1498Szrj However numerous experiments with different heuristics
1508*38fd1498Szrj taking invariant cost into account did not confirm this
1509*38fd1498Szrj assumption. There are possible explanations for this
1510*38fd1498Szrj result:
1511*38fd1498Szrj o probably all expensive invariants were already moved out
1512*38fd1498Szrj of the loop by PRE and gimple invariant motion pass.
1513*38fd1498Szrj o expensive invariant execution will be hidden by insn
1514*38fd1498Szrj scheduling or OOO processor hardware because usually such
1515*38fd1498Szrj invariants have a lot of freedom to be executed
1516*38fd1498Szrj out-of-order.
1517*38fd1498Szrj Another reason for ignoring invariant cost vs spilling cost
1518*38fd1498Szrj heuristics is also in difficulties to evaluate accurately
1519*38fd1498Szrj spill cost at this stage. */
1520*38fd1498Szrj return -1;
1521*38fd1498Szrj else
1522*38fd1498Szrj size_cost = 0;
1523*38fd1498Szrj }
1524*38fd1498Szrj
1525*38fd1498Szrj return comp_cost - size_cost;
1526*38fd1498Szrj }
1527*38fd1498Szrj
1528*38fd1498Szrj /* Finds invariant with best gain for moving. Returns the gain, stores
1529*38fd1498Szrj the invariant in *BEST and number of registers needed for it to
1530*38fd1498Szrj *REGS_NEEDED. REGS_USED is the number of registers used in the loop.
1531*38fd1498Szrj NEW_REGS is the number of new variables already added due to invariant
1532*38fd1498Szrj motion. */
1533*38fd1498Szrj
1534*38fd1498Szrj static int
best_gain_for_invariant(struct invariant ** best,unsigned * regs_needed,unsigned * new_regs,unsigned regs_used,bool speed,bool call_p)1535*38fd1498Szrj best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
1536*38fd1498Szrj unsigned *new_regs, unsigned regs_used,
1537*38fd1498Szrj bool speed, bool call_p)
1538*38fd1498Szrj {
1539*38fd1498Szrj struct invariant *inv;
1540*38fd1498Szrj int i, gain = 0, again;
1541*38fd1498Szrj unsigned aregs_needed[N_REG_CLASSES], invno;
1542*38fd1498Szrj
1543*38fd1498Szrj FOR_EACH_VEC_ELT (invariants, invno, inv)
1544*38fd1498Szrj {
1545*38fd1498Szrj if (inv->move)
1546*38fd1498Szrj continue;
1547*38fd1498Szrj
1548*38fd1498Szrj /* Only consider the "representatives" of equivalent invariants. */
1549*38fd1498Szrj if (inv->eqto != inv->invno)
1550*38fd1498Szrj continue;
1551*38fd1498Szrj
1552*38fd1498Szrj again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
1553*38fd1498Szrj speed, call_p);
1554*38fd1498Szrj if (again > gain)
1555*38fd1498Szrj {
1556*38fd1498Szrj gain = again;
1557*38fd1498Szrj *best = inv;
1558*38fd1498Szrj if (! flag_ira_loop_pressure)
1559*38fd1498Szrj regs_needed[0] = aregs_needed[0];
1560*38fd1498Szrj else
1561*38fd1498Szrj {
1562*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
1563*38fd1498Szrj regs_needed[ira_pressure_classes[i]]
1564*38fd1498Szrj = aregs_needed[ira_pressure_classes[i]];
1565*38fd1498Szrj }
1566*38fd1498Szrj }
1567*38fd1498Szrj }
1568*38fd1498Szrj
1569*38fd1498Szrj return gain;
1570*38fd1498Szrj }
1571*38fd1498Szrj
1572*38fd1498Szrj /* Marks invariant INVNO and all its dependencies for moving. */
1573*38fd1498Szrj
1574*38fd1498Szrj static void
set_move_mark(unsigned invno,int gain)1575*38fd1498Szrj set_move_mark (unsigned invno, int gain)
1576*38fd1498Szrj {
1577*38fd1498Szrj struct invariant *inv = invariants[invno];
1578*38fd1498Szrj bitmap_iterator bi;
1579*38fd1498Szrj
1580*38fd1498Szrj /* Find the representative of the class of the equivalent invariants. */
1581*38fd1498Szrj inv = invariants[inv->eqto];
1582*38fd1498Szrj
1583*38fd1498Szrj if (inv->move)
1584*38fd1498Szrj return;
1585*38fd1498Szrj inv->move = true;
1586*38fd1498Szrj
1587*38fd1498Szrj if (dump_file)
1588*38fd1498Szrj {
1589*38fd1498Szrj if (gain >= 0)
1590*38fd1498Szrj fprintf (dump_file, "Decided to move invariant %d -- gain %d\n",
1591*38fd1498Szrj invno, gain);
1592*38fd1498Szrj else
1593*38fd1498Szrj fprintf (dump_file, "Decided to move dependent invariant %d\n",
1594*38fd1498Szrj invno);
1595*38fd1498Szrj };
1596*38fd1498Szrj
1597*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, bi)
1598*38fd1498Szrj {
1599*38fd1498Szrj set_move_mark (invno, -1);
1600*38fd1498Szrj }
1601*38fd1498Szrj }
1602*38fd1498Szrj
1603*38fd1498Szrj /* Determines which invariants to move. */
1604*38fd1498Szrj
1605*38fd1498Szrj static void
find_invariants_to_move(bool speed,bool call_p)1606*38fd1498Szrj find_invariants_to_move (bool speed, bool call_p)
1607*38fd1498Szrj {
1608*38fd1498Szrj int gain;
1609*38fd1498Szrj unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
1610*38fd1498Szrj struct invariant *inv = NULL;
1611*38fd1498Szrj
1612*38fd1498Szrj if (!invariants.length ())
1613*38fd1498Szrj return;
1614*38fd1498Szrj
1615*38fd1498Szrj if (flag_ira_loop_pressure)
1616*38fd1498Szrj /* REGS_USED is actually never used when the flag is on. */
1617*38fd1498Szrj regs_used = 0;
1618*38fd1498Szrj else
1619*38fd1498Szrj /* We do not really do a good job in estimating number of
1620*38fd1498Szrj registers used; we put some initial bound here to stand for
1621*38fd1498Szrj induction variables etc. that we do not detect. */
1622*38fd1498Szrj {
1623*38fd1498Szrj unsigned int n_regs = DF_REG_SIZE (df);
1624*38fd1498Szrj
1625*38fd1498Szrj regs_used = 2;
1626*38fd1498Szrj
1627*38fd1498Szrj for (i = 0; i < n_regs; i++)
1628*38fd1498Szrj {
1629*38fd1498Szrj if (!DF_REGNO_FIRST_DEF (i) && DF_REGNO_LAST_USE (i))
1630*38fd1498Szrj {
1631*38fd1498Szrj /* This is a value that is used but not changed inside loop. */
1632*38fd1498Szrj regs_used++;
1633*38fd1498Szrj }
1634*38fd1498Szrj }
1635*38fd1498Szrj }
1636*38fd1498Szrj
1637*38fd1498Szrj if (! flag_ira_loop_pressure)
1638*38fd1498Szrj new_regs[0] = regs_needed[0] = 0;
1639*38fd1498Szrj else
1640*38fd1498Szrj {
1641*38fd1498Szrj for (i = 0; (int) i < ira_pressure_classes_num; i++)
1642*38fd1498Szrj new_regs[ira_pressure_classes[i]] = 0;
1643*38fd1498Szrj }
1644*38fd1498Szrj while ((gain = best_gain_for_invariant (&inv, regs_needed,
1645*38fd1498Szrj new_regs, regs_used,
1646*38fd1498Szrj speed, call_p)) > 0)
1647*38fd1498Szrj {
1648*38fd1498Szrj set_move_mark (inv->invno, gain);
1649*38fd1498Szrj if (! flag_ira_loop_pressure)
1650*38fd1498Szrj new_regs[0] += regs_needed[0];
1651*38fd1498Szrj else
1652*38fd1498Szrj {
1653*38fd1498Szrj for (i = 0; (int) i < ira_pressure_classes_num; i++)
1654*38fd1498Szrj new_regs[ira_pressure_classes[i]]
1655*38fd1498Szrj += regs_needed[ira_pressure_classes[i]];
1656*38fd1498Szrj }
1657*38fd1498Szrj }
1658*38fd1498Szrj }
1659*38fd1498Szrj
1660*38fd1498Szrj /* Replace the uses, reached by the definition of invariant INV, by REG.
1661*38fd1498Szrj
1662*38fd1498Szrj IN_GROUP is nonzero if this is part of a group of changes that must be
1663*38fd1498Szrj performed as a group. In that case, the changes will be stored. The
1664*38fd1498Szrj function `apply_change_group' will validate and apply the changes. */
1665*38fd1498Szrj
1666*38fd1498Szrj static int
replace_uses(struct invariant * inv,rtx reg,bool in_group)1667*38fd1498Szrj replace_uses (struct invariant *inv, rtx reg, bool in_group)
1668*38fd1498Szrj {
1669*38fd1498Szrj /* Replace the uses we know to be dominated. It saves work for copy
1670*38fd1498Szrj propagation, and also it is necessary so that dependent invariants
1671*38fd1498Szrj are computed right. */
1672*38fd1498Szrj if (inv->def)
1673*38fd1498Szrj {
1674*38fd1498Szrj struct use *use;
1675*38fd1498Szrj for (use = inv->def->uses; use; use = use->next)
1676*38fd1498Szrj validate_change (use->insn, use->pos, reg, true);
1677*38fd1498Szrj
1678*38fd1498Szrj /* If we aren't part of a larger group, apply the changes now. */
1679*38fd1498Szrj if (!in_group)
1680*38fd1498Szrj return apply_change_group ();
1681*38fd1498Szrj }
1682*38fd1498Szrj
1683*38fd1498Szrj return 1;
1684*38fd1498Szrj }
1685*38fd1498Szrj
1686*38fd1498Szrj /* Whether invariant INV setting REG can be moved out of LOOP, at the end of
1687*38fd1498Szrj the block preceding its header. */
1688*38fd1498Szrj
1689*38fd1498Szrj static bool
can_move_invariant_reg(struct loop * loop,struct invariant * inv,rtx reg)1690*38fd1498Szrj can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
1691*38fd1498Szrj {
1692*38fd1498Szrj df_ref def, use;
1693*38fd1498Szrj unsigned int dest_regno, defs_in_loop_count = 0;
1694*38fd1498Szrj rtx_insn *insn = inv->insn;
1695*38fd1498Szrj basic_block bb = BLOCK_FOR_INSN (inv->insn);
1696*38fd1498Szrj
1697*38fd1498Szrj /* We ignore hard register and memory access for cost and complexity reasons.
1698*38fd1498Szrj Hard register are few at this stage and expensive to consider as they
1699*38fd1498Szrj require building a separate data flow. Memory access would require using
1700*38fd1498Szrj df_simulate_* and can_move_insns_across functions and is more complex. */
1701*38fd1498Szrj if (!REG_P (reg) || HARD_REGISTER_P (reg))
1702*38fd1498Szrj return false;
1703*38fd1498Szrj
1704*38fd1498Szrj /* Check whether the set is always executed. We could omit this condition if
1705*38fd1498Szrj we know that the register is unused outside of the loop, but it does not
1706*38fd1498Szrj seem worth finding out. */
1707*38fd1498Szrj if (!inv->always_executed)
1708*38fd1498Szrj return false;
1709*38fd1498Szrj
1710*38fd1498Szrj /* Check that all uses that would be dominated by def are already dominated
1711*38fd1498Szrj by it. */
1712*38fd1498Szrj dest_regno = REGNO (reg);
1713*38fd1498Szrj for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
1714*38fd1498Szrj {
1715*38fd1498Szrj rtx_insn *use_insn;
1716*38fd1498Szrj basic_block use_bb;
1717*38fd1498Szrj
1718*38fd1498Szrj use_insn = DF_REF_INSN (use);
1719*38fd1498Szrj use_bb = BLOCK_FOR_INSN (use_insn);
1720*38fd1498Szrj
1721*38fd1498Szrj /* Ignore instruction considered for moving. */
1722*38fd1498Szrj if (use_insn == insn)
1723*38fd1498Szrj continue;
1724*38fd1498Szrj
1725*38fd1498Szrj /* Don't consider uses outside loop. */
1726*38fd1498Szrj if (!flow_bb_inside_loop_p (loop, use_bb))
1727*38fd1498Szrj continue;
1728*38fd1498Szrj
1729*38fd1498Szrj /* Don't move if a use is not dominated by def in insn. */
1730*38fd1498Szrj if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
1731*38fd1498Szrj return false;
1732*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
1733*38fd1498Szrj return false;
1734*38fd1498Szrj }
1735*38fd1498Szrj
1736*38fd1498Szrj /* Check for other defs. Any other def in the loop might reach a use
1737*38fd1498Szrj currently reached by the def in insn. */
1738*38fd1498Szrj for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
1739*38fd1498Szrj {
1740*38fd1498Szrj basic_block def_bb = DF_REF_BB (def);
1741*38fd1498Szrj
1742*38fd1498Szrj /* Defs in exit block cannot reach a use they weren't already. */
1743*38fd1498Szrj if (single_succ_p (def_bb))
1744*38fd1498Szrj {
1745*38fd1498Szrj basic_block def_bb_succ;
1746*38fd1498Szrj
1747*38fd1498Szrj def_bb_succ = single_succ (def_bb);
1748*38fd1498Szrj if (!flow_bb_inside_loop_p (loop, def_bb_succ))
1749*38fd1498Szrj continue;
1750*38fd1498Szrj }
1751*38fd1498Szrj
1752*38fd1498Szrj if (++defs_in_loop_count > 1)
1753*38fd1498Szrj return false;
1754*38fd1498Szrj }
1755*38fd1498Szrj
1756*38fd1498Szrj return true;
1757*38fd1498Szrj }
1758*38fd1498Szrj
1759*38fd1498Szrj /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false
1760*38fd1498Szrj otherwise. */
1761*38fd1498Szrj
1762*38fd1498Szrj static bool
move_invariant_reg(struct loop * loop,unsigned invno)1763*38fd1498Szrj move_invariant_reg (struct loop *loop, unsigned invno)
1764*38fd1498Szrj {
1765*38fd1498Szrj struct invariant *inv = invariants[invno];
1766*38fd1498Szrj struct invariant *repr = invariants[inv->eqto];
1767*38fd1498Szrj unsigned i;
1768*38fd1498Szrj basic_block preheader = loop_preheader_edge (loop)->src;
1769*38fd1498Szrj rtx reg, set, dest, note;
1770*38fd1498Szrj bitmap_iterator bi;
1771*38fd1498Szrj int regno = -1;
1772*38fd1498Szrj
1773*38fd1498Szrj if (inv->reg)
1774*38fd1498Szrj return true;
1775*38fd1498Szrj if (!repr->move)
1776*38fd1498Szrj return false;
1777*38fd1498Szrj
1778*38fd1498Szrj /* If this is a representative of the class of equivalent invariants,
1779*38fd1498Szrj really move the invariant. Otherwise just replace its use with
1780*38fd1498Szrj the register used for the representative. */
1781*38fd1498Szrj if (inv == repr)
1782*38fd1498Szrj {
1783*38fd1498Szrj if (inv->depends_on)
1784*38fd1498Szrj {
1785*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, bi)
1786*38fd1498Szrj {
1787*38fd1498Szrj if (!move_invariant_reg (loop, i))
1788*38fd1498Szrj goto fail;
1789*38fd1498Szrj }
1790*38fd1498Szrj }
1791*38fd1498Szrj
1792*38fd1498Szrj /* If possible, just move the set out of the loop. Otherwise, we
1793*38fd1498Szrj need to create a temporary register. */
1794*38fd1498Szrj set = single_set (inv->insn);
1795*38fd1498Szrj reg = dest = SET_DEST (set);
1796*38fd1498Szrj if (GET_CODE (reg) == SUBREG)
1797*38fd1498Szrj reg = SUBREG_REG (reg);
1798*38fd1498Szrj if (REG_P (reg))
1799*38fd1498Szrj regno = REGNO (reg);
1800*38fd1498Szrj
1801*38fd1498Szrj if (!can_move_invariant_reg (loop, inv, dest))
1802*38fd1498Szrj {
1803*38fd1498Szrj reg = gen_reg_rtx_and_attrs (dest);
1804*38fd1498Szrj
1805*38fd1498Szrj /* Try replacing the destination by a new pseudoregister. */
1806*38fd1498Szrj validate_change (inv->insn, &SET_DEST (set), reg, true);
1807*38fd1498Szrj
1808*38fd1498Szrj /* As well as all the dominated uses. */
1809*38fd1498Szrj replace_uses (inv, reg, true);
1810*38fd1498Szrj
1811*38fd1498Szrj /* And validate all the changes. */
1812*38fd1498Szrj if (!apply_change_group ())
1813*38fd1498Szrj goto fail;
1814*38fd1498Szrj
1815*38fd1498Szrj emit_insn_after (gen_move_insn (dest, reg), inv->insn);
1816*38fd1498Szrj }
1817*38fd1498Szrj else if (dump_file)
1818*38fd1498Szrj fprintf (dump_file, "Invariant %d moved without introducing a new "
1819*38fd1498Szrj "temporary register\n", invno);
1820*38fd1498Szrj reorder_insns (inv->insn, inv->insn, BB_END (preheader));
1821*38fd1498Szrj df_recompute_luids (preheader);
1822*38fd1498Szrj
1823*38fd1498Szrj /* If there is a REG_EQUAL note on the insn we just moved, and the
1824*38fd1498Szrj insn is in a basic block that is not always executed or the note
1825*38fd1498Szrj contains something for which we don't know the invariant status,
1826*38fd1498Szrj the note may no longer be valid after we move the insn. Note that
1827*38fd1498Szrj uses in REG_EQUAL notes are taken into account in the computation
1828*38fd1498Szrj of invariants, so it is safe to retain the note even if it contains
1829*38fd1498Szrj register references for which we know the invariant status. */
1830*38fd1498Szrj if ((note = find_reg_note (inv->insn, REG_EQUAL, NULL_RTX))
1831*38fd1498Szrj && (!inv->always_executed
1832*38fd1498Szrj || !check_maybe_invariant (XEXP (note, 0))))
1833*38fd1498Szrj remove_note (inv->insn, note);
1834*38fd1498Szrj }
1835*38fd1498Szrj else
1836*38fd1498Szrj {
1837*38fd1498Szrj if (!move_invariant_reg (loop, repr->invno))
1838*38fd1498Szrj goto fail;
1839*38fd1498Szrj reg = repr->reg;
1840*38fd1498Szrj regno = repr->orig_regno;
1841*38fd1498Szrj if (!replace_uses (inv, reg, false))
1842*38fd1498Szrj goto fail;
1843*38fd1498Szrj set = single_set (inv->insn);
1844*38fd1498Szrj emit_insn_after (gen_move_insn (SET_DEST (set), reg), inv->insn);
1845*38fd1498Szrj delete_insn (inv->insn);
1846*38fd1498Szrj }
1847*38fd1498Szrj
1848*38fd1498Szrj inv->reg = reg;
1849*38fd1498Szrj inv->orig_regno = regno;
1850*38fd1498Szrj
1851*38fd1498Szrj return true;
1852*38fd1498Szrj
1853*38fd1498Szrj fail:
1854*38fd1498Szrj /* If we failed, clear move flag, so that we do not try to move inv
1855*38fd1498Szrj again. */
1856*38fd1498Szrj if (dump_file)
1857*38fd1498Szrj fprintf (dump_file, "Failed to move invariant %d\n", invno);
1858*38fd1498Szrj inv->move = false;
1859*38fd1498Szrj inv->reg = NULL_RTX;
1860*38fd1498Szrj inv->orig_regno = -1;
1861*38fd1498Szrj
1862*38fd1498Szrj return false;
1863*38fd1498Szrj }
1864*38fd1498Szrj
1865*38fd1498Szrj /* Move selected invariant out of the LOOP. Newly created regs are marked
1866*38fd1498Szrj in TEMPORARY_REGS. */
1867*38fd1498Szrj
1868*38fd1498Szrj static void
move_invariants(struct loop * loop)1869*38fd1498Szrj move_invariants (struct loop *loop)
1870*38fd1498Szrj {
1871*38fd1498Szrj struct invariant *inv;
1872*38fd1498Szrj unsigned i;
1873*38fd1498Szrj
1874*38fd1498Szrj FOR_EACH_VEC_ELT (invariants, i, inv)
1875*38fd1498Szrj move_invariant_reg (loop, i);
1876*38fd1498Szrj if (flag_ira_loop_pressure && resize_reg_info ())
1877*38fd1498Szrj {
1878*38fd1498Szrj FOR_EACH_VEC_ELT (invariants, i, inv)
1879*38fd1498Szrj if (inv->reg != NULL_RTX)
1880*38fd1498Szrj {
1881*38fd1498Szrj if (inv->orig_regno >= 0)
1882*38fd1498Szrj setup_reg_classes (REGNO (inv->reg),
1883*38fd1498Szrj reg_preferred_class (inv->orig_regno),
1884*38fd1498Szrj reg_alternate_class (inv->orig_regno),
1885*38fd1498Szrj reg_allocno_class (inv->orig_regno));
1886*38fd1498Szrj else
1887*38fd1498Szrj setup_reg_classes (REGNO (inv->reg),
1888*38fd1498Szrj GENERAL_REGS, NO_REGS, GENERAL_REGS);
1889*38fd1498Szrj }
1890*38fd1498Szrj }
1891*38fd1498Szrj }
1892*38fd1498Szrj
1893*38fd1498Szrj /* Initializes invariant motion data. */
1894*38fd1498Szrj
1895*38fd1498Szrj static void
init_inv_motion_data(void)1896*38fd1498Szrj init_inv_motion_data (void)
1897*38fd1498Szrj {
1898*38fd1498Szrj actual_stamp = 1;
1899*38fd1498Szrj
1900*38fd1498Szrj invariants.create (100);
1901*38fd1498Szrj }
1902*38fd1498Szrj
1903*38fd1498Szrj /* Frees the data allocated by invariant motion. */
1904*38fd1498Szrj
1905*38fd1498Szrj static void
free_inv_motion_data(void)1906*38fd1498Szrj free_inv_motion_data (void)
1907*38fd1498Szrj {
1908*38fd1498Szrj unsigned i;
1909*38fd1498Szrj struct def *def;
1910*38fd1498Szrj struct invariant *inv;
1911*38fd1498Szrj
1912*38fd1498Szrj check_invariant_table_size ();
1913*38fd1498Szrj for (i = 0; i < DF_DEFS_TABLE_SIZE (); i++)
1914*38fd1498Szrj {
1915*38fd1498Szrj inv = invariant_table[i];
1916*38fd1498Szrj if (inv)
1917*38fd1498Szrj {
1918*38fd1498Szrj def = inv->def;
1919*38fd1498Szrj gcc_assert (def != NULL);
1920*38fd1498Szrj
1921*38fd1498Szrj free_use_list (def->uses);
1922*38fd1498Szrj free (def);
1923*38fd1498Szrj invariant_table[i] = NULL;
1924*38fd1498Szrj }
1925*38fd1498Szrj }
1926*38fd1498Szrj
1927*38fd1498Szrj FOR_EACH_VEC_ELT (invariants, i, inv)
1928*38fd1498Szrj {
1929*38fd1498Szrj BITMAP_FREE (inv->depends_on);
1930*38fd1498Szrj free (inv);
1931*38fd1498Szrj }
1932*38fd1498Szrj invariants.release ();
1933*38fd1498Szrj }
1934*38fd1498Szrj
1935*38fd1498Szrj /* Move the invariants out of the LOOP. */
1936*38fd1498Szrj
1937*38fd1498Szrj static void
move_single_loop_invariants(struct loop * loop)1938*38fd1498Szrj move_single_loop_invariants (struct loop *loop)
1939*38fd1498Szrj {
1940*38fd1498Szrj init_inv_motion_data ();
1941*38fd1498Szrj
1942*38fd1498Szrj find_invariants (loop);
1943*38fd1498Szrj find_invariants_to_move (optimize_loop_for_speed_p (loop),
1944*38fd1498Szrj LOOP_DATA (loop)->has_call);
1945*38fd1498Szrj move_invariants (loop);
1946*38fd1498Szrj
1947*38fd1498Szrj free_inv_motion_data ();
1948*38fd1498Szrj }
1949*38fd1498Szrj
1950*38fd1498Szrj /* Releases the auxiliary data for LOOP. */
1951*38fd1498Szrj
1952*38fd1498Szrj static void
free_loop_data(struct loop * loop)1953*38fd1498Szrj free_loop_data (struct loop *loop)
1954*38fd1498Szrj {
1955*38fd1498Szrj struct loop_data *data = LOOP_DATA (loop);
1956*38fd1498Szrj if (!data)
1957*38fd1498Szrj return;
1958*38fd1498Szrj
1959*38fd1498Szrj bitmap_clear (&LOOP_DATA (loop)->regs_ref);
1960*38fd1498Szrj bitmap_clear (&LOOP_DATA (loop)->regs_live);
1961*38fd1498Szrj free (data);
1962*38fd1498Szrj loop->aux = NULL;
1963*38fd1498Szrj }
1964*38fd1498Szrj
1965*38fd1498Szrj
1966*38fd1498Szrj
1967*38fd1498Szrj /* Registers currently living. */
1968*38fd1498Szrj static bitmap_head curr_regs_live;
1969*38fd1498Szrj
1970*38fd1498Szrj /* Current reg pressure for each pressure class. */
1971*38fd1498Szrj static int curr_reg_pressure[N_REG_CLASSES];
1972*38fd1498Szrj
1973*38fd1498Szrj /* Record all regs that are set in any one insn. Communication from
1974*38fd1498Szrj mark_reg_{store,clobber} and global_conflicts. Asm can refer to
1975*38fd1498Szrj all hard-registers. */
1976*38fd1498Szrj static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
1977*38fd1498Szrj ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
1978*38fd1498Szrj /* Number of regs stored in the previous array. */
1979*38fd1498Szrj static int n_regs_set;
1980*38fd1498Szrj
1981*38fd1498Szrj /* Return pressure class and number of needed hard registers (through
1982*38fd1498Szrj *NREGS) of register REGNO. */
1983*38fd1498Szrj static enum reg_class
get_regno_pressure_class(int regno,int * nregs)1984*38fd1498Szrj get_regno_pressure_class (int regno, int *nregs)
1985*38fd1498Szrj {
1986*38fd1498Szrj if (regno >= FIRST_PSEUDO_REGISTER)
1987*38fd1498Szrj {
1988*38fd1498Szrj enum reg_class pressure_class;
1989*38fd1498Szrj
1990*38fd1498Szrj pressure_class = reg_allocno_class (regno);
1991*38fd1498Szrj pressure_class = ira_pressure_class_translate[pressure_class];
1992*38fd1498Szrj *nregs
1993*38fd1498Szrj = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
1994*38fd1498Szrj return pressure_class;
1995*38fd1498Szrj }
1996*38fd1498Szrj else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
1997*38fd1498Szrj && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
1998*38fd1498Szrj {
1999*38fd1498Szrj *nregs = 1;
2000*38fd1498Szrj return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
2001*38fd1498Szrj }
2002*38fd1498Szrj else
2003*38fd1498Szrj {
2004*38fd1498Szrj *nregs = 0;
2005*38fd1498Szrj return NO_REGS;
2006*38fd1498Szrj }
2007*38fd1498Szrj }
2008*38fd1498Szrj
2009*38fd1498Szrj /* Increase (if INCR_P) or decrease current register pressure for
2010*38fd1498Szrj register REGNO. */
2011*38fd1498Szrj static void
change_pressure(int regno,bool incr_p)2012*38fd1498Szrj change_pressure (int regno, bool incr_p)
2013*38fd1498Szrj {
2014*38fd1498Szrj int nregs;
2015*38fd1498Szrj enum reg_class pressure_class;
2016*38fd1498Szrj
2017*38fd1498Szrj pressure_class = get_regno_pressure_class (regno, &nregs);
2018*38fd1498Szrj if (! incr_p)
2019*38fd1498Szrj curr_reg_pressure[pressure_class] -= nregs;
2020*38fd1498Szrj else
2021*38fd1498Szrj {
2022*38fd1498Szrj curr_reg_pressure[pressure_class] += nregs;
2023*38fd1498Szrj if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2024*38fd1498Szrj < curr_reg_pressure[pressure_class])
2025*38fd1498Szrj LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2026*38fd1498Szrj = curr_reg_pressure[pressure_class];
2027*38fd1498Szrj }
2028*38fd1498Szrj }
2029*38fd1498Szrj
2030*38fd1498Szrj /* Mark REGNO birth. */
2031*38fd1498Szrj static void
mark_regno_live(int regno)2032*38fd1498Szrj mark_regno_live (int regno)
2033*38fd1498Szrj {
2034*38fd1498Szrj struct loop *loop;
2035*38fd1498Szrj
2036*38fd1498Szrj for (loop = curr_loop;
2037*38fd1498Szrj loop != current_loops->tree_root;
2038*38fd1498Szrj loop = loop_outer (loop))
2039*38fd1498Szrj bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
2040*38fd1498Szrj if (!bitmap_set_bit (&curr_regs_live, regno))
2041*38fd1498Szrj return;
2042*38fd1498Szrj change_pressure (regno, true);
2043*38fd1498Szrj }
2044*38fd1498Szrj
2045*38fd1498Szrj /* Mark REGNO death. */
2046*38fd1498Szrj static void
mark_regno_death(int regno)2047*38fd1498Szrj mark_regno_death (int regno)
2048*38fd1498Szrj {
2049*38fd1498Szrj if (! bitmap_clear_bit (&curr_regs_live, regno))
2050*38fd1498Szrj return;
2051*38fd1498Szrj change_pressure (regno, false);
2052*38fd1498Szrj }
2053*38fd1498Szrj
2054*38fd1498Szrj /* Mark setting register REG. */
2055*38fd1498Szrj static void
mark_reg_store(rtx reg,const_rtx setter ATTRIBUTE_UNUSED,void * data ATTRIBUTE_UNUSED)2056*38fd1498Szrj mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
2057*38fd1498Szrj void *data ATTRIBUTE_UNUSED)
2058*38fd1498Szrj {
2059*38fd1498Szrj if (GET_CODE (reg) == SUBREG)
2060*38fd1498Szrj reg = SUBREG_REG (reg);
2061*38fd1498Szrj
2062*38fd1498Szrj if (! REG_P (reg))
2063*38fd1498Szrj return;
2064*38fd1498Szrj
2065*38fd1498Szrj regs_set[n_regs_set++] = reg;
2066*38fd1498Szrj
2067*38fd1498Szrj unsigned int end_regno = END_REGNO (reg);
2068*38fd1498Szrj for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2069*38fd1498Szrj mark_regno_live (regno);
2070*38fd1498Szrj }
2071*38fd1498Szrj
2072*38fd1498Szrj /* Mark clobbering register REG. */
2073*38fd1498Szrj static void
mark_reg_clobber(rtx reg,const_rtx setter,void * data)2074*38fd1498Szrj mark_reg_clobber (rtx reg, const_rtx setter, void *data)
2075*38fd1498Szrj {
2076*38fd1498Szrj if (GET_CODE (setter) == CLOBBER)
2077*38fd1498Szrj mark_reg_store (reg, setter, data);
2078*38fd1498Szrj }
2079*38fd1498Szrj
2080*38fd1498Szrj /* Mark register REG death. */
2081*38fd1498Szrj static void
mark_reg_death(rtx reg)2082*38fd1498Szrj mark_reg_death (rtx reg)
2083*38fd1498Szrj {
2084*38fd1498Szrj unsigned int end_regno = END_REGNO (reg);
2085*38fd1498Szrj for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2086*38fd1498Szrj mark_regno_death (regno);
2087*38fd1498Szrj }
2088*38fd1498Szrj
2089*38fd1498Szrj /* Mark occurrence of registers in X for the current loop. */
2090*38fd1498Szrj static void
mark_ref_regs(rtx x)2091*38fd1498Szrj mark_ref_regs (rtx x)
2092*38fd1498Szrj {
2093*38fd1498Szrj RTX_CODE code;
2094*38fd1498Szrj int i;
2095*38fd1498Szrj const char *fmt;
2096*38fd1498Szrj
2097*38fd1498Szrj if (!x)
2098*38fd1498Szrj return;
2099*38fd1498Szrj
2100*38fd1498Szrj code = GET_CODE (x);
2101*38fd1498Szrj if (code == REG)
2102*38fd1498Szrj {
2103*38fd1498Szrj struct loop *loop;
2104*38fd1498Szrj
2105*38fd1498Szrj for (loop = curr_loop;
2106*38fd1498Szrj loop != current_loops->tree_root;
2107*38fd1498Szrj loop = loop_outer (loop))
2108*38fd1498Szrj bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
2109*38fd1498Szrj return;
2110*38fd1498Szrj }
2111*38fd1498Szrj
2112*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
2113*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2114*38fd1498Szrj if (fmt[i] == 'e')
2115*38fd1498Szrj mark_ref_regs (XEXP (x, i));
2116*38fd1498Szrj else if (fmt[i] == 'E')
2117*38fd1498Szrj {
2118*38fd1498Szrj int j;
2119*38fd1498Szrj
2120*38fd1498Szrj for (j = 0; j < XVECLEN (x, i); j++)
2121*38fd1498Szrj mark_ref_regs (XVECEXP (x, i, j));
2122*38fd1498Szrj }
2123*38fd1498Szrj }
2124*38fd1498Szrj
2125*38fd1498Szrj /* Calculate register pressure in the loops. */
2126*38fd1498Szrj static void
calculate_loop_reg_pressure(void)2127*38fd1498Szrj calculate_loop_reg_pressure (void)
2128*38fd1498Szrj {
2129*38fd1498Szrj int i;
2130*38fd1498Szrj unsigned int j;
2131*38fd1498Szrj bitmap_iterator bi;
2132*38fd1498Szrj basic_block bb;
2133*38fd1498Szrj rtx_insn *insn;
2134*38fd1498Szrj rtx link;
2135*38fd1498Szrj struct loop *loop, *parent;
2136*38fd1498Szrj
2137*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
2138*38fd1498Szrj if (loop->aux == NULL)
2139*38fd1498Szrj {
2140*38fd1498Szrj loop->aux = xcalloc (1, sizeof (struct loop_data));
2141*38fd1498Szrj bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
2142*38fd1498Szrj bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
2143*38fd1498Szrj }
2144*38fd1498Szrj ira_setup_eliminable_regset ();
2145*38fd1498Szrj bitmap_initialize (&curr_regs_live, ®_obstack);
2146*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
2147*38fd1498Szrj {
2148*38fd1498Szrj curr_loop = bb->loop_father;
2149*38fd1498Szrj if (curr_loop == current_loops->tree_root)
2150*38fd1498Szrj continue;
2151*38fd1498Szrj
2152*38fd1498Szrj for (loop = curr_loop;
2153*38fd1498Szrj loop != current_loops->tree_root;
2154*38fd1498Szrj loop = loop_outer (loop))
2155*38fd1498Szrj bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
2156*38fd1498Szrj
2157*38fd1498Szrj bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
2158*38fd1498Szrj for (i = 0; i < ira_pressure_classes_num; i++)
2159*38fd1498Szrj curr_reg_pressure[ira_pressure_classes[i]] = 0;
2160*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
2161*38fd1498Szrj change_pressure (j, true);
2162*38fd1498Szrj
2163*38fd1498Szrj FOR_BB_INSNS (bb, insn)
2164*38fd1498Szrj {
2165*38fd1498Szrj if (! NONDEBUG_INSN_P (insn))
2166*38fd1498Szrj continue;
2167*38fd1498Szrj
2168*38fd1498Szrj mark_ref_regs (PATTERN (insn));
2169*38fd1498Szrj n_regs_set = 0;
2170*38fd1498Szrj note_stores (PATTERN (insn), mark_reg_clobber, NULL);
2171*38fd1498Szrj
2172*38fd1498Szrj /* Mark any registers dead after INSN as dead now. */
2173*38fd1498Szrj
2174*38fd1498Szrj for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2175*38fd1498Szrj if (REG_NOTE_KIND (link) == REG_DEAD)
2176*38fd1498Szrj mark_reg_death (XEXP (link, 0));
2177*38fd1498Szrj
2178*38fd1498Szrj /* Mark any registers set in INSN as live,
2179*38fd1498Szrj and mark them as conflicting with all other live regs.
2180*38fd1498Szrj Clobbers are processed again, so they conflict with
2181*38fd1498Szrj the registers that are set. */
2182*38fd1498Szrj
2183*38fd1498Szrj note_stores (PATTERN (insn), mark_reg_store, NULL);
2184*38fd1498Szrj
2185*38fd1498Szrj if (AUTO_INC_DEC)
2186*38fd1498Szrj for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2187*38fd1498Szrj if (REG_NOTE_KIND (link) == REG_INC)
2188*38fd1498Szrj mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
2189*38fd1498Szrj
2190*38fd1498Szrj while (n_regs_set-- > 0)
2191*38fd1498Szrj {
2192*38fd1498Szrj rtx note = find_regno_note (insn, REG_UNUSED,
2193*38fd1498Szrj REGNO (regs_set[n_regs_set]));
2194*38fd1498Szrj if (! note)
2195*38fd1498Szrj continue;
2196*38fd1498Szrj
2197*38fd1498Szrj mark_reg_death (XEXP (note, 0));
2198*38fd1498Szrj }
2199*38fd1498Szrj }
2200*38fd1498Szrj }
2201*38fd1498Szrj bitmap_clear (&curr_regs_live);
2202*38fd1498Szrj if (flag_ira_region == IRA_REGION_MIXED
2203*38fd1498Szrj || flag_ira_region == IRA_REGION_ALL)
2204*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
2205*38fd1498Szrj {
2206*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2207*38fd1498Szrj if (! bitmap_bit_p (&LOOP_DATA (loop)->regs_ref, j))
2208*38fd1498Szrj {
2209*38fd1498Szrj enum reg_class pressure_class;
2210*38fd1498Szrj int nregs;
2211*38fd1498Szrj
2212*38fd1498Szrj pressure_class = get_regno_pressure_class (j, &nregs);
2213*38fd1498Szrj LOOP_DATA (loop)->max_reg_pressure[pressure_class] -= nregs;
2214*38fd1498Szrj }
2215*38fd1498Szrj }
2216*38fd1498Szrj if (dump_file == NULL)
2217*38fd1498Szrj return;
2218*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
2219*38fd1498Szrj {
2220*38fd1498Szrj parent = loop_outer (loop);
2221*38fd1498Szrj fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n",
2222*38fd1498Szrj loop->num, (parent == NULL ? -1 : parent->num),
2223*38fd1498Szrj loop->header->index, loop_depth (loop));
2224*38fd1498Szrj fprintf (dump_file, "\n ref. regnos:");
2225*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
2226*38fd1498Szrj fprintf (dump_file, " %d", j);
2227*38fd1498Szrj fprintf (dump_file, "\n live regnos:");
2228*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2229*38fd1498Szrj fprintf (dump_file, " %d", j);
2230*38fd1498Szrj fprintf (dump_file, "\n Pressure:");
2231*38fd1498Szrj for (i = 0; (int) i < ira_pressure_classes_num; i++)
2232*38fd1498Szrj {
2233*38fd1498Szrj enum reg_class pressure_class;
2234*38fd1498Szrj
2235*38fd1498Szrj pressure_class = ira_pressure_classes[i];
2236*38fd1498Szrj if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
2237*38fd1498Szrj continue;
2238*38fd1498Szrj fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
2239*38fd1498Szrj LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
2240*38fd1498Szrj }
2241*38fd1498Szrj fprintf (dump_file, "\n");
2242*38fd1498Szrj }
2243*38fd1498Szrj }
2244*38fd1498Szrj
2245*38fd1498Szrj
2246*38fd1498Szrj
2247*38fd1498Szrj /* Move the invariants out of the loops. */
2248*38fd1498Szrj
2249*38fd1498Szrj void
move_loop_invariants(void)2250*38fd1498Szrj move_loop_invariants (void)
2251*38fd1498Szrj {
2252*38fd1498Szrj struct loop *loop;
2253*38fd1498Szrj
2254*38fd1498Szrj if (flag_ira_loop_pressure)
2255*38fd1498Szrj {
2256*38fd1498Szrj df_analyze ();
2257*38fd1498Szrj regstat_init_n_sets_and_refs ();
2258*38fd1498Szrj ira_set_pseudo_classes (true, dump_file);
2259*38fd1498Szrj calculate_loop_reg_pressure ();
2260*38fd1498Szrj regstat_free_n_sets_and_refs ();
2261*38fd1498Szrj }
2262*38fd1498Szrj df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
2263*38fd1498Szrj /* Process the loops, innermost first. */
2264*38fd1498Szrj FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2265*38fd1498Szrj {
2266*38fd1498Szrj curr_loop = loop;
2267*38fd1498Szrj /* move_single_loop_invariants for very large loops
2268*38fd1498Szrj is time consuming and might need a lot of memory. */
2269*38fd1498Szrj if (loop->num_nodes <= (unsigned) LOOP_INVARIANT_MAX_BBS_IN_LOOP)
2270*38fd1498Szrj move_single_loop_invariants (loop);
2271*38fd1498Szrj }
2272*38fd1498Szrj
2273*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
2274*38fd1498Szrj {
2275*38fd1498Szrj free_loop_data (loop);
2276*38fd1498Szrj }
2277*38fd1498Szrj
2278*38fd1498Szrj if (flag_ira_loop_pressure)
2279*38fd1498Szrj /* There is no sense to keep this info because it was most
2280*38fd1498Szrj probably outdated by subsequent passes. */
2281*38fd1498Szrj free_reg_info ();
2282*38fd1498Szrj free (invariant_table);
2283*38fd1498Szrj invariant_table = NULL;
2284*38fd1498Szrj invariant_table_size = 0;
2285*38fd1498Szrj
2286*38fd1498Szrj checking_verify_flow_info ();
2287*38fd1498Szrj }
2288