1*404b540aSrobert /* Loop invariant motion.
2*404b540aSrobert Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3*404b540aSrobert
4*404b540aSrobert This file is part of GCC.
5*404b540aSrobert
6*404b540aSrobert GCC is free software; you can redistribute it and/or modify it
7*404b540aSrobert under the terms of the GNU General Public License as published by the
8*404b540aSrobert Free Software Foundation; either version 2, or (at your option) any
9*404b540aSrobert later version.
10*404b540aSrobert
11*404b540aSrobert GCC is distributed in the hope that it will be useful, but WITHOUT
12*404b540aSrobert ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*404b540aSrobert FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14*404b540aSrobert for more details.
15*404b540aSrobert
16*404b540aSrobert You should have received a copy of the GNU General Public License
17*404b540aSrobert along with GCC; see the file COPYING. If not, write to the Free
18*404b540aSrobert Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19*404b540aSrobert 02110-1301, USA. */
20*404b540aSrobert
21*404b540aSrobert #include "config.h"
22*404b540aSrobert #include "system.h"
23*404b540aSrobert #include "coretypes.h"
24*404b540aSrobert #include "tm.h"
25*404b540aSrobert #include "tree.h"
26*404b540aSrobert #include "rtl.h"
27*404b540aSrobert #include "tm_p.h"
28*404b540aSrobert #include "hard-reg-set.h"
29*404b540aSrobert #include "basic-block.h"
30*404b540aSrobert #include "output.h"
31*404b540aSrobert #include "diagnostic.h"
32*404b540aSrobert #include "tree-flow.h"
33*404b540aSrobert #include "tree-dump.h"
34*404b540aSrobert #include "timevar.h"
35*404b540aSrobert #include "cfgloop.h"
36*404b540aSrobert #include "domwalk.h"
37*404b540aSrobert #include "params.h"
38*404b540aSrobert #include "tree-pass.h"
39*404b540aSrobert #include "flags.h"
40*404b540aSrobert #include "real.h"
41*404b540aSrobert #include "hashtab.h"
42*404b540aSrobert
43*404b540aSrobert /* TODO: Support for predicated code motion. I.e.
44*404b540aSrobert
45*404b540aSrobert while (1)
46*404b540aSrobert {
47*404b540aSrobert if (cond)
48*404b540aSrobert {
49*404b540aSrobert a = inv;
50*404b540aSrobert something;
51*404b540aSrobert }
52*404b540aSrobert }
53*404b540aSrobert
54*404b540aSrobert Where COND and INV are is invariants, but evaluating INV may trap or be
55*404b540aSrobert invalid from some other reason if !COND. This may be transformed to
56*404b540aSrobert
57*404b540aSrobert if (cond)
58*404b540aSrobert a = inv;
59*404b540aSrobert while (1)
60*404b540aSrobert {
61*404b540aSrobert if (cond)
62*404b540aSrobert something;
63*404b540aSrobert } */
64*404b540aSrobert
65*404b540aSrobert /* A type for the list of statements that have to be moved in order to be able
66*404b540aSrobert to hoist an invariant computation. */
67*404b540aSrobert
68*404b540aSrobert struct depend
69*404b540aSrobert {
70*404b540aSrobert tree stmt;
71*404b540aSrobert struct depend *next;
72*404b540aSrobert };
73*404b540aSrobert
74*404b540aSrobert /* The auxiliary data kept for each statement. */
75*404b540aSrobert
76*404b540aSrobert struct lim_aux_data
77*404b540aSrobert {
78*404b540aSrobert struct loop *max_loop; /* The outermost loop in that the statement
79*404b540aSrobert is invariant. */
80*404b540aSrobert
81*404b540aSrobert struct loop *tgt_loop; /* The loop out of that we want to move the
82*404b540aSrobert invariant. */
83*404b540aSrobert
84*404b540aSrobert struct loop *always_executed_in;
85*404b540aSrobert /* The outermost loop for that we are sure
86*404b540aSrobert the statement is executed if the loop
87*404b540aSrobert is entered. */
88*404b540aSrobert
89*404b540aSrobert bool sm_done; /* True iff the store motion for a memory
90*404b540aSrobert reference in the statement has already
91*404b540aSrobert been executed. */
92*404b540aSrobert
93*404b540aSrobert unsigned cost; /* Cost of the computation performed by the
94*404b540aSrobert statement. */
95*404b540aSrobert
96*404b540aSrobert struct depend *depends; /* List of statements that must be also hoisted
97*404b540aSrobert out of the loop when this statement is
98*404b540aSrobert hoisted; i.e. those that define the operands
99*404b540aSrobert of the statement and are inside of the
100*404b540aSrobert MAX_LOOP loop. */
101*404b540aSrobert };
102*404b540aSrobert
103*404b540aSrobert #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104*404b540aSrobert ? NULL \
105*404b540aSrobert : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
106*404b540aSrobert
107*404b540aSrobert /* Description of a memory reference location for store motion. */
108*404b540aSrobert
109*404b540aSrobert struct mem_ref_loc
110*404b540aSrobert {
111*404b540aSrobert tree *ref; /* The reference itself. */
112*404b540aSrobert tree stmt; /* The statement in that it occurs. */
113*404b540aSrobert struct mem_ref_loc *next; /* Next use in the chain. */
114*404b540aSrobert };
115*404b540aSrobert
116*404b540aSrobert /* Description of a memory reference for store motion. */
117*404b540aSrobert
118*404b540aSrobert struct mem_ref
119*404b540aSrobert {
120*404b540aSrobert tree mem; /* The memory itself. */
121*404b540aSrobert hashval_t hash; /* Its hash value. */
122*404b540aSrobert bool is_stored; /* True if there is a store to the location
123*404b540aSrobert in the loop. */
124*404b540aSrobert struct mem_ref_loc *locs; /* The locations where it is found. */
125*404b540aSrobert bitmap vops; /* Vops corresponding to this memory
126*404b540aSrobert location. */
127*404b540aSrobert struct mem_ref *next; /* Next memory reference in the list.
128*404b540aSrobert Memory references are stored in a hash
129*404b540aSrobert table, but the hash function depends
130*404b540aSrobert on values of pointers. Thus we cannot use
131*404b540aSrobert htab_traverse, since then we would get
132*404b540aSrobert miscompares during bootstrap (although the
133*404b540aSrobert produced code would be correct). */
134*404b540aSrobert };
135*404b540aSrobert
136*404b540aSrobert /* Minimum cost of an expensive expression. */
137*404b540aSrobert #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
138*404b540aSrobert
139*404b540aSrobert /* The outermost loop for that execution of the header guarantees that the
140*404b540aSrobert block will be executed. */
141*404b540aSrobert #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
142*404b540aSrobert
143*404b540aSrobert /* Calls CBCK for each index in memory reference ADDR_P. There are two
144*404b540aSrobert kinds situations handled; in each of these cases, the memory reference
145*404b540aSrobert and DATA are passed to the callback:
146*404b540aSrobert
147*404b540aSrobert Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148*404b540aSrobert pass the pointer to the index to the callback.
149*404b540aSrobert
150*404b540aSrobert Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151*404b540aSrobert pointer to addr to the callback.
152*404b540aSrobert
153*404b540aSrobert If the callback returns false, the whole search stops and false is returned.
154*404b540aSrobert Otherwise the function returns true after traversing through the whole
155*404b540aSrobert reference *ADDR_P. */
156*404b540aSrobert
157*404b540aSrobert bool
for_each_index(tree * addr_p,bool (* cbck)(tree,tree *,void *),void * data)158*404b540aSrobert for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
159*404b540aSrobert {
160*404b540aSrobert tree *nxt, *idx;
161*404b540aSrobert
162*404b540aSrobert for (; ; addr_p = nxt)
163*404b540aSrobert {
164*404b540aSrobert switch (TREE_CODE (*addr_p))
165*404b540aSrobert {
166*404b540aSrobert case SSA_NAME:
167*404b540aSrobert return cbck (*addr_p, addr_p, data);
168*404b540aSrobert
169*404b540aSrobert case MISALIGNED_INDIRECT_REF:
170*404b540aSrobert case ALIGN_INDIRECT_REF:
171*404b540aSrobert case INDIRECT_REF:
172*404b540aSrobert nxt = &TREE_OPERAND (*addr_p, 0);
173*404b540aSrobert return cbck (*addr_p, nxt, data);
174*404b540aSrobert
175*404b540aSrobert case BIT_FIELD_REF:
176*404b540aSrobert case VIEW_CONVERT_EXPR:
177*404b540aSrobert case REALPART_EXPR:
178*404b540aSrobert case IMAGPART_EXPR:
179*404b540aSrobert nxt = &TREE_OPERAND (*addr_p, 0);
180*404b540aSrobert break;
181*404b540aSrobert
182*404b540aSrobert case COMPONENT_REF:
183*404b540aSrobert /* If the component has varying offset, it behaves like index
184*404b540aSrobert as well. */
185*404b540aSrobert idx = &TREE_OPERAND (*addr_p, 2);
186*404b540aSrobert if (*idx
187*404b540aSrobert && !cbck (*addr_p, idx, data))
188*404b540aSrobert return false;
189*404b540aSrobert
190*404b540aSrobert nxt = &TREE_OPERAND (*addr_p, 0);
191*404b540aSrobert break;
192*404b540aSrobert
193*404b540aSrobert case ARRAY_REF:
194*404b540aSrobert case ARRAY_RANGE_REF:
195*404b540aSrobert nxt = &TREE_OPERAND (*addr_p, 0);
196*404b540aSrobert if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197*404b540aSrobert return false;
198*404b540aSrobert break;
199*404b540aSrobert
200*404b540aSrobert case VAR_DECL:
201*404b540aSrobert case PARM_DECL:
202*404b540aSrobert case STRING_CST:
203*404b540aSrobert case RESULT_DECL:
204*404b540aSrobert case VECTOR_CST:
205*404b540aSrobert case COMPLEX_CST:
206*404b540aSrobert case INTEGER_CST:
207*404b540aSrobert case REAL_CST:
208*404b540aSrobert return true;
209*404b540aSrobert
210*404b540aSrobert case TARGET_MEM_REF:
211*404b540aSrobert idx = &TMR_BASE (*addr_p);
212*404b540aSrobert if (*idx
213*404b540aSrobert && !cbck (*addr_p, idx, data))
214*404b540aSrobert return false;
215*404b540aSrobert idx = &TMR_INDEX (*addr_p);
216*404b540aSrobert if (*idx
217*404b540aSrobert && !cbck (*addr_p, idx, data))
218*404b540aSrobert return false;
219*404b540aSrobert return true;
220*404b540aSrobert
221*404b540aSrobert default:
222*404b540aSrobert gcc_unreachable ();
223*404b540aSrobert }
224*404b540aSrobert }
225*404b540aSrobert }
226*404b540aSrobert
227*404b540aSrobert /* If it is possible to hoist the statement STMT unconditionally,
228*404b540aSrobert returns MOVE_POSSIBLE.
229*404b540aSrobert If it is possible to hoist the statement STMT, but we must avoid making
230*404b540aSrobert it executed if it would not be executed in the original program (e.g.
231*404b540aSrobert because it may trap), return MOVE_PRESERVE_EXECUTION.
232*404b540aSrobert Otherwise return MOVE_IMPOSSIBLE. */
233*404b540aSrobert
234*404b540aSrobert enum move_pos
movement_possibility(tree stmt)235*404b540aSrobert movement_possibility (tree stmt)
236*404b540aSrobert {
237*404b540aSrobert tree lhs, rhs;
238*404b540aSrobert
239*404b540aSrobert if (flag_unswitch_loops
240*404b540aSrobert && TREE_CODE (stmt) == COND_EXPR)
241*404b540aSrobert {
242*404b540aSrobert /* If we perform unswitching, force the operands of the invariant
243*404b540aSrobert condition to be moved out of the loop. */
244*404b540aSrobert return MOVE_POSSIBLE;
245*404b540aSrobert }
246*404b540aSrobert
247*404b540aSrobert if (TREE_CODE (stmt) != MODIFY_EXPR)
248*404b540aSrobert return MOVE_IMPOSSIBLE;
249*404b540aSrobert
250*404b540aSrobert if (stmt_ends_bb_p (stmt))
251*404b540aSrobert return MOVE_IMPOSSIBLE;
252*404b540aSrobert
253*404b540aSrobert if (stmt_ann (stmt)->has_volatile_ops)
254*404b540aSrobert return MOVE_IMPOSSIBLE;
255*404b540aSrobert
256*404b540aSrobert lhs = TREE_OPERAND (stmt, 0);
257*404b540aSrobert if (TREE_CODE (lhs) == SSA_NAME
258*404b540aSrobert && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
259*404b540aSrobert return MOVE_IMPOSSIBLE;
260*404b540aSrobert
261*404b540aSrobert rhs = TREE_OPERAND (stmt, 1);
262*404b540aSrobert
263*404b540aSrobert if (TREE_SIDE_EFFECTS (rhs))
264*404b540aSrobert return MOVE_IMPOSSIBLE;
265*404b540aSrobert
266*404b540aSrobert if (TREE_CODE (lhs) != SSA_NAME
267*404b540aSrobert || tree_could_trap_p (rhs))
268*404b540aSrobert return MOVE_PRESERVE_EXECUTION;
269*404b540aSrobert
270*404b540aSrobert if (get_call_expr_in (stmt))
271*404b540aSrobert {
272*404b540aSrobert /* While pure or const call is guaranteed to have no side effects, we
273*404b540aSrobert cannot move it arbitrarily. Consider code like
274*404b540aSrobert
275*404b540aSrobert char *s = something ();
276*404b540aSrobert
277*404b540aSrobert while (1)
278*404b540aSrobert {
279*404b540aSrobert if (s)
280*404b540aSrobert t = strlen (s);
281*404b540aSrobert else
282*404b540aSrobert t = 0;
283*404b540aSrobert }
284*404b540aSrobert
285*404b540aSrobert Here the strlen call cannot be moved out of the loop, even though
286*404b540aSrobert s is invariant. In addition to possibly creating a call with
287*404b540aSrobert invalid arguments, moving out a function call that is not executed
288*404b540aSrobert may cause performance regressions in case the call is costly and
289*404b540aSrobert not executed at all. */
290*404b540aSrobert return MOVE_PRESERVE_EXECUTION;
291*404b540aSrobert }
292*404b540aSrobert return MOVE_POSSIBLE;
293*404b540aSrobert }
294*404b540aSrobert
295*404b540aSrobert /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
296*404b540aSrobert loop to that we could move the expression using DEF if it did not have
297*404b540aSrobert other operands, i.e. the outermost loop enclosing LOOP in that the value
298*404b540aSrobert of DEF is invariant. */
299*404b540aSrobert
300*404b540aSrobert static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)301*404b540aSrobert outermost_invariant_loop (tree def, struct loop *loop)
302*404b540aSrobert {
303*404b540aSrobert tree def_stmt;
304*404b540aSrobert basic_block def_bb;
305*404b540aSrobert struct loop *max_loop;
306*404b540aSrobert
307*404b540aSrobert if (TREE_CODE (def) != SSA_NAME)
308*404b540aSrobert return superloop_at_depth (loop, 1);
309*404b540aSrobert
310*404b540aSrobert def_stmt = SSA_NAME_DEF_STMT (def);
311*404b540aSrobert def_bb = bb_for_stmt (def_stmt);
312*404b540aSrobert if (!def_bb)
313*404b540aSrobert return superloop_at_depth (loop, 1);
314*404b540aSrobert
315*404b540aSrobert max_loop = find_common_loop (loop, def_bb->loop_father);
316*404b540aSrobert
317*404b540aSrobert if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
318*404b540aSrobert max_loop = find_common_loop (max_loop,
319*404b540aSrobert LIM_DATA (def_stmt)->max_loop->outer);
320*404b540aSrobert if (max_loop == loop)
321*404b540aSrobert return NULL;
322*404b540aSrobert max_loop = superloop_at_depth (loop, max_loop->depth + 1);
323*404b540aSrobert
324*404b540aSrobert return max_loop;
325*404b540aSrobert }
326*404b540aSrobert
327*404b540aSrobert /* Returns the outermost superloop of LOOP in that the expression EXPR is
328*404b540aSrobert invariant. */
329*404b540aSrobert
330*404b540aSrobert static struct loop *
outermost_invariant_loop_expr(tree expr,struct loop * loop)331*404b540aSrobert outermost_invariant_loop_expr (tree expr, struct loop *loop)
332*404b540aSrobert {
333*404b540aSrobert enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
334*404b540aSrobert unsigned i, nops;
335*404b540aSrobert struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
336*404b540aSrobert
337*404b540aSrobert if (TREE_CODE (expr) == SSA_NAME
338*404b540aSrobert || TREE_CODE (expr) == INTEGER_CST
339*404b540aSrobert || is_gimple_min_invariant (expr))
340*404b540aSrobert return outermost_invariant_loop (expr, loop);
341*404b540aSrobert
342*404b540aSrobert if (class != tcc_unary
343*404b540aSrobert && class != tcc_binary
344*404b540aSrobert && class != tcc_expression
345*404b540aSrobert && class != tcc_comparison)
346*404b540aSrobert return NULL;
347*404b540aSrobert
348*404b540aSrobert nops = TREE_CODE_LENGTH (TREE_CODE (expr));
349*404b540aSrobert for (i = 0; i < nops; i++)
350*404b540aSrobert {
351*404b540aSrobert aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
352*404b540aSrobert if (!aloop)
353*404b540aSrobert return NULL;
354*404b540aSrobert
355*404b540aSrobert if (flow_loop_nested_p (max_loop, aloop))
356*404b540aSrobert max_loop = aloop;
357*404b540aSrobert }
358*404b540aSrobert
359*404b540aSrobert return max_loop;
360*404b540aSrobert }
361*404b540aSrobert
362*404b540aSrobert /* DATA is a structure containing information associated with a statement
363*404b540aSrobert inside LOOP. DEF is one of the operands of this statement.
364*404b540aSrobert
365*404b540aSrobert Find the outermost loop enclosing LOOP in that value of DEF is invariant
366*404b540aSrobert and record this in DATA->max_loop field. If DEF itself is defined inside
367*404b540aSrobert this loop as well (i.e. we need to hoist it out of the loop if we want
368*404b540aSrobert to hoist the statement represented by DATA), record the statement in that
369*404b540aSrobert DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
370*404b540aSrobert add the cost of the computation of DEF to the DATA->cost.
371*404b540aSrobert
372*404b540aSrobert If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
373*404b540aSrobert
374*404b540aSrobert static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)375*404b540aSrobert add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
376*404b540aSrobert bool add_cost)
377*404b540aSrobert {
378*404b540aSrobert tree def_stmt = SSA_NAME_DEF_STMT (def);
379*404b540aSrobert basic_block def_bb = bb_for_stmt (def_stmt);
380*404b540aSrobert struct loop *max_loop;
381*404b540aSrobert struct depend *dep;
382*404b540aSrobert
383*404b540aSrobert if (!def_bb)
384*404b540aSrobert return true;
385*404b540aSrobert
386*404b540aSrobert max_loop = outermost_invariant_loop (def, loop);
387*404b540aSrobert if (!max_loop)
388*404b540aSrobert return false;
389*404b540aSrobert
390*404b540aSrobert if (flow_loop_nested_p (data->max_loop, max_loop))
391*404b540aSrobert data->max_loop = max_loop;
392*404b540aSrobert
393*404b540aSrobert if (!LIM_DATA (def_stmt))
394*404b540aSrobert return true;
395*404b540aSrobert
396*404b540aSrobert if (add_cost
397*404b540aSrobert /* Only add the cost if the statement defining DEF is inside LOOP,
398*404b540aSrobert i.e. if it is likely that by moving the invariants dependent
399*404b540aSrobert on it, we will be able to avoid creating a new register for
400*404b540aSrobert it (since it will be only used in these dependent invariants). */
401*404b540aSrobert && def_bb->loop_father == loop)
402*404b540aSrobert data->cost += LIM_DATA (def_stmt)->cost;
403*404b540aSrobert
404*404b540aSrobert dep = XNEW (struct depend);
405*404b540aSrobert dep->stmt = def_stmt;
406*404b540aSrobert dep->next = data->depends;
407*404b540aSrobert data->depends = dep;
408*404b540aSrobert
409*404b540aSrobert return true;
410*404b540aSrobert }
411*404b540aSrobert
412*404b540aSrobert /* Returns an estimate for a cost of statement STMT. TODO -- the values here
413*404b540aSrobert are just ad-hoc constants. The estimates should be based on target-specific
414*404b540aSrobert values. */
415*404b540aSrobert
416*404b540aSrobert static unsigned
stmt_cost(tree stmt)417*404b540aSrobert stmt_cost (tree stmt)
418*404b540aSrobert {
419*404b540aSrobert tree rhs;
420*404b540aSrobert unsigned cost = 1;
421*404b540aSrobert
422*404b540aSrobert /* Always try to create possibilities for unswitching. */
423*404b540aSrobert if (TREE_CODE (stmt) == COND_EXPR)
424*404b540aSrobert return LIM_EXPENSIVE;
425*404b540aSrobert
426*404b540aSrobert rhs = TREE_OPERAND (stmt, 1);
427*404b540aSrobert
428*404b540aSrobert /* Hoisting memory references out should almost surely be a win. */
429*404b540aSrobert if (stmt_references_memory_p (stmt))
430*404b540aSrobert cost += 20;
431*404b540aSrobert
432*404b540aSrobert switch (TREE_CODE (rhs))
433*404b540aSrobert {
434*404b540aSrobert case CALL_EXPR:
435*404b540aSrobert /* We should be hoisting calls if possible. */
436*404b540aSrobert
437*404b540aSrobert /* Unless the call is a builtin_constant_p; this always folds to a
438*404b540aSrobert constant, so moving it is useless. */
439*404b540aSrobert rhs = get_callee_fndecl (rhs);
440*404b540aSrobert if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
441*404b540aSrobert && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
442*404b540aSrobert return 0;
443*404b540aSrobert
444*404b540aSrobert cost += 20;
445*404b540aSrobert break;
446*404b540aSrobert
447*404b540aSrobert case MULT_EXPR:
448*404b540aSrobert case TRUNC_DIV_EXPR:
449*404b540aSrobert case CEIL_DIV_EXPR:
450*404b540aSrobert case FLOOR_DIV_EXPR:
451*404b540aSrobert case ROUND_DIV_EXPR:
452*404b540aSrobert case EXACT_DIV_EXPR:
453*404b540aSrobert case CEIL_MOD_EXPR:
454*404b540aSrobert case FLOOR_MOD_EXPR:
455*404b540aSrobert case ROUND_MOD_EXPR:
456*404b540aSrobert case TRUNC_MOD_EXPR:
457*404b540aSrobert case RDIV_EXPR:
458*404b540aSrobert /* Division and multiplication are usually expensive. */
459*404b540aSrobert cost += 20;
460*404b540aSrobert break;
461*404b540aSrobert
462*404b540aSrobert default:
463*404b540aSrobert break;
464*404b540aSrobert }
465*404b540aSrobert
466*404b540aSrobert return cost;
467*404b540aSrobert }
468*404b540aSrobert
469*404b540aSrobert /* Determine the outermost loop to that it is possible to hoist a statement
470*404b540aSrobert STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
471*404b540aSrobert the outermost loop in that the value computed by STMT is invariant.
472*404b540aSrobert If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
473*404b540aSrobert we preserve the fact whether STMT is executed. It also fills other related
474*404b540aSrobert information to LIM_DATA (STMT).
475*404b540aSrobert
476*404b540aSrobert The function returns false if STMT cannot be hoisted outside of the loop it
477*404b540aSrobert is defined in, and true otherwise. */
478*404b540aSrobert
479*404b540aSrobert static bool
determine_max_movement(tree stmt,bool must_preserve_exec)480*404b540aSrobert determine_max_movement (tree stmt, bool must_preserve_exec)
481*404b540aSrobert {
482*404b540aSrobert basic_block bb = bb_for_stmt (stmt);
483*404b540aSrobert struct loop *loop = bb->loop_father;
484*404b540aSrobert struct loop *level;
485*404b540aSrobert struct lim_aux_data *lim_data = LIM_DATA (stmt);
486*404b540aSrobert tree val;
487*404b540aSrobert ssa_op_iter iter;
488*404b540aSrobert
489*404b540aSrobert if (must_preserve_exec)
490*404b540aSrobert level = ALWAYS_EXECUTED_IN (bb);
491*404b540aSrobert else
492*404b540aSrobert level = superloop_at_depth (loop, 1);
493*404b540aSrobert lim_data->max_loop = level;
494*404b540aSrobert
495*404b540aSrobert FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
496*404b540aSrobert if (!add_dependency (val, lim_data, loop, true))
497*404b540aSrobert return false;
498*404b540aSrobert
499*404b540aSrobert FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
500*404b540aSrobert if (!add_dependency (val, lim_data, loop, false))
501*404b540aSrobert return false;
502*404b540aSrobert
503*404b540aSrobert lim_data->cost += stmt_cost (stmt);
504*404b540aSrobert
505*404b540aSrobert return true;
506*404b540aSrobert }
507*404b540aSrobert
508*404b540aSrobert /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
509*404b540aSrobert and that one of the operands of this statement is computed by STMT.
510*404b540aSrobert Ensure that STMT (together with all the statements that define its
511*404b540aSrobert operands) is hoisted at least out of the loop LEVEL. */
512*404b540aSrobert
513*404b540aSrobert static void
set_level(tree stmt,struct loop * orig_loop,struct loop * level)514*404b540aSrobert set_level (tree stmt, struct loop *orig_loop, struct loop *level)
515*404b540aSrobert {
516*404b540aSrobert struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
517*404b540aSrobert struct depend *dep;
518*404b540aSrobert
519*404b540aSrobert stmt_loop = find_common_loop (orig_loop, stmt_loop);
520*404b540aSrobert if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
521*404b540aSrobert stmt_loop = find_common_loop (stmt_loop,
522*404b540aSrobert LIM_DATA (stmt)->tgt_loop->outer);
523*404b540aSrobert if (flow_loop_nested_p (stmt_loop, level))
524*404b540aSrobert return;
525*404b540aSrobert
526*404b540aSrobert gcc_assert (LIM_DATA (stmt));
527*404b540aSrobert gcc_assert (level == LIM_DATA (stmt)->max_loop
528*404b540aSrobert || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
529*404b540aSrobert
530*404b540aSrobert LIM_DATA (stmt)->tgt_loop = level;
531*404b540aSrobert for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
532*404b540aSrobert set_level (dep->stmt, orig_loop, level);
533*404b540aSrobert }
534*404b540aSrobert
535*404b540aSrobert /* Determines an outermost loop from that we want to hoist the statement STMT.
536*404b540aSrobert For now we chose the outermost possible loop. TODO -- use profiling
537*404b540aSrobert information to set it more sanely. */
538*404b540aSrobert
539*404b540aSrobert static void
set_profitable_level(tree stmt)540*404b540aSrobert set_profitable_level (tree stmt)
541*404b540aSrobert {
542*404b540aSrobert set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
543*404b540aSrobert }
544*404b540aSrobert
545*404b540aSrobert /* Returns true if STMT is not a pure call. */
546*404b540aSrobert
547*404b540aSrobert static bool
nonpure_call_p(tree stmt)548*404b540aSrobert nonpure_call_p (tree stmt)
549*404b540aSrobert {
550*404b540aSrobert tree call = get_call_expr_in (stmt);
551*404b540aSrobert
552*404b540aSrobert if (!call)
553*404b540aSrobert return false;
554*404b540aSrobert
555*404b540aSrobert return TREE_SIDE_EFFECTS (call) != 0;
556*404b540aSrobert }
557*404b540aSrobert
558*404b540aSrobert /* Releases the memory occupied by DATA. */
559*404b540aSrobert
560*404b540aSrobert static void
free_lim_aux_data(struct lim_aux_data * data)561*404b540aSrobert free_lim_aux_data (struct lim_aux_data *data)
562*404b540aSrobert {
563*404b540aSrobert struct depend *dep, *next;
564*404b540aSrobert
565*404b540aSrobert for (dep = data->depends; dep; dep = next)
566*404b540aSrobert {
567*404b540aSrobert next = dep->next;
568*404b540aSrobert free (dep);
569*404b540aSrobert }
570*404b540aSrobert free (data);
571*404b540aSrobert }
572*404b540aSrobert
573*404b540aSrobert /* Determine the outermost loops in that statements in basic block BB are
574*404b540aSrobert invariant, and record them to the LIM_DATA associated with the statements.
575*404b540aSrobert Callback for walk_dominator_tree. */
576*404b540aSrobert
577*404b540aSrobert static void
determine_invariantness_stmt(struct dom_walk_data * dw_data ATTRIBUTE_UNUSED,basic_block bb)578*404b540aSrobert determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
579*404b540aSrobert basic_block bb)
580*404b540aSrobert {
581*404b540aSrobert enum move_pos pos;
582*404b540aSrobert block_stmt_iterator bsi;
583*404b540aSrobert tree stmt, rhs;
584*404b540aSrobert bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
585*404b540aSrobert struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
586*404b540aSrobert
587*404b540aSrobert if (!bb->loop_father->outer)
588*404b540aSrobert return;
589*404b540aSrobert
590*404b540aSrobert if (dump_file && (dump_flags & TDF_DETAILS))
591*404b540aSrobert fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
592*404b540aSrobert bb->index, bb->loop_father->num, bb->loop_father->depth);
593*404b540aSrobert
594*404b540aSrobert for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
595*404b540aSrobert {
596*404b540aSrobert stmt = bsi_stmt (bsi);
597*404b540aSrobert
598*404b540aSrobert pos = movement_possibility (stmt);
599*404b540aSrobert if (pos == MOVE_IMPOSSIBLE)
600*404b540aSrobert {
601*404b540aSrobert if (nonpure_call_p (stmt))
602*404b540aSrobert {
603*404b540aSrobert maybe_never = true;
604*404b540aSrobert outermost = NULL;
605*404b540aSrobert }
606*404b540aSrobert continue;
607*404b540aSrobert }
608*404b540aSrobert
609*404b540aSrobert /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
610*404b540aSrobert to be hoisted out of loop, saving expensive divide. */
611*404b540aSrobert if (pos == MOVE_POSSIBLE
612*404b540aSrobert && (rhs = TREE_OPERAND (stmt, 1)) != NULL
613*404b540aSrobert && TREE_CODE (rhs) == RDIV_EXPR
614*404b540aSrobert && flag_unsafe_math_optimizations
615*404b540aSrobert && !flag_trapping_math
616*404b540aSrobert && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
617*404b540aSrobert loop_containing_stmt (stmt)) != NULL
618*404b540aSrobert && outermost_invariant_loop_expr (rhs,
619*404b540aSrobert loop_containing_stmt (stmt)) == NULL)
620*404b540aSrobert {
621*404b540aSrobert tree lhs, stmt1, stmt2, var, name;
622*404b540aSrobert
623*404b540aSrobert lhs = TREE_OPERAND (stmt, 0);
624*404b540aSrobert
625*404b540aSrobert /* stmt must be MODIFY_EXPR. */
626*404b540aSrobert var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
627*404b540aSrobert add_referenced_var (var);
628*404b540aSrobert
629*404b540aSrobert stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
630*404b540aSrobert build2 (RDIV_EXPR, TREE_TYPE (rhs),
631*404b540aSrobert build_real (TREE_TYPE (rhs), dconst1),
632*404b540aSrobert TREE_OPERAND (rhs, 1)));
633*404b540aSrobert name = make_ssa_name (var, stmt1);
634*404b540aSrobert TREE_OPERAND (stmt1, 0) = name;
635*404b540aSrobert stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
636*404b540aSrobert build2 (MULT_EXPR, TREE_TYPE (rhs),
637*404b540aSrobert name, TREE_OPERAND (rhs, 0)));
638*404b540aSrobert
639*404b540aSrobert /* Replace division stmt with reciprocal and multiply stmts.
640*404b540aSrobert The multiply stmt is not invariant, so update iterator
641*404b540aSrobert and avoid rescanning. */
642*404b540aSrobert bsi_replace (&bsi, stmt1, true);
643*404b540aSrobert bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
644*404b540aSrobert SSA_NAME_DEF_STMT (lhs) = stmt2;
645*404b540aSrobert
646*404b540aSrobert /* Continue processing with invariant reciprocal statement. */
647*404b540aSrobert stmt = stmt1;
648*404b540aSrobert }
649*404b540aSrobert
650*404b540aSrobert stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
651*404b540aSrobert LIM_DATA (stmt)->always_executed_in = outermost;
652*404b540aSrobert
653*404b540aSrobert if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
654*404b540aSrobert continue;
655*404b540aSrobert
656*404b540aSrobert if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
657*404b540aSrobert {
658*404b540aSrobert LIM_DATA (stmt)->max_loop = NULL;
659*404b540aSrobert continue;
660*404b540aSrobert }
661*404b540aSrobert
662*404b540aSrobert if (dump_file && (dump_flags & TDF_DETAILS))
663*404b540aSrobert {
664*404b540aSrobert print_generic_stmt_indented (dump_file, stmt, 0, 2);
665*404b540aSrobert fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
666*404b540aSrobert LIM_DATA (stmt)->max_loop->depth,
667*404b540aSrobert LIM_DATA (stmt)->cost);
668*404b540aSrobert }
669*404b540aSrobert
670*404b540aSrobert if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
671*404b540aSrobert set_profitable_level (stmt);
672*404b540aSrobert }
673*404b540aSrobert }
674*404b540aSrobert
675*404b540aSrobert /* For each statement determines the outermost loop in that it is invariant,
676*404b540aSrobert statements on whose motion it depends and the cost of the computation.
677*404b540aSrobert This information is stored to the LIM_DATA structure associated with
678*404b540aSrobert each statement. */
679*404b540aSrobert
680*404b540aSrobert static void
determine_invariantness(void)681*404b540aSrobert determine_invariantness (void)
682*404b540aSrobert {
683*404b540aSrobert struct dom_walk_data walk_data;
684*404b540aSrobert
685*404b540aSrobert memset (&walk_data, 0, sizeof (struct dom_walk_data));
686*404b540aSrobert walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
687*404b540aSrobert
688*404b540aSrobert init_walk_dominator_tree (&walk_data);
689*404b540aSrobert walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
690*404b540aSrobert fini_walk_dominator_tree (&walk_data);
691*404b540aSrobert }
692*404b540aSrobert
693*404b540aSrobert /* Commits edge insertions and updates loop structures. */
694*404b540aSrobert
695*404b540aSrobert void
loop_commit_inserts(void)696*404b540aSrobert loop_commit_inserts (void)
697*404b540aSrobert {
698*404b540aSrobert unsigned old_last_basic_block, i;
699*404b540aSrobert basic_block bb;
700*404b540aSrobert
701*404b540aSrobert old_last_basic_block = last_basic_block;
702*404b540aSrobert bsi_commit_edge_inserts ();
703*404b540aSrobert for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
704*404b540aSrobert {
705*404b540aSrobert bb = BASIC_BLOCK (i);
706*404b540aSrobert add_bb_to_loop (bb,
707*404b540aSrobert find_common_loop (single_pred (bb)->loop_father,
708*404b540aSrobert single_succ (bb)->loop_father));
709*404b540aSrobert }
710*404b540aSrobert }
711*404b540aSrobert
712*404b540aSrobert /* Hoist the statements in basic block BB out of the loops prescribed by
713*404b540aSrobert data stored in LIM_DATA structures associated with each statement. Callback
714*404b540aSrobert for walk_dominator_tree. */
715*404b540aSrobert
716*404b540aSrobert static void
move_computations_stmt(struct dom_walk_data * dw_data ATTRIBUTE_UNUSED,basic_block bb)717*404b540aSrobert move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
718*404b540aSrobert basic_block bb)
719*404b540aSrobert {
720*404b540aSrobert struct loop *level;
721*404b540aSrobert block_stmt_iterator bsi;
722*404b540aSrobert tree stmt;
723*404b540aSrobert unsigned cost = 0;
724*404b540aSrobert
725*404b540aSrobert if (!bb->loop_father->outer)
726*404b540aSrobert return;
727*404b540aSrobert
728*404b540aSrobert for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
729*404b540aSrobert {
730*404b540aSrobert stmt = bsi_stmt (bsi);
731*404b540aSrobert
732*404b540aSrobert if (!LIM_DATA (stmt))
733*404b540aSrobert {
734*404b540aSrobert bsi_next (&bsi);
735*404b540aSrobert continue;
736*404b540aSrobert }
737*404b540aSrobert
738*404b540aSrobert cost = LIM_DATA (stmt)->cost;
739*404b540aSrobert level = LIM_DATA (stmt)->tgt_loop;
740*404b540aSrobert free_lim_aux_data (LIM_DATA (stmt));
741*404b540aSrobert stmt_ann (stmt)->common.aux = NULL;
742*404b540aSrobert
743*404b540aSrobert if (!level)
744*404b540aSrobert {
745*404b540aSrobert bsi_next (&bsi);
746*404b540aSrobert continue;
747*404b540aSrobert }
748*404b540aSrobert
749*404b540aSrobert /* We do not really want to move conditionals out of the loop; we just
750*404b540aSrobert placed it here to force its operands to be moved if necessary. */
751*404b540aSrobert if (TREE_CODE (stmt) == COND_EXPR)
752*404b540aSrobert continue;
753*404b540aSrobert
754*404b540aSrobert if (dump_file && (dump_flags & TDF_DETAILS))
755*404b540aSrobert {
756*404b540aSrobert fprintf (dump_file, "Moving statement\n");
757*404b540aSrobert print_generic_stmt (dump_file, stmt, 0);
758*404b540aSrobert fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
759*404b540aSrobert cost, level->num);
760*404b540aSrobert }
761*404b540aSrobert bsi_insert_on_edge (loop_preheader_edge (level), stmt);
762*404b540aSrobert bsi_remove (&bsi, false);
763*404b540aSrobert }
764*404b540aSrobert }
765*404b540aSrobert
766*404b540aSrobert /* Hoist the statements out of the loops prescribed by data stored in
767*404b540aSrobert LIM_DATA structures associated with each statement.*/
768*404b540aSrobert
769*404b540aSrobert static void
move_computations(void)770*404b540aSrobert move_computations (void)
771*404b540aSrobert {
772*404b540aSrobert struct dom_walk_data walk_data;
773*404b540aSrobert
774*404b540aSrobert memset (&walk_data, 0, sizeof (struct dom_walk_data));
775*404b540aSrobert walk_data.before_dom_children_before_stmts = move_computations_stmt;
776*404b540aSrobert
777*404b540aSrobert init_walk_dominator_tree (&walk_data);
778*404b540aSrobert walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
779*404b540aSrobert fini_walk_dominator_tree (&walk_data);
780*404b540aSrobert
781*404b540aSrobert loop_commit_inserts ();
782*404b540aSrobert if (need_ssa_update_p ())
783*404b540aSrobert rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
784*404b540aSrobert }
785*404b540aSrobert
786*404b540aSrobert /* Checks whether the statement defining variable *INDEX can be hoisted
787*404b540aSrobert out of the loop passed in DATA. Callback for for_each_index. */
788*404b540aSrobert
789*404b540aSrobert static bool
may_move_till(tree ref,tree * index,void * data)790*404b540aSrobert may_move_till (tree ref, tree *index, void *data)
791*404b540aSrobert {
792*404b540aSrobert struct loop *loop = data, *max_loop;
793*404b540aSrobert
794*404b540aSrobert /* If REF is an array reference, check also that the step and the lower
795*404b540aSrobert bound is invariant in LOOP. */
796*404b540aSrobert if (TREE_CODE (ref) == ARRAY_REF)
797*404b540aSrobert {
798*404b540aSrobert tree step = array_ref_element_size (ref);
799*404b540aSrobert tree lbound = array_ref_low_bound (ref);
800*404b540aSrobert
801*404b540aSrobert max_loop = outermost_invariant_loop_expr (step, loop);
802*404b540aSrobert if (!max_loop)
803*404b540aSrobert return false;
804*404b540aSrobert
805*404b540aSrobert max_loop = outermost_invariant_loop_expr (lbound, loop);
806*404b540aSrobert if (!max_loop)
807*404b540aSrobert return false;
808*404b540aSrobert }
809*404b540aSrobert
810*404b540aSrobert max_loop = outermost_invariant_loop (*index, loop);
811*404b540aSrobert if (!max_loop)
812*404b540aSrobert return false;
813*404b540aSrobert
814*404b540aSrobert return true;
815*404b540aSrobert }
816*404b540aSrobert
817*404b540aSrobert /* Forces statements defining (invariant) SSA names in expression EXPR to be
818*404b540aSrobert moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
819*404b540aSrobert
820*404b540aSrobert static void
force_move_till_expr(tree expr,struct loop * orig_loop,struct loop * loop)821*404b540aSrobert force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
822*404b540aSrobert {
823*404b540aSrobert enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
824*404b540aSrobert unsigned i, nops;
825*404b540aSrobert
826*404b540aSrobert if (TREE_CODE (expr) == SSA_NAME)
827*404b540aSrobert {
828*404b540aSrobert tree stmt = SSA_NAME_DEF_STMT (expr);
829*404b540aSrobert if (IS_EMPTY_STMT (stmt))
830*404b540aSrobert return;
831*404b540aSrobert
832*404b540aSrobert set_level (stmt, orig_loop, loop);
833*404b540aSrobert return;
834*404b540aSrobert }
835*404b540aSrobert
836*404b540aSrobert if (class != tcc_unary
837*404b540aSrobert && class != tcc_binary
838*404b540aSrobert && class != tcc_expression
839*404b540aSrobert && class != tcc_comparison)
840*404b540aSrobert return;
841*404b540aSrobert
842*404b540aSrobert nops = TREE_CODE_LENGTH (TREE_CODE (expr));
843*404b540aSrobert for (i = 0; i < nops; i++)
844*404b540aSrobert force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
845*404b540aSrobert }
846*404b540aSrobert
847*404b540aSrobert /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
848*404b540aSrobert the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
849*404b540aSrobert for_each_index. */
850*404b540aSrobert
851*404b540aSrobert struct fmt_data
852*404b540aSrobert {
853*404b540aSrobert struct loop *loop;
854*404b540aSrobert struct loop *orig_loop;
855*404b540aSrobert };
856*404b540aSrobert
857*404b540aSrobert static bool
force_move_till(tree ref,tree * index,void * data)858*404b540aSrobert force_move_till (tree ref, tree *index, void *data)
859*404b540aSrobert {
860*404b540aSrobert tree stmt;
861*404b540aSrobert struct fmt_data *fmt_data = data;
862*404b540aSrobert
863*404b540aSrobert if (TREE_CODE (ref) == ARRAY_REF)
864*404b540aSrobert {
865*404b540aSrobert tree step = array_ref_element_size (ref);
866*404b540aSrobert tree lbound = array_ref_low_bound (ref);
867*404b540aSrobert
868*404b540aSrobert force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
869*404b540aSrobert force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
870*404b540aSrobert }
871*404b540aSrobert
872*404b540aSrobert if (TREE_CODE (*index) != SSA_NAME)
873*404b540aSrobert return true;
874*404b540aSrobert
875*404b540aSrobert stmt = SSA_NAME_DEF_STMT (*index);
876*404b540aSrobert if (IS_EMPTY_STMT (stmt))
877*404b540aSrobert return true;
878*404b540aSrobert
879*404b540aSrobert set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
880*404b540aSrobert
881*404b540aSrobert return true;
882*404b540aSrobert }
883*404b540aSrobert
884*404b540aSrobert /* Records memory reference location *REF to the list MEM_REFS. The reference
885*404b540aSrobert occurs in statement STMT. */
886*404b540aSrobert
887*404b540aSrobert static void
record_mem_ref_loc(struct mem_ref_loc ** mem_refs,tree stmt,tree * ref)888*404b540aSrobert record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
889*404b540aSrobert {
890*404b540aSrobert struct mem_ref_loc *aref = XNEW (struct mem_ref_loc);
891*404b540aSrobert
892*404b540aSrobert aref->stmt = stmt;
893*404b540aSrobert aref->ref = ref;
894*404b540aSrobert
895*404b540aSrobert aref->next = *mem_refs;
896*404b540aSrobert *mem_refs = aref;
897*404b540aSrobert }
898*404b540aSrobert
899*404b540aSrobert /* Releases list of memory reference locations MEM_REFS. */
900*404b540aSrobert
901*404b540aSrobert static void
free_mem_ref_locs(struct mem_ref_loc * mem_refs)902*404b540aSrobert free_mem_ref_locs (struct mem_ref_loc *mem_refs)
903*404b540aSrobert {
904*404b540aSrobert struct mem_ref_loc *act;
905*404b540aSrobert
906*404b540aSrobert while (mem_refs)
907*404b540aSrobert {
908*404b540aSrobert act = mem_refs;
909*404b540aSrobert mem_refs = mem_refs->next;
910*404b540aSrobert free (act);
911*404b540aSrobert }
912*404b540aSrobert }
913*404b540aSrobert
914*404b540aSrobert /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
915*404b540aSrobert
916*404b540aSrobert static void
rewrite_mem_refs(tree tmp_var,struct mem_ref_loc * mem_refs)917*404b540aSrobert rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
918*404b540aSrobert {
919*404b540aSrobert tree var;
920*404b540aSrobert ssa_op_iter iter;
921*404b540aSrobert
922*404b540aSrobert for (; mem_refs; mem_refs = mem_refs->next)
923*404b540aSrobert {
924*404b540aSrobert FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
925*404b540aSrobert mark_sym_for_renaming (SSA_NAME_VAR (var));
926*404b540aSrobert
927*404b540aSrobert *mem_refs->ref = tmp_var;
928*404b540aSrobert update_stmt (mem_refs->stmt);
929*404b540aSrobert }
930*404b540aSrobert }
931*404b540aSrobert
932*404b540aSrobert /* The name and the length of the currently generated variable
933*404b540aSrobert for lsm. */
934*404b540aSrobert #define MAX_LSM_NAME_LENGTH 40
935*404b540aSrobert static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
936*404b540aSrobert static int lsm_tmp_name_length;
937*404b540aSrobert
938*404b540aSrobert /* Adds S to lsm_tmp_name. */
939*404b540aSrobert
940*404b540aSrobert static void
lsm_tmp_name_add(const char * s)941*404b540aSrobert lsm_tmp_name_add (const char *s)
942*404b540aSrobert {
943*404b540aSrobert int l = strlen (s) + lsm_tmp_name_length;
944*404b540aSrobert if (l > MAX_LSM_NAME_LENGTH)
945*404b540aSrobert return;
946*404b540aSrobert
947*404b540aSrobert strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
948*404b540aSrobert lsm_tmp_name_length = l;
949*404b540aSrobert }
950*404b540aSrobert
951*404b540aSrobert /* Stores the name for temporary variable that replaces REF to
952*404b540aSrobert lsm_tmp_name. */
953*404b540aSrobert
954*404b540aSrobert static void
gen_lsm_tmp_name(tree ref)955*404b540aSrobert gen_lsm_tmp_name (tree ref)
956*404b540aSrobert {
957*404b540aSrobert const char *name;
958*404b540aSrobert
959*404b540aSrobert switch (TREE_CODE (ref))
960*404b540aSrobert {
961*404b540aSrobert case MISALIGNED_INDIRECT_REF:
962*404b540aSrobert case ALIGN_INDIRECT_REF:
963*404b540aSrobert case INDIRECT_REF:
964*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
965*404b540aSrobert lsm_tmp_name_add ("_");
966*404b540aSrobert break;
967*404b540aSrobert
968*404b540aSrobert case BIT_FIELD_REF:
969*404b540aSrobert case VIEW_CONVERT_EXPR:
970*404b540aSrobert case ARRAY_RANGE_REF:
971*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
972*404b540aSrobert break;
973*404b540aSrobert
974*404b540aSrobert case REALPART_EXPR:
975*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
976*404b540aSrobert lsm_tmp_name_add ("_RE");
977*404b540aSrobert break;
978*404b540aSrobert
979*404b540aSrobert case IMAGPART_EXPR:
980*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
981*404b540aSrobert lsm_tmp_name_add ("_IM");
982*404b540aSrobert break;
983*404b540aSrobert
984*404b540aSrobert case COMPONENT_REF:
985*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
986*404b540aSrobert lsm_tmp_name_add ("_");
987*404b540aSrobert name = get_name (TREE_OPERAND (ref, 1));
988*404b540aSrobert if (!name)
989*404b540aSrobert name = "F";
990*404b540aSrobert lsm_tmp_name_add ("_");
991*404b540aSrobert lsm_tmp_name_add (name);
992*404b540aSrobert
993*404b540aSrobert case ARRAY_REF:
994*404b540aSrobert gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
995*404b540aSrobert lsm_tmp_name_add ("_I");
996*404b540aSrobert break;
997*404b540aSrobert
998*404b540aSrobert case SSA_NAME:
999*404b540aSrobert ref = SSA_NAME_VAR (ref);
1000*404b540aSrobert /* Fallthru. */
1001*404b540aSrobert
1002*404b540aSrobert case VAR_DECL:
1003*404b540aSrobert case PARM_DECL:
1004*404b540aSrobert name = get_name (ref);
1005*404b540aSrobert if (!name)
1006*404b540aSrobert name = "D";
1007*404b540aSrobert lsm_tmp_name_add (name);
1008*404b540aSrobert break;
1009*404b540aSrobert
1010*404b540aSrobert case STRING_CST:
1011*404b540aSrobert lsm_tmp_name_add ("S");
1012*404b540aSrobert break;
1013*404b540aSrobert
1014*404b540aSrobert case RESULT_DECL:
1015*404b540aSrobert lsm_tmp_name_add ("R");
1016*404b540aSrobert break;
1017*404b540aSrobert
1018*404b540aSrobert default:
1019*404b540aSrobert gcc_unreachable ();
1020*404b540aSrobert }
1021*404b540aSrobert }
1022*404b540aSrobert
1023*404b540aSrobert /* Determines name for temporary variable that replaces REF.
1024*404b540aSrobert The name is accumulated into the lsm_tmp_name variable. */
1025*404b540aSrobert
1026*404b540aSrobert static char *
get_lsm_tmp_name(tree ref)1027*404b540aSrobert get_lsm_tmp_name (tree ref)
1028*404b540aSrobert {
1029*404b540aSrobert lsm_tmp_name_length = 0;
1030*404b540aSrobert gen_lsm_tmp_name (ref);
1031*404b540aSrobert lsm_tmp_name_add ("_lsm");
1032*404b540aSrobert return lsm_tmp_name;
1033*404b540aSrobert }
1034*404b540aSrobert
1035*404b540aSrobert /* Records request for store motion of memory reference REF from LOOP.
1036*404b540aSrobert MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1037*404b540aSrobert these references are rewritten by a new temporary variable.
1038*404b540aSrobert Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1039*404b540aSrobert The initialization of the temporary variable is put to the preheader
1040*404b540aSrobert of the loop, and assignments to the reference from the temporary variable
1041*404b540aSrobert are emitted to exits. */
1042*404b540aSrobert
1043*404b540aSrobert static void
schedule_sm(struct loop * loop,edge * exits,unsigned n_exits,tree ref,struct mem_ref_loc * mem_refs)1044*404b540aSrobert schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
1045*404b540aSrobert struct mem_ref_loc *mem_refs)
1046*404b540aSrobert {
1047*404b540aSrobert struct mem_ref_loc *aref;
1048*404b540aSrobert tree tmp_var;
1049*404b540aSrobert unsigned i;
1050*404b540aSrobert tree load, store;
1051*404b540aSrobert struct fmt_data fmt_data;
1052*404b540aSrobert
1053*404b540aSrobert if (dump_file && (dump_flags & TDF_DETAILS))
1054*404b540aSrobert {
1055*404b540aSrobert fprintf (dump_file, "Executing store motion of ");
1056*404b540aSrobert print_generic_expr (dump_file, ref, 0);
1057*404b540aSrobert fprintf (dump_file, " from loop %d\n", loop->num);
1058*404b540aSrobert }
1059*404b540aSrobert
1060*404b540aSrobert tmp_var = make_rename_temp (TREE_TYPE (ref),
1061*404b540aSrobert get_lsm_tmp_name (ref));
1062*404b540aSrobert
1063*404b540aSrobert fmt_data.loop = loop;
1064*404b540aSrobert fmt_data.orig_loop = loop;
1065*404b540aSrobert for_each_index (&ref, force_move_till, &fmt_data);
1066*404b540aSrobert
1067*404b540aSrobert rewrite_mem_refs (tmp_var, mem_refs);
1068*404b540aSrobert for (aref = mem_refs; aref; aref = aref->next)
1069*404b540aSrobert if (LIM_DATA (aref->stmt))
1070*404b540aSrobert LIM_DATA (aref->stmt)->sm_done = true;
1071*404b540aSrobert
1072*404b540aSrobert /* Emit the load & stores. */
1073*404b540aSrobert load = build2 (MODIFY_EXPR, void_type_node, tmp_var, ref);
1074*404b540aSrobert get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1075*404b540aSrobert LIM_DATA (load)->max_loop = loop;
1076*404b540aSrobert LIM_DATA (load)->tgt_loop = loop;
1077*404b540aSrobert
1078*404b540aSrobert /* Put this into the latch, so that we are sure it will be processed after
1079*404b540aSrobert all dependencies. */
1080*404b540aSrobert bsi_insert_on_edge (loop_latch_edge (loop), load);
1081*404b540aSrobert
1082*404b540aSrobert for (i = 0; i < n_exits; i++)
1083*404b540aSrobert {
1084*404b540aSrobert store = build2 (MODIFY_EXPR, void_type_node,
1085*404b540aSrobert unshare_expr (ref), tmp_var);
1086*404b540aSrobert bsi_insert_on_edge (exits[i], store);
1087*404b540aSrobert }
1088*404b540aSrobert }
1089*404b540aSrobert
1090*404b540aSrobert /* Check whether memory reference REF can be hoisted out of the LOOP. If this
1091*404b540aSrobert is true, prepare the statements that load the value of the memory reference
1092*404b540aSrobert to a temporary variable in the loop preheader, store it back on the loop
1093*404b540aSrobert exits, and replace all the references inside LOOP by this temporary variable.
1094*404b540aSrobert LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
1095*404b540aSrobert operands that are clobbered by a call or accessed through multiple references
1096*404b540aSrobert in loop. */
1097*404b540aSrobert
1098*404b540aSrobert static void
determine_lsm_ref(struct loop * loop,edge * exits,unsigned n_exits,bitmap clobbered_vops,struct mem_ref * ref)1099*404b540aSrobert determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
1100*404b540aSrobert bitmap clobbered_vops, struct mem_ref *ref)
1101*404b540aSrobert {
1102*404b540aSrobert struct mem_ref_loc *aref;
1103*404b540aSrobert struct loop *must_exec;
1104*404b540aSrobert
1105*404b540aSrobert /* In case the memory is not stored to, there is nothing for SM to do. */
1106*404b540aSrobert if (!ref->is_stored)
1107*404b540aSrobert return;
1108*404b540aSrobert
1109*404b540aSrobert /* If the reference is aliased with any different ref, or killed by call
1110*404b540aSrobert in function, then fail. */
1111*404b540aSrobert if (bitmap_intersect_p (ref->vops, clobbered_vops))
1112*404b540aSrobert return;
1113*404b540aSrobert
1114*404b540aSrobert if (tree_could_trap_p (ref->mem))
1115*404b540aSrobert {
1116*404b540aSrobert /* If the memory access is unsafe (i.e. it might trap), ensure that some
1117*404b540aSrobert of the statements in that it occurs is always executed when the loop
1118*404b540aSrobert is entered. This way we know that by moving the load from the
1119*404b540aSrobert reference out of the loop we will not cause the error that would not
1120*404b540aSrobert occur otherwise.
1121*404b540aSrobert
1122*404b540aSrobert TODO -- in fact we would like to check for anticipability of the
1123*404b540aSrobert reference, i.e. that on each path from loop entry to loop exit at
1124*404b540aSrobert least one of the statements containing the memory reference is
1125*404b540aSrobert executed. */
1126*404b540aSrobert
1127*404b540aSrobert for (aref = ref->locs; aref; aref = aref->next)
1128*404b540aSrobert {
1129*404b540aSrobert if (!LIM_DATA (aref->stmt))
1130*404b540aSrobert continue;
1131*404b540aSrobert
1132*404b540aSrobert must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1133*404b540aSrobert if (!must_exec)
1134*404b540aSrobert continue;
1135*404b540aSrobert
1136*404b540aSrobert if (must_exec == loop
1137*404b540aSrobert || flow_loop_nested_p (must_exec, loop))
1138*404b540aSrobert break;
1139*404b540aSrobert }
1140*404b540aSrobert
1141*404b540aSrobert if (!aref)
1142*404b540aSrobert return;
1143*404b540aSrobert }
1144*404b540aSrobert
1145*404b540aSrobert schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1146*404b540aSrobert }
1147*404b540aSrobert
1148*404b540aSrobert /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1149*404b540aSrobert of vops clobbered by call in loop or accessed by multiple memory references.
1150*404b540aSrobert EXITS is the list of N_EXITS exit edges of the LOOP. */
1151*404b540aSrobert
1152*404b540aSrobert static void
hoist_memory_references(struct loop * loop,struct mem_ref * mem_refs,bitmap clobbered_vops,edge * exits,unsigned n_exits)1153*404b540aSrobert hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1154*404b540aSrobert bitmap clobbered_vops, edge *exits, unsigned n_exits)
1155*404b540aSrobert {
1156*404b540aSrobert struct mem_ref *ref;
1157*404b540aSrobert
1158*404b540aSrobert for (ref = mem_refs; ref; ref = ref->next)
1159*404b540aSrobert determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1160*404b540aSrobert }
1161*404b540aSrobert
1162*404b540aSrobert /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1163*404b540aSrobert for a store motion optimization (i.e. whether we can insert statement
1164*404b540aSrobert on its exits). */
1165*404b540aSrobert
1166*404b540aSrobert static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,edge * exits,unsigned n_exits)1167*404b540aSrobert loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1168*404b540aSrobert unsigned n_exits)
1169*404b540aSrobert {
1170*404b540aSrobert unsigned i;
1171*404b540aSrobert
1172*404b540aSrobert for (i = 0; i < n_exits; i++)
1173*404b540aSrobert if (exits[i]->flags & EDGE_ABNORMAL)
1174*404b540aSrobert return false;
1175*404b540aSrobert
1176*404b540aSrobert return true;
1177*404b540aSrobert }
1178*404b540aSrobert
1179*404b540aSrobert /* A hash function for struct mem_ref object OBJ. */
1180*404b540aSrobert
1181*404b540aSrobert static hashval_t
memref_hash(const void * obj)1182*404b540aSrobert memref_hash (const void *obj)
1183*404b540aSrobert {
1184*404b540aSrobert const struct mem_ref *mem = obj;
1185*404b540aSrobert
1186*404b540aSrobert return mem->hash;
1187*404b540aSrobert }
1188*404b540aSrobert
1189*404b540aSrobert /* An equality function for struct mem_ref object OBJ1 with
1190*404b540aSrobert memory reference OBJ2. */
1191*404b540aSrobert
1192*404b540aSrobert static int
memref_eq(const void * obj1,const void * obj2)1193*404b540aSrobert memref_eq (const void *obj1, const void *obj2)
1194*404b540aSrobert {
1195*404b540aSrobert const struct mem_ref *mem1 = obj1;
1196*404b540aSrobert
1197*404b540aSrobert return operand_equal_p (mem1->mem, (tree) obj2, 0);
1198*404b540aSrobert }
1199*404b540aSrobert
1200*404b540aSrobert /* Gathers memory references in statement STMT in LOOP, storing the
1201*404b540aSrobert information about them in MEM_REFS hash table. Note vops accessed through
1202*404b540aSrobert unrecognized statements in CLOBBERED_VOPS. The newly created references
1203*404b540aSrobert are also stored to MEM_REF_LIST. */
1204*404b540aSrobert
1205*404b540aSrobert static void
gather_mem_refs_stmt(struct loop * loop,htab_t mem_refs,bitmap clobbered_vops,tree stmt,struct mem_ref ** mem_ref_list)1206*404b540aSrobert gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1207*404b540aSrobert bitmap clobbered_vops, tree stmt,
1208*404b540aSrobert struct mem_ref **mem_ref_list)
1209*404b540aSrobert {
1210*404b540aSrobert tree *lhs, *rhs, *mem = NULL;
1211*404b540aSrobert hashval_t hash;
1212*404b540aSrobert PTR *slot;
1213*404b540aSrobert struct mem_ref *ref = NULL;
1214*404b540aSrobert ssa_op_iter oi;
1215*404b540aSrobert tree vname;
1216*404b540aSrobert bool is_stored;
1217*404b540aSrobert
1218*404b540aSrobert if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1219*404b540aSrobert return;
1220*404b540aSrobert
1221*404b540aSrobert /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1222*404b540aSrobert if (TREE_CODE (stmt) != MODIFY_EXPR)
1223*404b540aSrobert goto fail;
1224*404b540aSrobert
1225*404b540aSrobert lhs = &TREE_OPERAND (stmt, 0);
1226*404b540aSrobert rhs = &TREE_OPERAND (stmt, 1);
1227*404b540aSrobert
1228*404b540aSrobert if (TREE_CODE (*lhs) == SSA_NAME)
1229*404b540aSrobert {
1230*404b540aSrobert if (!is_gimple_addressable (*rhs))
1231*404b540aSrobert goto fail;
1232*404b540aSrobert
1233*404b540aSrobert mem = rhs;
1234*404b540aSrobert is_stored = false;
1235*404b540aSrobert }
1236*404b540aSrobert else if (TREE_CODE (*rhs) == SSA_NAME
1237*404b540aSrobert || is_gimple_min_invariant (*rhs))
1238*404b540aSrobert {
1239*404b540aSrobert mem = lhs;
1240*404b540aSrobert is_stored = true;
1241*404b540aSrobert }
1242*404b540aSrobert else
1243*404b540aSrobert goto fail;
1244*404b540aSrobert
1245*404b540aSrobert /* If we cannot create an SSA name for the result, give up. */
1246*404b540aSrobert if (!is_gimple_reg_type (TREE_TYPE (*mem))
1247*404b540aSrobert || TREE_THIS_VOLATILE (*mem))
1248*404b540aSrobert goto fail;
1249*404b540aSrobert
1250*404b540aSrobert /* If we cannot move the reference out of the loop, fail. */
1251*404b540aSrobert if (!for_each_index (mem, may_move_till, loop))
1252*404b540aSrobert goto fail;
1253*404b540aSrobert
1254*404b540aSrobert hash = iterative_hash_expr (*mem, 0);
1255*404b540aSrobert slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1256*404b540aSrobert
1257*404b540aSrobert if (*slot)
1258*404b540aSrobert ref = *slot;
1259*404b540aSrobert else
1260*404b540aSrobert {
1261*404b540aSrobert ref = XNEW (struct mem_ref);
1262*404b540aSrobert ref->mem = *mem;
1263*404b540aSrobert ref->hash = hash;
1264*404b540aSrobert ref->locs = NULL;
1265*404b540aSrobert ref->is_stored = false;
1266*404b540aSrobert ref->vops = BITMAP_ALLOC (NULL);
1267*404b540aSrobert ref->next = *mem_ref_list;
1268*404b540aSrobert *mem_ref_list = ref;
1269*404b540aSrobert *slot = ref;
1270*404b540aSrobert }
1271*404b540aSrobert ref->is_stored |= is_stored;
1272*404b540aSrobert
1273*404b540aSrobert FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1274*404b540aSrobert SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1275*404b540aSrobert bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1276*404b540aSrobert record_mem_ref_loc (&ref->locs, stmt, mem);
1277*404b540aSrobert return;
1278*404b540aSrobert
1279*404b540aSrobert fail:
1280*404b540aSrobert FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1281*404b540aSrobert SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1282*404b540aSrobert bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1283*404b540aSrobert }
1284*404b540aSrobert
1285*404b540aSrobert /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1286*404b540aSrobert statements in CLOBBERED_VOPS. The list of the references found by
1287*404b540aSrobert the function is returned. */
1288*404b540aSrobert
1289*404b540aSrobert static struct mem_ref *
gather_mem_refs(struct loop * loop,bitmap clobbered_vops)1290*404b540aSrobert gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1291*404b540aSrobert {
1292*404b540aSrobert basic_block *body = get_loop_body (loop);
1293*404b540aSrobert block_stmt_iterator bsi;
1294*404b540aSrobert unsigned i;
1295*404b540aSrobert struct mem_ref *mem_ref_list = NULL;
1296*404b540aSrobert htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1297*404b540aSrobert
1298*404b540aSrobert for (i = 0; i < loop->num_nodes; i++)
1299*404b540aSrobert {
1300*404b540aSrobert for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1301*404b540aSrobert gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1302*404b540aSrobert &mem_ref_list);
1303*404b540aSrobert }
1304*404b540aSrobert
1305*404b540aSrobert free (body);
1306*404b540aSrobert
1307*404b540aSrobert htab_delete (mem_refs);
1308*404b540aSrobert return mem_ref_list;
1309*404b540aSrobert }
1310*404b540aSrobert
1311*404b540aSrobert /* Finds the vops accessed by more than one of the memory references described
1312*404b540aSrobert in MEM_REFS and marks them in CLOBBERED_VOPS. */
1313*404b540aSrobert
1314*404b540aSrobert static void
find_more_ref_vops(struct mem_ref * mem_refs,bitmap clobbered_vops)1315*404b540aSrobert find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1316*404b540aSrobert {
1317*404b540aSrobert bitmap_head tmp, all_vops;
1318*404b540aSrobert struct mem_ref *ref;
1319*404b540aSrobert
1320*404b540aSrobert bitmap_initialize (&tmp, &bitmap_default_obstack);
1321*404b540aSrobert bitmap_initialize (&all_vops, &bitmap_default_obstack);
1322*404b540aSrobert
1323*404b540aSrobert for (ref = mem_refs; ref; ref = ref->next)
1324*404b540aSrobert {
1325*404b540aSrobert /* The vops that are already in all_vops are accessed by more than
1326*404b540aSrobert one memory reference. */
1327*404b540aSrobert bitmap_and (&tmp, &all_vops, ref->vops);
1328*404b540aSrobert bitmap_ior_into (clobbered_vops, &tmp);
1329*404b540aSrobert bitmap_clear (&tmp);
1330*404b540aSrobert
1331*404b540aSrobert bitmap_ior_into (&all_vops, ref->vops);
1332*404b540aSrobert }
1333*404b540aSrobert
1334*404b540aSrobert bitmap_clear (&all_vops);
1335*404b540aSrobert }
1336*404b540aSrobert
1337*404b540aSrobert /* Releases the memory occupied by REF. */
1338*404b540aSrobert
1339*404b540aSrobert static void
free_mem_ref(struct mem_ref * ref)1340*404b540aSrobert free_mem_ref (struct mem_ref *ref)
1341*404b540aSrobert {
1342*404b540aSrobert free_mem_ref_locs (ref->locs);
1343*404b540aSrobert BITMAP_FREE (ref->vops);
1344*404b540aSrobert free (ref);
1345*404b540aSrobert }
1346*404b540aSrobert
1347*404b540aSrobert /* Releases the memory occupied by REFS. */
1348*404b540aSrobert
1349*404b540aSrobert static void
free_mem_refs(struct mem_ref * refs)1350*404b540aSrobert free_mem_refs (struct mem_ref *refs)
1351*404b540aSrobert {
1352*404b540aSrobert struct mem_ref *ref, *next;
1353*404b540aSrobert
1354*404b540aSrobert for (ref = refs; ref; ref = next)
1355*404b540aSrobert {
1356*404b540aSrobert next = ref->next;
1357*404b540aSrobert free_mem_ref (ref);
1358*404b540aSrobert }
1359*404b540aSrobert }
1360*404b540aSrobert
1361*404b540aSrobert /* Try to perform store motion for all memory references modified inside
1362*404b540aSrobert LOOP. */
1363*404b540aSrobert
1364*404b540aSrobert static void
determine_lsm_loop(struct loop * loop)1365*404b540aSrobert determine_lsm_loop (struct loop *loop)
1366*404b540aSrobert {
1367*404b540aSrobert unsigned n_exits;
1368*404b540aSrobert edge *exits = get_loop_exit_edges (loop, &n_exits);
1369*404b540aSrobert bitmap clobbered_vops;
1370*404b540aSrobert struct mem_ref *mem_refs;
1371*404b540aSrobert
1372*404b540aSrobert if (!loop_suitable_for_sm (loop, exits, n_exits))
1373*404b540aSrobert {
1374*404b540aSrobert free (exits);
1375*404b540aSrobert return;
1376*404b540aSrobert }
1377*404b540aSrobert
1378*404b540aSrobert /* Find the memory references in LOOP. */
1379*404b540aSrobert clobbered_vops = BITMAP_ALLOC (NULL);
1380*404b540aSrobert mem_refs = gather_mem_refs (loop, clobbered_vops);
1381*404b540aSrobert
1382*404b540aSrobert /* Find the vops that are used for more than one reference. */
1383*404b540aSrobert find_more_ref_vops (mem_refs, clobbered_vops);
1384*404b540aSrobert
1385*404b540aSrobert /* Hoist all suitable memory references. */
1386*404b540aSrobert hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1387*404b540aSrobert
1388*404b540aSrobert free_mem_refs (mem_refs);
1389*404b540aSrobert free (exits);
1390*404b540aSrobert BITMAP_FREE (clobbered_vops);
1391*404b540aSrobert }
1392*404b540aSrobert
1393*404b540aSrobert /* Try to perform store motion for all memory references modified inside
1394*404b540aSrobert any of LOOPS. */
1395*404b540aSrobert
1396*404b540aSrobert static void
determine_lsm(struct loops * loops)1397*404b540aSrobert determine_lsm (struct loops *loops)
1398*404b540aSrobert {
1399*404b540aSrobert struct loop *loop;
1400*404b540aSrobert
1401*404b540aSrobert if (!loops->tree_root->inner)
1402*404b540aSrobert return;
1403*404b540aSrobert
1404*404b540aSrobert /* Pass the loops from the outermost and perform the store motion as
1405*404b540aSrobert suitable. */
1406*404b540aSrobert
1407*404b540aSrobert loop = loops->tree_root->inner;
1408*404b540aSrobert while (1)
1409*404b540aSrobert {
1410*404b540aSrobert determine_lsm_loop (loop);
1411*404b540aSrobert
1412*404b540aSrobert if (loop->inner)
1413*404b540aSrobert {
1414*404b540aSrobert loop = loop->inner;
1415*404b540aSrobert continue;
1416*404b540aSrobert }
1417*404b540aSrobert while (!loop->next)
1418*404b540aSrobert {
1419*404b540aSrobert loop = loop->outer;
1420*404b540aSrobert if (loop == loops->tree_root)
1421*404b540aSrobert {
1422*404b540aSrobert loop_commit_inserts ();
1423*404b540aSrobert return;
1424*404b540aSrobert }
1425*404b540aSrobert }
1426*404b540aSrobert loop = loop->next;
1427*404b540aSrobert }
1428*404b540aSrobert }
1429*404b540aSrobert
1430*404b540aSrobert /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1431*404b540aSrobert for each such basic block bb records the outermost loop for that execution
1432*404b540aSrobert of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1433*404b540aSrobert blocks that contain a nonpure call. */
1434*404b540aSrobert
1435*404b540aSrobert static void
fill_always_executed_in(struct loop * loop,sbitmap contains_call)1436*404b540aSrobert fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1437*404b540aSrobert {
1438*404b540aSrobert basic_block bb = NULL, *bbs, last = NULL;
1439*404b540aSrobert unsigned i;
1440*404b540aSrobert edge e;
1441*404b540aSrobert struct loop *inn_loop = loop;
1442*404b540aSrobert
1443*404b540aSrobert if (!loop->header->aux)
1444*404b540aSrobert {
1445*404b540aSrobert bbs = get_loop_body_in_dom_order (loop);
1446*404b540aSrobert
1447*404b540aSrobert for (i = 0; i < loop->num_nodes; i++)
1448*404b540aSrobert {
1449*404b540aSrobert edge_iterator ei;
1450*404b540aSrobert bb = bbs[i];
1451*404b540aSrobert
1452*404b540aSrobert if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1453*404b540aSrobert last = bb;
1454*404b540aSrobert
1455*404b540aSrobert if (TEST_BIT (contains_call, bb->index))
1456*404b540aSrobert break;
1457*404b540aSrobert
1458*404b540aSrobert FOR_EACH_EDGE (e, ei, bb->succs)
1459*404b540aSrobert if (!flow_bb_inside_loop_p (loop, e->dest))
1460*404b540aSrobert break;
1461*404b540aSrobert if (e)
1462*404b540aSrobert break;
1463*404b540aSrobert
1464*404b540aSrobert /* A loop might be infinite (TODO use simple loop analysis
1465*404b540aSrobert to disprove this if possible). */
1466*404b540aSrobert if (bb->flags & BB_IRREDUCIBLE_LOOP)
1467*404b540aSrobert break;
1468*404b540aSrobert
1469*404b540aSrobert if (!flow_bb_inside_loop_p (inn_loop, bb))
1470*404b540aSrobert break;
1471*404b540aSrobert
1472*404b540aSrobert if (bb->loop_father->header == bb)
1473*404b540aSrobert {
1474*404b540aSrobert if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1475*404b540aSrobert break;
1476*404b540aSrobert
1477*404b540aSrobert /* In a loop that is always entered we may proceed anyway.
1478*404b540aSrobert But record that we entered it and stop once we leave it. */
1479*404b540aSrobert inn_loop = bb->loop_father;
1480*404b540aSrobert }
1481*404b540aSrobert }
1482*404b540aSrobert
1483*404b540aSrobert while (1)
1484*404b540aSrobert {
1485*404b540aSrobert last->aux = loop;
1486*404b540aSrobert if (last == loop->header)
1487*404b540aSrobert break;
1488*404b540aSrobert last = get_immediate_dominator (CDI_DOMINATORS, last);
1489*404b540aSrobert }
1490*404b540aSrobert
1491*404b540aSrobert free (bbs);
1492*404b540aSrobert }
1493*404b540aSrobert
1494*404b540aSrobert for (loop = loop->inner; loop; loop = loop->next)
1495*404b540aSrobert fill_always_executed_in (loop, contains_call);
1496*404b540aSrobert }
1497*404b540aSrobert
1498*404b540aSrobert /* Compute the global information needed by the loop invariant motion pass.
1499*404b540aSrobert LOOPS is the loop tree. */
1500*404b540aSrobert
1501*404b540aSrobert static void
tree_ssa_lim_initialize(struct loops * loops)1502*404b540aSrobert tree_ssa_lim_initialize (struct loops *loops)
1503*404b540aSrobert {
1504*404b540aSrobert sbitmap contains_call = sbitmap_alloc (last_basic_block);
1505*404b540aSrobert block_stmt_iterator bsi;
1506*404b540aSrobert struct loop *loop;
1507*404b540aSrobert basic_block bb;
1508*404b540aSrobert
1509*404b540aSrobert sbitmap_zero (contains_call);
1510*404b540aSrobert FOR_EACH_BB (bb)
1511*404b540aSrobert {
1512*404b540aSrobert for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1513*404b540aSrobert {
1514*404b540aSrobert if (nonpure_call_p (bsi_stmt (bsi)))
1515*404b540aSrobert break;
1516*404b540aSrobert }
1517*404b540aSrobert
1518*404b540aSrobert if (!bsi_end_p (bsi))
1519*404b540aSrobert SET_BIT (contains_call, bb->index);
1520*404b540aSrobert }
1521*404b540aSrobert
1522*404b540aSrobert for (loop = loops->tree_root->inner; loop; loop = loop->next)
1523*404b540aSrobert fill_always_executed_in (loop, contains_call);
1524*404b540aSrobert
1525*404b540aSrobert sbitmap_free (contains_call);
1526*404b540aSrobert }
1527*404b540aSrobert
1528*404b540aSrobert /* Cleans up after the invariant motion pass. */
1529*404b540aSrobert
1530*404b540aSrobert static void
tree_ssa_lim_finalize(void)1531*404b540aSrobert tree_ssa_lim_finalize (void)
1532*404b540aSrobert {
1533*404b540aSrobert basic_block bb;
1534*404b540aSrobert
1535*404b540aSrobert FOR_EACH_BB (bb)
1536*404b540aSrobert {
1537*404b540aSrobert bb->aux = NULL;
1538*404b540aSrobert }
1539*404b540aSrobert }
1540*404b540aSrobert
1541*404b540aSrobert /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1542*404b540aSrobert i.e. those that are likely to be win regardless of the register pressure. */
1543*404b540aSrobert
1544*404b540aSrobert void
tree_ssa_lim(struct loops * loops)1545*404b540aSrobert tree_ssa_lim (struct loops *loops)
1546*404b540aSrobert {
1547*404b540aSrobert tree_ssa_lim_initialize (loops);
1548*404b540aSrobert
1549*404b540aSrobert /* For each statement determine the outermost loop in that it is
1550*404b540aSrobert invariant and cost for computing the invariant. */
1551*404b540aSrobert determine_invariantness ();
1552*404b540aSrobert
1553*404b540aSrobert /* For each memory reference determine whether it is possible to hoist it
1554*404b540aSrobert out of the loop. Force the necessary invariants to be moved out of the
1555*404b540aSrobert loops as well. */
1556*404b540aSrobert determine_lsm (loops);
1557*404b540aSrobert
1558*404b540aSrobert /* Move the expressions that are expensive enough. */
1559*404b540aSrobert move_computations ();
1560*404b540aSrobert
1561*404b540aSrobert tree_ssa_lim_finalize ();
1562*404b540aSrobert }
1563