xref: /openbsd-src/gnu/gcc/gcc/tree-ssa-loop-im.c (revision 404b540a9034ac75a6199ad1a32d1bbc7a0d4210)
1*404b540aSrobert /* Loop invariant motion.
2*404b540aSrobert    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3*404b540aSrobert 
4*404b540aSrobert This file is part of GCC.
5*404b540aSrobert 
6*404b540aSrobert GCC is free software; you can redistribute it and/or modify it
7*404b540aSrobert under the terms of the GNU General Public License as published by the
8*404b540aSrobert Free Software Foundation; either version 2, or (at your option) any
9*404b540aSrobert later version.
10*404b540aSrobert 
11*404b540aSrobert GCC is distributed in the hope that it will be useful, but WITHOUT
12*404b540aSrobert ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*404b540aSrobert FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*404b540aSrobert for more details.
15*404b540aSrobert 
16*404b540aSrobert You should have received a copy of the GNU General Public License
17*404b540aSrobert along with GCC; see the file COPYING.  If not, write to the Free
18*404b540aSrobert Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19*404b540aSrobert 02110-1301, USA.  */
20*404b540aSrobert 
21*404b540aSrobert #include "config.h"
22*404b540aSrobert #include "system.h"
23*404b540aSrobert #include "coretypes.h"
24*404b540aSrobert #include "tm.h"
25*404b540aSrobert #include "tree.h"
26*404b540aSrobert #include "rtl.h"
27*404b540aSrobert #include "tm_p.h"
28*404b540aSrobert #include "hard-reg-set.h"
29*404b540aSrobert #include "basic-block.h"
30*404b540aSrobert #include "output.h"
31*404b540aSrobert #include "diagnostic.h"
32*404b540aSrobert #include "tree-flow.h"
33*404b540aSrobert #include "tree-dump.h"
34*404b540aSrobert #include "timevar.h"
35*404b540aSrobert #include "cfgloop.h"
36*404b540aSrobert #include "domwalk.h"
37*404b540aSrobert #include "params.h"
38*404b540aSrobert #include "tree-pass.h"
39*404b540aSrobert #include "flags.h"
40*404b540aSrobert #include "real.h"
41*404b540aSrobert #include "hashtab.h"
42*404b540aSrobert 
43*404b540aSrobert /* TODO:  Support for predicated code motion.  I.e.
44*404b540aSrobert 
45*404b540aSrobert    while (1)
46*404b540aSrobert      {
47*404b540aSrobert        if (cond)
48*404b540aSrobert 	 {
49*404b540aSrobert 	   a = inv;
50*404b540aSrobert 	   something;
51*404b540aSrobert 	 }
52*404b540aSrobert      }
53*404b540aSrobert 
54*404b540aSrobert    Where COND and INV are is invariants, but evaluating INV may trap or be
55*404b540aSrobert    invalid from some other reason if !COND.  This may be transformed to
56*404b540aSrobert 
57*404b540aSrobert    if (cond)
58*404b540aSrobert      a = inv;
59*404b540aSrobert    while (1)
60*404b540aSrobert      {
61*404b540aSrobert        if (cond)
62*404b540aSrobert 	 something;
63*404b540aSrobert      }  */
64*404b540aSrobert 
65*404b540aSrobert /* A type for the list of statements that have to be moved in order to be able
66*404b540aSrobert    to hoist an invariant computation.  */
67*404b540aSrobert 
68*404b540aSrobert struct depend
69*404b540aSrobert {
70*404b540aSrobert   tree stmt;
71*404b540aSrobert   struct depend *next;
72*404b540aSrobert };
73*404b540aSrobert 
74*404b540aSrobert /* The auxiliary data kept for each statement.  */
75*404b540aSrobert 
76*404b540aSrobert struct lim_aux_data
77*404b540aSrobert {
78*404b540aSrobert   struct loop *max_loop;	/* The outermost loop in that the statement
79*404b540aSrobert 				   is invariant.  */
80*404b540aSrobert 
81*404b540aSrobert   struct loop *tgt_loop;	/* The loop out of that we want to move the
82*404b540aSrobert 				   invariant.  */
83*404b540aSrobert 
84*404b540aSrobert   struct loop *always_executed_in;
85*404b540aSrobert 				/* The outermost loop for that we are sure
86*404b540aSrobert 				   the statement is executed if the loop
87*404b540aSrobert 				   is entered.  */
88*404b540aSrobert 
89*404b540aSrobert   bool sm_done;			/* True iff the store motion for a memory
90*404b540aSrobert 				   reference in the statement has already
91*404b540aSrobert 				   been executed.  */
92*404b540aSrobert 
93*404b540aSrobert   unsigned cost;		/* Cost of the computation performed by the
94*404b540aSrobert 				   statement.  */
95*404b540aSrobert 
96*404b540aSrobert   struct depend *depends;	/* List of statements that must be also hoisted
97*404b540aSrobert 				   out of the loop when this statement is
98*404b540aSrobert 				   hoisted; i.e. those that define the operands
99*404b540aSrobert 				   of the statement and are inside of the
100*404b540aSrobert 				   MAX_LOOP loop.  */
101*404b540aSrobert };
102*404b540aSrobert 
103*404b540aSrobert #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104*404b540aSrobert 			? NULL \
105*404b540aSrobert 			: (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
106*404b540aSrobert 
107*404b540aSrobert /* Description of a memory reference location for store motion.  */
108*404b540aSrobert 
109*404b540aSrobert struct mem_ref_loc
110*404b540aSrobert {
111*404b540aSrobert   tree *ref;			/* The reference itself.  */
112*404b540aSrobert   tree stmt;			/* The statement in that it occurs.  */
113*404b540aSrobert   struct mem_ref_loc *next;	/* Next use in the chain.  */
114*404b540aSrobert };
115*404b540aSrobert 
116*404b540aSrobert /* Description of a memory reference for store motion.  */
117*404b540aSrobert 
118*404b540aSrobert struct mem_ref
119*404b540aSrobert {
120*404b540aSrobert   tree mem;			/* The memory itself.  */
121*404b540aSrobert   hashval_t hash;		/* Its hash value.  */
122*404b540aSrobert   bool is_stored;		/* True if there is a store to the location
123*404b540aSrobert 				   in the loop.  */
124*404b540aSrobert   struct mem_ref_loc *locs;	/* The locations where it is found.  */
125*404b540aSrobert   bitmap vops;			/* Vops corresponding to this memory
126*404b540aSrobert 				   location.  */
127*404b540aSrobert   struct mem_ref *next;		/* Next memory reference in the list.
128*404b540aSrobert 				   Memory references are stored in a hash
129*404b540aSrobert 				   table, but the hash function depends
130*404b540aSrobert 				   on values of pointers. Thus we cannot use
131*404b540aSrobert 				   htab_traverse, since then we would get
132*404b540aSrobert 				   miscompares during bootstrap (although the
133*404b540aSrobert 				   produced code would be correct).  */
134*404b540aSrobert };
135*404b540aSrobert 
136*404b540aSrobert /* Minimum cost of an expensive expression.  */
137*404b540aSrobert #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
138*404b540aSrobert 
139*404b540aSrobert /* The outermost loop for that execution of the header guarantees that the
140*404b540aSrobert    block will be executed.  */
141*404b540aSrobert #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
142*404b540aSrobert 
143*404b540aSrobert /* Calls CBCK for each index in memory reference ADDR_P.  There are two
144*404b540aSrobert    kinds situations handled; in each of these cases, the memory reference
145*404b540aSrobert    and DATA are passed to the callback:
146*404b540aSrobert 
147*404b540aSrobert    Access to an array: ARRAY_{RANGE_}REF (base, index).  In this case we also
148*404b540aSrobert    pass the pointer to the index to the callback.
149*404b540aSrobert 
150*404b540aSrobert    Pointer dereference: INDIRECT_REF (addr).  In this case we also pass the
151*404b540aSrobert    pointer to addr to the callback.
152*404b540aSrobert 
153*404b540aSrobert    If the callback returns false, the whole search stops and false is returned.
154*404b540aSrobert    Otherwise the function returns true after traversing through the whole
155*404b540aSrobert    reference *ADDR_P.  */
156*404b540aSrobert 
157*404b540aSrobert bool
for_each_index(tree * addr_p,bool (* cbck)(tree,tree *,void *),void * data)158*404b540aSrobert for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
159*404b540aSrobert {
160*404b540aSrobert   tree *nxt, *idx;
161*404b540aSrobert 
162*404b540aSrobert   for (; ; addr_p = nxt)
163*404b540aSrobert     {
164*404b540aSrobert       switch (TREE_CODE (*addr_p))
165*404b540aSrobert 	{
166*404b540aSrobert 	case SSA_NAME:
167*404b540aSrobert 	  return cbck (*addr_p, addr_p, data);
168*404b540aSrobert 
169*404b540aSrobert 	case MISALIGNED_INDIRECT_REF:
170*404b540aSrobert 	case ALIGN_INDIRECT_REF:
171*404b540aSrobert 	case INDIRECT_REF:
172*404b540aSrobert 	  nxt = &TREE_OPERAND (*addr_p, 0);
173*404b540aSrobert 	  return cbck (*addr_p, nxt, data);
174*404b540aSrobert 
175*404b540aSrobert 	case BIT_FIELD_REF:
176*404b540aSrobert 	case VIEW_CONVERT_EXPR:
177*404b540aSrobert 	case REALPART_EXPR:
178*404b540aSrobert 	case IMAGPART_EXPR:
179*404b540aSrobert 	  nxt = &TREE_OPERAND (*addr_p, 0);
180*404b540aSrobert 	  break;
181*404b540aSrobert 
182*404b540aSrobert 	case COMPONENT_REF:
183*404b540aSrobert 	  /* If the component has varying offset, it behaves like index
184*404b540aSrobert 	     as well.  */
185*404b540aSrobert 	  idx = &TREE_OPERAND (*addr_p, 2);
186*404b540aSrobert 	  if (*idx
187*404b540aSrobert 	      && !cbck (*addr_p, idx, data))
188*404b540aSrobert 	    return false;
189*404b540aSrobert 
190*404b540aSrobert 	  nxt = &TREE_OPERAND (*addr_p, 0);
191*404b540aSrobert 	  break;
192*404b540aSrobert 
193*404b540aSrobert 	case ARRAY_REF:
194*404b540aSrobert 	case ARRAY_RANGE_REF:
195*404b540aSrobert 	  nxt = &TREE_OPERAND (*addr_p, 0);
196*404b540aSrobert 	  if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197*404b540aSrobert 	    return false;
198*404b540aSrobert 	  break;
199*404b540aSrobert 
200*404b540aSrobert 	case VAR_DECL:
201*404b540aSrobert 	case PARM_DECL:
202*404b540aSrobert 	case STRING_CST:
203*404b540aSrobert 	case RESULT_DECL:
204*404b540aSrobert 	case VECTOR_CST:
205*404b540aSrobert 	case COMPLEX_CST:
206*404b540aSrobert 	case INTEGER_CST:
207*404b540aSrobert 	case REAL_CST:
208*404b540aSrobert 	  return true;
209*404b540aSrobert 
210*404b540aSrobert 	case TARGET_MEM_REF:
211*404b540aSrobert 	  idx = &TMR_BASE (*addr_p);
212*404b540aSrobert 	  if (*idx
213*404b540aSrobert 	      && !cbck (*addr_p, idx, data))
214*404b540aSrobert 	    return false;
215*404b540aSrobert 	  idx = &TMR_INDEX (*addr_p);
216*404b540aSrobert 	  if (*idx
217*404b540aSrobert 	      && !cbck (*addr_p, idx, data))
218*404b540aSrobert 	    return false;
219*404b540aSrobert 	  return true;
220*404b540aSrobert 
221*404b540aSrobert 	default:
222*404b540aSrobert     	  gcc_unreachable ();
223*404b540aSrobert 	}
224*404b540aSrobert     }
225*404b540aSrobert }
226*404b540aSrobert 
227*404b540aSrobert /* If it is possible to hoist the statement STMT unconditionally,
228*404b540aSrobert    returns MOVE_POSSIBLE.
229*404b540aSrobert    If it is possible to hoist the statement STMT, but we must avoid making
230*404b540aSrobert    it executed if it would not be executed in the original program (e.g.
231*404b540aSrobert    because it may trap), return MOVE_PRESERVE_EXECUTION.
232*404b540aSrobert    Otherwise return MOVE_IMPOSSIBLE.  */
233*404b540aSrobert 
234*404b540aSrobert enum move_pos
movement_possibility(tree stmt)235*404b540aSrobert movement_possibility (tree stmt)
236*404b540aSrobert {
237*404b540aSrobert   tree lhs, rhs;
238*404b540aSrobert 
239*404b540aSrobert   if (flag_unswitch_loops
240*404b540aSrobert       && TREE_CODE (stmt) == COND_EXPR)
241*404b540aSrobert     {
242*404b540aSrobert       /* If we perform unswitching, force the operands of the invariant
243*404b540aSrobert 	 condition to be moved out of the loop.  */
244*404b540aSrobert       return MOVE_POSSIBLE;
245*404b540aSrobert     }
246*404b540aSrobert 
247*404b540aSrobert   if (TREE_CODE (stmt) != MODIFY_EXPR)
248*404b540aSrobert     return MOVE_IMPOSSIBLE;
249*404b540aSrobert 
250*404b540aSrobert   if (stmt_ends_bb_p (stmt))
251*404b540aSrobert     return MOVE_IMPOSSIBLE;
252*404b540aSrobert 
253*404b540aSrobert   if (stmt_ann (stmt)->has_volatile_ops)
254*404b540aSrobert     return MOVE_IMPOSSIBLE;
255*404b540aSrobert 
256*404b540aSrobert   lhs = TREE_OPERAND (stmt, 0);
257*404b540aSrobert   if (TREE_CODE (lhs) == SSA_NAME
258*404b540aSrobert       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
259*404b540aSrobert     return MOVE_IMPOSSIBLE;
260*404b540aSrobert 
261*404b540aSrobert   rhs = TREE_OPERAND (stmt, 1);
262*404b540aSrobert 
263*404b540aSrobert   if (TREE_SIDE_EFFECTS (rhs))
264*404b540aSrobert     return MOVE_IMPOSSIBLE;
265*404b540aSrobert 
266*404b540aSrobert   if (TREE_CODE (lhs) != SSA_NAME
267*404b540aSrobert       || tree_could_trap_p (rhs))
268*404b540aSrobert     return MOVE_PRESERVE_EXECUTION;
269*404b540aSrobert 
270*404b540aSrobert   if (get_call_expr_in (stmt))
271*404b540aSrobert     {
272*404b540aSrobert       /* While pure or const call is guaranteed to have no side effects, we
273*404b540aSrobert 	 cannot move it arbitrarily.  Consider code like
274*404b540aSrobert 
275*404b540aSrobert 	 char *s = something ();
276*404b540aSrobert 
277*404b540aSrobert 	 while (1)
278*404b540aSrobert 	   {
279*404b540aSrobert 	     if (s)
280*404b540aSrobert 	       t = strlen (s);
281*404b540aSrobert 	     else
282*404b540aSrobert 	       t = 0;
283*404b540aSrobert 	   }
284*404b540aSrobert 
285*404b540aSrobert 	 Here the strlen call cannot be moved out of the loop, even though
286*404b540aSrobert 	 s is invariant.  In addition to possibly creating a call with
287*404b540aSrobert 	 invalid arguments, moving out a function call that is not executed
288*404b540aSrobert 	 may cause performance regressions in case the call is costly and
289*404b540aSrobert 	 not executed at all.  */
290*404b540aSrobert       return MOVE_PRESERVE_EXECUTION;
291*404b540aSrobert     }
292*404b540aSrobert   return MOVE_POSSIBLE;
293*404b540aSrobert }
294*404b540aSrobert 
295*404b540aSrobert /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
296*404b540aSrobert    loop to that we could move the expression using DEF if it did not have
297*404b540aSrobert    other operands, i.e. the outermost loop enclosing LOOP in that the value
298*404b540aSrobert    of DEF is invariant.  */
299*404b540aSrobert 
300*404b540aSrobert static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)301*404b540aSrobert outermost_invariant_loop (tree def, struct loop *loop)
302*404b540aSrobert {
303*404b540aSrobert   tree def_stmt;
304*404b540aSrobert   basic_block def_bb;
305*404b540aSrobert   struct loop *max_loop;
306*404b540aSrobert 
307*404b540aSrobert   if (TREE_CODE (def) != SSA_NAME)
308*404b540aSrobert     return superloop_at_depth (loop, 1);
309*404b540aSrobert 
310*404b540aSrobert   def_stmt = SSA_NAME_DEF_STMT (def);
311*404b540aSrobert   def_bb = bb_for_stmt (def_stmt);
312*404b540aSrobert   if (!def_bb)
313*404b540aSrobert     return superloop_at_depth (loop, 1);
314*404b540aSrobert 
315*404b540aSrobert   max_loop = find_common_loop (loop, def_bb->loop_father);
316*404b540aSrobert 
317*404b540aSrobert   if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
318*404b540aSrobert     max_loop = find_common_loop (max_loop,
319*404b540aSrobert 				 LIM_DATA (def_stmt)->max_loop->outer);
320*404b540aSrobert   if (max_loop == loop)
321*404b540aSrobert     return NULL;
322*404b540aSrobert   max_loop = superloop_at_depth (loop, max_loop->depth + 1);
323*404b540aSrobert 
324*404b540aSrobert   return max_loop;
325*404b540aSrobert }
326*404b540aSrobert 
327*404b540aSrobert /* Returns the outermost superloop of LOOP in that the expression EXPR is
328*404b540aSrobert    invariant.  */
329*404b540aSrobert 
330*404b540aSrobert static struct loop *
outermost_invariant_loop_expr(tree expr,struct loop * loop)331*404b540aSrobert outermost_invariant_loop_expr (tree expr, struct loop *loop)
332*404b540aSrobert {
333*404b540aSrobert   enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
334*404b540aSrobert   unsigned i, nops;
335*404b540aSrobert   struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
336*404b540aSrobert 
337*404b540aSrobert   if (TREE_CODE (expr) == SSA_NAME
338*404b540aSrobert       || TREE_CODE (expr) == INTEGER_CST
339*404b540aSrobert       || is_gimple_min_invariant (expr))
340*404b540aSrobert     return outermost_invariant_loop (expr, loop);
341*404b540aSrobert 
342*404b540aSrobert   if (class != tcc_unary
343*404b540aSrobert       && class != tcc_binary
344*404b540aSrobert       && class != tcc_expression
345*404b540aSrobert       && class != tcc_comparison)
346*404b540aSrobert     return NULL;
347*404b540aSrobert 
348*404b540aSrobert   nops = TREE_CODE_LENGTH (TREE_CODE (expr));
349*404b540aSrobert   for (i = 0; i < nops; i++)
350*404b540aSrobert     {
351*404b540aSrobert       aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
352*404b540aSrobert       if (!aloop)
353*404b540aSrobert 	return NULL;
354*404b540aSrobert 
355*404b540aSrobert       if (flow_loop_nested_p (max_loop, aloop))
356*404b540aSrobert 	max_loop = aloop;
357*404b540aSrobert     }
358*404b540aSrobert 
359*404b540aSrobert   return max_loop;
360*404b540aSrobert }
361*404b540aSrobert 
362*404b540aSrobert /* DATA is a structure containing information associated with a statement
363*404b540aSrobert    inside LOOP.  DEF is one of the operands of this statement.
364*404b540aSrobert 
365*404b540aSrobert    Find the outermost loop enclosing LOOP in that value of DEF is invariant
366*404b540aSrobert    and record this in DATA->max_loop field.  If DEF itself is defined inside
367*404b540aSrobert    this loop as well (i.e. we need to hoist it out of the loop if we want
368*404b540aSrobert    to hoist the statement represented by DATA), record the statement in that
369*404b540aSrobert    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
370*404b540aSrobert    add the cost of the computation of DEF to the DATA->cost.
371*404b540aSrobert 
372*404b540aSrobert    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
373*404b540aSrobert 
374*404b540aSrobert static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)375*404b540aSrobert add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
376*404b540aSrobert 		bool add_cost)
377*404b540aSrobert {
378*404b540aSrobert   tree def_stmt = SSA_NAME_DEF_STMT (def);
379*404b540aSrobert   basic_block def_bb = bb_for_stmt (def_stmt);
380*404b540aSrobert   struct loop *max_loop;
381*404b540aSrobert   struct depend *dep;
382*404b540aSrobert 
383*404b540aSrobert   if (!def_bb)
384*404b540aSrobert     return true;
385*404b540aSrobert 
386*404b540aSrobert   max_loop = outermost_invariant_loop (def, loop);
387*404b540aSrobert   if (!max_loop)
388*404b540aSrobert     return false;
389*404b540aSrobert 
390*404b540aSrobert   if (flow_loop_nested_p (data->max_loop, max_loop))
391*404b540aSrobert     data->max_loop = max_loop;
392*404b540aSrobert 
393*404b540aSrobert   if (!LIM_DATA (def_stmt))
394*404b540aSrobert     return true;
395*404b540aSrobert 
396*404b540aSrobert   if (add_cost
397*404b540aSrobert       /* Only add the cost if the statement defining DEF is inside LOOP,
398*404b540aSrobert 	 i.e. if it is likely that by moving the invariants dependent
399*404b540aSrobert 	 on it, we will be able to avoid creating a new register for
400*404b540aSrobert 	 it (since it will be only used in these dependent invariants).  */
401*404b540aSrobert       && def_bb->loop_father == loop)
402*404b540aSrobert     data->cost += LIM_DATA (def_stmt)->cost;
403*404b540aSrobert 
404*404b540aSrobert   dep = XNEW (struct depend);
405*404b540aSrobert   dep->stmt = def_stmt;
406*404b540aSrobert   dep->next = data->depends;
407*404b540aSrobert   data->depends = dep;
408*404b540aSrobert 
409*404b540aSrobert   return true;
410*404b540aSrobert }
411*404b540aSrobert 
412*404b540aSrobert /* Returns an estimate for a cost of statement STMT.  TODO -- the values here
413*404b540aSrobert    are just ad-hoc constants.  The estimates should be based on target-specific
414*404b540aSrobert    values.  */
415*404b540aSrobert 
416*404b540aSrobert static unsigned
stmt_cost(tree stmt)417*404b540aSrobert stmt_cost (tree stmt)
418*404b540aSrobert {
419*404b540aSrobert   tree rhs;
420*404b540aSrobert   unsigned cost = 1;
421*404b540aSrobert 
422*404b540aSrobert   /* Always try to create possibilities for unswitching.  */
423*404b540aSrobert   if (TREE_CODE (stmt) == COND_EXPR)
424*404b540aSrobert     return LIM_EXPENSIVE;
425*404b540aSrobert 
426*404b540aSrobert   rhs = TREE_OPERAND (stmt, 1);
427*404b540aSrobert 
428*404b540aSrobert   /* Hoisting memory references out should almost surely be a win.  */
429*404b540aSrobert   if (stmt_references_memory_p (stmt))
430*404b540aSrobert     cost += 20;
431*404b540aSrobert 
432*404b540aSrobert   switch (TREE_CODE (rhs))
433*404b540aSrobert     {
434*404b540aSrobert     case CALL_EXPR:
435*404b540aSrobert       /* We should be hoisting calls if possible.  */
436*404b540aSrobert 
437*404b540aSrobert       /* Unless the call is a builtin_constant_p; this always folds to a
438*404b540aSrobert 	 constant, so moving it is useless.  */
439*404b540aSrobert       rhs = get_callee_fndecl (rhs);
440*404b540aSrobert       if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
441*404b540aSrobert 	  && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
442*404b540aSrobert 	return 0;
443*404b540aSrobert 
444*404b540aSrobert       cost += 20;
445*404b540aSrobert       break;
446*404b540aSrobert 
447*404b540aSrobert     case MULT_EXPR:
448*404b540aSrobert     case TRUNC_DIV_EXPR:
449*404b540aSrobert     case CEIL_DIV_EXPR:
450*404b540aSrobert     case FLOOR_DIV_EXPR:
451*404b540aSrobert     case ROUND_DIV_EXPR:
452*404b540aSrobert     case EXACT_DIV_EXPR:
453*404b540aSrobert     case CEIL_MOD_EXPR:
454*404b540aSrobert     case FLOOR_MOD_EXPR:
455*404b540aSrobert     case ROUND_MOD_EXPR:
456*404b540aSrobert     case TRUNC_MOD_EXPR:
457*404b540aSrobert     case RDIV_EXPR:
458*404b540aSrobert       /* Division and multiplication are usually expensive.  */
459*404b540aSrobert       cost += 20;
460*404b540aSrobert       break;
461*404b540aSrobert 
462*404b540aSrobert     default:
463*404b540aSrobert       break;
464*404b540aSrobert     }
465*404b540aSrobert 
466*404b540aSrobert   return cost;
467*404b540aSrobert }
468*404b540aSrobert 
469*404b540aSrobert /* Determine the outermost loop to that it is possible to hoist a statement
470*404b540aSrobert    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
471*404b540aSrobert    the outermost loop in that the value computed by STMT is invariant.
472*404b540aSrobert    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
473*404b540aSrobert    we preserve the fact whether STMT is executed.  It also fills other related
474*404b540aSrobert    information to LIM_DATA (STMT).
475*404b540aSrobert 
476*404b540aSrobert    The function returns false if STMT cannot be hoisted outside of the loop it
477*404b540aSrobert    is defined in, and true otherwise.  */
478*404b540aSrobert 
479*404b540aSrobert static bool
determine_max_movement(tree stmt,bool must_preserve_exec)480*404b540aSrobert determine_max_movement (tree stmt, bool must_preserve_exec)
481*404b540aSrobert {
482*404b540aSrobert   basic_block bb = bb_for_stmt (stmt);
483*404b540aSrobert   struct loop *loop = bb->loop_father;
484*404b540aSrobert   struct loop *level;
485*404b540aSrobert   struct lim_aux_data *lim_data = LIM_DATA (stmt);
486*404b540aSrobert   tree val;
487*404b540aSrobert   ssa_op_iter iter;
488*404b540aSrobert 
489*404b540aSrobert   if (must_preserve_exec)
490*404b540aSrobert     level = ALWAYS_EXECUTED_IN (bb);
491*404b540aSrobert   else
492*404b540aSrobert     level = superloop_at_depth (loop, 1);
493*404b540aSrobert   lim_data->max_loop = level;
494*404b540aSrobert 
495*404b540aSrobert   FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
496*404b540aSrobert     if (!add_dependency (val, lim_data, loop, true))
497*404b540aSrobert       return false;
498*404b540aSrobert 
499*404b540aSrobert   FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
500*404b540aSrobert     if (!add_dependency (val, lim_data, loop, false))
501*404b540aSrobert       return false;
502*404b540aSrobert 
503*404b540aSrobert   lim_data->cost += stmt_cost (stmt);
504*404b540aSrobert 
505*404b540aSrobert   return true;
506*404b540aSrobert }
507*404b540aSrobert 
508*404b540aSrobert /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
509*404b540aSrobert    and that one of the operands of this statement is computed by STMT.
510*404b540aSrobert    Ensure that STMT (together with all the statements that define its
511*404b540aSrobert    operands) is hoisted at least out of the loop LEVEL.  */
512*404b540aSrobert 
513*404b540aSrobert static void
set_level(tree stmt,struct loop * orig_loop,struct loop * level)514*404b540aSrobert set_level (tree stmt, struct loop *orig_loop, struct loop *level)
515*404b540aSrobert {
516*404b540aSrobert   struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
517*404b540aSrobert   struct depend *dep;
518*404b540aSrobert 
519*404b540aSrobert   stmt_loop = find_common_loop (orig_loop, stmt_loop);
520*404b540aSrobert   if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
521*404b540aSrobert     stmt_loop = find_common_loop (stmt_loop,
522*404b540aSrobert 				  LIM_DATA (stmt)->tgt_loop->outer);
523*404b540aSrobert   if (flow_loop_nested_p (stmt_loop, level))
524*404b540aSrobert     return;
525*404b540aSrobert 
526*404b540aSrobert   gcc_assert (LIM_DATA (stmt));
527*404b540aSrobert   gcc_assert (level == LIM_DATA (stmt)->max_loop
528*404b540aSrobert 	      || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
529*404b540aSrobert 
530*404b540aSrobert   LIM_DATA (stmt)->tgt_loop = level;
531*404b540aSrobert   for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
532*404b540aSrobert     set_level (dep->stmt, orig_loop, level);
533*404b540aSrobert }
534*404b540aSrobert 
535*404b540aSrobert /* Determines an outermost loop from that we want to hoist the statement STMT.
536*404b540aSrobert    For now we chose the outermost possible loop.  TODO -- use profiling
537*404b540aSrobert    information to set it more sanely.  */
538*404b540aSrobert 
539*404b540aSrobert static void
set_profitable_level(tree stmt)540*404b540aSrobert set_profitable_level (tree stmt)
541*404b540aSrobert {
542*404b540aSrobert   set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
543*404b540aSrobert }
544*404b540aSrobert 
545*404b540aSrobert /* Returns true if STMT is not a pure call.  */
546*404b540aSrobert 
547*404b540aSrobert static bool
nonpure_call_p(tree stmt)548*404b540aSrobert nonpure_call_p (tree stmt)
549*404b540aSrobert {
550*404b540aSrobert   tree call = get_call_expr_in (stmt);
551*404b540aSrobert 
552*404b540aSrobert   if (!call)
553*404b540aSrobert     return false;
554*404b540aSrobert 
555*404b540aSrobert   return TREE_SIDE_EFFECTS (call) != 0;
556*404b540aSrobert }
557*404b540aSrobert 
558*404b540aSrobert /* Releases the memory occupied by DATA.  */
559*404b540aSrobert 
560*404b540aSrobert static void
free_lim_aux_data(struct lim_aux_data * data)561*404b540aSrobert free_lim_aux_data (struct lim_aux_data *data)
562*404b540aSrobert {
563*404b540aSrobert   struct depend *dep, *next;
564*404b540aSrobert 
565*404b540aSrobert   for (dep = data->depends; dep; dep = next)
566*404b540aSrobert     {
567*404b540aSrobert       next = dep->next;
568*404b540aSrobert       free (dep);
569*404b540aSrobert     }
570*404b540aSrobert   free (data);
571*404b540aSrobert }
572*404b540aSrobert 
573*404b540aSrobert /* Determine the outermost loops in that statements in basic block BB are
574*404b540aSrobert    invariant, and record them to the LIM_DATA associated with the statements.
575*404b540aSrobert    Callback for walk_dominator_tree.  */
576*404b540aSrobert 
577*404b540aSrobert static void
determine_invariantness_stmt(struct dom_walk_data * dw_data ATTRIBUTE_UNUSED,basic_block bb)578*404b540aSrobert determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
579*404b540aSrobert 			      basic_block bb)
580*404b540aSrobert {
581*404b540aSrobert   enum move_pos pos;
582*404b540aSrobert   block_stmt_iterator bsi;
583*404b540aSrobert   tree stmt, rhs;
584*404b540aSrobert   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
585*404b540aSrobert   struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
586*404b540aSrobert 
587*404b540aSrobert   if (!bb->loop_father->outer)
588*404b540aSrobert     return;
589*404b540aSrobert 
590*404b540aSrobert   if (dump_file && (dump_flags & TDF_DETAILS))
591*404b540aSrobert     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
592*404b540aSrobert 	     bb->index, bb->loop_father->num, bb->loop_father->depth);
593*404b540aSrobert 
594*404b540aSrobert   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
595*404b540aSrobert     {
596*404b540aSrobert       stmt = bsi_stmt (bsi);
597*404b540aSrobert 
598*404b540aSrobert       pos = movement_possibility (stmt);
599*404b540aSrobert       if (pos == MOVE_IMPOSSIBLE)
600*404b540aSrobert 	{
601*404b540aSrobert 	  if (nonpure_call_p (stmt))
602*404b540aSrobert 	    {
603*404b540aSrobert 	      maybe_never = true;
604*404b540aSrobert 	      outermost = NULL;
605*404b540aSrobert 	    }
606*404b540aSrobert 	  continue;
607*404b540aSrobert 	}
608*404b540aSrobert 
609*404b540aSrobert       /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
610*404b540aSrobert 	 to be hoisted out of loop, saving expensive divide.  */
611*404b540aSrobert       if (pos == MOVE_POSSIBLE
612*404b540aSrobert 	  && (rhs = TREE_OPERAND (stmt, 1)) != NULL
613*404b540aSrobert 	  && TREE_CODE (rhs) == RDIV_EXPR
614*404b540aSrobert 	  && flag_unsafe_math_optimizations
615*404b540aSrobert 	  && !flag_trapping_math
616*404b540aSrobert 	  && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
617*404b540aSrobert 					    loop_containing_stmt (stmt)) != NULL
618*404b540aSrobert 	  && outermost_invariant_loop_expr (rhs,
619*404b540aSrobert 					    loop_containing_stmt (stmt)) == NULL)
620*404b540aSrobert 	{
621*404b540aSrobert 	  tree lhs, stmt1, stmt2, var, name;
622*404b540aSrobert 
623*404b540aSrobert 	  lhs = TREE_OPERAND (stmt, 0);
624*404b540aSrobert 
625*404b540aSrobert 	  /* stmt must be MODIFY_EXPR.  */
626*404b540aSrobert 	  var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
627*404b540aSrobert 	  add_referenced_var (var);
628*404b540aSrobert 
629*404b540aSrobert 	  stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
630*404b540aSrobert 			  build2 (RDIV_EXPR, TREE_TYPE (rhs),
631*404b540aSrobert 				  build_real (TREE_TYPE (rhs), dconst1),
632*404b540aSrobert 				  TREE_OPERAND (rhs, 1)));
633*404b540aSrobert 	  name = make_ssa_name (var, stmt1);
634*404b540aSrobert 	  TREE_OPERAND (stmt1, 0) = name;
635*404b540aSrobert 	  stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
636*404b540aSrobert 			  build2 (MULT_EXPR, TREE_TYPE (rhs),
637*404b540aSrobert 				  name, TREE_OPERAND (rhs, 0)));
638*404b540aSrobert 
639*404b540aSrobert 	  /* Replace division stmt with reciprocal and multiply stmts.
640*404b540aSrobert 	     The multiply stmt is not invariant, so update iterator
641*404b540aSrobert 	     and avoid rescanning.  */
642*404b540aSrobert 	  bsi_replace (&bsi, stmt1, true);
643*404b540aSrobert 	  bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
644*404b540aSrobert 	  SSA_NAME_DEF_STMT (lhs) = stmt2;
645*404b540aSrobert 
646*404b540aSrobert 	  /* Continue processing with invariant reciprocal statement.  */
647*404b540aSrobert 	  stmt = stmt1;
648*404b540aSrobert 	}
649*404b540aSrobert 
650*404b540aSrobert       stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
651*404b540aSrobert       LIM_DATA (stmt)->always_executed_in = outermost;
652*404b540aSrobert 
653*404b540aSrobert       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
654*404b540aSrobert 	continue;
655*404b540aSrobert 
656*404b540aSrobert       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
657*404b540aSrobert 	{
658*404b540aSrobert 	  LIM_DATA (stmt)->max_loop = NULL;
659*404b540aSrobert 	  continue;
660*404b540aSrobert 	}
661*404b540aSrobert 
662*404b540aSrobert       if (dump_file && (dump_flags & TDF_DETAILS))
663*404b540aSrobert 	{
664*404b540aSrobert 	  print_generic_stmt_indented (dump_file, stmt, 0, 2);
665*404b540aSrobert 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
666*404b540aSrobert 		   LIM_DATA (stmt)->max_loop->depth,
667*404b540aSrobert 		   LIM_DATA (stmt)->cost);
668*404b540aSrobert 	}
669*404b540aSrobert 
670*404b540aSrobert       if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
671*404b540aSrobert 	set_profitable_level (stmt);
672*404b540aSrobert     }
673*404b540aSrobert }
674*404b540aSrobert 
675*404b540aSrobert /* For each statement determines the outermost loop in that it is invariant,
676*404b540aSrobert    statements on whose motion it depends and the cost of the computation.
677*404b540aSrobert    This information is stored to the LIM_DATA structure associated with
678*404b540aSrobert    each statement.  */
679*404b540aSrobert 
680*404b540aSrobert static void
determine_invariantness(void)681*404b540aSrobert determine_invariantness (void)
682*404b540aSrobert {
683*404b540aSrobert   struct dom_walk_data walk_data;
684*404b540aSrobert 
685*404b540aSrobert   memset (&walk_data, 0, sizeof (struct dom_walk_data));
686*404b540aSrobert   walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
687*404b540aSrobert 
688*404b540aSrobert   init_walk_dominator_tree (&walk_data);
689*404b540aSrobert   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
690*404b540aSrobert   fini_walk_dominator_tree (&walk_data);
691*404b540aSrobert }
692*404b540aSrobert 
693*404b540aSrobert /* Commits edge insertions and updates loop structures.  */
694*404b540aSrobert 
695*404b540aSrobert void
loop_commit_inserts(void)696*404b540aSrobert loop_commit_inserts (void)
697*404b540aSrobert {
698*404b540aSrobert   unsigned old_last_basic_block, i;
699*404b540aSrobert   basic_block bb;
700*404b540aSrobert 
701*404b540aSrobert   old_last_basic_block = last_basic_block;
702*404b540aSrobert   bsi_commit_edge_inserts ();
703*404b540aSrobert   for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
704*404b540aSrobert     {
705*404b540aSrobert       bb = BASIC_BLOCK (i);
706*404b540aSrobert       add_bb_to_loop (bb,
707*404b540aSrobert 		      find_common_loop (single_pred (bb)->loop_father,
708*404b540aSrobert 					single_succ (bb)->loop_father));
709*404b540aSrobert     }
710*404b540aSrobert }
711*404b540aSrobert 
712*404b540aSrobert /* Hoist the statements in basic block BB out of the loops prescribed by
713*404b540aSrobert    data stored in LIM_DATA structures associated with each statement.  Callback
714*404b540aSrobert    for walk_dominator_tree.  */
715*404b540aSrobert 
716*404b540aSrobert static void
move_computations_stmt(struct dom_walk_data * dw_data ATTRIBUTE_UNUSED,basic_block bb)717*404b540aSrobert move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
718*404b540aSrobert 			basic_block bb)
719*404b540aSrobert {
720*404b540aSrobert   struct loop *level;
721*404b540aSrobert   block_stmt_iterator bsi;
722*404b540aSrobert   tree stmt;
723*404b540aSrobert   unsigned cost = 0;
724*404b540aSrobert 
725*404b540aSrobert   if (!bb->loop_father->outer)
726*404b540aSrobert     return;
727*404b540aSrobert 
728*404b540aSrobert   for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
729*404b540aSrobert     {
730*404b540aSrobert       stmt = bsi_stmt (bsi);
731*404b540aSrobert 
732*404b540aSrobert       if (!LIM_DATA (stmt))
733*404b540aSrobert 	{
734*404b540aSrobert 	  bsi_next (&bsi);
735*404b540aSrobert 	  continue;
736*404b540aSrobert 	}
737*404b540aSrobert 
738*404b540aSrobert       cost = LIM_DATA (stmt)->cost;
739*404b540aSrobert       level = LIM_DATA (stmt)->tgt_loop;
740*404b540aSrobert       free_lim_aux_data (LIM_DATA (stmt));
741*404b540aSrobert       stmt_ann (stmt)->common.aux = NULL;
742*404b540aSrobert 
743*404b540aSrobert       if (!level)
744*404b540aSrobert 	{
745*404b540aSrobert 	  bsi_next (&bsi);
746*404b540aSrobert 	  continue;
747*404b540aSrobert 	}
748*404b540aSrobert 
749*404b540aSrobert       /* We do not really want to move conditionals out of the loop; we just
750*404b540aSrobert 	 placed it here to force its operands to be moved if necessary.  */
751*404b540aSrobert       if (TREE_CODE (stmt) == COND_EXPR)
752*404b540aSrobert 	continue;
753*404b540aSrobert 
754*404b540aSrobert       if (dump_file && (dump_flags & TDF_DETAILS))
755*404b540aSrobert 	{
756*404b540aSrobert 	  fprintf (dump_file, "Moving statement\n");
757*404b540aSrobert 	  print_generic_stmt (dump_file, stmt, 0);
758*404b540aSrobert 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
759*404b540aSrobert 		   cost, level->num);
760*404b540aSrobert 	}
761*404b540aSrobert       bsi_insert_on_edge (loop_preheader_edge (level), stmt);
762*404b540aSrobert       bsi_remove (&bsi, false);
763*404b540aSrobert     }
764*404b540aSrobert }
765*404b540aSrobert 
766*404b540aSrobert /* Hoist the statements out of the loops prescribed by data stored in
767*404b540aSrobert    LIM_DATA structures associated with each statement.*/
768*404b540aSrobert 
769*404b540aSrobert static void
move_computations(void)770*404b540aSrobert move_computations (void)
771*404b540aSrobert {
772*404b540aSrobert   struct dom_walk_data walk_data;
773*404b540aSrobert 
774*404b540aSrobert   memset (&walk_data, 0, sizeof (struct dom_walk_data));
775*404b540aSrobert   walk_data.before_dom_children_before_stmts = move_computations_stmt;
776*404b540aSrobert 
777*404b540aSrobert   init_walk_dominator_tree (&walk_data);
778*404b540aSrobert   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
779*404b540aSrobert   fini_walk_dominator_tree (&walk_data);
780*404b540aSrobert 
781*404b540aSrobert   loop_commit_inserts ();
782*404b540aSrobert   if (need_ssa_update_p ())
783*404b540aSrobert     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
784*404b540aSrobert }
785*404b540aSrobert 
786*404b540aSrobert /* Checks whether the statement defining variable *INDEX can be hoisted
787*404b540aSrobert    out of the loop passed in DATA.  Callback for for_each_index.  */
788*404b540aSrobert 
789*404b540aSrobert static bool
may_move_till(tree ref,tree * index,void * data)790*404b540aSrobert may_move_till (tree ref, tree *index, void *data)
791*404b540aSrobert {
792*404b540aSrobert   struct loop *loop = data, *max_loop;
793*404b540aSrobert 
794*404b540aSrobert   /* If REF is an array reference, check also that the step and the lower
795*404b540aSrobert      bound is invariant in LOOP.  */
796*404b540aSrobert   if (TREE_CODE (ref) == ARRAY_REF)
797*404b540aSrobert     {
798*404b540aSrobert       tree step = array_ref_element_size (ref);
799*404b540aSrobert       tree lbound = array_ref_low_bound (ref);
800*404b540aSrobert 
801*404b540aSrobert       max_loop = outermost_invariant_loop_expr (step, loop);
802*404b540aSrobert       if (!max_loop)
803*404b540aSrobert 	return false;
804*404b540aSrobert 
805*404b540aSrobert       max_loop = outermost_invariant_loop_expr (lbound, loop);
806*404b540aSrobert       if (!max_loop)
807*404b540aSrobert 	return false;
808*404b540aSrobert     }
809*404b540aSrobert 
810*404b540aSrobert   max_loop = outermost_invariant_loop (*index, loop);
811*404b540aSrobert   if (!max_loop)
812*404b540aSrobert     return false;
813*404b540aSrobert 
814*404b540aSrobert   return true;
815*404b540aSrobert }
816*404b540aSrobert 
817*404b540aSrobert /* Forces statements defining (invariant) SSA names in expression EXPR to be
818*404b540aSrobert    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
819*404b540aSrobert 
820*404b540aSrobert static void
force_move_till_expr(tree expr,struct loop * orig_loop,struct loop * loop)821*404b540aSrobert force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
822*404b540aSrobert {
823*404b540aSrobert   enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
824*404b540aSrobert   unsigned i, nops;
825*404b540aSrobert 
826*404b540aSrobert   if (TREE_CODE (expr) == SSA_NAME)
827*404b540aSrobert     {
828*404b540aSrobert       tree stmt = SSA_NAME_DEF_STMT (expr);
829*404b540aSrobert       if (IS_EMPTY_STMT (stmt))
830*404b540aSrobert 	return;
831*404b540aSrobert 
832*404b540aSrobert       set_level (stmt, orig_loop, loop);
833*404b540aSrobert       return;
834*404b540aSrobert     }
835*404b540aSrobert 
836*404b540aSrobert   if (class != tcc_unary
837*404b540aSrobert       && class != tcc_binary
838*404b540aSrobert       && class != tcc_expression
839*404b540aSrobert       && class != tcc_comparison)
840*404b540aSrobert     return;
841*404b540aSrobert 
842*404b540aSrobert   nops = TREE_CODE_LENGTH (TREE_CODE (expr));
843*404b540aSrobert   for (i = 0; i < nops; i++)
844*404b540aSrobert     force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
845*404b540aSrobert }
846*404b540aSrobert 
847*404b540aSrobert /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
848*404b540aSrobert    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
849*404b540aSrobert    for_each_index.  */
850*404b540aSrobert 
851*404b540aSrobert struct fmt_data
852*404b540aSrobert {
853*404b540aSrobert   struct loop *loop;
854*404b540aSrobert   struct loop *orig_loop;
855*404b540aSrobert };
856*404b540aSrobert 
857*404b540aSrobert static bool
force_move_till(tree ref,tree * index,void * data)858*404b540aSrobert force_move_till (tree ref, tree *index, void *data)
859*404b540aSrobert {
860*404b540aSrobert   tree stmt;
861*404b540aSrobert   struct fmt_data *fmt_data = data;
862*404b540aSrobert 
863*404b540aSrobert   if (TREE_CODE (ref) == ARRAY_REF)
864*404b540aSrobert     {
865*404b540aSrobert       tree step = array_ref_element_size (ref);
866*404b540aSrobert       tree lbound = array_ref_low_bound (ref);
867*404b540aSrobert 
868*404b540aSrobert       force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
869*404b540aSrobert       force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
870*404b540aSrobert     }
871*404b540aSrobert 
872*404b540aSrobert   if (TREE_CODE (*index) != SSA_NAME)
873*404b540aSrobert     return true;
874*404b540aSrobert 
875*404b540aSrobert   stmt = SSA_NAME_DEF_STMT (*index);
876*404b540aSrobert   if (IS_EMPTY_STMT (stmt))
877*404b540aSrobert     return true;
878*404b540aSrobert 
879*404b540aSrobert   set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
880*404b540aSrobert 
881*404b540aSrobert   return true;
882*404b540aSrobert }
883*404b540aSrobert 
884*404b540aSrobert /* Records memory reference location *REF to the list MEM_REFS.  The reference
885*404b540aSrobert    occurs in statement STMT.  */
886*404b540aSrobert 
887*404b540aSrobert static void
record_mem_ref_loc(struct mem_ref_loc ** mem_refs,tree stmt,tree * ref)888*404b540aSrobert record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
889*404b540aSrobert {
890*404b540aSrobert   struct mem_ref_loc *aref = XNEW (struct mem_ref_loc);
891*404b540aSrobert 
892*404b540aSrobert   aref->stmt = stmt;
893*404b540aSrobert   aref->ref = ref;
894*404b540aSrobert 
895*404b540aSrobert   aref->next = *mem_refs;
896*404b540aSrobert   *mem_refs = aref;
897*404b540aSrobert }
898*404b540aSrobert 
899*404b540aSrobert /* Releases list of memory reference locations MEM_REFS.  */
900*404b540aSrobert 
901*404b540aSrobert static void
free_mem_ref_locs(struct mem_ref_loc * mem_refs)902*404b540aSrobert free_mem_ref_locs (struct mem_ref_loc *mem_refs)
903*404b540aSrobert {
904*404b540aSrobert   struct mem_ref_loc *act;
905*404b540aSrobert 
906*404b540aSrobert   while (mem_refs)
907*404b540aSrobert     {
908*404b540aSrobert       act = mem_refs;
909*404b540aSrobert       mem_refs = mem_refs->next;
910*404b540aSrobert       free (act);
911*404b540aSrobert     }
912*404b540aSrobert }
913*404b540aSrobert 
914*404b540aSrobert /* Rewrites memory references in list MEM_REFS by variable TMP_VAR.  */
915*404b540aSrobert 
916*404b540aSrobert static void
rewrite_mem_refs(tree tmp_var,struct mem_ref_loc * mem_refs)917*404b540aSrobert rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
918*404b540aSrobert {
919*404b540aSrobert   tree var;
920*404b540aSrobert   ssa_op_iter iter;
921*404b540aSrobert 
922*404b540aSrobert   for (; mem_refs; mem_refs = mem_refs->next)
923*404b540aSrobert     {
924*404b540aSrobert       FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
925*404b540aSrobert 	mark_sym_for_renaming (SSA_NAME_VAR (var));
926*404b540aSrobert 
927*404b540aSrobert       *mem_refs->ref = tmp_var;
928*404b540aSrobert       update_stmt (mem_refs->stmt);
929*404b540aSrobert     }
930*404b540aSrobert }
931*404b540aSrobert 
932*404b540aSrobert /* The name and the length of the currently generated variable
933*404b540aSrobert    for lsm.  */
934*404b540aSrobert #define MAX_LSM_NAME_LENGTH 40
935*404b540aSrobert static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
936*404b540aSrobert static int lsm_tmp_name_length;
937*404b540aSrobert 
938*404b540aSrobert /* Adds S to lsm_tmp_name.  */
939*404b540aSrobert 
940*404b540aSrobert static void
lsm_tmp_name_add(const char * s)941*404b540aSrobert lsm_tmp_name_add (const char *s)
942*404b540aSrobert {
943*404b540aSrobert   int l = strlen (s) + lsm_tmp_name_length;
944*404b540aSrobert   if (l > MAX_LSM_NAME_LENGTH)
945*404b540aSrobert     return;
946*404b540aSrobert 
947*404b540aSrobert   strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
948*404b540aSrobert   lsm_tmp_name_length = l;
949*404b540aSrobert }
950*404b540aSrobert 
951*404b540aSrobert /* Stores the name for temporary variable that replaces REF to
952*404b540aSrobert    lsm_tmp_name.  */
953*404b540aSrobert 
954*404b540aSrobert static void
gen_lsm_tmp_name(tree ref)955*404b540aSrobert gen_lsm_tmp_name (tree ref)
956*404b540aSrobert {
957*404b540aSrobert   const char *name;
958*404b540aSrobert 
959*404b540aSrobert   switch (TREE_CODE (ref))
960*404b540aSrobert     {
961*404b540aSrobert     case MISALIGNED_INDIRECT_REF:
962*404b540aSrobert     case ALIGN_INDIRECT_REF:
963*404b540aSrobert     case INDIRECT_REF:
964*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
965*404b540aSrobert       lsm_tmp_name_add ("_");
966*404b540aSrobert       break;
967*404b540aSrobert 
968*404b540aSrobert     case BIT_FIELD_REF:
969*404b540aSrobert     case VIEW_CONVERT_EXPR:
970*404b540aSrobert     case ARRAY_RANGE_REF:
971*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
972*404b540aSrobert       break;
973*404b540aSrobert 
974*404b540aSrobert     case REALPART_EXPR:
975*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
976*404b540aSrobert       lsm_tmp_name_add ("_RE");
977*404b540aSrobert       break;
978*404b540aSrobert 
979*404b540aSrobert     case IMAGPART_EXPR:
980*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
981*404b540aSrobert       lsm_tmp_name_add ("_IM");
982*404b540aSrobert       break;
983*404b540aSrobert 
984*404b540aSrobert     case COMPONENT_REF:
985*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
986*404b540aSrobert       lsm_tmp_name_add ("_");
987*404b540aSrobert       name = get_name (TREE_OPERAND (ref, 1));
988*404b540aSrobert       if (!name)
989*404b540aSrobert 	name = "F";
990*404b540aSrobert       lsm_tmp_name_add ("_");
991*404b540aSrobert       lsm_tmp_name_add (name);
992*404b540aSrobert 
993*404b540aSrobert     case ARRAY_REF:
994*404b540aSrobert       gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
995*404b540aSrobert       lsm_tmp_name_add ("_I");
996*404b540aSrobert       break;
997*404b540aSrobert 
998*404b540aSrobert     case SSA_NAME:
999*404b540aSrobert       ref = SSA_NAME_VAR (ref);
1000*404b540aSrobert       /* Fallthru.  */
1001*404b540aSrobert 
1002*404b540aSrobert     case VAR_DECL:
1003*404b540aSrobert     case PARM_DECL:
1004*404b540aSrobert       name = get_name (ref);
1005*404b540aSrobert       if (!name)
1006*404b540aSrobert 	name = "D";
1007*404b540aSrobert       lsm_tmp_name_add (name);
1008*404b540aSrobert       break;
1009*404b540aSrobert 
1010*404b540aSrobert     case STRING_CST:
1011*404b540aSrobert       lsm_tmp_name_add ("S");
1012*404b540aSrobert       break;
1013*404b540aSrobert 
1014*404b540aSrobert     case RESULT_DECL:
1015*404b540aSrobert       lsm_tmp_name_add ("R");
1016*404b540aSrobert       break;
1017*404b540aSrobert 
1018*404b540aSrobert     default:
1019*404b540aSrobert       gcc_unreachable ();
1020*404b540aSrobert     }
1021*404b540aSrobert }
1022*404b540aSrobert 
1023*404b540aSrobert /* Determines name for temporary variable that replaces REF.
1024*404b540aSrobert    The name is accumulated into the lsm_tmp_name variable.  */
1025*404b540aSrobert 
1026*404b540aSrobert static char *
get_lsm_tmp_name(tree ref)1027*404b540aSrobert get_lsm_tmp_name (tree ref)
1028*404b540aSrobert {
1029*404b540aSrobert   lsm_tmp_name_length = 0;
1030*404b540aSrobert   gen_lsm_tmp_name (ref);
1031*404b540aSrobert   lsm_tmp_name_add ("_lsm");
1032*404b540aSrobert   return lsm_tmp_name;
1033*404b540aSrobert }
1034*404b540aSrobert 
1035*404b540aSrobert /* Records request for store motion of memory reference REF from LOOP.
1036*404b540aSrobert    MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1037*404b540aSrobert    these references are rewritten by a new temporary variable.
1038*404b540aSrobert    Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1039*404b540aSrobert    The initialization of the temporary variable is put to the preheader
1040*404b540aSrobert    of the loop, and assignments to the reference from the temporary variable
1041*404b540aSrobert    are emitted to exits.  */
1042*404b540aSrobert 
1043*404b540aSrobert static void
schedule_sm(struct loop * loop,edge * exits,unsigned n_exits,tree ref,struct mem_ref_loc * mem_refs)1044*404b540aSrobert schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
1045*404b540aSrobert 	     struct mem_ref_loc *mem_refs)
1046*404b540aSrobert {
1047*404b540aSrobert   struct mem_ref_loc *aref;
1048*404b540aSrobert   tree tmp_var;
1049*404b540aSrobert   unsigned i;
1050*404b540aSrobert   tree load, store;
1051*404b540aSrobert   struct fmt_data fmt_data;
1052*404b540aSrobert 
1053*404b540aSrobert   if (dump_file && (dump_flags & TDF_DETAILS))
1054*404b540aSrobert     {
1055*404b540aSrobert       fprintf (dump_file, "Executing store motion of ");
1056*404b540aSrobert       print_generic_expr (dump_file, ref, 0);
1057*404b540aSrobert       fprintf (dump_file, " from loop %d\n", loop->num);
1058*404b540aSrobert     }
1059*404b540aSrobert 
1060*404b540aSrobert   tmp_var = make_rename_temp (TREE_TYPE (ref),
1061*404b540aSrobert 			      get_lsm_tmp_name (ref));
1062*404b540aSrobert 
1063*404b540aSrobert   fmt_data.loop = loop;
1064*404b540aSrobert   fmt_data.orig_loop = loop;
1065*404b540aSrobert   for_each_index (&ref, force_move_till, &fmt_data);
1066*404b540aSrobert 
1067*404b540aSrobert   rewrite_mem_refs (tmp_var, mem_refs);
1068*404b540aSrobert   for (aref = mem_refs; aref; aref = aref->next)
1069*404b540aSrobert     if (LIM_DATA (aref->stmt))
1070*404b540aSrobert       LIM_DATA (aref->stmt)->sm_done = true;
1071*404b540aSrobert 
1072*404b540aSrobert   /* Emit the load & stores.  */
1073*404b540aSrobert   load = build2 (MODIFY_EXPR, void_type_node, tmp_var, ref);
1074*404b540aSrobert   get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1075*404b540aSrobert   LIM_DATA (load)->max_loop = loop;
1076*404b540aSrobert   LIM_DATA (load)->tgt_loop = loop;
1077*404b540aSrobert 
1078*404b540aSrobert   /* Put this into the latch, so that we are sure it will be processed after
1079*404b540aSrobert      all dependencies.  */
1080*404b540aSrobert   bsi_insert_on_edge (loop_latch_edge (loop), load);
1081*404b540aSrobert 
1082*404b540aSrobert   for (i = 0; i < n_exits; i++)
1083*404b540aSrobert     {
1084*404b540aSrobert       store = build2 (MODIFY_EXPR, void_type_node,
1085*404b540aSrobert 		      unshare_expr (ref), tmp_var);
1086*404b540aSrobert       bsi_insert_on_edge (exits[i], store);
1087*404b540aSrobert     }
1088*404b540aSrobert }
1089*404b540aSrobert 
1090*404b540aSrobert /* Check whether memory reference REF can be hoisted out of the LOOP.  If this
1091*404b540aSrobert    is true, prepare the statements that load the value of the memory reference
1092*404b540aSrobert    to a temporary variable in the loop preheader, store it back on the loop
1093*404b540aSrobert    exits, and replace all the references inside LOOP by this temporary variable.
1094*404b540aSrobert    LOOP has N_EXITS stored in EXITS.  CLOBBERED_VOPS is the bitmap of virtual
1095*404b540aSrobert    operands that are clobbered by a call or accessed through multiple references
1096*404b540aSrobert    in loop.  */
1097*404b540aSrobert 
1098*404b540aSrobert static void
determine_lsm_ref(struct loop * loop,edge * exits,unsigned n_exits,bitmap clobbered_vops,struct mem_ref * ref)1099*404b540aSrobert determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
1100*404b540aSrobert 		   bitmap clobbered_vops, struct mem_ref *ref)
1101*404b540aSrobert {
1102*404b540aSrobert   struct mem_ref_loc *aref;
1103*404b540aSrobert   struct loop *must_exec;
1104*404b540aSrobert 
1105*404b540aSrobert   /* In case the memory is not stored to, there is nothing for SM to do.  */
1106*404b540aSrobert   if (!ref->is_stored)
1107*404b540aSrobert     return;
1108*404b540aSrobert 
1109*404b540aSrobert   /* If the reference is aliased with any different ref, or killed by call
1110*404b540aSrobert      in function, then fail.  */
1111*404b540aSrobert   if (bitmap_intersect_p (ref->vops, clobbered_vops))
1112*404b540aSrobert     return;
1113*404b540aSrobert 
1114*404b540aSrobert   if (tree_could_trap_p (ref->mem))
1115*404b540aSrobert     {
1116*404b540aSrobert       /* If the memory access is unsafe (i.e. it might trap), ensure that some
1117*404b540aSrobert 	 of the statements in that it occurs is always executed when the loop
1118*404b540aSrobert 	 is entered.  This way we know that by moving the load from the
1119*404b540aSrobert 	 reference out of the loop we will not cause the error that would not
1120*404b540aSrobert 	 occur otherwise.
1121*404b540aSrobert 
1122*404b540aSrobert 	 TODO -- in fact we would like to check for anticipability of the
1123*404b540aSrobert 	 reference, i.e. that on each path from loop entry to loop exit at
1124*404b540aSrobert 	 least one of the statements containing the memory reference is
1125*404b540aSrobert 	 executed.  */
1126*404b540aSrobert 
1127*404b540aSrobert       for (aref = ref->locs; aref; aref = aref->next)
1128*404b540aSrobert 	{
1129*404b540aSrobert 	  if (!LIM_DATA (aref->stmt))
1130*404b540aSrobert 	    continue;
1131*404b540aSrobert 
1132*404b540aSrobert 	  must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1133*404b540aSrobert 	  if (!must_exec)
1134*404b540aSrobert 	    continue;
1135*404b540aSrobert 
1136*404b540aSrobert 	  if (must_exec == loop
1137*404b540aSrobert 	      || flow_loop_nested_p (must_exec, loop))
1138*404b540aSrobert 	    break;
1139*404b540aSrobert 	}
1140*404b540aSrobert 
1141*404b540aSrobert       if (!aref)
1142*404b540aSrobert 	return;
1143*404b540aSrobert     }
1144*404b540aSrobert 
1145*404b540aSrobert   schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1146*404b540aSrobert }
1147*404b540aSrobert 
1148*404b540aSrobert /* Hoists memory references MEM_REFS out of LOOP.  CLOBBERED_VOPS is the list
1149*404b540aSrobert    of vops clobbered by call in loop or accessed by multiple memory references.
1150*404b540aSrobert    EXITS is the list of N_EXITS exit edges of the LOOP.  */
1151*404b540aSrobert 
1152*404b540aSrobert static void
hoist_memory_references(struct loop * loop,struct mem_ref * mem_refs,bitmap clobbered_vops,edge * exits,unsigned n_exits)1153*404b540aSrobert hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1154*404b540aSrobert 			 bitmap clobbered_vops, edge *exits, unsigned n_exits)
1155*404b540aSrobert {
1156*404b540aSrobert   struct mem_ref *ref;
1157*404b540aSrobert 
1158*404b540aSrobert   for (ref = mem_refs; ref; ref = ref->next)
1159*404b540aSrobert     determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1160*404b540aSrobert }
1161*404b540aSrobert 
1162*404b540aSrobert /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1163*404b540aSrobert    for a store motion optimization (i.e. whether we can insert statement
1164*404b540aSrobert    on its exits).  */
1165*404b540aSrobert 
1166*404b540aSrobert static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,edge * exits,unsigned n_exits)1167*404b540aSrobert loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1168*404b540aSrobert 		      unsigned n_exits)
1169*404b540aSrobert {
1170*404b540aSrobert   unsigned i;
1171*404b540aSrobert 
1172*404b540aSrobert   for (i = 0; i < n_exits; i++)
1173*404b540aSrobert     if (exits[i]->flags & EDGE_ABNORMAL)
1174*404b540aSrobert       return false;
1175*404b540aSrobert 
1176*404b540aSrobert   return true;
1177*404b540aSrobert }
1178*404b540aSrobert 
1179*404b540aSrobert /* A hash function for struct mem_ref object OBJ.  */
1180*404b540aSrobert 
1181*404b540aSrobert static hashval_t
memref_hash(const void * obj)1182*404b540aSrobert memref_hash (const void *obj)
1183*404b540aSrobert {
1184*404b540aSrobert   const struct mem_ref *mem = obj;
1185*404b540aSrobert 
1186*404b540aSrobert   return mem->hash;
1187*404b540aSrobert }
1188*404b540aSrobert 
1189*404b540aSrobert /* An equality function for struct mem_ref object OBJ1 with
1190*404b540aSrobert    memory reference OBJ2.  */
1191*404b540aSrobert 
1192*404b540aSrobert static int
memref_eq(const void * obj1,const void * obj2)1193*404b540aSrobert memref_eq (const void *obj1, const void *obj2)
1194*404b540aSrobert {
1195*404b540aSrobert   const struct mem_ref *mem1 = obj1;
1196*404b540aSrobert 
1197*404b540aSrobert   return operand_equal_p (mem1->mem, (tree) obj2, 0);
1198*404b540aSrobert }
1199*404b540aSrobert 
1200*404b540aSrobert /* Gathers memory references in statement STMT in LOOP, storing the
1201*404b540aSrobert    information about them in MEM_REFS hash table.  Note vops accessed through
1202*404b540aSrobert    unrecognized statements in CLOBBERED_VOPS.  The newly created references
1203*404b540aSrobert    are also stored to MEM_REF_LIST.  */
1204*404b540aSrobert 
1205*404b540aSrobert static void
gather_mem_refs_stmt(struct loop * loop,htab_t mem_refs,bitmap clobbered_vops,tree stmt,struct mem_ref ** mem_ref_list)1206*404b540aSrobert gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1207*404b540aSrobert 		      bitmap clobbered_vops, tree stmt,
1208*404b540aSrobert 		      struct mem_ref **mem_ref_list)
1209*404b540aSrobert {
1210*404b540aSrobert   tree *lhs, *rhs, *mem = NULL;
1211*404b540aSrobert   hashval_t hash;
1212*404b540aSrobert   PTR *slot;
1213*404b540aSrobert   struct mem_ref *ref = NULL;
1214*404b540aSrobert   ssa_op_iter oi;
1215*404b540aSrobert   tree vname;
1216*404b540aSrobert   bool is_stored;
1217*404b540aSrobert 
1218*404b540aSrobert   if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1219*404b540aSrobert     return;
1220*404b540aSrobert 
1221*404b540aSrobert   /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns.  */
1222*404b540aSrobert   if (TREE_CODE (stmt) != MODIFY_EXPR)
1223*404b540aSrobert     goto fail;
1224*404b540aSrobert 
1225*404b540aSrobert   lhs = &TREE_OPERAND (stmt, 0);
1226*404b540aSrobert   rhs = &TREE_OPERAND (stmt, 1);
1227*404b540aSrobert 
1228*404b540aSrobert   if (TREE_CODE (*lhs) == SSA_NAME)
1229*404b540aSrobert     {
1230*404b540aSrobert       if (!is_gimple_addressable (*rhs))
1231*404b540aSrobert 	goto fail;
1232*404b540aSrobert 
1233*404b540aSrobert       mem = rhs;
1234*404b540aSrobert       is_stored = false;
1235*404b540aSrobert     }
1236*404b540aSrobert   else if (TREE_CODE (*rhs) == SSA_NAME
1237*404b540aSrobert 	   || is_gimple_min_invariant (*rhs))
1238*404b540aSrobert     {
1239*404b540aSrobert       mem = lhs;
1240*404b540aSrobert       is_stored = true;
1241*404b540aSrobert     }
1242*404b540aSrobert   else
1243*404b540aSrobert     goto fail;
1244*404b540aSrobert 
1245*404b540aSrobert   /* If we cannot create an SSA name for the result, give up.  */
1246*404b540aSrobert   if (!is_gimple_reg_type (TREE_TYPE (*mem))
1247*404b540aSrobert       || TREE_THIS_VOLATILE (*mem))
1248*404b540aSrobert     goto fail;
1249*404b540aSrobert 
1250*404b540aSrobert   /* If we cannot move the reference out of the loop, fail.  */
1251*404b540aSrobert   if (!for_each_index (mem, may_move_till, loop))
1252*404b540aSrobert     goto fail;
1253*404b540aSrobert 
1254*404b540aSrobert   hash = iterative_hash_expr (*mem, 0);
1255*404b540aSrobert   slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1256*404b540aSrobert 
1257*404b540aSrobert   if (*slot)
1258*404b540aSrobert     ref = *slot;
1259*404b540aSrobert   else
1260*404b540aSrobert     {
1261*404b540aSrobert       ref = XNEW (struct mem_ref);
1262*404b540aSrobert       ref->mem = *mem;
1263*404b540aSrobert       ref->hash = hash;
1264*404b540aSrobert       ref->locs = NULL;
1265*404b540aSrobert       ref->is_stored = false;
1266*404b540aSrobert       ref->vops = BITMAP_ALLOC (NULL);
1267*404b540aSrobert       ref->next = *mem_ref_list;
1268*404b540aSrobert       *mem_ref_list = ref;
1269*404b540aSrobert       *slot = ref;
1270*404b540aSrobert     }
1271*404b540aSrobert   ref->is_stored |= is_stored;
1272*404b540aSrobert 
1273*404b540aSrobert   FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1274*404b540aSrobert 			     SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1275*404b540aSrobert     bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1276*404b540aSrobert   record_mem_ref_loc (&ref->locs, stmt, mem);
1277*404b540aSrobert   return;
1278*404b540aSrobert 
1279*404b540aSrobert fail:
1280*404b540aSrobert   FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1281*404b540aSrobert 			     SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1282*404b540aSrobert     bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1283*404b540aSrobert }
1284*404b540aSrobert 
1285*404b540aSrobert /* Gathers memory references in LOOP.  Notes vops accessed through unrecognized
1286*404b540aSrobert    statements in CLOBBERED_VOPS.  The list of the references found by
1287*404b540aSrobert    the function is returned.  */
1288*404b540aSrobert 
1289*404b540aSrobert static struct mem_ref *
gather_mem_refs(struct loop * loop,bitmap clobbered_vops)1290*404b540aSrobert gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1291*404b540aSrobert {
1292*404b540aSrobert   basic_block *body = get_loop_body (loop);
1293*404b540aSrobert   block_stmt_iterator bsi;
1294*404b540aSrobert   unsigned i;
1295*404b540aSrobert   struct mem_ref *mem_ref_list = NULL;
1296*404b540aSrobert   htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1297*404b540aSrobert 
1298*404b540aSrobert   for (i = 0; i < loop->num_nodes; i++)
1299*404b540aSrobert     {
1300*404b540aSrobert       for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1301*404b540aSrobert 	gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1302*404b540aSrobert 			      &mem_ref_list);
1303*404b540aSrobert     }
1304*404b540aSrobert 
1305*404b540aSrobert   free (body);
1306*404b540aSrobert 
1307*404b540aSrobert   htab_delete (mem_refs);
1308*404b540aSrobert   return mem_ref_list;
1309*404b540aSrobert }
1310*404b540aSrobert 
1311*404b540aSrobert /* Finds the vops accessed by more than one of the memory references described
1312*404b540aSrobert    in MEM_REFS and marks them in CLOBBERED_VOPS.  */
1313*404b540aSrobert 
1314*404b540aSrobert static void
find_more_ref_vops(struct mem_ref * mem_refs,bitmap clobbered_vops)1315*404b540aSrobert find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1316*404b540aSrobert {
1317*404b540aSrobert   bitmap_head tmp, all_vops;
1318*404b540aSrobert   struct mem_ref *ref;
1319*404b540aSrobert 
1320*404b540aSrobert   bitmap_initialize (&tmp, &bitmap_default_obstack);
1321*404b540aSrobert   bitmap_initialize (&all_vops, &bitmap_default_obstack);
1322*404b540aSrobert 
1323*404b540aSrobert   for (ref = mem_refs; ref; ref = ref->next)
1324*404b540aSrobert     {
1325*404b540aSrobert       /* The vops that are already in all_vops are accessed by more than
1326*404b540aSrobert 	 one memory reference.  */
1327*404b540aSrobert       bitmap_and (&tmp, &all_vops, ref->vops);
1328*404b540aSrobert       bitmap_ior_into (clobbered_vops, &tmp);
1329*404b540aSrobert       bitmap_clear (&tmp);
1330*404b540aSrobert 
1331*404b540aSrobert       bitmap_ior_into (&all_vops, ref->vops);
1332*404b540aSrobert     }
1333*404b540aSrobert 
1334*404b540aSrobert   bitmap_clear (&all_vops);
1335*404b540aSrobert }
1336*404b540aSrobert 
1337*404b540aSrobert /* Releases the memory occupied by REF.  */
1338*404b540aSrobert 
1339*404b540aSrobert static void
free_mem_ref(struct mem_ref * ref)1340*404b540aSrobert free_mem_ref (struct mem_ref *ref)
1341*404b540aSrobert {
1342*404b540aSrobert   free_mem_ref_locs (ref->locs);
1343*404b540aSrobert   BITMAP_FREE (ref->vops);
1344*404b540aSrobert   free (ref);
1345*404b540aSrobert }
1346*404b540aSrobert 
1347*404b540aSrobert /* Releases the memory occupied by REFS.  */
1348*404b540aSrobert 
1349*404b540aSrobert static void
free_mem_refs(struct mem_ref * refs)1350*404b540aSrobert free_mem_refs (struct mem_ref *refs)
1351*404b540aSrobert {
1352*404b540aSrobert   struct mem_ref *ref, *next;
1353*404b540aSrobert 
1354*404b540aSrobert   for (ref = refs; ref; ref = next)
1355*404b540aSrobert     {
1356*404b540aSrobert       next = ref->next;
1357*404b540aSrobert       free_mem_ref (ref);
1358*404b540aSrobert     }
1359*404b540aSrobert }
1360*404b540aSrobert 
1361*404b540aSrobert /* Try to perform store motion for all memory references modified inside
1362*404b540aSrobert    LOOP.  */
1363*404b540aSrobert 
1364*404b540aSrobert static void
determine_lsm_loop(struct loop * loop)1365*404b540aSrobert determine_lsm_loop (struct loop *loop)
1366*404b540aSrobert {
1367*404b540aSrobert   unsigned n_exits;
1368*404b540aSrobert   edge *exits = get_loop_exit_edges (loop, &n_exits);
1369*404b540aSrobert   bitmap clobbered_vops;
1370*404b540aSrobert   struct mem_ref *mem_refs;
1371*404b540aSrobert 
1372*404b540aSrobert   if (!loop_suitable_for_sm (loop, exits, n_exits))
1373*404b540aSrobert     {
1374*404b540aSrobert       free (exits);
1375*404b540aSrobert       return;
1376*404b540aSrobert     }
1377*404b540aSrobert 
1378*404b540aSrobert   /* Find the memory references in LOOP.  */
1379*404b540aSrobert   clobbered_vops = BITMAP_ALLOC (NULL);
1380*404b540aSrobert   mem_refs = gather_mem_refs (loop, clobbered_vops);
1381*404b540aSrobert 
1382*404b540aSrobert   /* Find the vops that are used for more than one reference.  */
1383*404b540aSrobert   find_more_ref_vops (mem_refs, clobbered_vops);
1384*404b540aSrobert 
1385*404b540aSrobert   /* Hoist all suitable memory references.  */
1386*404b540aSrobert   hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1387*404b540aSrobert 
1388*404b540aSrobert   free_mem_refs (mem_refs);
1389*404b540aSrobert   free (exits);
1390*404b540aSrobert   BITMAP_FREE (clobbered_vops);
1391*404b540aSrobert }
1392*404b540aSrobert 
1393*404b540aSrobert /* Try to perform store motion for all memory references modified inside
1394*404b540aSrobert    any of LOOPS.  */
1395*404b540aSrobert 
1396*404b540aSrobert static void
determine_lsm(struct loops * loops)1397*404b540aSrobert determine_lsm (struct loops *loops)
1398*404b540aSrobert {
1399*404b540aSrobert   struct loop *loop;
1400*404b540aSrobert 
1401*404b540aSrobert   if (!loops->tree_root->inner)
1402*404b540aSrobert     return;
1403*404b540aSrobert 
1404*404b540aSrobert   /* Pass the loops from the outermost and perform the store motion as
1405*404b540aSrobert      suitable.  */
1406*404b540aSrobert 
1407*404b540aSrobert   loop = loops->tree_root->inner;
1408*404b540aSrobert   while (1)
1409*404b540aSrobert     {
1410*404b540aSrobert       determine_lsm_loop (loop);
1411*404b540aSrobert 
1412*404b540aSrobert       if (loop->inner)
1413*404b540aSrobert 	{
1414*404b540aSrobert 	  loop = loop->inner;
1415*404b540aSrobert 	  continue;
1416*404b540aSrobert 	}
1417*404b540aSrobert       while (!loop->next)
1418*404b540aSrobert 	{
1419*404b540aSrobert 	  loop = loop->outer;
1420*404b540aSrobert 	  if (loop == loops->tree_root)
1421*404b540aSrobert 	    {
1422*404b540aSrobert 	      loop_commit_inserts ();
1423*404b540aSrobert 	      return;
1424*404b540aSrobert 	    }
1425*404b540aSrobert 	}
1426*404b540aSrobert       loop = loop->next;
1427*404b540aSrobert     }
1428*404b540aSrobert }
1429*404b540aSrobert 
1430*404b540aSrobert /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1431*404b540aSrobert    for each such basic block bb records the outermost loop for that execution
1432*404b540aSrobert    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
1433*404b540aSrobert    blocks that contain a nonpure call.  */
1434*404b540aSrobert 
1435*404b540aSrobert static void
fill_always_executed_in(struct loop * loop,sbitmap contains_call)1436*404b540aSrobert fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1437*404b540aSrobert {
1438*404b540aSrobert   basic_block bb = NULL, *bbs, last = NULL;
1439*404b540aSrobert   unsigned i;
1440*404b540aSrobert   edge e;
1441*404b540aSrobert   struct loop *inn_loop = loop;
1442*404b540aSrobert 
1443*404b540aSrobert   if (!loop->header->aux)
1444*404b540aSrobert     {
1445*404b540aSrobert       bbs = get_loop_body_in_dom_order (loop);
1446*404b540aSrobert 
1447*404b540aSrobert       for (i = 0; i < loop->num_nodes; i++)
1448*404b540aSrobert 	{
1449*404b540aSrobert 	  edge_iterator ei;
1450*404b540aSrobert 	  bb = bbs[i];
1451*404b540aSrobert 
1452*404b540aSrobert 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1453*404b540aSrobert 	    last = bb;
1454*404b540aSrobert 
1455*404b540aSrobert 	  if (TEST_BIT (contains_call, bb->index))
1456*404b540aSrobert 	    break;
1457*404b540aSrobert 
1458*404b540aSrobert 	  FOR_EACH_EDGE (e, ei, bb->succs)
1459*404b540aSrobert 	    if (!flow_bb_inside_loop_p (loop, e->dest))
1460*404b540aSrobert 	      break;
1461*404b540aSrobert 	  if (e)
1462*404b540aSrobert 	    break;
1463*404b540aSrobert 
1464*404b540aSrobert 	  /* A loop might be infinite (TODO use simple loop analysis
1465*404b540aSrobert 	     to disprove this if possible).  */
1466*404b540aSrobert 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
1467*404b540aSrobert 	    break;
1468*404b540aSrobert 
1469*404b540aSrobert 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
1470*404b540aSrobert 	    break;
1471*404b540aSrobert 
1472*404b540aSrobert 	  if (bb->loop_father->header == bb)
1473*404b540aSrobert 	    {
1474*404b540aSrobert 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1475*404b540aSrobert 		break;
1476*404b540aSrobert 
1477*404b540aSrobert 	      /* In a loop that is always entered we may proceed anyway.
1478*404b540aSrobert 		 But record that we entered it and stop once we leave it.  */
1479*404b540aSrobert 	      inn_loop = bb->loop_father;
1480*404b540aSrobert 	    }
1481*404b540aSrobert 	}
1482*404b540aSrobert 
1483*404b540aSrobert       while (1)
1484*404b540aSrobert 	{
1485*404b540aSrobert 	  last->aux = loop;
1486*404b540aSrobert 	  if (last == loop->header)
1487*404b540aSrobert 	    break;
1488*404b540aSrobert 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
1489*404b540aSrobert 	}
1490*404b540aSrobert 
1491*404b540aSrobert       free (bbs);
1492*404b540aSrobert     }
1493*404b540aSrobert 
1494*404b540aSrobert   for (loop = loop->inner; loop; loop = loop->next)
1495*404b540aSrobert     fill_always_executed_in (loop, contains_call);
1496*404b540aSrobert }
1497*404b540aSrobert 
1498*404b540aSrobert /* Compute the global information needed by the loop invariant motion pass.
1499*404b540aSrobert    LOOPS is the loop tree.  */
1500*404b540aSrobert 
1501*404b540aSrobert static void
tree_ssa_lim_initialize(struct loops * loops)1502*404b540aSrobert tree_ssa_lim_initialize (struct loops *loops)
1503*404b540aSrobert {
1504*404b540aSrobert   sbitmap contains_call = sbitmap_alloc (last_basic_block);
1505*404b540aSrobert   block_stmt_iterator bsi;
1506*404b540aSrobert   struct loop *loop;
1507*404b540aSrobert   basic_block bb;
1508*404b540aSrobert 
1509*404b540aSrobert   sbitmap_zero (contains_call);
1510*404b540aSrobert   FOR_EACH_BB (bb)
1511*404b540aSrobert     {
1512*404b540aSrobert       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1513*404b540aSrobert 	{
1514*404b540aSrobert 	  if (nonpure_call_p (bsi_stmt (bsi)))
1515*404b540aSrobert 	    break;
1516*404b540aSrobert 	}
1517*404b540aSrobert 
1518*404b540aSrobert       if (!bsi_end_p (bsi))
1519*404b540aSrobert 	SET_BIT (contains_call, bb->index);
1520*404b540aSrobert     }
1521*404b540aSrobert 
1522*404b540aSrobert   for (loop = loops->tree_root->inner; loop; loop = loop->next)
1523*404b540aSrobert     fill_always_executed_in (loop, contains_call);
1524*404b540aSrobert 
1525*404b540aSrobert   sbitmap_free (contains_call);
1526*404b540aSrobert }
1527*404b540aSrobert 
1528*404b540aSrobert /* Cleans up after the invariant motion pass.  */
1529*404b540aSrobert 
1530*404b540aSrobert static void
tree_ssa_lim_finalize(void)1531*404b540aSrobert tree_ssa_lim_finalize (void)
1532*404b540aSrobert {
1533*404b540aSrobert   basic_block bb;
1534*404b540aSrobert 
1535*404b540aSrobert   FOR_EACH_BB (bb)
1536*404b540aSrobert     {
1537*404b540aSrobert       bb->aux = NULL;
1538*404b540aSrobert     }
1539*404b540aSrobert }
1540*404b540aSrobert 
1541*404b540aSrobert /* Moves invariants from LOOPS.  Only "expensive" invariants are moved out --
1542*404b540aSrobert    i.e. those that are likely to be win regardless of the register pressure.  */
1543*404b540aSrobert 
1544*404b540aSrobert void
tree_ssa_lim(struct loops * loops)1545*404b540aSrobert tree_ssa_lim (struct loops *loops)
1546*404b540aSrobert {
1547*404b540aSrobert   tree_ssa_lim_initialize (loops);
1548*404b540aSrobert 
1549*404b540aSrobert   /* For each statement determine the outermost loop in that it is
1550*404b540aSrobert      invariant and cost for computing the invariant.  */
1551*404b540aSrobert   determine_invariantness ();
1552*404b540aSrobert 
1553*404b540aSrobert   /* For each memory reference determine whether it is possible to hoist it
1554*404b540aSrobert      out of the loop.  Force the necessary invariants to be moved out of the
1555*404b540aSrobert      loops as well.  */
1556*404b540aSrobert   determine_lsm (loops);
1557*404b540aSrobert 
1558*404b540aSrobert   /* Move the expressions that are expensive enough.  */
1559*404b540aSrobert   move_computations ();
1560*404b540aSrobert 
1561*404b540aSrobert   tree_ssa_lim_finalize ();
1562*404b540aSrobert }
1563