xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-ssa-loop-im.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Loop invariant motion.
2*38fd1498Szrj    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it
7*38fd1498Szrj under the terms of the GNU General Public License as published by the
8*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
9*38fd1498Szrj later version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
12*38fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj #include "config.h"
21*38fd1498Szrj #include "system.h"
22*38fd1498Szrj #include "coretypes.h"
23*38fd1498Szrj #include "backend.h"
24*38fd1498Szrj #include "tree.h"
25*38fd1498Szrj #include "gimple.h"
26*38fd1498Szrj #include "cfghooks.h"
27*38fd1498Szrj #include "tree-pass.h"
28*38fd1498Szrj #include "ssa.h"
29*38fd1498Szrj #include "gimple-pretty-print.h"
30*38fd1498Szrj #include "fold-const.h"
31*38fd1498Szrj #include "cfganal.h"
32*38fd1498Szrj #include "tree-eh.h"
33*38fd1498Szrj #include "gimplify.h"
34*38fd1498Szrj #include "gimple-iterator.h"
35*38fd1498Szrj #include "tree-cfg.h"
36*38fd1498Szrj #include "tree-ssa-loop-manip.h"
37*38fd1498Szrj #include "tree-ssa-loop.h"
38*38fd1498Szrj #include "tree-into-ssa.h"
39*38fd1498Szrj #include "cfgloop.h"
40*38fd1498Szrj #include "domwalk.h"
41*38fd1498Szrj #include "params.h"
42*38fd1498Szrj #include "tree-affine.h"
43*38fd1498Szrj #include "tree-ssa-propagate.h"
44*38fd1498Szrj #include "trans-mem.h"
45*38fd1498Szrj #include "gimple-fold.h"
46*38fd1498Szrj #include "tree-scalar-evolution.h"
47*38fd1498Szrj #include "tree-ssa-loop-niter.h"
48*38fd1498Szrj 
49*38fd1498Szrj /* TODO:  Support for predicated code motion.  I.e.
50*38fd1498Szrj 
51*38fd1498Szrj    while (1)
52*38fd1498Szrj      {
53*38fd1498Szrj        if (cond)
54*38fd1498Szrj 	 {
55*38fd1498Szrj 	   a = inv;
56*38fd1498Szrj 	   something;
57*38fd1498Szrj 	 }
58*38fd1498Szrj      }
59*38fd1498Szrj 
60*38fd1498Szrj    Where COND and INV are invariants, but evaluating INV may trap or be
61*38fd1498Szrj    invalid from some other reason if !COND.  This may be transformed to
62*38fd1498Szrj 
63*38fd1498Szrj    if (cond)
64*38fd1498Szrj      a = inv;
65*38fd1498Szrj    while (1)
66*38fd1498Szrj      {
67*38fd1498Szrj        if (cond)
68*38fd1498Szrj 	 something;
69*38fd1498Szrj      }  */
70*38fd1498Szrj 
71*38fd1498Szrj /* The auxiliary data kept for each statement.  */
72*38fd1498Szrj 
73*38fd1498Szrj struct lim_aux_data
74*38fd1498Szrj {
75*38fd1498Szrj   struct loop *max_loop;	/* The outermost loop in that the statement
76*38fd1498Szrj 				   is invariant.  */
77*38fd1498Szrj 
78*38fd1498Szrj   struct loop *tgt_loop;	/* The loop out of that we want to move the
79*38fd1498Szrj 				   invariant.  */
80*38fd1498Szrj 
81*38fd1498Szrj   struct loop *always_executed_in;
82*38fd1498Szrj 				/* The outermost loop for that we are sure
83*38fd1498Szrj 				   the statement is executed if the loop
84*38fd1498Szrj 				   is entered.  */
85*38fd1498Szrj 
86*38fd1498Szrj   unsigned cost;		/* Cost of the computation performed by the
87*38fd1498Szrj 				   statement.  */
88*38fd1498Szrj 
89*38fd1498Szrj   unsigned ref;			/* The simple_mem_ref in this stmt or 0.  */
90*38fd1498Szrj 
91*38fd1498Szrj   vec<gimple *> depends;	/* Vector of statements that must be also
92*38fd1498Szrj 				   hoisted out of the loop when this statement
93*38fd1498Szrj 				   is hoisted; i.e. those that define the
94*38fd1498Szrj 				   operands of the statement and are inside of
95*38fd1498Szrj 				   the MAX_LOOP loop.  */
96*38fd1498Szrj };
97*38fd1498Szrj 
98*38fd1498Szrj /* Maps statements to their lim_aux_data.  */
99*38fd1498Szrj 
100*38fd1498Szrj static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
101*38fd1498Szrj 
102*38fd1498Szrj /* Description of a memory reference location.  */
103*38fd1498Szrj 
104*38fd1498Szrj struct mem_ref_loc
105*38fd1498Szrj {
106*38fd1498Szrj   tree *ref;			/* The reference itself.  */
107*38fd1498Szrj   gimple *stmt;			/* The statement in that it occurs.  */
108*38fd1498Szrj };
109*38fd1498Szrj 
110*38fd1498Szrj 
111*38fd1498Szrj /* Description of a memory reference.  */
112*38fd1498Szrj 
113*38fd1498Szrj struct im_mem_ref
114*38fd1498Szrj {
115*38fd1498Szrj   unsigned id;			/* ID assigned to the memory reference
116*38fd1498Szrj 				   (its index in memory_accesses.refs_list)  */
117*38fd1498Szrj   hashval_t hash;		/* Its hash value.  */
118*38fd1498Szrj 
119*38fd1498Szrj   /* The memory access itself and associated caching of alias-oracle
120*38fd1498Szrj      query meta-data.  */
121*38fd1498Szrj   ao_ref mem;
122*38fd1498Szrj 
123*38fd1498Szrj   bitmap stored;		/* The set of loops in that this memory location
124*38fd1498Szrj 				   is stored to.  */
125*38fd1498Szrj   vec<mem_ref_loc>		accesses_in_loop;
126*38fd1498Szrj 				/* The locations of the accesses.  Vector
127*38fd1498Szrj 				   indexed by the loop number.  */
128*38fd1498Szrj 
129*38fd1498Szrj   /* The following sets are computed on demand.  We keep both set and
130*38fd1498Szrj      its complement, so that we know whether the information was
131*38fd1498Szrj      already computed or not.  */
132*38fd1498Szrj   bitmap_head indep_loop;	/* The set of loops in that the memory
133*38fd1498Szrj 				   reference is independent, meaning:
134*38fd1498Szrj 				   If it is stored in the loop, this store
135*38fd1498Szrj 				     is independent on all other loads and
136*38fd1498Szrj 				     stores.
137*38fd1498Szrj 				   If it is only loaded, then it is independent
138*38fd1498Szrj 				     on all stores in the loop.  */
139*38fd1498Szrj   bitmap_head dep_loop;		/* The complement of INDEP_LOOP.  */
140*38fd1498Szrj };
141*38fd1498Szrj 
142*38fd1498Szrj /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
143*38fd1498Szrj    to record (in)dependence against stores in the loop and its subloops, the
144*38fd1498Szrj    second to record (in)dependence against all references in the loop
145*38fd1498Szrj    and its subloops.  */
146*38fd1498Szrj #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
147*38fd1498Szrj 
148*38fd1498Szrj /* Mem_ref hashtable helpers.  */
149*38fd1498Szrj 
150*38fd1498Szrj struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
151*38fd1498Szrj {
152*38fd1498Szrj   typedef tree_node *compare_type;
153*38fd1498Szrj   static inline hashval_t hash (const im_mem_ref *);
154*38fd1498Szrj   static inline bool equal (const im_mem_ref *, const tree_node *);
155*38fd1498Szrj };
156*38fd1498Szrj 
157*38fd1498Szrj /* A hash function for struct im_mem_ref object OBJ.  */
158*38fd1498Szrj 
159*38fd1498Szrj inline hashval_t
hash(const im_mem_ref * mem)160*38fd1498Szrj mem_ref_hasher::hash (const im_mem_ref *mem)
161*38fd1498Szrj {
162*38fd1498Szrj   return mem->hash;
163*38fd1498Szrj }
164*38fd1498Szrj 
165*38fd1498Szrj /* An equality function for struct im_mem_ref object MEM1 with
166*38fd1498Szrj    memory reference OBJ2.  */
167*38fd1498Szrj 
168*38fd1498Szrj inline bool
equal(const im_mem_ref * mem1,const tree_node * obj2)169*38fd1498Szrj mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
170*38fd1498Szrj {
171*38fd1498Szrj   return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
172*38fd1498Szrj }
173*38fd1498Szrj 
174*38fd1498Szrj 
175*38fd1498Szrj /* Description of memory accesses in loops.  */
176*38fd1498Szrj 
177*38fd1498Szrj static struct
178*38fd1498Szrj {
179*38fd1498Szrj   /* The hash table of memory references accessed in loops.  */
180*38fd1498Szrj   hash_table<mem_ref_hasher> *refs;
181*38fd1498Szrj 
182*38fd1498Szrj   /* The list of memory references.  */
183*38fd1498Szrj   vec<im_mem_ref *> refs_list;
184*38fd1498Szrj 
185*38fd1498Szrj   /* The set of memory references accessed in each loop.  */
186*38fd1498Szrj   vec<bitmap_head> refs_in_loop;
187*38fd1498Szrj 
188*38fd1498Szrj   /* The set of memory references stored in each loop.  */
189*38fd1498Szrj   vec<bitmap_head> refs_stored_in_loop;
190*38fd1498Szrj 
191*38fd1498Szrj   /* The set of memory references stored in each loop, including subloops .  */
192*38fd1498Szrj   vec<bitmap_head> all_refs_stored_in_loop;
193*38fd1498Szrj 
194*38fd1498Szrj   /* Cache for expanding memory addresses.  */
195*38fd1498Szrj   hash_map<tree, name_expansion *> *ttae_cache;
196*38fd1498Szrj } memory_accesses;
197*38fd1498Szrj 
198*38fd1498Szrj /* Obstack for the bitmaps in the above data structures.  */
199*38fd1498Szrj static bitmap_obstack lim_bitmap_obstack;
200*38fd1498Szrj static obstack mem_ref_obstack;
201*38fd1498Szrj 
202*38fd1498Szrj static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
203*38fd1498Szrj static bool ref_always_accessed_p (struct loop *, im_mem_ref *, bool);
204*38fd1498Szrj 
205*38fd1498Szrj /* Minimum cost of an expensive expression.  */
206*38fd1498Szrj #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
207*38fd1498Szrj 
208*38fd1498Szrj /* The outermost loop for which execution of the header guarantees that the
209*38fd1498Szrj    block will be executed.  */
210*38fd1498Szrj #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
211*38fd1498Szrj #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
212*38fd1498Szrj 
213*38fd1498Szrj /* ID of the shared unanalyzable mem.  */
214*38fd1498Szrj #define UNANALYZABLE_MEM_ID 0
215*38fd1498Szrj 
216*38fd1498Szrj /* Whether the reference was analyzable.  */
217*38fd1498Szrj #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
218*38fd1498Szrj 
219*38fd1498Szrj static struct lim_aux_data *
init_lim_data(gimple * stmt)220*38fd1498Szrj init_lim_data (gimple *stmt)
221*38fd1498Szrj {
222*38fd1498Szrj   lim_aux_data *p = XCNEW (struct lim_aux_data);
223*38fd1498Szrj   lim_aux_data_map->put (stmt, p);
224*38fd1498Szrj 
225*38fd1498Szrj   return p;
226*38fd1498Szrj }
227*38fd1498Szrj 
228*38fd1498Szrj static struct lim_aux_data *
get_lim_data(gimple * stmt)229*38fd1498Szrj get_lim_data (gimple *stmt)
230*38fd1498Szrj {
231*38fd1498Szrj   lim_aux_data **p = lim_aux_data_map->get (stmt);
232*38fd1498Szrj   if (!p)
233*38fd1498Szrj     return NULL;
234*38fd1498Szrj 
235*38fd1498Szrj   return *p;
236*38fd1498Szrj }
237*38fd1498Szrj 
238*38fd1498Szrj /* Releases the memory occupied by DATA.  */
239*38fd1498Szrj 
240*38fd1498Szrj static void
free_lim_aux_data(struct lim_aux_data * data)241*38fd1498Szrj free_lim_aux_data (struct lim_aux_data *data)
242*38fd1498Szrj {
243*38fd1498Szrj   data->depends.release ();
244*38fd1498Szrj   free (data);
245*38fd1498Szrj }
246*38fd1498Szrj 
247*38fd1498Szrj static void
clear_lim_data(gimple * stmt)248*38fd1498Szrj clear_lim_data (gimple *stmt)
249*38fd1498Szrj {
250*38fd1498Szrj   lim_aux_data **p = lim_aux_data_map->get (stmt);
251*38fd1498Szrj   if (!p)
252*38fd1498Szrj     return;
253*38fd1498Szrj 
254*38fd1498Szrj   free_lim_aux_data (*p);
255*38fd1498Szrj   *p = NULL;
256*38fd1498Szrj }
257*38fd1498Szrj 
258*38fd1498Szrj 
259*38fd1498Szrj /* The possibilities of statement movement.  */
260*38fd1498Szrj enum move_pos
261*38fd1498Szrj   {
262*38fd1498Szrj     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
263*38fd1498Szrj     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
264*38fd1498Szrj 				   become executed -- memory accesses, ... */
265*38fd1498Szrj     MOVE_POSSIBLE		/* Unlimited movement.  */
266*38fd1498Szrj   };
267*38fd1498Szrj 
268*38fd1498Szrj 
269*38fd1498Szrj /* If it is possible to hoist the statement STMT unconditionally,
270*38fd1498Szrj    returns MOVE_POSSIBLE.
271*38fd1498Szrj    If it is possible to hoist the statement STMT, but we must avoid making
272*38fd1498Szrj    it executed if it would not be executed in the original program (e.g.
273*38fd1498Szrj    because it may trap), return MOVE_PRESERVE_EXECUTION.
274*38fd1498Szrj    Otherwise return MOVE_IMPOSSIBLE.  */
275*38fd1498Szrj 
276*38fd1498Szrj enum move_pos
movement_possibility(gimple * stmt)277*38fd1498Szrj movement_possibility (gimple *stmt)
278*38fd1498Szrj {
279*38fd1498Szrj   tree lhs;
280*38fd1498Szrj   enum move_pos ret = MOVE_POSSIBLE;
281*38fd1498Szrj 
282*38fd1498Szrj   if (flag_unswitch_loops
283*38fd1498Szrj       && gimple_code (stmt) == GIMPLE_COND)
284*38fd1498Szrj     {
285*38fd1498Szrj       /* If we perform unswitching, force the operands of the invariant
286*38fd1498Szrj 	 condition to be moved out of the loop.  */
287*38fd1498Szrj       return MOVE_POSSIBLE;
288*38fd1498Szrj     }
289*38fd1498Szrj 
290*38fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI
291*38fd1498Szrj       && gimple_phi_num_args (stmt) <= 2
292*38fd1498Szrj       && !virtual_operand_p (gimple_phi_result (stmt))
293*38fd1498Szrj       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
294*38fd1498Szrj     return MOVE_POSSIBLE;
295*38fd1498Szrj 
296*38fd1498Szrj   if (gimple_get_lhs (stmt) == NULL_TREE)
297*38fd1498Szrj     return MOVE_IMPOSSIBLE;
298*38fd1498Szrj 
299*38fd1498Szrj   if (gimple_vdef (stmt))
300*38fd1498Szrj     return MOVE_IMPOSSIBLE;
301*38fd1498Szrj 
302*38fd1498Szrj   if (stmt_ends_bb_p (stmt)
303*38fd1498Szrj       || gimple_has_volatile_ops (stmt)
304*38fd1498Szrj       || gimple_has_side_effects (stmt)
305*38fd1498Szrj       || stmt_could_throw_p (stmt))
306*38fd1498Szrj     return MOVE_IMPOSSIBLE;
307*38fd1498Szrj 
308*38fd1498Szrj   if (is_gimple_call (stmt))
309*38fd1498Szrj     {
310*38fd1498Szrj       /* While pure or const call is guaranteed to have no side effects, we
311*38fd1498Szrj 	 cannot move it arbitrarily.  Consider code like
312*38fd1498Szrj 
313*38fd1498Szrj 	 char *s = something ();
314*38fd1498Szrj 
315*38fd1498Szrj 	 while (1)
316*38fd1498Szrj 	   {
317*38fd1498Szrj 	     if (s)
318*38fd1498Szrj 	       t = strlen (s);
319*38fd1498Szrj 	     else
320*38fd1498Szrj 	       t = 0;
321*38fd1498Szrj 	   }
322*38fd1498Szrj 
323*38fd1498Szrj 	 Here the strlen call cannot be moved out of the loop, even though
324*38fd1498Szrj 	 s is invariant.  In addition to possibly creating a call with
325*38fd1498Szrj 	 invalid arguments, moving out a function call that is not executed
326*38fd1498Szrj 	 may cause performance regressions in case the call is costly and
327*38fd1498Szrj 	 not executed at all.  */
328*38fd1498Szrj       ret = MOVE_PRESERVE_EXECUTION;
329*38fd1498Szrj       lhs = gimple_call_lhs (stmt);
330*38fd1498Szrj     }
331*38fd1498Szrj   else if (is_gimple_assign (stmt))
332*38fd1498Szrj     lhs = gimple_assign_lhs (stmt);
333*38fd1498Szrj   else
334*38fd1498Szrj     return MOVE_IMPOSSIBLE;
335*38fd1498Szrj 
336*38fd1498Szrj   if (TREE_CODE (lhs) == SSA_NAME
337*38fd1498Szrj       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
338*38fd1498Szrj     return MOVE_IMPOSSIBLE;
339*38fd1498Szrj 
340*38fd1498Szrj   if (TREE_CODE (lhs) != SSA_NAME
341*38fd1498Szrj       || gimple_could_trap_p (stmt))
342*38fd1498Szrj     return MOVE_PRESERVE_EXECUTION;
343*38fd1498Szrj 
344*38fd1498Szrj   /* Non local loads in a transaction cannot be hoisted out.  Well,
345*38fd1498Szrj      unless the load happens on every path out of the loop, but we
346*38fd1498Szrj      don't take this into account yet.  */
347*38fd1498Szrj   if (flag_tm
348*38fd1498Szrj       && gimple_in_transaction (stmt)
349*38fd1498Szrj       && gimple_assign_single_p (stmt))
350*38fd1498Szrj     {
351*38fd1498Szrj       tree rhs = gimple_assign_rhs1 (stmt);
352*38fd1498Szrj       if (DECL_P (rhs) && is_global_var (rhs))
353*38fd1498Szrj 	{
354*38fd1498Szrj 	  if (dump_file)
355*38fd1498Szrj 	    {
356*38fd1498Szrj 	      fprintf (dump_file, "Cannot hoist conditional load of ");
357*38fd1498Szrj 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
358*38fd1498Szrj 	      fprintf (dump_file, " because it is in a transaction.\n");
359*38fd1498Szrj 	    }
360*38fd1498Szrj 	  return MOVE_IMPOSSIBLE;
361*38fd1498Szrj 	}
362*38fd1498Szrj     }
363*38fd1498Szrj 
364*38fd1498Szrj   return ret;
365*38fd1498Szrj }
366*38fd1498Szrj 
367*38fd1498Szrj /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
368*38fd1498Szrj    loop to that we could move the expression using DEF if it did not have
369*38fd1498Szrj    other operands, i.e. the outermost loop enclosing LOOP in that the value
370*38fd1498Szrj    of DEF is invariant.  */
371*38fd1498Szrj 
372*38fd1498Szrj static struct loop *
outermost_invariant_loop(tree def,struct loop * loop)373*38fd1498Szrj outermost_invariant_loop (tree def, struct loop *loop)
374*38fd1498Szrj {
375*38fd1498Szrj   gimple *def_stmt;
376*38fd1498Szrj   basic_block def_bb;
377*38fd1498Szrj   struct loop *max_loop;
378*38fd1498Szrj   struct lim_aux_data *lim_data;
379*38fd1498Szrj 
380*38fd1498Szrj   if (!def)
381*38fd1498Szrj     return superloop_at_depth (loop, 1);
382*38fd1498Szrj 
383*38fd1498Szrj   if (TREE_CODE (def) != SSA_NAME)
384*38fd1498Szrj     {
385*38fd1498Szrj       gcc_assert (is_gimple_min_invariant (def));
386*38fd1498Szrj       return superloop_at_depth (loop, 1);
387*38fd1498Szrj     }
388*38fd1498Szrj 
389*38fd1498Szrj   def_stmt = SSA_NAME_DEF_STMT (def);
390*38fd1498Szrj   def_bb = gimple_bb (def_stmt);
391*38fd1498Szrj   if (!def_bb)
392*38fd1498Szrj     return superloop_at_depth (loop, 1);
393*38fd1498Szrj 
394*38fd1498Szrj   max_loop = find_common_loop (loop, def_bb->loop_father);
395*38fd1498Szrj 
396*38fd1498Szrj   lim_data = get_lim_data (def_stmt);
397*38fd1498Szrj   if (lim_data != NULL && lim_data->max_loop != NULL)
398*38fd1498Szrj     max_loop = find_common_loop (max_loop,
399*38fd1498Szrj 				 loop_outer (lim_data->max_loop));
400*38fd1498Szrj   if (max_loop == loop)
401*38fd1498Szrj     return NULL;
402*38fd1498Szrj   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
403*38fd1498Szrj 
404*38fd1498Szrj   return max_loop;
405*38fd1498Szrj }
406*38fd1498Szrj 
407*38fd1498Szrj /* DATA is a structure containing information associated with a statement
408*38fd1498Szrj    inside LOOP.  DEF is one of the operands of this statement.
409*38fd1498Szrj 
410*38fd1498Szrj    Find the outermost loop enclosing LOOP in that value of DEF is invariant
411*38fd1498Szrj    and record this in DATA->max_loop field.  If DEF itself is defined inside
412*38fd1498Szrj    this loop as well (i.e. we need to hoist it out of the loop if we want
413*38fd1498Szrj    to hoist the statement represented by DATA), record the statement in that
414*38fd1498Szrj    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
415*38fd1498Szrj    add the cost of the computation of DEF to the DATA->cost.
416*38fd1498Szrj 
417*38fd1498Szrj    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
418*38fd1498Szrj 
419*38fd1498Szrj static bool
add_dependency(tree def,struct lim_aux_data * data,struct loop * loop,bool add_cost)420*38fd1498Szrj add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
421*38fd1498Szrj 		bool add_cost)
422*38fd1498Szrj {
423*38fd1498Szrj   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
424*38fd1498Szrj   basic_block def_bb = gimple_bb (def_stmt);
425*38fd1498Szrj   struct loop *max_loop;
426*38fd1498Szrj   struct lim_aux_data *def_data;
427*38fd1498Szrj 
428*38fd1498Szrj   if (!def_bb)
429*38fd1498Szrj     return true;
430*38fd1498Szrj 
431*38fd1498Szrj   max_loop = outermost_invariant_loop (def, loop);
432*38fd1498Szrj   if (!max_loop)
433*38fd1498Szrj     return false;
434*38fd1498Szrj 
435*38fd1498Szrj   if (flow_loop_nested_p (data->max_loop, max_loop))
436*38fd1498Szrj     data->max_loop = max_loop;
437*38fd1498Szrj 
438*38fd1498Szrj   def_data = get_lim_data (def_stmt);
439*38fd1498Szrj   if (!def_data)
440*38fd1498Szrj     return true;
441*38fd1498Szrj 
442*38fd1498Szrj   if (add_cost
443*38fd1498Szrj       /* Only add the cost if the statement defining DEF is inside LOOP,
444*38fd1498Szrj 	 i.e. if it is likely that by moving the invariants dependent
445*38fd1498Szrj 	 on it, we will be able to avoid creating a new register for
446*38fd1498Szrj 	 it (since it will be only used in these dependent invariants).  */
447*38fd1498Szrj       && def_bb->loop_father == loop)
448*38fd1498Szrj     data->cost += def_data->cost;
449*38fd1498Szrj 
450*38fd1498Szrj   data->depends.safe_push (def_stmt);
451*38fd1498Szrj 
452*38fd1498Szrj   return true;
453*38fd1498Szrj }
454*38fd1498Szrj 
455*38fd1498Szrj /* Returns an estimate for a cost of statement STMT.  The values here
456*38fd1498Szrj    are just ad-hoc constants, similar to costs for inlining.  */
457*38fd1498Szrj 
458*38fd1498Szrj static unsigned
stmt_cost(gimple * stmt)459*38fd1498Szrj stmt_cost (gimple *stmt)
460*38fd1498Szrj {
461*38fd1498Szrj   /* Always try to create possibilities for unswitching.  */
462*38fd1498Szrj   if (gimple_code (stmt) == GIMPLE_COND
463*38fd1498Szrj       || gimple_code (stmt) == GIMPLE_PHI)
464*38fd1498Szrj     return LIM_EXPENSIVE;
465*38fd1498Szrj 
466*38fd1498Szrj   /* We should be hoisting calls if possible.  */
467*38fd1498Szrj   if (is_gimple_call (stmt))
468*38fd1498Szrj     {
469*38fd1498Szrj       tree fndecl;
470*38fd1498Szrj 
471*38fd1498Szrj       /* Unless the call is a builtin_constant_p; this always folds to a
472*38fd1498Szrj 	 constant, so moving it is useless.  */
473*38fd1498Szrj       fndecl = gimple_call_fndecl (stmt);
474*38fd1498Szrj       if (fndecl
475*38fd1498Szrj 	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
476*38fd1498Szrj 	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
477*38fd1498Szrj 	return 0;
478*38fd1498Szrj 
479*38fd1498Szrj       return LIM_EXPENSIVE;
480*38fd1498Szrj     }
481*38fd1498Szrj 
482*38fd1498Szrj   /* Hoisting memory references out should almost surely be a win.  */
483*38fd1498Szrj   if (gimple_references_memory_p (stmt))
484*38fd1498Szrj     return LIM_EXPENSIVE;
485*38fd1498Szrj 
486*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_ASSIGN)
487*38fd1498Szrj     return 1;
488*38fd1498Szrj 
489*38fd1498Szrj   switch (gimple_assign_rhs_code (stmt))
490*38fd1498Szrj     {
491*38fd1498Szrj     case MULT_EXPR:
492*38fd1498Szrj     case WIDEN_MULT_EXPR:
493*38fd1498Szrj     case WIDEN_MULT_PLUS_EXPR:
494*38fd1498Szrj     case WIDEN_MULT_MINUS_EXPR:
495*38fd1498Szrj     case DOT_PROD_EXPR:
496*38fd1498Szrj     case FMA_EXPR:
497*38fd1498Szrj     case TRUNC_DIV_EXPR:
498*38fd1498Szrj     case CEIL_DIV_EXPR:
499*38fd1498Szrj     case FLOOR_DIV_EXPR:
500*38fd1498Szrj     case ROUND_DIV_EXPR:
501*38fd1498Szrj     case EXACT_DIV_EXPR:
502*38fd1498Szrj     case CEIL_MOD_EXPR:
503*38fd1498Szrj     case FLOOR_MOD_EXPR:
504*38fd1498Szrj     case ROUND_MOD_EXPR:
505*38fd1498Szrj     case TRUNC_MOD_EXPR:
506*38fd1498Szrj     case RDIV_EXPR:
507*38fd1498Szrj       /* Division and multiplication are usually expensive.  */
508*38fd1498Szrj       return LIM_EXPENSIVE;
509*38fd1498Szrj 
510*38fd1498Szrj     case LSHIFT_EXPR:
511*38fd1498Szrj     case RSHIFT_EXPR:
512*38fd1498Szrj     case WIDEN_LSHIFT_EXPR:
513*38fd1498Szrj     case LROTATE_EXPR:
514*38fd1498Szrj     case RROTATE_EXPR:
515*38fd1498Szrj       /* Shifts and rotates are usually expensive.  */
516*38fd1498Szrj       return LIM_EXPENSIVE;
517*38fd1498Szrj 
518*38fd1498Szrj     case CONSTRUCTOR:
519*38fd1498Szrj       /* Make vector construction cost proportional to the number
520*38fd1498Szrj          of elements.  */
521*38fd1498Szrj       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
522*38fd1498Szrj 
523*38fd1498Szrj     case SSA_NAME:
524*38fd1498Szrj     case PAREN_EXPR:
525*38fd1498Szrj       /* Whether or not something is wrapped inside a PAREN_EXPR
526*38fd1498Szrj          should not change move cost.  Nor should an intermediate
527*38fd1498Szrj 	 unpropagated SSA name copy.  */
528*38fd1498Szrj       return 0;
529*38fd1498Szrj 
530*38fd1498Szrj     default:
531*38fd1498Szrj       return 1;
532*38fd1498Szrj     }
533*38fd1498Szrj }
534*38fd1498Szrj 
535*38fd1498Szrj /* Finds the outermost loop between OUTER and LOOP in that the memory reference
536*38fd1498Szrj    REF is independent.  If REF is not independent in LOOP, NULL is returned
537*38fd1498Szrj    instead.  */
538*38fd1498Szrj 
539*38fd1498Szrj static struct loop *
outermost_indep_loop(struct loop * outer,struct loop * loop,im_mem_ref * ref)540*38fd1498Szrj outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
541*38fd1498Szrj {
542*38fd1498Szrj   struct loop *aloop;
543*38fd1498Szrj 
544*38fd1498Szrj   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
545*38fd1498Szrj     return NULL;
546*38fd1498Szrj 
547*38fd1498Szrj   for (aloop = outer;
548*38fd1498Szrj        aloop != loop;
549*38fd1498Szrj        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
550*38fd1498Szrj     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
551*38fd1498Szrj 	&& ref_indep_loop_p (aloop, ref))
552*38fd1498Szrj       return aloop;
553*38fd1498Szrj 
554*38fd1498Szrj   if (ref_indep_loop_p (loop, ref))
555*38fd1498Szrj     return loop;
556*38fd1498Szrj   else
557*38fd1498Szrj     return NULL;
558*38fd1498Szrj }
559*38fd1498Szrj 
560*38fd1498Szrj /* If there is a simple load or store to a memory reference in STMT, returns
561*38fd1498Szrj    the location of the memory reference, and sets IS_STORE according to whether
562*38fd1498Szrj    it is a store or load.  Otherwise, returns NULL.  */
563*38fd1498Szrj 
564*38fd1498Szrj static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)565*38fd1498Szrj simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
566*38fd1498Szrj {
567*38fd1498Szrj   tree *lhs, *rhs;
568*38fd1498Szrj 
569*38fd1498Szrj   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
570*38fd1498Szrj   if (!gimple_assign_single_p (stmt))
571*38fd1498Szrj     return NULL;
572*38fd1498Szrj 
573*38fd1498Szrj   lhs = gimple_assign_lhs_ptr (stmt);
574*38fd1498Szrj   rhs = gimple_assign_rhs1_ptr (stmt);
575*38fd1498Szrj 
576*38fd1498Szrj   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
577*38fd1498Szrj     {
578*38fd1498Szrj       *is_store = false;
579*38fd1498Szrj       return rhs;
580*38fd1498Szrj     }
581*38fd1498Szrj   else if (gimple_vdef (stmt)
582*38fd1498Szrj 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
583*38fd1498Szrj     {
584*38fd1498Szrj       *is_store = true;
585*38fd1498Szrj       return lhs;
586*38fd1498Szrj     }
587*38fd1498Szrj   else
588*38fd1498Szrj     return NULL;
589*38fd1498Szrj }
590*38fd1498Szrj 
591*38fd1498Szrj /* From a controlling predicate in DOM determine the arguments from
592*38fd1498Szrj    the PHI node PHI that are chosen if the predicate evaluates to
593*38fd1498Szrj    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
594*38fd1498Szrj    they are non-NULL.  Returns true if the arguments can be determined,
595*38fd1498Szrj    else return false.  */
596*38fd1498Szrj 
597*38fd1498Szrj static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)598*38fd1498Szrj extract_true_false_args_from_phi (basic_block dom, gphi *phi,
599*38fd1498Szrj 				  tree *true_arg_p, tree *false_arg_p)
600*38fd1498Szrj {
601*38fd1498Szrj   edge te, fe;
602*38fd1498Szrj   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
603*38fd1498Szrj 					     &te, &fe))
604*38fd1498Szrj     return false;
605*38fd1498Szrj 
606*38fd1498Szrj   if (true_arg_p)
607*38fd1498Szrj     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
608*38fd1498Szrj   if (false_arg_p)
609*38fd1498Szrj     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
610*38fd1498Szrj 
611*38fd1498Szrj   return true;
612*38fd1498Szrj }
613*38fd1498Szrj 
614*38fd1498Szrj /* Determine the outermost loop to that it is possible to hoist a statement
615*38fd1498Szrj    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
616*38fd1498Szrj    the outermost loop in that the value computed by STMT is invariant.
617*38fd1498Szrj    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
618*38fd1498Szrj    we preserve the fact whether STMT is executed.  It also fills other related
619*38fd1498Szrj    information to LIM_DATA (STMT).
620*38fd1498Szrj 
621*38fd1498Szrj    The function returns false if STMT cannot be hoisted outside of the loop it
622*38fd1498Szrj    is defined in, and true otherwise.  */
623*38fd1498Szrj 
624*38fd1498Szrj static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)625*38fd1498Szrj determine_max_movement (gimple *stmt, bool must_preserve_exec)
626*38fd1498Szrj {
627*38fd1498Szrj   basic_block bb = gimple_bb (stmt);
628*38fd1498Szrj   struct loop *loop = bb->loop_father;
629*38fd1498Szrj   struct loop *level;
630*38fd1498Szrj   struct lim_aux_data *lim_data = get_lim_data (stmt);
631*38fd1498Szrj   tree val;
632*38fd1498Szrj   ssa_op_iter iter;
633*38fd1498Szrj 
634*38fd1498Szrj   if (must_preserve_exec)
635*38fd1498Szrj     level = ALWAYS_EXECUTED_IN (bb);
636*38fd1498Szrj   else
637*38fd1498Szrj     level = superloop_at_depth (loop, 1);
638*38fd1498Szrj   lim_data->max_loop = level;
639*38fd1498Szrj 
640*38fd1498Szrj   if (gphi *phi = dyn_cast <gphi *> (stmt))
641*38fd1498Szrj     {
642*38fd1498Szrj       use_operand_p use_p;
643*38fd1498Szrj       unsigned min_cost = UINT_MAX;
644*38fd1498Szrj       unsigned total_cost = 0;
645*38fd1498Szrj       struct lim_aux_data *def_data;
646*38fd1498Szrj 
647*38fd1498Szrj       /* We will end up promoting dependencies to be unconditionally
648*38fd1498Szrj 	 evaluated.  For this reason the PHI cost (and thus the
649*38fd1498Szrj 	 cost we remove from the loop by doing the invariant motion)
650*38fd1498Szrj 	 is that of the cheapest PHI argument dependency chain.  */
651*38fd1498Szrj       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
652*38fd1498Szrj 	{
653*38fd1498Szrj 	  val = USE_FROM_PTR (use_p);
654*38fd1498Szrj 
655*38fd1498Szrj 	  if (TREE_CODE (val) != SSA_NAME)
656*38fd1498Szrj 	    {
657*38fd1498Szrj 	      /* Assign const 1 to constants.  */
658*38fd1498Szrj 	      min_cost = MIN (min_cost, 1);
659*38fd1498Szrj 	      total_cost += 1;
660*38fd1498Szrj 	      continue;
661*38fd1498Szrj 	    }
662*38fd1498Szrj 	  if (!add_dependency (val, lim_data, loop, false))
663*38fd1498Szrj 	    return false;
664*38fd1498Szrj 
665*38fd1498Szrj 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
666*38fd1498Szrj 	  if (gimple_bb (def_stmt)
667*38fd1498Szrj 	      && gimple_bb (def_stmt)->loop_father == loop)
668*38fd1498Szrj 	    {
669*38fd1498Szrj 	      def_data = get_lim_data (def_stmt);
670*38fd1498Szrj 	      if (def_data)
671*38fd1498Szrj 		{
672*38fd1498Szrj 		  min_cost = MIN (min_cost, def_data->cost);
673*38fd1498Szrj 		  total_cost += def_data->cost;
674*38fd1498Szrj 		}
675*38fd1498Szrj 	    }
676*38fd1498Szrj 	}
677*38fd1498Szrj 
678*38fd1498Szrj       min_cost = MIN (min_cost, total_cost);
679*38fd1498Szrj       lim_data->cost += min_cost;
680*38fd1498Szrj 
681*38fd1498Szrj       if (gimple_phi_num_args (phi) > 1)
682*38fd1498Szrj 	{
683*38fd1498Szrj 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
684*38fd1498Szrj 	  gimple *cond;
685*38fd1498Szrj 	  if (gsi_end_p (gsi_last_bb (dom)))
686*38fd1498Szrj 	    return false;
687*38fd1498Szrj 	  cond = gsi_stmt (gsi_last_bb (dom));
688*38fd1498Szrj 	  if (gimple_code (cond) != GIMPLE_COND)
689*38fd1498Szrj 	    return false;
690*38fd1498Szrj 	  /* Verify that this is an extended form of a diamond and
691*38fd1498Szrj 	     the PHI arguments are completely controlled by the
692*38fd1498Szrj 	     predicate in DOM.  */
693*38fd1498Szrj 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
694*38fd1498Szrj 	    return false;
695*38fd1498Szrj 
696*38fd1498Szrj 	  /* Fold in dependencies and cost of the condition.  */
697*38fd1498Szrj 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
698*38fd1498Szrj 	    {
699*38fd1498Szrj 	      if (!add_dependency (val, lim_data, loop, false))
700*38fd1498Szrj 		return false;
701*38fd1498Szrj 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
702*38fd1498Szrj 	      if (def_data)
703*38fd1498Szrj 		lim_data->cost += def_data->cost;
704*38fd1498Szrj 	    }
705*38fd1498Szrj 
706*38fd1498Szrj 	  /* We want to avoid unconditionally executing very expensive
707*38fd1498Szrj 	     operations.  As costs for our dependencies cannot be
708*38fd1498Szrj 	     negative just claim we are not invariand for this case.
709*38fd1498Szrj 	     We also are not sure whether the control-flow inside the
710*38fd1498Szrj 	     loop will vanish.  */
711*38fd1498Szrj 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
712*38fd1498Szrj 	      && !(min_cost != 0
713*38fd1498Szrj 		   && total_cost / min_cost <= 2))
714*38fd1498Szrj 	    return false;
715*38fd1498Szrj 
716*38fd1498Szrj 	  /* Assume that the control-flow in the loop will vanish.
717*38fd1498Szrj 	     ???  We should verify this and not artificially increase
718*38fd1498Szrj 	     the cost if that is not the case.  */
719*38fd1498Szrj 	  lim_data->cost += stmt_cost (stmt);
720*38fd1498Szrj 	}
721*38fd1498Szrj 
722*38fd1498Szrj       return true;
723*38fd1498Szrj     }
724*38fd1498Szrj   else
725*38fd1498Szrj     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
726*38fd1498Szrj       if (!add_dependency (val, lim_data, loop, true))
727*38fd1498Szrj 	return false;
728*38fd1498Szrj 
729*38fd1498Szrj   if (gimple_vuse (stmt))
730*38fd1498Szrj     {
731*38fd1498Szrj       im_mem_ref *ref
732*38fd1498Szrj 	= lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
733*38fd1498Szrj       if (ref
734*38fd1498Szrj 	  && MEM_ANALYZABLE (ref))
735*38fd1498Szrj 	{
736*38fd1498Szrj 	  lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
737*38fd1498Szrj 						     loop, ref);
738*38fd1498Szrj 	  if (!lim_data->max_loop)
739*38fd1498Szrj 	    return false;
740*38fd1498Szrj 	}
741*38fd1498Szrj       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
742*38fd1498Szrj 	return false;
743*38fd1498Szrj     }
744*38fd1498Szrj 
745*38fd1498Szrj   lim_data->cost += stmt_cost (stmt);
746*38fd1498Szrj 
747*38fd1498Szrj   return true;
748*38fd1498Szrj }
749*38fd1498Szrj 
750*38fd1498Szrj /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
751*38fd1498Szrj    and that one of the operands of this statement is computed by STMT.
752*38fd1498Szrj    Ensure that STMT (together with all the statements that define its
753*38fd1498Szrj    operands) is hoisted at least out of the loop LEVEL.  */
754*38fd1498Szrj 
755*38fd1498Szrj static void
set_level(gimple * stmt,struct loop * orig_loop,struct loop * level)756*38fd1498Szrj set_level (gimple *stmt, struct loop *orig_loop, struct loop *level)
757*38fd1498Szrj {
758*38fd1498Szrj   struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
759*38fd1498Szrj   struct lim_aux_data *lim_data;
760*38fd1498Szrj   gimple *dep_stmt;
761*38fd1498Szrj   unsigned i;
762*38fd1498Szrj 
763*38fd1498Szrj   stmt_loop = find_common_loop (orig_loop, stmt_loop);
764*38fd1498Szrj   lim_data = get_lim_data (stmt);
765*38fd1498Szrj   if (lim_data != NULL && lim_data->tgt_loop != NULL)
766*38fd1498Szrj     stmt_loop = find_common_loop (stmt_loop,
767*38fd1498Szrj 				  loop_outer (lim_data->tgt_loop));
768*38fd1498Szrj   if (flow_loop_nested_p (stmt_loop, level))
769*38fd1498Szrj     return;
770*38fd1498Szrj 
771*38fd1498Szrj   gcc_assert (level == lim_data->max_loop
772*38fd1498Szrj 	      || flow_loop_nested_p (lim_data->max_loop, level));
773*38fd1498Szrj 
774*38fd1498Szrj   lim_data->tgt_loop = level;
775*38fd1498Szrj   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
776*38fd1498Szrj     set_level (dep_stmt, orig_loop, level);
777*38fd1498Szrj }
778*38fd1498Szrj 
779*38fd1498Szrj /* Determines an outermost loop from that we want to hoist the statement STMT.
780*38fd1498Szrj    For now we chose the outermost possible loop.  TODO -- use profiling
781*38fd1498Szrj    information to set it more sanely.  */
782*38fd1498Szrj 
783*38fd1498Szrj static void
set_profitable_level(gimple * stmt)784*38fd1498Szrj set_profitable_level (gimple *stmt)
785*38fd1498Szrj {
786*38fd1498Szrj   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
787*38fd1498Szrj }
788*38fd1498Szrj 
789*38fd1498Szrj /* Returns true if STMT is a call that has side effects.  */
790*38fd1498Szrj 
791*38fd1498Szrj static bool
nonpure_call_p(gimple * stmt)792*38fd1498Szrj nonpure_call_p (gimple *stmt)
793*38fd1498Szrj {
794*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_CALL)
795*38fd1498Szrj     return false;
796*38fd1498Szrj 
797*38fd1498Szrj   return gimple_has_side_effects (stmt);
798*38fd1498Szrj }
799*38fd1498Szrj 
800*38fd1498Szrj /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
801*38fd1498Szrj 
802*38fd1498Szrj static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)803*38fd1498Szrj rewrite_reciprocal (gimple_stmt_iterator *bsi)
804*38fd1498Szrj {
805*38fd1498Szrj   gassign *stmt, *stmt1, *stmt2;
806*38fd1498Szrj   tree name, lhs, type;
807*38fd1498Szrj   tree real_one;
808*38fd1498Szrj   gimple_stmt_iterator gsi;
809*38fd1498Szrj 
810*38fd1498Szrj   stmt = as_a <gassign *> (gsi_stmt (*bsi));
811*38fd1498Szrj   lhs = gimple_assign_lhs (stmt);
812*38fd1498Szrj   type = TREE_TYPE (lhs);
813*38fd1498Szrj 
814*38fd1498Szrj   real_one = build_one_cst (type);
815*38fd1498Szrj 
816*38fd1498Szrj   name = make_temp_ssa_name (type, NULL, "reciptmp");
817*38fd1498Szrj   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
818*38fd1498Szrj 			       gimple_assign_rhs2 (stmt));
819*38fd1498Szrj   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
820*38fd1498Szrj 			       gimple_assign_rhs1 (stmt));
821*38fd1498Szrj 
822*38fd1498Szrj   /* Replace division stmt with reciprocal and multiply stmts.
823*38fd1498Szrj      The multiply stmt is not invariant, so update iterator
824*38fd1498Szrj      and avoid rescanning.  */
825*38fd1498Szrj   gsi = *bsi;
826*38fd1498Szrj   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
827*38fd1498Szrj   gsi_replace (&gsi, stmt2, true);
828*38fd1498Szrj 
829*38fd1498Szrj   /* Continue processing with invariant reciprocal statement.  */
830*38fd1498Szrj   return stmt1;
831*38fd1498Szrj }
832*38fd1498Szrj 
833*38fd1498Szrj /* Check if the pattern at *BSI is a bittest of the form
834*38fd1498Szrj    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
835*38fd1498Szrj 
836*38fd1498Szrj static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)837*38fd1498Szrj rewrite_bittest (gimple_stmt_iterator *bsi)
838*38fd1498Szrj {
839*38fd1498Szrj   gassign *stmt;
840*38fd1498Szrj   gimple *stmt1;
841*38fd1498Szrj   gassign *stmt2;
842*38fd1498Szrj   gimple *use_stmt;
843*38fd1498Szrj   gcond *cond_stmt;
844*38fd1498Szrj   tree lhs, name, t, a, b;
845*38fd1498Szrj   use_operand_p use;
846*38fd1498Szrj 
847*38fd1498Szrj   stmt = as_a <gassign *> (gsi_stmt (*bsi));
848*38fd1498Szrj   lhs = gimple_assign_lhs (stmt);
849*38fd1498Szrj 
850*38fd1498Szrj   /* Verify that the single use of lhs is a comparison against zero.  */
851*38fd1498Szrj   if (TREE_CODE (lhs) != SSA_NAME
852*38fd1498Szrj       || !single_imm_use (lhs, &use, &use_stmt))
853*38fd1498Szrj     return stmt;
854*38fd1498Szrj   cond_stmt = dyn_cast <gcond *> (use_stmt);
855*38fd1498Szrj   if (!cond_stmt)
856*38fd1498Szrj     return stmt;
857*38fd1498Szrj   if (gimple_cond_lhs (cond_stmt) != lhs
858*38fd1498Szrj       || (gimple_cond_code (cond_stmt) != NE_EXPR
859*38fd1498Szrj 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
860*38fd1498Szrj       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
861*38fd1498Szrj     return stmt;
862*38fd1498Szrj 
863*38fd1498Szrj   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
864*38fd1498Szrj   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
865*38fd1498Szrj   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
866*38fd1498Szrj     return stmt;
867*38fd1498Szrj 
868*38fd1498Szrj   /* There is a conversion in between possibly inserted by fold.  */
869*38fd1498Szrj   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
870*38fd1498Szrj     {
871*38fd1498Szrj       t = gimple_assign_rhs1 (stmt1);
872*38fd1498Szrj       if (TREE_CODE (t) != SSA_NAME
873*38fd1498Szrj 	  || !has_single_use (t))
874*38fd1498Szrj 	return stmt;
875*38fd1498Szrj       stmt1 = SSA_NAME_DEF_STMT (t);
876*38fd1498Szrj       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
877*38fd1498Szrj 	return stmt;
878*38fd1498Szrj     }
879*38fd1498Szrj 
880*38fd1498Szrj   /* Verify that B is loop invariant but A is not.  Verify that with
881*38fd1498Szrj      all the stmt walking we are still in the same loop.  */
882*38fd1498Szrj   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
883*38fd1498Szrj       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
884*38fd1498Szrj     return stmt;
885*38fd1498Szrj 
886*38fd1498Szrj   a = gimple_assign_rhs1 (stmt1);
887*38fd1498Szrj   b = gimple_assign_rhs2 (stmt1);
888*38fd1498Szrj 
889*38fd1498Szrj   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
890*38fd1498Szrj       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
891*38fd1498Szrj     {
892*38fd1498Szrj       gimple_stmt_iterator rsi;
893*38fd1498Szrj 
894*38fd1498Szrj       /* 1 << B */
895*38fd1498Szrj       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
896*38fd1498Szrj 		       build_int_cst (TREE_TYPE (a), 1), b);
897*38fd1498Szrj       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
898*38fd1498Szrj       stmt1 = gimple_build_assign (name, t);
899*38fd1498Szrj 
900*38fd1498Szrj       /* A & (1 << B) */
901*38fd1498Szrj       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
902*38fd1498Szrj       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
903*38fd1498Szrj       stmt2 = gimple_build_assign (name, t);
904*38fd1498Szrj 
905*38fd1498Szrj       /* Replace the SSA_NAME we compare against zero.  Adjust
906*38fd1498Szrj 	 the type of zero accordingly.  */
907*38fd1498Szrj       SET_USE (use, name);
908*38fd1498Szrj       gimple_cond_set_rhs (cond_stmt,
909*38fd1498Szrj 			   build_int_cst_type (TREE_TYPE (name),
910*38fd1498Szrj 					       0));
911*38fd1498Szrj 
912*38fd1498Szrj       /* Don't use gsi_replace here, none of the new assignments sets
913*38fd1498Szrj 	 the variable originally set in stmt.  Move bsi to stmt1, and
914*38fd1498Szrj 	 then remove the original stmt, so that we get a chance to
915*38fd1498Szrj 	 retain debug info for it.  */
916*38fd1498Szrj       rsi = *bsi;
917*38fd1498Szrj       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
918*38fd1498Szrj       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
919*38fd1498Szrj       gimple *to_release = gsi_stmt (rsi);
920*38fd1498Szrj       gsi_remove (&rsi, true);
921*38fd1498Szrj       release_defs (to_release);
922*38fd1498Szrj 
923*38fd1498Szrj       return stmt1;
924*38fd1498Szrj     }
925*38fd1498Szrj 
926*38fd1498Szrj   return stmt;
927*38fd1498Szrj }
928*38fd1498Szrj 
929*38fd1498Szrj /* For each statement determines the outermost loop in that it is invariant,
930*38fd1498Szrj    -   statements on whose motion it depends and the cost of the computation.
931*38fd1498Szrj    -   This information is stored to the LIM_DATA structure associated with
932*38fd1498Szrj    -   each statement.  */
933*38fd1498Szrj class invariantness_dom_walker : public dom_walker
934*38fd1498Szrj {
935*38fd1498Szrj public:
invariantness_dom_walker(cdi_direction direction)936*38fd1498Szrj   invariantness_dom_walker (cdi_direction direction)
937*38fd1498Szrj     : dom_walker (direction) {}
938*38fd1498Szrj 
939*38fd1498Szrj   virtual edge before_dom_children (basic_block);
940*38fd1498Szrj };
941*38fd1498Szrj 
942*38fd1498Szrj /* Determine the outermost loops in that statements in basic block BB are
943*38fd1498Szrj    invariant, and record them to the LIM_DATA associated with the statements.
944*38fd1498Szrj    Callback for dom_walker.  */
945*38fd1498Szrj 
946*38fd1498Szrj edge
before_dom_children(basic_block bb)947*38fd1498Szrj invariantness_dom_walker::before_dom_children (basic_block bb)
948*38fd1498Szrj {
949*38fd1498Szrj   enum move_pos pos;
950*38fd1498Szrj   gimple_stmt_iterator bsi;
951*38fd1498Szrj   gimple *stmt;
952*38fd1498Szrj   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
953*38fd1498Szrj   struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
954*38fd1498Szrj   struct lim_aux_data *lim_data;
955*38fd1498Szrj 
956*38fd1498Szrj   if (!loop_outer (bb->loop_father))
957*38fd1498Szrj     return NULL;
958*38fd1498Szrj 
959*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
960*38fd1498Szrj     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
961*38fd1498Szrj 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
962*38fd1498Szrj 
963*38fd1498Szrj   /* Look at PHI nodes, but only if there is at most two.
964*38fd1498Szrj      ???  We could relax this further by post-processing the inserted
965*38fd1498Szrj      code and transforming adjacent cond-exprs with the same predicate
966*38fd1498Szrj      to control flow again.  */
967*38fd1498Szrj   bsi = gsi_start_phis (bb);
968*38fd1498Szrj   if (!gsi_end_p (bsi)
969*38fd1498Szrj       && ((gsi_next (&bsi), gsi_end_p (bsi))
970*38fd1498Szrj 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
971*38fd1498Szrj     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
972*38fd1498Szrj       {
973*38fd1498Szrj 	stmt = gsi_stmt (bsi);
974*38fd1498Szrj 
975*38fd1498Szrj 	pos = movement_possibility (stmt);
976*38fd1498Szrj 	if (pos == MOVE_IMPOSSIBLE)
977*38fd1498Szrj 	  continue;
978*38fd1498Szrj 
979*38fd1498Szrj 	lim_data = get_lim_data (stmt);
980*38fd1498Szrj 	if (! lim_data)
981*38fd1498Szrj 	  lim_data = init_lim_data (stmt);
982*38fd1498Szrj 	lim_data->always_executed_in = outermost;
983*38fd1498Szrj 
984*38fd1498Szrj 	if (!determine_max_movement (stmt, false))
985*38fd1498Szrj 	  {
986*38fd1498Szrj 	    lim_data->max_loop = NULL;
987*38fd1498Szrj 	    continue;
988*38fd1498Szrj 	  }
989*38fd1498Szrj 
990*38fd1498Szrj 	if (dump_file && (dump_flags & TDF_DETAILS))
991*38fd1498Szrj 	  {
992*38fd1498Szrj 	    print_gimple_stmt (dump_file, stmt, 2);
993*38fd1498Szrj 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
994*38fd1498Szrj 		     loop_depth (lim_data->max_loop),
995*38fd1498Szrj 		     lim_data->cost);
996*38fd1498Szrj 	  }
997*38fd1498Szrj 
998*38fd1498Szrj 	if (lim_data->cost >= LIM_EXPENSIVE)
999*38fd1498Szrj 	  set_profitable_level (stmt);
1000*38fd1498Szrj       }
1001*38fd1498Szrj 
1002*38fd1498Szrj   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1003*38fd1498Szrj     {
1004*38fd1498Szrj       stmt = gsi_stmt (bsi);
1005*38fd1498Szrj 
1006*38fd1498Szrj       pos = movement_possibility (stmt);
1007*38fd1498Szrj       if (pos == MOVE_IMPOSSIBLE)
1008*38fd1498Szrj 	{
1009*38fd1498Szrj 	  if (nonpure_call_p (stmt))
1010*38fd1498Szrj 	    {
1011*38fd1498Szrj 	      maybe_never = true;
1012*38fd1498Szrj 	      outermost = NULL;
1013*38fd1498Szrj 	    }
1014*38fd1498Szrj 	  /* Make sure to note always_executed_in for stores to make
1015*38fd1498Szrj 	     store-motion work.  */
1016*38fd1498Szrj 	  else if (stmt_makes_single_store (stmt))
1017*38fd1498Szrj 	    {
1018*38fd1498Szrj 	      struct lim_aux_data *lim_data = get_lim_data (stmt);
1019*38fd1498Szrj 	      if (! lim_data)
1020*38fd1498Szrj 		lim_data = init_lim_data (stmt);
1021*38fd1498Szrj 	      lim_data->always_executed_in = outermost;
1022*38fd1498Szrj 	    }
1023*38fd1498Szrj 	  continue;
1024*38fd1498Szrj 	}
1025*38fd1498Szrj 
1026*38fd1498Szrj       if (is_gimple_assign (stmt)
1027*38fd1498Szrj 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1028*38fd1498Szrj 	      == GIMPLE_BINARY_RHS))
1029*38fd1498Szrj 	{
1030*38fd1498Szrj 	  tree op0 = gimple_assign_rhs1 (stmt);
1031*38fd1498Szrj 	  tree op1 = gimple_assign_rhs2 (stmt);
1032*38fd1498Szrj 	  struct loop *ol1 = outermost_invariant_loop (op1,
1033*38fd1498Szrj 					loop_containing_stmt (stmt));
1034*38fd1498Szrj 
1035*38fd1498Szrj 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1036*38fd1498Szrj 	     to be hoisted out of loop, saving expensive divide.  */
1037*38fd1498Szrj 	  if (pos == MOVE_POSSIBLE
1038*38fd1498Szrj 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1039*38fd1498Szrj 	      && flag_unsafe_math_optimizations
1040*38fd1498Szrj 	      && !flag_trapping_math
1041*38fd1498Szrj 	      && ol1 != NULL
1042*38fd1498Szrj 	      && outermost_invariant_loop (op0, ol1) == NULL)
1043*38fd1498Szrj 	    stmt = rewrite_reciprocal (&bsi);
1044*38fd1498Szrj 
1045*38fd1498Szrj 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1046*38fd1498Szrj 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1047*38fd1498Szrj 	     saving an expensive shift.  */
1048*38fd1498Szrj 	  if (pos == MOVE_POSSIBLE
1049*38fd1498Szrj 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1050*38fd1498Szrj 	      && integer_onep (op1)
1051*38fd1498Szrj 	      && TREE_CODE (op0) == SSA_NAME
1052*38fd1498Szrj 	      && has_single_use (op0))
1053*38fd1498Szrj 	    stmt = rewrite_bittest (&bsi);
1054*38fd1498Szrj 	}
1055*38fd1498Szrj 
1056*38fd1498Szrj       lim_data = get_lim_data (stmt);
1057*38fd1498Szrj       if (! lim_data)
1058*38fd1498Szrj 	lim_data = init_lim_data (stmt);
1059*38fd1498Szrj       lim_data->always_executed_in = outermost;
1060*38fd1498Szrj 
1061*38fd1498Szrj       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1062*38fd1498Szrj 	continue;
1063*38fd1498Szrj 
1064*38fd1498Szrj       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1065*38fd1498Szrj 	{
1066*38fd1498Szrj 	  lim_data->max_loop = NULL;
1067*38fd1498Szrj 	  continue;
1068*38fd1498Szrj 	}
1069*38fd1498Szrj 
1070*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1071*38fd1498Szrj 	{
1072*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 2);
1073*38fd1498Szrj 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1074*38fd1498Szrj 		   loop_depth (lim_data->max_loop),
1075*38fd1498Szrj 		   lim_data->cost);
1076*38fd1498Szrj 	}
1077*38fd1498Szrj 
1078*38fd1498Szrj       if (lim_data->cost >= LIM_EXPENSIVE)
1079*38fd1498Szrj 	set_profitable_level (stmt);
1080*38fd1498Szrj     }
1081*38fd1498Szrj   return NULL;
1082*38fd1498Szrj }
1083*38fd1498Szrj 
1084*38fd1498Szrj /* Hoist the statements in basic block BB out of the loops prescribed by
1085*38fd1498Szrj    data stored in LIM_DATA structures associated with each statement.  Callback
1086*38fd1498Szrj    for walk_dominator_tree.  */
1087*38fd1498Szrj 
1088*38fd1498Szrj unsigned int
move_computations_worker(basic_block bb)1089*38fd1498Szrj move_computations_worker (basic_block bb)
1090*38fd1498Szrj {
1091*38fd1498Szrj   struct loop *level;
1092*38fd1498Szrj   unsigned cost = 0;
1093*38fd1498Szrj   struct lim_aux_data *lim_data;
1094*38fd1498Szrj   unsigned int todo = 0;
1095*38fd1498Szrj 
1096*38fd1498Szrj   if (!loop_outer (bb->loop_father))
1097*38fd1498Szrj     return todo;
1098*38fd1498Szrj 
1099*38fd1498Szrj   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1100*38fd1498Szrj     {
1101*38fd1498Szrj       gassign *new_stmt;
1102*38fd1498Szrj       gphi *stmt = bsi.phi ();
1103*38fd1498Szrj 
1104*38fd1498Szrj       lim_data = get_lim_data (stmt);
1105*38fd1498Szrj       if (lim_data == NULL)
1106*38fd1498Szrj 	{
1107*38fd1498Szrj 	  gsi_next (&bsi);
1108*38fd1498Szrj 	  continue;
1109*38fd1498Szrj 	}
1110*38fd1498Szrj 
1111*38fd1498Szrj       cost = lim_data->cost;
1112*38fd1498Szrj       level = lim_data->tgt_loop;
1113*38fd1498Szrj       clear_lim_data (stmt);
1114*38fd1498Szrj 
1115*38fd1498Szrj       if (!level)
1116*38fd1498Szrj 	{
1117*38fd1498Szrj 	  gsi_next (&bsi);
1118*38fd1498Szrj 	  continue;
1119*38fd1498Szrj 	}
1120*38fd1498Szrj 
1121*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1122*38fd1498Szrj 	{
1123*38fd1498Szrj 	  fprintf (dump_file, "Moving PHI node\n");
1124*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 0);
1125*38fd1498Szrj 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1126*38fd1498Szrj 		   cost, level->num);
1127*38fd1498Szrj 	}
1128*38fd1498Szrj 
1129*38fd1498Szrj       if (gimple_phi_num_args (stmt) == 1)
1130*38fd1498Szrj 	{
1131*38fd1498Szrj 	  tree arg = PHI_ARG_DEF (stmt, 0);
1132*38fd1498Szrj 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1133*38fd1498Szrj 					  TREE_CODE (arg), arg);
1134*38fd1498Szrj 	}
1135*38fd1498Szrj       else
1136*38fd1498Szrj 	{
1137*38fd1498Szrj 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1138*38fd1498Szrj 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1139*38fd1498Szrj 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1140*38fd1498Szrj 	  /* Get the PHI arguments corresponding to the true and false
1141*38fd1498Szrj 	     edges of COND.  */
1142*38fd1498Szrj 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1143*38fd1498Szrj 	  gcc_assert (arg0 && arg1);
1144*38fd1498Szrj 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1145*38fd1498Szrj 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1146*38fd1498Szrj 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1147*38fd1498Szrj 					  COND_EXPR, t, arg0, arg1);
1148*38fd1498Szrj 	  todo |= TODO_cleanup_cfg;
1149*38fd1498Szrj 	}
1150*38fd1498Szrj       if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1151*38fd1498Szrj 	  && (!ALWAYS_EXECUTED_IN (bb)
1152*38fd1498Szrj 	      || (ALWAYS_EXECUTED_IN (bb) != level
1153*38fd1498Szrj 		  && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1154*38fd1498Szrj 	{
1155*38fd1498Szrj 	  tree lhs = gimple_assign_lhs (new_stmt);
1156*38fd1498Szrj 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1157*38fd1498Szrj 	}
1158*38fd1498Szrj       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1159*38fd1498Szrj       remove_phi_node (&bsi, false);
1160*38fd1498Szrj     }
1161*38fd1498Szrj 
1162*38fd1498Szrj   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1163*38fd1498Szrj     {
1164*38fd1498Szrj       edge e;
1165*38fd1498Szrj 
1166*38fd1498Szrj       gimple *stmt = gsi_stmt (bsi);
1167*38fd1498Szrj 
1168*38fd1498Szrj       lim_data = get_lim_data (stmt);
1169*38fd1498Szrj       if (lim_data == NULL)
1170*38fd1498Szrj 	{
1171*38fd1498Szrj 	  gsi_next (&bsi);
1172*38fd1498Szrj 	  continue;
1173*38fd1498Szrj 	}
1174*38fd1498Szrj 
1175*38fd1498Szrj       cost = lim_data->cost;
1176*38fd1498Szrj       level = lim_data->tgt_loop;
1177*38fd1498Szrj       clear_lim_data (stmt);
1178*38fd1498Szrj 
1179*38fd1498Szrj       if (!level)
1180*38fd1498Szrj 	{
1181*38fd1498Szrj 	  gsi_next (&bsi);
1182*38fd1498Szrj 	  continue;
1183*38fd1498Szrj 	}
1184*38fd1498Szrj 
1185*38fd1498Szrj       /* We do not really want to move conditionals out of the loop; we just
1186*38fd1498Szrj 	 placed it here to force its operands to be moved if necessary.  */
1187*38fd1498Szrj       if (gimple_code (stmt) == GIMPLE_COND)
1188*38fd1498Szrj 	continue;
1189*38fd1498Szrj 
1190*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1191*38fd1498Szrj 	{
1192*38fd1498Szrj 	  fprintf (dump_file, "Moving statement\n");
1193*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 0);
1194*38fd1498Szrj 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1195*38fd1498Szrj 		   cost, level->num);
1196*38fd1498Szrj 	}
1197*38fd1498Szrj 
1198*38fd1498Szrj       e = loop_preheader_edge (level);
1199*38fd1498Szrj       gcc_assert (!gimple_vdef (stmt));
1200*38fd1498Szrj       if (gimple_vuse (stmt))
1201*38fd1498Szrj 	{
1202*38fd1498Szrj 	  /* The new VUSE is the one from the virtual PHI in the loop
1203*38fd1498Szrj 	     header or the one already present.  */
1204*38fd1498Szrj 	  gphi_iterator gsi2;
1205*38fd1498Szrj 	  for (gsi2 = gsi_start_phis (e->dest);
1206*38fd1498Szrj 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1207*38fd1498Szrj 	    {
1208*38fd1498Szrj 	      gphi *phi = gsi2.phi ();
1209*38fd1498Szrj 	      if (virtual_operand_p (gimple_phi_result (phi)))
1210*38fd1498Szrj 		{
1211*38fd1498Szrj 		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1212*38fd1498Szrj 		  break;
1213*38fd1498Szrj 		}
1214*38fd1498Szrj 	    }
1215*38fd1498Szrj 	}
1216*38fd1498Szrj       gsi_remove (&bsi, false);
1217*38fd1498Szrj       if (gimple_has_lhs (stmt)
1218*38fd1498Szrj 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1219*38fd1498Szrj 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1220*38fd1498Szrj 	  && (!ALWAYS_EXECUTED_IN (bb)
1221*38fd1498Szrj 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1222*38fd1498Szrj 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1223*38fd1498Szrj 	{
1224*38fd1498Szrj 	  tree lhs = gimple_get_lhs (stmt);
1225*38fd1498Szrj 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1226*38fd1498Szrj 	}
1227*38fd1498Szrj       /* In case this is a stmt that is not unconditionally executed
1228*38fd1498Szrj          when the target loop header is executed and the stmt may
1229*38fd1498Szrj 	 invoke undefined integer or pointer overflow rewrite it to
1230*38fd1498Szrj 	 unsigned arithmetic.  */
1231*38fd1498Szrj       if (is_gimple_assign (stmt)
1232*38fd1498Szrj 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1233*38fd1498Szrj 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1234*38fd1498Szrj 	  && arith_code_with_undefined_signed_overflow
1235*38fd1498Szrj 	       (gimple_assign_rhs_code (stmt))
1236*38fd1498Szrj 	  && (!ALWAYS_EXECUTED_IN (bb)
1237*38fd1498Szrj 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1238*38fd1498Szrj 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1239*38fd1498Szrj 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1240*38fd1498Szrj       else
1241*38fd1498Szrj 	gsi_insert_on_edge (e, stmt);
1242*38fd1498Szrj     }
1243*38fd1498Szrj 
1244*38fd1498Szrj   return todo;
1245*38fd1498Szrj }
1246*38fd1498Szrj 
1247*38fd1498Szrj /* Hoist the statements out of the loops prescribed by data stored in
1248*38fd1498Szrj    LIM_DATA structures associated with each statement.*/
1249*38fd1498Szrj 
1250*38fd1498Szrj static unsigned int
move_computations(void)1251*38fd1498Szrj move_computations (void)
1252*38fd1498Szrj {
1253*38fd1498Szrj   int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1254*38fd1498Szrj   int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1255*38fd1498Szrj   unsigned todo = 0;
1256*38fd1498Szrj 
1257*38fd1498Szrj   for (int i = 0; i < n; ++i)
1258*38fd1498Szrj     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1259*38fd1498Szrj 
1260*38fd1498Szrj   free (rpo);
1261*38fd1498Szrj 
1262*38fd1498Szrj   gsi_commit_edge_inserts ();
1263*38fd1498Szrj   if (need_ssa_update_p (cfun))
1264*38fd1498Szrj     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1265*38fd1498Szrj 
1266*38fd1498Szrj   return todo;
1267*38fd1498Szrj }
1268*38fd1498Szrj 
1269*38fd1498Szrj /* Checks whether the statement defining variable *INDEX can be hoisted
1270*38fd1498Szrj    out of the loop passed in DATA.  Callback for for_each_index.  */
1271*38fd1498Szrj 
1272*38fd1498Szrj static bool
may_move_till(tree ref,tree * index,void * data)1273*38fd1498Szrj may_move_till (tree ref, tree *index, void *data)
1274*38fd1498Szrj {
1275*38fd1498Szrj   struct loop *loop = (struct loop *) data, *max_loop;
1276*38fd1498Szrj 
1277*38fd1498Szrj   /* If REF is an array reference, check also that the step and the lower
1278*38fd1498Szrj      bound is invariant in LOOP.  */
1279*38fd1498Szrj   if (TREE_CODE (ref) == ARRAY_REF)
1280*38fd1498Szrj     {
1281*38fd1498Szrj       tree step = TREE_OPERAND (ref, 3);
1282*38fd1498Szrj       tree lbound = TREE_OPERAND (ref, 2);
1283*38fd1498Szrj 
1284*38fd1498Szrj       max_loop = outermost_invariant_loop (step, loop);
1285*38fd1498Szrj       if (!max_loop)
1286*38fd1498Szrj 	return false;
1287*38fd1498Szrj 
1288*38fd1498Szrj       max_loop = outermost_invariant_loop (lbound, loop);
1289*38fd1498Szrj       if (!max_loop)
1290*38fd1498Szrj 	return false;
1291*38fd1498Szrj     }
1292*38fd1498Szrj 
1293*38fd1498Szrj   max_loop = outermost_invariant_loop (*index, loop);
1294*38fd1498Szrj   if (!max_loop)
1295*38fd1498Szrj     return false;
1296*38fd1498Szrj 
1297*38fd1498Szrj   return true;
1298*38fd1498Szrj }
1299*38fd1498Szrj 
1300*38fd1498Szrj /* If OP is SSA NAME, force the statement that defines it to be
1301*38fd1498Szrj    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1302*38fd1498Szrj 
1303*38fd1498Szrj static void
force_move_till_op(tree op,struct loop * orig_loop,struct loop * loop)1304*38fd1498Szrj force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1305*38fd1498Szrj {
1306*38fd1498Szrj   gimple *stmt;
1307*38fd1498Szrj 
1308*38fd1498Szrj   if (!op
1309*38fd1498Szrj       || is_gimple_min_invariant (op))
1310*38fd1498Szrj     return;
1311*38fd1498Szrj 
1312*38fd1498Szrj   gcc_assert (TREE_CODE (op) == SSA_NAME);
1313*38fd1498Szrj 
1314*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (op);
1315*38fd1498Szrj   if (gimple_nop_p (stmt))
1316*38fd1498Szrj     return;
1317*38fd1498Szrj 
1318*38fd1498Szrj   set_level (stmt, orig_loop, loop);
1319*38fd1498Szrj }
1320*38fd1498Szrj 
1321*38fd1498Szrj /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1322*38fd1498Szrj    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1323*38fd1498Szrj    for_each_index.  */
1324*38fd1498Szrj 
1325*38fd1498Szrj struct fmt_data
1326*38fd1498Szrj {
1327*38fd1498Szrj   struct loop *loop;
1328*38fd1498Szrj   struct loop *orig_loop;
1329*38fd1498Szrj };
1330*38fd1498Szrj 
1331*38fd1498Szrj static bool
force_move_till(tree ref,tree * index,void * data)1332*38fd1498Szrj force_move_till (tree ref, tree *index, void *data)
1333*38fd1498Szrj {
1334*38fd1498Szrj   struct fmt_data *fmt_data = (struct fmt_data *) data;
1335*38fd1498Szrj 
1336*38fd1498Szrj   if (TREE_CODE (ref) == ARRAY_REF)
1337*38fd1498Szrj     {
1338*38fd1498Szrj       tree step = TREE_OPERAND (ref, 3);
1339*38fd1498Szrj       tree lbound = TREE_OPERAND (ref, 2);
1340*38fd1498Szrj 
1341*38fd1498Szrj       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1342*38fd1498Szrj       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1343*38fd1498Szrj     }
1344*38fd1498Szrj 
1345*38fd1498Szrj   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1346*38fd1498Szrj 
1347*38fd1498Szrj   return true;
1348*38fd1498Szrj }
1349*38fd1498Szrj 
1350*38fd1498Szrj /* A function to free the mem_ref object OBJ.  */
1351*38fd1498Szrj 
1352*38fd1498Szrj static void
memref_free(struct im_mem_ref * mem)1353*38fd1498Szrj memref_free (struct im_mem_ref *mem)
1354*38fd1498Szrj {
1355*38fd1498Szrj   mem->accesses_in_loop.release ();
1356*38fd1498Szrj }
1357*38fd1498Szrj 
1358*38fd1498Szrj /* Allocates and returns a memory reference description for MEM whose hash
1359*38fd1498Szrj    value is HASH and id is ID.  */
1360*38fd1498Szrj 
1361*38fd1498Szrj static im_mem_ref *
mem_ref_alloc(tree mem,unsigned hash,unsigned id)1362*38fd1498Szrj mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1363*38fd1498Szrj {
1364*38fd1498Szrj   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1365*38fd1498Szrj   ao_ref_init (&ref->mem, mem);
1366*38fd1498Szrj   ref->id = id;
1367*38fd1498Szrj   ref->hash = hash;
1368*38fd1498Szrj   ref->stored = NULL;
1369*38fd1498Szrj   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1370*38fd1498Szrj   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1371*38fd1498Szrj   ref->accesses_in_loop.create (1);
1372*38fd1498Szrj 
1373*38fd1498Szrj   return ref;
1374*38fd1498Szrj }
1375*38fd1498Szrj 
1376*38fd1498Szrj /* Records memory reference location *LOC in LOOP to the memory reference
1377*38fd1498Szrj    description REF.  The reference occurs in statement STMT.  */
1378*38fd1498Szrj 
1379*38fd1498Szrj static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)1380*38fd1498Szrj record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1381*38fd1498Szrj {
1382*38fd1498Szrj   mem_ref_loc aref;
1383*38fd1498Szrj   aref.stmt = stmt;
1384*38fd1498Szrj   aref.ref = loc;
1385*38fd1498Szrj   ref->accesses_in_loop.safe_push (aref);
1386*38fd1498Szrj }
1387*38fd1498Szrj 
1388*38fd1498Szrj /* Set the LOOP bit in REF stored bitmap and allocate that if
1389*38fd1498Szrj    necessary.  Return whether a bit was changed.  */
1390*38fd1498Szrj 
1391*38fd1498Szrj static bool
set_ref_stored_in_loop(im_mem_ref * ref,struct loop * loop)1392*38fd1498Szrj set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop)
1393*38fd1498Szrj {
1394*38fd1498Szrj   if (!ref->stored)
1395*38fd1498Szrj     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1396*38fd1498Szrj   return bitmap_set_bit (ref->stored, loop->num);
1397*38fd1498Szrj }
1398*38fd1498Szrj 
1399*38fd1498Szrj /* Marks reference REF as stored in LOOP.  */
1400*38fd1498Szrj 
1401*38fd1498Szrj static void
mark_ref_stored(im_mem_ref * ref,struct loop * loop)1402*38fd1498Szrj mark_ref_stored (im_mem_ref *ref, struct loop *loop)
1403*38fd1498Szrj {
1404*38fd1498Szrj   while (loop != current_loops->tree_root
1405*38fd1498Szrj 	 && set_ref_stored_in_loop (ref, loop))
1406*38fd1498Szrj     loop = loop_outer (loop);
1407*38fd1498Szrj }
1408*38fd1498Szrj 
1409*38fd1498Szrj /* Gathers memory references in statement STMT in LOOP, storing the
1410*38fd1498Szrj    information about them in the memory_accesses structure.  Marks
1411*38fd1498Szrj    the vops accessed through unrecognized statements there as
1412*38fd1498Szrj    well.  */
1413*38fd1498Szrj 
1414*38fd1498Szrj static void
gather_mem_refs_stmt(struct loop * loop,gimple * stmt)1415*38fd1498Szrj gather_mem_refs_stmt (struct loop *loop, gimple *stmt)
1416*38fd1498Szrj {
1417*38fd1498Szrj   tree *mem = NULL;
1418*38fd1498Szrj   hashval_t hash;
1419*38fd1498Szrj   im_mem_ref **slot;
1420*38fd1498Szrj   im_mem_ref *ref;
1421*38fd1498Szrj   bool is_stored;
1422*38fd1498Szrj   unsigned id;
1423*38fd1498Szrj 
1424*38fd1498Szrj   if (!gimple_vuse (stmt))
1425*38fd1498Szrj     return;
1426*38fd1498Szrj 
1427*38fd1498Szrj   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1428*38fd1498Szrj   if (!mem)
1429*38fd1498Szrj     {
1430*38fd1498Szrj       /* We use the shared mem_ref for all unanalyzable refs.  */
1431*38fd1498Szrj       id = UNANALYZABLE_MEM_ID;
1432*38fd1498Szrj       ref = memory_accesses.refs_list[id];
1433*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1434*38fd1498Szrj 	{
1435*38fd1498Szrj 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1436*38fd1498Szrj 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1437*38fd1498Szrj 	}
1438*38fd1498Szrj       is_stored = gimple_vdef (stmt);
1439*38fd1498Szrj     }
1440*38fd1498Szrj   else
1441*38fd1498Szrj     {
1442*38fd1498Szrj       hash = iterative_hash_expr (*mem, 0);
1443*38fd1498Szrj       slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1444*38fd1498Szrj       if (*slot)
1445*38fd1498Szrj 	{
1446*38fd1498Szrj 	  ref = *slot;
1447*38fd1498Szrj 	  id = ref->id;
1448*38fd1498Szrj 	}
1449*38fd1498Szrj       else
1450*38fd1498Szrj 	{
1451*38fd1498Szrj 	  id = memory_accesses.refs_list.length ();
1452*38fd1498Szrj 	  ref = mem_ref_alloc (*mem, hash, id);
1453*38fd1498Szrj 	  memory_accesses.refs_list.safe_push (ref);
1454*38fd1498Szrj 	  *slot = ref;
1455*38fd1498Szrj 
1456*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
1457*38fd1498Szrj 	    {
1458*38fd1498Szrj 	      fprintf (dump_file, "Memory reference %u: ", id);
1459*38fd1498Szrj 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1460*38fd1498Szrj 	      fprintf (dump_file, "\n");
1461*38fd1498Szrj 	    }
1462*38fd1498Szrj 	}
1463*38fd1498Szrj 
1464*38fd1498Szrj       record_mem_ref_loc (ref, stmt, mem);
1465*38fd1498Szrj     }
1466*38fd1498Szrj   bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1467*38fd1498Szrj   if (is_stored)
1468*38fd1498Szrj     {
1469*38fd1498Szrj       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1470*38fd1498Szrj       mark_ref_stored (ref, loop);
1471*38fd1498Szrj     }
1472*38fd1498Szrj   init_lim_data (stmt)->ref = ref->id;
1473*38fd1498Szrj   return;
1474*38fd1498Szrj }
1475*38fd1498Szrj 
1476*38fd1498Szrj static unsigned *bb_loop_postorder;
1477*38fd1498Szrj 
1478*38fd1498Szrj /* qsort sort function to sort blocks after their loop fathers postorder.  */
1479*38fd1498Szrj 
1480*38fd1498Szrj static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_)1481*38fd1498Szrj sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1482*38fd1498Szrj {
1483*38fd1498Szrj   basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1484*38fd1498Szrj   basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1485*38fd1498Szrj   struct loop *loop1 = bb1->loop_father;
1486*38fd1498Szrj   struct loop *loop2 = bb2->loop_father;
1487*38fd1498Szrj   if (loop1->num == loop2->num)
1488*38fd1498Szrj     return bb1->index - bb2->index;
1489*38fd1498Szrj   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1490*38fd1498Szrj }
1491*38fd1498Szrj 
1492*38fd1498Szrj /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1493*38fd1498Szrj 
1494*38fd1498Szrj static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_)1495*38fd1498Szrj sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1496*38fd1498Szrj {
1497*38fd1498Szrj   mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1498*38fd1498Szrj   mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1499*38fd1498Szrj   struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1500*38fd1498Szrj   struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1501*38fd1498Szrj   if (loop1->num == loop2->num)
1502*38fd1498Szrj     return 0;
1503*38fd1498Szrj   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1504*38fd1498Szrj }
1505*38fd1498Szrj 
1506*38fd1498Szrj /* Gathers memory references in loops.  */
1507*38fd1498Szrj 
1508*38fd1498Szrj static void
analyze_memory_references(void)1509*38fd1498Szrj analyze_memory_references (void)
1510*38fd1498Szrj {
1511*38fd1498Szrj   gimple_stmt_iterator bsi;
1512*38fd1498Szrj   basic_block bb, *bbs;
1513*38fd1498Szrj   struct loop *loop, *outer;
1514*38fd1498Szrj   unsigned i, n;
1515*38fd1498Szrj 
1516*38fd1498Szrj   /* Collect all basic-blocks in loops and sort them after their
1517*38fd1498Szrj      loops postorder.  */
1518*38fd1498Szrj   i = 0;
1519*38fd1498Szrj   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1520*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1521*38fd1498Szrj     if (bb->loop_father != current_loops->tree_root)
1522*38fd1498Szrj       bbs[i++] = bb;
1523*38fd1498Szrj   n = i;
1524*38fd1498Szrj   qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1525*38fd1498Szrj 
1526*38fd1498Szrj   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1527*38fd1498Szrj      That results in better locality for all the bitmaps.  */
1528*38fd1498Szrj   for (i = 0; i < n; ++i)
1529*38fd1498Szrj     {
1530*38fd1498Szrj       basic_block bb = bbs[i];
1531*38fd1498Szrj       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1532*38fd1498Szrj         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1533*38fd1498Szrj     }
1534*38fd1498Szrj 
1535*38fd1498Szrj   /* Sort the location list of gathered memory references after their
1536*38fd1498Szrj      loop postorder number.  */
1537*38fd1498Szrj   im_mem_ref *ref;
1538*38fd1498Szrj   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1539*38fd1498Szrj     ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1540*38fd1498Szrj 
1541*38fd1498Szrj   free (bbs);
1542*38fd1498Szrj //  free (bb_loop_postorder);
1543*38fd1498Szrj 
1544*38fd1498Szrj   /* Propagate the information about accessed memory references up
1545*38fd1498Szrj      the loop hierarchy.  */
1546*38fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1547*38fd1498Szrj     {
1548*38fd1498Szrj       /* Finalize the overall touched references (including subloops).  */
1549*38fd1498Szrj       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1550*38fd1498Szrj 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1551*38fd1498Szrj 
1552*38fd1498Szrj       /* Propagate the information about accessed memory references up
1553*38fd1498Szrj 	 the loop hierarchy.  */
1554*38fd1498Szrj       outer = loop_outer (loop);
1555*38fd1498Szrj       if (outer == current_loops->tree_root)
1556*38fd1498Szrj 	continue;
1557*38fd1498Szrj 
1558*38fd1498Szrj       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1559*38fd1498Szrj 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1560*38fd1498Szrj     }
1561*38fd1498Szrj }
1562*38fd1498Szrj 
1563*38fd1498Szrj /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1564*38fd1498Szrj    tree_to_aff_combination_expand.  */
1565*38fd1498Szrj 
1566*38fd1498Szrj static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache)1567*38fd1498Szrj mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1568*38fd1498Szrj 		      hash_map<tree, name_expansion *> **ttae_cache)
1569*38fd1498Szrj {
1570*38fd1498Szrj   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1571*38fd1498Szrj      object and their offset differ in such a way that the locations cannot
1572*38fd1498Szrj      overlap, then they cannot alias.  */
1573*38fd1498Szrj   poly_widest_int size1, size2;
1574*38fd1498Szrj   aff_tree off1, off2;
1575*38fd1498Szrj 
1576*38fd1498Szrj   /* Perform basic offset and type-based disambiguation.  */
1577*38fd1498Szrj   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1578*38fd1498Szrj     return false;
1579*38fd1498Szrj 
1580*38fd1498Szrj   /* The expansion of addresses may be a bit expensive, thus we only do
1581*38fd1498Szrj      the check at -O2 and higher optimization levels.  */
1582*38fd1498Szrj   if (optimize < 2)
1583*38fd1498Szrj     return true;
1584*38fd1498Szrj 
1585*38fd1498Szrj   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1586*38fd1498Szrj   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1587*38fd1498Szrj   aff_combination_expand (&off1, ttae_cache);
1588*38fd1498Szrj   aff_combination_expand (&off2, ttae_cache);
1589*38fd1498Szrj   aff_combination_scale (&off1, -1);
1590*38fd1498Szrj   aff_combination_add (&off2, &off1);
1591*38fd1498Szrj 
1592*38fd1498Szrj   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1593*38fd1498Szrj     return false;
1594*38fd1498Szrj 
1595*38fd1498Szrj   return true;
1596*38fd1498Szrj }
1597*38fd1498Szrj 
1598*38fd1498Szrj /* Compare function for bsearch searching for reference locations
1599*38fd1498Szrj    in a loop.  */
1600*38fd1498Szrj 
1601*38fd1498Szrj static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_)1602*38fd1498Szrj find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1603*38fd1498Szrj {
1604*38fd1498Szrj   struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1605*38fd1498Szrj   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1606*38fd1498Szrj   struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1607*38fd1498Szrj   if (loop->num  == loc_loop->num
1608*38fd1498Szrj       || flow_loop_nested_p (loop, loc_loop))
1609*38fd1498Szrj     return 0;
1610*38fd1498Szrj   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1611*38fd1498Szrj 	  ? -1 : 1);
1612*38fd1498Szrj }
1613*38fd1498Szrj 
1614*38fd1498Szrj /* Iterates over all locations of REF in LOOP and its subloops calling
1615*38fd1498Szrj    fn.operator() with the location as argument.  When that operator
1616*38fd1498Szrj    returns true the iteration is stopped and true is returned.
1617*38fd1498Szrj    Otherwise false is returned.  */
1618*38fd1498Szrj 
1619*38fd1498Szrj template <typename FN>
1620*38fd1498Szrj static bool
for_all_locs_in_loop(struct loop * loop,im_mem_ref * ref,FN fn)1621*38fd1498Szrj for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn)
1622*38fd1498Szrj {
1623*38fd1498Szrj   unsigned i;
1624*38fd1498Szrj   mem_ref_loc *loc;
1625*38fd1498Szrj 
1626*38fd1498Szrj   /* Search for the cluster of locs in the accesses_in_loop vector
1627*38fd1498Szrj      which is sorted after postorder index of the loop father.  */
1628*38fd1498Szrj   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1629*38fd1498Szrj   if (!loc)
1630*38fd1498Szrj     return false;
1631*38fd1498Szrj 
1632*38fd1498Szrj   /* We have found one location inside loop or its sub-loops.  Iterate
1633*38fd1498Szrj      both forward and backward to cover the whole cluster.  */
1634*38fd1498Szrj   i = loc - ref->accesses_in_loop.address ();
1635*38fd1498Szrj   while (i > 0)
1636*38fd1498Szrj     {
1637*38fd1498Szrj       --i;
1638*38fd1498Szrj       mem_ref_loc *l = &ref->accesses_in_loop[i];
1639*38fd1498Szrj       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1640*38fd1498Szrj 	break;
1641*38fd1498Szrj       if (fn (l))
1642*38fd1498Szrj 	return true;
1643*38fd1498Szrj     }
1644*38fd1498Szrj   for (i = loc - ref->accesses_in_loop.address ();
1645*38fd1498Szrj        i < ref->accesses_in_loop.length (); ++i)
1646*38fd1498Szrj     {
1647*38fd1498Szrj       mem_ref_loc *l = &ref->accesses_in_loop[i];
1648*38fd1498Szrj       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1649*38fd1498Szrj 	break;
1650*38fd1498Szrj       if (fn (l))
1651*38fd1498Szrj 	return true;
1652*38fd1498Szrj     }
1653*38fd1498Szrj 
1654*38fd1498Szrj   return false;
1655*38fd1498Szrj }
1656*38fd1498Szrj 
1657*38fd1498Szrj /* Rewrites location LOC by TMP_VAR.  */
1658*38fd1498Szrj 
1659*38fd1498Szrj struct rewrite_mem_ref_loc
1660*38fd1498Szrj {
rewrite_mem_ref_locrewrite_mem_ref_loc1661*38fd1498Szrj   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1662*38fd1498Szrj   bool operator () (mem_ref_loc *loc);
1663*38fd1498Szrj   tree tmp_var;
1664*38fd1498Szrj };
1665*38fd1498Szrj 
1666*38fd1498Szrj bool
operator()1667*38fd1498Szrj rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1668*38fd1498Szrj {
1669*38fd1498Szrj   *loc->ref = tmp_var;
1670*38fd1498Szrj   update_stmt (loc->stmt);
1671*38fd1498Szrj   return false;
1672*38fd1498Szrj }
1673*38fd1498Szrj 
1674*38fd1498Szrj /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1675*38fd1498Szrj 
1676*38fd1498Szrj static void
rewrite_mem_refs(struct loop * loop,im_mem_ref * ref,tree tmp_var)1677*38fd1498Szrj rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var)
1678*38fd1498Szrj {
1679*38fd1498Szrj   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1680*38fd1498Szrj }
1681*38fd1498Szrj 
1682*38fd1498Szrj /* Stores the first reference location in LOCP.  */
1683*38fd1498Szrj 
1684*38fd1498Szrj struct first_mem_ref_loc_1
1685*38fd1498Szrj {
first_mem_ref_loc_1first_mem_ref_loc_11686*38fd1498Szrj   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1687*38fd1498Szrj   bool operator () (mem_ref_loc *loc);
1688*38fd1498Szrj   mem_ref_loc **locp;
1689*38fd1498Szrj };
1690*38fd1498Szrj 
1691*38fd1498Szrj bool
operator()1692*38fd1498Szrj first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1693*38fd1498Szrj {
1694*38fd1498Szrj   *locp = loc;
1695*38fd1498Szrj   return true;
1696*38fd1498Szrj }
1697*38fd1498Szrj 
1698*38fd1498Szrj /* Returns the first reference location to REF in LOOP.  */
1699*38fd1498Szrj 
1700*38fd1498Szrj static mem_ref_loc *
first_mem_ref_loc(struct loop * loop,im_mem_ref * ref)1701*38fd1498Szrj first_mem_ref_loc (struct loop *loop, im_mem_ref *ref)
1702*38fd1498Szrj {
1703*38fd1498Szrj   mem_ref_loc *locp = NULL;
1704*38fd1498Szrj   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1705*38fd1498Szrj   return locp;
1706*38fd1498Szrj }
1707*38fd1498Szrj 
1708*38fd1498Szrj struct prev_flag_edges {
1709*38fd1498Szrj   /* Edge to insert new flag comparison code.  */
1710*38fd1498Szrj   edge append_cond_position;
1711*38fd1498Szrj 
1712*38fd1498Szrj   /* Edge for fall through from previous flag comparison.  */
1713*38fd1498Szrj   edge last_cond_fallthru;
1714*38fd1498Szrj };
1715*38fd1498Szrj 
1716*38fd1498Szrj /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1717*38fd1498Szrj    MEM along edge EX.
1718*38fd1498Szrj 
1719*38fd1498Szrj    The store is only done if MEM has changed.  We do this so no
1720*38fd1498Szrj    changes to MEM occur on code paths that did not originally store
1721*38fd1498Szrj    into it.
1722*38fd1498Szrj 
1723*38fd1498Szrj    The common case for execute_sm will transform:
1724*38fd1498Szrj 
1725*38fd1498Szrj      for (...) {
1726*38fd1498Szrj        if (foo)
1727*38fd1498Szrj          stuff;
1728*38fd1498Szrj        else
1729*38fd1498Szrj          MEM = TMP_VAR;
1730*38fd1498Szrj      }
1731*38fd1498Szrj 
1732*38fd1498Szrj    into:
1733*38fd1498Szrj 
1734*38fd1498Szrj      lsm = MEM;
1735*38fd1498Szrj      for (...) {
1736*38fd1498Szrj        if (foo)
1737*38fd1498Szrj          stuff;
1738*38fd1498Szrj        else
1739*38fd1498Szrj          lsm = TMP_VAR;
1740*38fd1498Szrj      }
1741*38fd1498Szrj      MEM = lsm;
1742*38fd1498Szrj 
1743*38fd1498Szrj   This function will generate:
1744*38fd1498Szrj 
1745*38fd1498Szrj      lsm = MEM;
1746*38fd1498Szrj 
1747*38fd1498Szrj      lsm_flag = false;
1748*38fd1498Szrj      ...
1749*38fd1498Szrj      for (...) {
1750*38fd1498Szrj        if (foo)
1751*38fd1498Szrj          stuff;
1752*38fd1498Szrj        else {
1753*38fd1498Szrj          lsm = TMP_VAR;
1754*38fd1498Szrj          lsm_flag = true;
1755*38fd1498Szrj        }
1756*38fd1498Szrj      }
1757*38fd1498Szrj      if (lsm_flag)	<--
1758*38fd1498Szrj        MEM = lsm;	<--
1759*38fd1498Szrj */
1760*38fd1498Szrj 
1761*38fd1498Szrj static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs)1762*38fd1498Szrj execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1763*38fd1498Szrj 		       edge preheader, hash_set <basic_block> *flag_bbs)
1764*38fd1498Szrj {
1765*38fd1498Szrj   basic_block new_bb, then_bb, old_dest;
1766*38fd1498Szrj   bool loop_has_only_one_exit;
1767*38fd1498Szrj   edge then_old_edge, orig_ex = ex;
1768*38fd1498Szrj   gimple_stmt_iterator gsi;
1769*38fd1498Szrj   gimple *stmt;
1770*38fd1498Szrj   struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1771*38fd1498Szrj   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1772*38fd1498Szrj 
1773*38fd1498Szrj   profile_count count_sum = profile_count::zero ();
1774*38fd1498Szrj   int nbbs = 0, ncount = 0;
1775*38fd1498Szrj   profile_probability flag_probability = profile_probability::uninitialized ();
1776*38fd1498Szrj 
1777*38fd1498Szrj   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1778*38fd1498Szrj      at loop exit.
1779*38fd1498Szrj 
1780*38fd1498Szrj      This code may look fancy, but it can not update profile very realistically
1781*38fd1498Szrj      because we do not know the probability that flag will be true at given
1782*38fd1498Szrj      loop exit.
1783*38fd1498Szrj 
1784*38fd1498Szrj      We look for two interesting extremes
1785*38fd1498Szrj        - when exit is dominated by block setting the flag, we know it will
1786*38fd1498Szrj          always be true.  This is a common case.
1787*38fd1498Szrj        - when all blocks setting the flag have very low frequency we know
1788*38fd1498Szrj          it will likely be false.
1789*38fd1498Szrj      In all other cases we default to 2/3 for flag being true.  */
1790*38fd1498Szrj 
1791*38fd1498Szrj   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1792*38fd1498Szrj        it != flag_bbs->end (); ++it)
1793*38fd1498Szrj     {
1794*38fd1498Szrj        if ((*it)->count.initialized_p ())
1795*38fd1498Szrj          count_sum += (*it)->count, ncount ++;
1796*38fd1498Szrj        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1797*38fd1498Szrj 	 flag_probability = profile_probability::always ();
1798*38fd1498Szrj        nbbs++;
1799*38fd1498Szrj     }
1800*38fd1498Szrj 
1801*38fd1498Szrj   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1802*38fd1498Szrj 
1803*38fd1498Szrj   if (flag_probability.initialized_p ())
1804*38fd1498Szrj     ;
1805*38fd1498Szrj   else if (ncount == nbbs
1806*38fd1498Szrj 	   && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1807*38fd1498Szrj     {
1808*38fd1498Szrj       flag_probability = count_sum.probability_in (preheader->count ());
1809*38fd1498Szrj       if (flag_probability > cap)
1810*38fd1498Szrj 	flag_probability = cap;
1811*38fd1498Szrj     }
1812*38fd1498Szrj 
1813*38fd1498Szrj   if (!flag_probability.initialized_p ())
1814*38fd1498Szrj     flag_probability = cap;
1815*38fd1498Szrj 
1816*38fd1498Szrj   /* ?? Insert store after previous store if applicable.  See note
1817*38fd1498Szrj      below.  */
1818*38fd1498Szrj   if (prev_edges)
1819*38fd1498Szrj     ex = prev_edges->append_cond_position;
1820*38fd1498Szrj 
1821*38fd1498Szrj   loop_has_only_one_exit = single_pred_p (ex->dest);
1822*38fd1498Szrj 
1823*38fd1498Szrj   if (loop_has_only_one_exit)
1824*38fd1498Szrj     ex = split_block_after_labels (ex->dest);
1825*38fd1498Szrj   else
1826*38fd1498Szrj     {
1827*38fd1498Szrj       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1828*38fd1498Szrj 	   !gsi_end_p (gpi); gsi_next (&gpi))
1829*38fd1498Szrj 	{
1830*38fd1498Szrj 	  gphi *phi = gpi.phi ();
1831*38fd1498Szrj 	  if (virtual_operand_p (gimple_phi_result (phi)))
1832*38fd1498Szrj 	    continue;
1833*38fd1498Szrj 
1834*38fd1498Szrj 	  /* When the destination has a non-virtual PHI node with multiple
1835*38fd1498Szrj 	     predecessors make sure we preserve the PHI structure by
1836*38fd1498Szrj 	     forcing a forwarder block so that hoisting of that PHI will
1837*38fd1498Szrj 	     still work.  */
1838*38fd1498Szrj 	  split_edge (ex);
1839*38fd1498Szrj 	  break;
1840*38fd1498Szrj 	}
1841*38fd1498Szrj     }
1842*38fd1498Szrj 
1843*38fd1498Szrj   old_dest = ex->dest;
1844*38fd1498Szrj   new_bb = split_edge (ex);
1845*38fd1498Szrj   then_bb = create_empty_bb (new_bb);
1846*38fd1498Szrj   then_bb->count = new_bb->count.apply_probability (flag_probability);
1847*38fd1498Szrj   if (irr)
1848*38fd1498Szrj     then_bb->flags = BB_IRREDUCIBLE_LOOP;
1849*38fd1498Szrj   add_bb_to_loop (then_bb, new_bb->loop_father);
1850*38fd1498Szrj 
1851*38fd1498Szrj   gsi = gsi_start_bb (new_bb);
1852*38fd1498Szrj   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1853*38fd1498Szrj 			    NULL_TREE, NULL_TREE);
1854*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1855*38fd1498Szrj 
1856*38fd1498Szrj   gsi = gsi_start_bb (then_bb);
1857*38fd1498Szrj   /* Insert actual store.  */
1858*38fd1498Szrj   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1859*38fd1498Szrj   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1860*38fd1498Szrj 
1861*38fd1498Szrj   edge e1 = single_succ_edge (new_bb);
1862*38fd1498Szrj   edge e2 = make_edge (new_bb, then_bb,
1863*38fd1498Szrj 	               EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1864*38fd1498Szrj   e2->probability = flag_probability;
1865*38fd1498Szrj 
1866*38fd1498Szrj   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1867*38fd1498Szrj   e1->flags &= ~EDGE_FALLTHRU;
1868*38fd1498Szrj 
1869*38fd1498Szrj   e1->probability = flag_probability.invert ();
1870*38fd1498Szrj 
1871*38fd1498Szrj   then_old_edge = make_single_succ_edge (then_bb, old_dest,
1872*38fd1498Szrj 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1873*38fd1498Szrj 
1874*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1875*38fd1498Szrj 
1876*38fd1498Szrj   if (prev_edges)
1877*38fd1498Szrj     {
1878*38fd1498Szrj       basic_block prevbb = prev_edges->last_cond_fallthru->src;
1879*38fd1498Szrj       redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1880*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1881*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, old_dest,
1882*38fd1498Szrj 			       recompute_dominator (CDI_DOMINATORS, old_dest));
1883*38fd1498Szrj     }
1884*38fd1498Szrj 
1885*38fd1498Szrj   /* ?? Because stores may alias, they must happen in the exact
1886*38fd1498Szrj      sequence they originally happened.  Save the position right after
1887*38fd1498Szrj      the (_lsm) store we just created so we can continue appending after
1888*38fd1498Szrj      it and maintain the original order.  */
1889*38fd1498Szrj   {
1890*38fd1498Szrj     struct prev_flag_edges *p;
1891*38fd1498Szrj 
1892*38fd1498Szrj     if (orig_ex->aux)
1893*38fd1498Szrj       orig_ex->aux = NULL;
1894*38fd1498Szrj     alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1895*38fd1498Szrj     p = (struct prev_flag_edges *) orig_ex->aux;
1896*38fd1498Szrj     p->append_cond_position = then_old_edge;
1897*38fd1498Szrj     p->last_cond_fallthru = find_edge (new_bb, old_dest);
1898*38fd1498Szrj     orig_ex->aux = (void *) p;
1899*38fd1498Szrj   }
1900*38fd1498Szrj 
1901*38fd1498Szrj   if (!loop_has_only_one_exit)
1902*38fd1498Szrj     for (gphi_iterator gpi = gsi_start_phis (old_dest);
1903*38fd1498Szrj 	 !gsi_end_p (gpi); gsi_next (&gpi))
1904*38fd1498Szrj       {
1905*38fd1498Szrj 	gphi *phi = gpi.phi ();
1906*38fd1498Szrj 	unsigned i;
1907*38fd1498Szrj 
1908*38fd1498Szrj 	for (i = 0; i < gimple_phi_num_args (phi); i++)
1909*38fd1498Szrj 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1910*38fd1498Szrj 	    {
1911*38fd1498Szrj 	      tree arg = gimple_phi_arg_def (phi, i);
1912*38fd1498Szrj 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1913*38fd1498Szrj 	      update_stmt (phi);
1914*38fd1498Szrj 	    }
1915*38fd1498Szrj       }
1916*38fd1498Szrj }
1917*38fd1498Szrj 
1918*38fd1498Szrj /* When REF is set on the location, set flag indicating the store.  */
1919*38fd1498Szrj 
1920*38fd1498Szrj struct sm_set_flag_if_changed
1921*38fd1498Szrj {
sm_set_flag_if_changedsm_set_flag_if_changed1922*38fd1498Szrj   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
1923*38fd1498Szrj 	 : flag (flag_), bbs (bbs_) {}
1924*38fd1498Szrj   bool operator () (mem_ref_loc *loc);
1925*38fd1498Szrj   tree flag;
1926*38fd1498Szrj   hash_set <basic_block> *bbs;
1927*38fd1498Szrj };
1928*38fd1498Szrj 
1929*38fd1498Szrj bool
operator()1930*38fd1498Szrj sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
1931*38fd1498Szrj {
1932*38fd1498Szrj   /* Only set the flag for writes.  */
1933*38fd1498Szrj   if (is_gimple_assign (loc->stmt)
1934*38fd1498Szrj       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1935*38fd1498Szrj     {
1936*38fd1498Szrj       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1937*38fd1498Szrj       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
1938*38fd1498Szrj       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1939*38fd1498Szrj       bbs->add (gimple_bb (stmt));
1940*38fd1498Szrj     }
1941*38fd1498Szrj   return false;
1942*38fd1498Szrj }
1943*38fd1498Szrj 
1944*38fd1498Szrj /* Helper function for execute_sm.  On every location where REF is
1945*38fd1498Szrj    set, set an appropriate flag indicating the store.  */
1946*38fd1498Szrj 
1947*38fd1498Szrj static tree
execute_sm_if_changed_flag_set(struct loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)1948*38fd1498Szrj execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref,
1949*38fd1498Szrj 				hash_set <basic_block> *bbs)
1950*38fd1498Szrj {
1951*38fd1498Szrj   tree flag;
1952*38fd1498Szrj   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1953*38fd1498Szrj   flag = create_tmp_reg (boolean_type_node, str);
1954*38fd1498Szrj   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
1955*38fd1498Szrj   return flag;
1956*38fd1498Szrj }
1957*38fd1498Szrj 
1958*38fd1498Szrj /* Executes store motion of memory reference REF from LOOP.
1959*38fd1498Szrj    Exits from the LOOP are stored in EXITS.  The initialization of the
1960*38fd1498Szrj    temporary variable is put to the preheader of the loop, and assignments
1961*38fd1498Szrj    to the reference from the temporary variable are emitted to exits.  */
1962*38fd1498Szrj 
1963*38fd1498Szrj static void
execute_sm(struct loop * loop,vec<edge> exits,im_mem_ref * ref)1964*38fd1498Szrj execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref)
1965*38fd1498Szrj {
1966*38fd1498Szrj   tree tmp_var, store_flag = NULL_TREE;
1967*38fd1498Szrj   unsigned i;
1968*38fd1498Szrj   gassign *load;
1969*38fd1498Szrj   struct fmt_data fmt_data;
1970*38fd1498Szrj   edge ex;
1971*38fd1498Szrj   struct lim_aux_data *lim_data;
1972*38fd1498Szrj   bool multi_threaded_model_p = false;
1973*38fd1498Szrj   gimple_stmt_iterator gsi;
1974*38fd1498Szrj   hash_set<basic_block> flag_bbs;
1975*38fd1498Szrj 
1976*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
1977*38fd1498Szrj     {
1978*38fd1498Szrj       fprintf (dump_file, "Executing store motion of ");
1979*38fd1498Szrj       print_generic_expr (dump_file, ref->mem.ref);
1980*38fd1498Szrj       fprintf (dump_file, " from loop %d\n", loop->num);
1981*38fd1498Szrj     }
1982*38fd1498Szrj 
1983*38fd1498Szrj   tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1984*38fd1498Szrj 			    get_lsm_tmp_name (ref->mem.ref, ~0));
1985*38fd1498Szrj 
1986*38fd1498Szrj   fmt_data.loop = loop;
1987*38fd1498Szrj   fmt_data.orig_loop = loop;
1988*38fd1498Szrj   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1989*38fd1498Szrj 
1990*38fd1498Szrj   if (bb_in_transaction (loop_preheader_edge (loop)->src)
1991*38fd1498Szrj       || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
1992*38fd1498Szrj 	  && ! ref_always_accessed_p (loop, ref, true)))
1993*38fd1498Szrj     multi_threaded_model_p = true;
1994*38fd1498Szrj 
1995*38fd1498Szrj   if (multi_threaded_model_p)
1996*38fd1498Szrj     store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
1997*38fd1498Szrj 
1998*38fd1498Szrj   rewrite_mem_refs (loop, ref, tmp_var);
1999*38fd1498Szrj 
2000*38fd1498Szrj   /* Emit the load code on a random exit edge or into the latch if
2001*38fd1498Szrj      the loop does not exit, so that we are sure it will be processed
2002*38fd1498Szrj      by move_computations after all dependencies.  */
2003*38fd1498Szrj   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2004*38fd1498Szrj 
2005*38fd1498Szrj   /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2006*38fd1498Szrj      load altogether, since the store is predicated by a flag.  We
2007*38fd1498Szrj      could, do the load only if it was originally in the loop.  */
2008*38fd1498Szrj   load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2009*38fd1498Szrj   lim_data = init_lim_data (load);
2010*38fd1498Szrj   lim_data->max_loop = loop;
2011*38fd1498Szrj   lim_data->tgt_loop = loop;
2012*38fd1498Szrj   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2013*38fd1498Szrj 
2014*38fd1498Szrj   if (multi_threaded_model_p)
2015*38fd1498Szrj     {
2016*38fd1498Szrj       load = gimple_build_assign (store_flag, boolean_false_node);
2017*38fd1498Szrj       lim_data = init_lim_data (load);
2018*38fd1498Szrj       lim_data->max_loop = loop;
2019*38fd1498Szrj       lim_data->tgt_loop = loop;
2020*38fd1498Szrj       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2021*38fd1498Szrj     }
2022*38fd1498Szrj 
2023*38fd1498Szrj   /* Sink the store to every exit from the loop.  */
2024*38fd1498Szrj   FOR_EACH_VEC_ELT (exits, i, ex)
2025*38fd1498Szrj     if (!multi_threaded_model_p)
2026*38fd1498Szrj       {
2027*38fd1498Szrj 	gassign *store;
2028*38fd1498Szrj 	store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2029*38fd1498Szrj 	gsi_insert_on_edge (ex, store);
2030*38fd1498Szrj       }
2031*38fd1498Szrj     else
2032*38fd1498Szrj       execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2033*38fd1498Szrj 			     loop_preheader_edge (loop), &flag_bbs);
2034*38fd1498Szrj }
2035*38fd1498Szrj 
2036*38fd1498Szrj /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2037*38fd1498Szrj    edges of the LOOP.  */
2038*38fd1498Szrj 
2039*38fd1498Szrj static void
hoist_memory_references(struct loop * loop,bitmap mem_refs,vec<edge> exits)2040*38fd1498Szrj hoist_memory_references (struct loop *loop, bitmap mem_refs,
2041*38fd1498Szrj 			 vec<edge> exits)
2042*38fd1498Szrj {
2043*38fd1498Szrj   im_mem_ref *ref;
2044*38fd1498Szrj   unsigned  i;
2045*38fd1498Szrj   bitmap_iterator bi;
2046*38fd1498Szrj 
2047*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2048*38fd1498Szrj     {
2049*38fd1498Szrj       ref = memory_accesses.refs_list[i];
2050*38fd1498Szrj       execute_sm (loop, exits, ref);
2051*38fd1498Szrj     }
2052*38fd1498Szrj }
2053*38fd1498Szrj 
2054*38fd1498Szrj struct ref_always_accessed
2055*38fd1498Szrj {
ref_always_accessedref_always_accessed2056*38fd1498Szrj   ref_always_accessed (struct loop *loop_, bool stored_p_)
2057*38fd1498Szrj       : loop (loop_), stored_p (stored_p_) {}
2058*38fd1498Szrj   bool operator () (mem_ref_loc *loc);
2059*38fd1498Szrj   struct loop *loop;
2060*38fd1498Szrj   bool stored_p;
2061*38fd1498Szrj };
2062*38fd1498Szrj 
2063*38fd1498Szrj bool
operator()2064*38fd1498Szrj ref_always_accessed::operator () (mem_ref_loc *loc)
2065*38fd1498Szrj {
2066*38fd1498Szrj   struct loop *must_exec;
2067*38fd1498Szrj 
2068*38fd1498Szrj   if (!get_lim_data (loc->stmt))
2069*38fd1498Szrj     return false;
2070*38fd1498Szrj 
2071*38fd1498Szrj   /* If we require an always executed store make sure the statement
2072*38fd1498Szrj      stores to the reference.  */
2073*38fd1498Szrj   if (stored_p)
2074*38fd1498Szrj     {
2075*38fd1498Szrj       tree lhs = gimple_get_lhs (loc->stmt);
2076*38fd1498Szrj       if (!lhs
2077*38fd1498Szrj 	  || lhs != *loc->ref)
2078*38fd1498Szrj 	return false;
2079*38fd1498Szrj     }
2080*38fd1498Szrj 
2081*38fd1498Szrj   must_exec = get_lim_data (loc->stmt)->always_executed_in;
2082*38fd1498Szrj   if (!must_exec)
2083*38fd1498Szrj     return false;
2084*38fd1498Szrj 
2085*38fd1498Szrj   if (must_exec == loop
2086*38fd1498Szrj       || flow_loop_nested_p (must_exec, loop))
2087*38fd1498Szrj     return true;
2088*38fd1498Szrj 
2089*38fd1498Szrj   return false;
2090*38fd1498Szrj }
2091*38fd1498Szrj 
2092*38fd1498Szrj /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2093*38fd1498Szrj    make sure REF is always stored to in LOOP.  */
2094*38fd1498Szrj 
2095*38fd1498Szrj static bool
ref_always_accessed_p(struct loop * loop,im_mem_ref * ref,bool stored_p)2096*38fd1498Szrj ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p)
2097*38fd1498Szrj {
2098*38fd1498Szrj   return for_all_locs_in_loop (loop, ref,
2099*38fd1498Szrj 			       ref_always_accessed (loop, stored_p));
2100*38fd1498Szrj }
2101*38fd1498Szrj 
2102*38fd1498Szrj /* Returns true if REF1 and REF2 are independent.  */
2103*38fd1498Szrj 
2104*38fd1498Szrj static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2)2105*38fd1498Szrj refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
2106*38fd1498Szrj {
2107*38fd1498Szrj   if (ref1 == ref2)
2108*38fd1498Szrj     return true;
2109*38fd1498Szrj 
2110*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
2111*38fd1498Szrj     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2112*38fd1498Szrj 	     ref1->id, ref2->id);
2113*38fd1498Szrj 
2114*38fd1498Szrj   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2115*38fd1498Szrj     {
2116*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
2117*38fd1498Szrj 	fprintf (dump_file, "dependent.\n");
2118*38fd1498Szrj       return false;
2119*38fd1498Szrj     }
2120*38fd1498Szrj   else
2121*38fd1498Szrj     {
2122*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
2123*38fd1498Szrj 	fprintf (dump_file, "independent.\n");
2124*38fd1498Szrj       return true;
2125*38fd1498Szrj     }
2126*38fd1498Szrj }
2127*38fd1498Szrj 
2128*38fd1498Szrj /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2129*38fd1498Szrj    and its super-loops.  */
2130*38fd1498Szrj 
2131*38fd1498Szrj static void
record_dep_loop(struct loop * loop,im_mem_ref * ref,bool stored_p)2132*38fd1498Szrj record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
2133*38fd1498Szrj {
2134*38fd1498Szrj   /* We can propagate dependent-in-loop bits up the loop
2135*38fd1498Szrj      hierarchy to all outer loops.  */
2136*38fd1498Szrj   while (loop != current_loops->tree_root
2137*38fd1498Szrj 	 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2138*38fd1498Szrj     loop = loop_outer (loop);
2139*38fd1498Szrj }
2140*38fd1498Szrj 
2141*38fd1498Szrj /* Returns true if REF is independent on all other memory
2142*38fd1498Szrj    references in LOOP.  */
2143*38fd1498Szrj 
2144*38fd1498Szrj static bool
ref_indep_loop_p_1(struct loop * loop,im_mem_ref * ref,bool stored_p)2145*38fd1498Szrj ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
2146*38fd1498Szrj {
2147*38fd1498Szrj   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2148*38fd1498Szrj 
2149*38fd1498Szrj   bool indep_p = true;
2150*38fd1498Szrj   bitmap refs_to_check;
2151*38fd1498Szrj 
2152*38fd1498Szrj   if (stored_p)
2153*38fd1498Szrj     refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2154*38fd1498Szrj   else
2155*38fd1498Szrj     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2156*38fd1498Szrj 
2157*38fd1498Szrj   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2158*38fd1498Szrj     indep_p = false;
2159*38fd1498Szrj   else
2160*38fd1498Szrj     {
2161*38fd1498Szrj       if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2162*38fd1498Szrj 	return true;
2163*38fd1498Szrj       if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2164*38fd1498Szrj 	return false;
2165*38fd1498Szrj 
2166*38fd1498Szrj       struct loop *inner = loop->inner;
2167*38fd1498Szrj       while (inner)
2168*38fd1498Szrj 	{
2169*38fd1498Szrj 	  if (!ref_indep_loop_p_1 (inner, ref, stored_p))
2170*38fd1498Szrj 	    {
2171*38fd1498Szrj 	      indep_p = false;
2172*38fd1498Szrj 	      break;
2173*38fd1498Szrj 	    }
2174*38fd1498Szrj 	  inner = inner->next;
2175*38fd1498Szrj 	}
2176*38fd1498Szrj 
2177*38fd1498Szrj       if (indep_p)
2178*38fd1498Szrj 	{
2179*38fd1498Szrj 	  unsigned i;
2180*38fd1498Szrj 	  bitmap_iterator bi;
2181*38fd1498Szrj 	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2182*38fd1498Szrj 	    {
2183*38fd1498Szrj 	      im_mem_ref *aref = memory_accesses.refs_list[i];
2184*38fd1498Szrj 	      if (!refs_independent_p (ref, aref))
2185*38fd1498Szrj 		{
2186*38fd1498Szrj 		  indep_p = false;
2187*38fd1498Szrj 		  break;
2188*38fd1498Szrj 		}
2189*38fd1498Szrj 	    }
2190*38fd1498Szrj 	}
2191*38fd1498Szrj     }
2192*38fd1498Szrj 
2193*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
2194*38fd1498Szrj     fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2195*38fd1498Szrj 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
2196*38fd1498Szrj 
2197*38fd1498Szrj   /* Record the computed result in the cache.  */
2198*38fd1498Szrj   if (indep_p)
2199*38fd1498Szrj     {
2200*38fd1498Szrj       if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2201*38fd1498Szrj 	  && stored_p)
2202*38fd1498Szrj 	{
2203*38fd1498Szrj 	  /* If it's independend against all refs then it's independent
2204*38fd1498Szrj 	     against stores, too.  */
2205*38fd1498Szrj 	  bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2206*38fd1498Szrj 	}
2207*38fd1498Szrj     }
2208*38fd1498Szrj   else
2209*38fd1498Szrj     {
2210*38fd1498Szrj       record_dep_loop (loop, ref, stored_p);
2211*38fd1498Szrj       if (!stored_p)
2212*38fd1498Szrj 	{
2213*38fd1498Szrj 	  /* If it's dependent against stores it's dependent against
2214*38fd1498Szrj 	     all refs, too.  */
2215*38fd1498Szrj 	  record_dep_loop (loop, ref, true);
2216*38fd1498Szrj 	}
2217*38fd1498Szrj     }
2218*38fd1498Szrj 
2219*38fd1498Szrj   return indep_p;
2220*38fd1498Szrj }
2221*38fd1498Szrj 
2222*38fd1498Szrj /* Returns true if REF is independent on all other memory references in
2223*38fd1498Szrj    LOOP.  */
2224*38fd1498Szrj 
2225*38fd1498Szrj static bool
ref_indep_loop_p(struct loop * loop,im_mem_ref * ref)2226*38fd1498Szrj ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
2227*38fd1498Szrj {
2228*38fd1498Szrj   gcc_checking_assert (MEM_ANALYZABLE (ref));
2229*38fd1498Szrj 
2230*38fd1498Szrj   return ref_indep_loop_p_1 (loop, ref, false);
2231*38fd1498Szrj }
2232*38fd1498Szrj 
2233*38fd1498Szrj /* Returns true if we can perform store motion of REF from LOOP.  */
2234*38fd1498Szrj 
2235*38fd1498Szrj static bool
can_sm_ref_p(struct loop * loop,im_mem_ref * ref)2236*38fd1498Szrj can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
2237*38fd1498Szrj {
2238*38fd1498Szrj   tree base;
2239*38fd1498Szrj 
2240*38fd1498Szrj   /* Can't hoist unanalyzable refs.  */
2241*38fd1498Szrj   if (!MEM_ANALYZABLE (ref))
2242*38fd1498Szrj     return false;
2243*38fd1498Szrj 
2244*38fd1498Szrj   /* It should be movable.  */
2245*38fd1498Szrj   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2246*38fd1498Szrj       || TREE_THIS_VOLATILE (ref->mem.ref)
2247*38fd1498Szrj       || !for_each_index (&ref->mem.ref, may_move_till, loop))
2248*38fd1498Szrj     return false;
2249*38fd1498Szrj 
2250*38fd1498Szrj   /* If it can throw fail, we do not properly update EH info.  */
2251*38fd1498Szrj   if (tree_could_throw_p (ref->mem.ref))
2252*38fd1498Szrj     return false;
2253*38fd1498Szrj 
2254*38fd1498Szrj   /* If it can trap, it must be always executed in LOOP.
2255*38fd1498Szrj      Readonly memory locations may trap when storing to them, but
2256*38fd1498Szrj      tree_could_trap_p is a predicate for rvalues, so check that
2257*38fd1498Szrj      explicitly.  */
2258*38fd1498Szrj   base = get_base_address (ref->mem.ref);
2259*38fd1498Szrj   if ((tree_could_trap_p (ref->mem.ref)
2260*38fd1498Szrj        || (DECL_P (base) && TREE_READONLY (base)))
2261*38fd1498Szrj       && !ref_always_accessed_p (loop, ref, true))
2262*38fd1498Szrj     return false;
2263*38fd1498Szrj 
2264*38fd1498Szrj   /* And it must be independent on all other memory references
2265*38fd1498Szrj      in LOOP.  */
2266*38fd1498Szrj   if (!ref_indep_loop_p (loop, ref))
2267*38fd1498Szrj     return false;
2268*38fd1498Szrj 
2269*38fd1498Szrj   return true;
2270*38fd1498Szrj }
2271*38fd1498Szrj 
2272*38fd1498Szrj /* Marks the references in LOOP for that store motion should be performed
2273*38fd1498Szrj    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2274*38fd1498Szrj    motion was performed in one of the outer loops.  */
2275*38fd1498Szrj 
2276*38fd1498Szrj static void
find_refs_for_sm(struct loop * loop,bitmap sm_executed,bitmap refs_to_sm)2277*38fd1498Szrj find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2278*38fd1498Szrj {
2279*38fd1498Szrj   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2280*38fd1498Szrj   unsigned i;
2281*38fd1498Szrj   bitmap_iterator bi;
2282*38fd1498Szrj   im_mem_ref *ref;
2283*38fd1498Szrj 
2284*38fd1498Szrj   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2285*38fd1498Szrj     {
2286*38fd1498Szrj       ref = memory_accesses.refs_list[i];
2287*38fd1498Szrj       if (can_sm_ref_p (loop, ref))
2288*38fd1498Szrj 	bitmap_set_bit (refs_to_sm, i);
2289*38fd1498Szrj     }
2290*38fd1498Szrj }
2291*38fd1498Szrj 
2292*38fd1498Szrj /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2293*38fd1498Szrj    for a store motion optimization (i.e. whether we can insert statement
2294*38fd1498Szrj    on its exits).  */
2295*38fd1498Szrj 
2296*38fd1498Szrj static bool
loop_suitable_for_sm(struct loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2297*38fd1498Szrj loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2298*38fd1498Szrj 		      vec<edge> exits)
2299*38fd1498Szrj {
2300*38fd1498Szrj   unsigned i;
2301*38fd1498Szrj   edge ex;
2302*38fd1498Szrj 
2303*38fd1498Szrj   FOR_EACH_VEC_ELT (exits, i, ex)
2304*38fd1498Szrj     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2305*38fd1498Szrj       return false;
2306*38fd1498Szrj 
2307*38fd1498Szrj   return true;
2308*38fd1498Szrj }
2309*38fd1498Szrj 
2310*38fd1498Szrj /* Try to perform store motion for all memory references modified inside
2311*38fd1498Szrj    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2312*38fd1498Szrj    store motion was executed in one of the outer loops.  */
2313*38fd1498Szrj 
2314*38fd1498Szrj static void
store_motion_loop(struct loop * loop,bitmap sm_executed)2315*38fd1498Szrj store_motion_loop (struct loop *loop, bitmap sm_executed)
2316*38fd1498Szrj {
2317*38fd1498Szrj   vec<edge> exits = get_loop_exit_edges (loop);
2318*38fd1498Szrj   struct loop *subloop;
2319*38fd1498Szrj   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2320*38fd1498Szrj 
2321*38fd1498Szrj   if (loop_suitable_for_sm (loop, exits))
2322*38fd1498Szrj     {
2323*38fd1498Szrj       find_refs_for_sm (loop, sm_executed, sm_in_loop);
2324*38fd1498Szrj       hoist_memory_references (loop, sm_in_loop, exits);
2325*38fd1498Szrj     }
2326*38fd1498Szrj   exits.release ();
2327*38fd1498Szrj 
2328*38fd1498Szrj   bitmap_ior_into (sm_executed, sm_in_loop);
2329*38fd1498Szrj   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2330*38fd1498Szrj     store_motion_loop (subloop, sm_executed);
2331*38fd1498Szrj   bitmap_and_compl_into (sm_executed, sm_in_loop);
2332*38fd1498Szrj   BITMAP_FREE (sm_in_loop);
2333*38fd1498Szrj }
2334*38fd1498Szrj 
2335*38fd1498Szrj /* Try to perform store motion for all memory references modified inside
2336*38fd1498Szrj    loops.  */
2337*38fd1498Szrj 
2338*38fd1498Szrj static void
store_motion(void)2339*38fd1498Szrj store_motion (void)
2340*38fd1498Szrj {
2341*38fd1498Szrj   struct loop *loop;
2342*38fd1498Szrj   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2343*38fd1498Szrj 
2344*38fd1498Szrj   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2345*38fd1498Szrj     store_motion_loop (loop, sm_executed);
2346*38fd1498Szrj 
2347*38fd1498Szrj   BITMAP_FREE (sm_executed);
2348*38fd1498Szrj   gsi_commit_edge_inserts ();
2349*38fd1498Szrj }
2350*38fd1498Szrj 
2351*38fd1498Szrj /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2352*38fd1498Szrj    for each such basic block bb records the outermost loop for that execution
2353*38fd1498Szrj    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2354*38fd1498Szrj    blocks that contain a nonpure call.  */
2355*38fd1498Szrj 
2356*38fd1498Szrj static void
fill_always_executed_in_1(struct loop * loop,sbitmap contains_call)2357*38fd1498Szrj fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2358*38fd1498Szrj {
2359*38fd1498Szrj   basic_block bb = NULL, *bbs, last = NULL;
2360*38fd1498Szrj   unsigned i;
2361*38fd1498Szrj   edge e;
2362*38fd1498Szrj   struct loop *inn_loop = loop;
2363*38fd1498Szrj 
2364*38fd1498Szrj   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2365*38fd1498Szrj     {
2366*38fd1498Szrj       bbs = get_loop_body_in_dom_order (loop);
2367*38fd1498Szrj 
2368*38fd1498Szrj       for (i = 0; i < loop->num_nodes; i++)
2369*38fd1498Szrj 	{
2370*38fd1498Szrj 	  edge_iterator ei;
2371*38fd1498Szrj 	  bb = bbs[i];
2372*38fd1498Szrj 
2373*38fd1498Szrj 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2374*38fd1498Szrj 	    last = bb;
2375*38fd1498Szrj 
2376*38fd1498Szrj 	  if (bitmap_bit_p (contains_call, bb->index))
2377*38fd1498Szrj 	    break;
2378*38fd1498Szrj 
2379*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->succs)
2380*38fd1498Szrj 	    {
2381*38fd1498Szrj 	      /* If there is an exit from this BB.  */
2382*38fd1498Szrj 	      if (!flow_bb_inside_loop_p (loop, e->dest))
2383*38fd1498Szrj 		break;
2384*38fd1498Szrj 	      /* Or we enter a possibly non-finite loop.  */
2385*38fd1498Szrj 	      if (flow_loop_nested_p (bb->loop_father,
2386*38fd1498Szrj 				      e->dest->loop_father)
2387*38fd1498Szrj 		  && ! finite_loop_p (e->dest->loop_father))
2388*38fd1498Szrj 		break;
2389*38fd1498Szrj 	    }
2390*38fd1498Szrj 	  if (e)
2391*38fd1498Szrj 	    break;
2392*38fd1498Szrj 
2393*38fd1498Szrj 	  /* A loop might be infinite (TODO use simple loop analysis
2394*38fd1498Szrj 	     to disprove this if possible).  */
2395*38fd1498Szrj 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
2396*38fd1498Szrj 	    break;
2397*38fd1498Szrj 
2398*38fd1498Szrj 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
2399*38fd1498Szrj 	    break;
2400*38fd1498Szrj 
2401*38fd1498Szrj 	  if (bb->loop_father->header == bb)
2402*38fd1498Szrj 	    {
2403*38fd1498Szrj 	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2404*38fd1498Szrj 		break;
2405*38fd1498Szrj 
2406*38fd1498Szrj 	      /* In a loop that is always entered we may proceed anyway.
2407*38fd1498Szrj 		 But record that we entered it and stop once we leave it.  */
2408*38fd1498Szrj 	      inn_loop = bb->loop_father;
2409*38fd1498Szrj 	    }
2410*38fd1498Szrj 	}
2411*38fd1498Szrj 
2412*38fd1498Szrj       while (1)
2413*38fd1498Szrj 	{
2414*38fd1498Szrj 	  SET_ALWAYS_EXECUTED_IN (last, loop);
2415*38fd1498Szrj 	  if (last == loop->header)
2416*38fd1498Szrj 	    break;
2417*38fd1498Szrj 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
2418*38fd1498Szrj 	}
2419*38fd1498Szrj 
2420*38fd1498Szrj       free (bbs);
2421*38fd1498Szrj     }
2422*38fd1498Szrj 
2423*38fd1498Szrj   for (loop = loop->inner; loop; loop = loop->next)
2424*38fd1498Szrj     fill_always_executed_in_1 (loop, contains_call);
2425*38fd1498Szrj }
2426*38fd1498Szrj 
2427*38fd1498Szrj /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2428*38fd1498Szrj    for each such basic block bb records the outermost loop for that execution
2429*38fd1498Szrj    of its header implies execution of bb.  */
2430*38fd1498Szrj 
2431*38fd1498Szrj static void
fill_always_executed_in(void)2432*38fd1498Szrj fill_always_executed_in (void)
2433*38fd1498Szrj {
2434*38fd1498Szrj   basic_block bb;
2435*38fd1498Szrj   struct loop *loop;
2436*38fd1498Szrj 
2437*38fd1498Szrj   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
2438*38fd1498Szrj   bitmap_clear (contains_call);
2439*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2440*38fd1498Szrj     {
2441*38fd1498Szrj       gimple_stmt_iterator gsi;
2442*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2443*38fd1498Szrj 	{
2444*38fd1498Szrj 	  if (nonpure_call_p (gsi_stmt (gsi)))
2445*38fd1498Szrj 	    break;
2446*38fd1498Szrj 	}
2447*38fd1498Szrj 
2448*38fd1498Szrj       if (!gsi_end_p (gsi))
2449*38fd1498Szrj 	bitmap_set_bit (contains_call, bb->index);
2450*38fd1498Szrj     }
2451*38fd1498Szrj 
2452*38fd1498Szrj   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2453*38fd1498Szrj     fill_always_executed_in_1 (loop, contains_call);
2454*38fd1498Szrj }
2455*38fd1498Szrj 
2456*38fd1498Szrj 
2457*38fd1498Szrj /* Compute the global information needed by the loop invariant motion pass.  */
2458*38fd1498Szrj 
2459*38fd1498Szrj static void
tree_ssa_lim_initialize(void)2460*38fd1498Szrj tree_ssa_lim_initialize (void)
2461*38fd1498Szrj {
2462*38fd1498Szrj   struct loop *loop;
2463*38fd1498Szrj   unsigned i;
2464*38fd1498Szrj 
2465*38fd1498Szrj   bitmap_obstack_initialize (&lim_bitmap_obstack);
2466*38fd1498Szrj   gcc_obstack_init (&mem_ref_obstack);
2467*38fd1498Szrj   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
2468*38fd1498Szrj 
2469*38fd1498Szrj   if (flag_tm)
2470*38fd1498Szrj     compute_transaction_bits ();
2471*38fd1498Szrj 
2472*38fd1498Szrj   alloc_aux_for_edges (0);
2473*38fd1498Szrj 
2474*38fd1498Szrj   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2475*38fd1498Szrj   memory_accesses.refs_list.create (100);
2476*38fd1498Szrj   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
2477*38fd1498Szrj   memory_accesses.refs_list.quick_push
2478*38fd1498Szrj     (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2479*38fd1498Szrj 
2480*38fd1498Szrj   memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2481*38fd1498Szrj   memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2482*38fd1498Szrj   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2483*38fd1498Szrj   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2484*38fd1498Szrj   memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2485*38fd1498Szrj   memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2486*38fd1498Szrj 
2487*38fd1498Szrj   for (i = 0; i < number_of_loops (cfun); i++)
2488*38fd1498Szrj     {
2489*38fd1498Szrj       bitmap_initialize (&memory_accesses.refs_in_loop[i],
2490*38fd1498Szrj 			 &lim_bitmap_obstack);
2491*38fd1498Szrj       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2492*38fd1498Szrj 			 &lim_bitmap_obstack);
2493*38fd1498Szrj       bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2494*38fd1498Szrj 			 &lim_bitmap_obstack);
2495*38fd1498Szrj     }
2496*38fd1498Szrj 
2497*38fd1498Szrj   memory_accesses.ttae_cache = NULL;
2498*38fd1498Szrj 
2499*38fd1498Szrj   /* Initialize bb_loop_postorder with a mapping from loop->num to
2500*38fd1498Szrj      its postorder index.  */
2501*38fd1498Szrj   i = 0;
2502*38fd1498Szrj   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2503*38fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2504*38fd1498Szrj     bb_loop_postorder[loop->num] = i++;
2505*38fd1498Szrj }
2506*38fd1498Szrj 
2507*38fd1498Szrj /* Cleans up after the invariant motion pass.  */
2508*38fd1498Szrj 
2509*38fd1498Szrj static void
tree_ssa_lim_finalize(void)2510*38fd1498Szrj tree_ssa_lim_finalize (void)
2511*38fd1498Szrj {
2512*38fd1498Szrj   basic_block bb;
2513*38fd1498Szrj   unsigned i;
2514*38fd1498Szrj   im_mem_ref *ref;
2515*38fd1498Szrj 
2516*38fd1498Szrj   free_aux_for_edges ();
2517*38fd1498Szrj 
2518*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2519*38fd1498Szrj     SET_ALWAYS_EXECUTED_IN (bb, NULL);
2520*38fd1498Szrj 
2521*38fd1498Szrj   bitmap_obstack_release (&lim_bitmap_obstack);
2522*38fd1498Szrj   delete lim_aux_data_map;
2523*38fd1498Szrj 
2524*38fd1498Szrj   delete memory_accesses.refs;
2525*38fd1498Szrj   memory_accesses.refs = NULL;
2526*38fd1498Szrj 
2527*38fd1498Szrj   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2528*38fd1498Szrj     memref_free (ref);
2529*38fd1498Szrj   memory_accesses.refs_list.release ();
2530*38fd1498Szrj   obstack_free (&mem_ref_obstack, NULL);
2531*38fd1498Szrj 
2532*38fd1498Szrj   memory_accesses.refs_in_loop.release ();
2533*38fd1498Szrj   memory_accesses.refs_stored_in_loop.release ();
2534*38fd1498Szrj   memory_accesses.all_refs_stored_in_loop.release ();
2535*38fd1498Szrj 
2536*38fd1498Szrj   if (memory_accesses.ttae_cache)
2537*38fd1498Szrj     free_affine_expand_cache (&memory_accesses.ttae_cache);
2538*38fd1498Szrj 
2539*38fd1498Szrj   free (bb_loop_postorder);
2540*38fd1498Szrj }
2541*38fd1498Szrj 
2542*38fd1498Szrj /* Moves invariants from loops.  Only "expensive" invariants are moved out --
2543*38fd1498Szrj    i.e. those that are likely to be win regardless of the register pressure.  */
2544*38fd1498Szrj 
2545*38fd1498Szrj static unsigned int
tree_ssa_lim(void)2546*38fd1498Szrj tree_ssa_lim (void)
2547*38fd1498Szrj {
2548*38fd1498Szrj   unsigned int todo;
2549*38fd1498Szrj 
2550*38fd1498Szrj   tree_ssa_lim_initialize ();
2551*38fd1498Szrj 
2552*38fd1498Szrj   /* Gathers information about memory accesses in the loops.  */
2553*38fd1498Szrj   analyze_memory_references ();
2554*38fd1498Szrj 
2555*38fd1498Szrj   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
2556*38fd1498Szrj   fill_always_executed_in ();
2557*38fd1498Szrj 
2558*38fd1498Szrj   /* For each statement determine the outermost loop in that it is
2559*38fd1498Szrj      invariant and cost for computing the invariant.  */
2560*38fd1498Szrj   invariantness_dom_walker (CDI_DOMINATORS)
2561*38fd1498Szrj     .walk (cfun->cfg->x_entry_block_ptr);
2562*38fd1498Szrj 
2563*38fd1498Szrj   /* Execute store motion.  Force the necessary invariants to be moved
2564*38fd1498Szrj      out of the loops as well.  */
2565*38fd1498Szrj   store_motion ();
2566*38fd1498Szrj 
2567*38fd1498Szrj   /* Move the expressions that are expensive enough.  */
2568*38fd1498Szrj   todo = move_computations ();
2569*38fd1498Szrj 
2570*38fd1498Szrj   tree_ssa_lim_finalize ();
2571*38fd1498Szrj 
2572*38fd1498Szrj   return todo;
2573*38fd1498Szrj }
2574*38fd1498Szrj 
2575*38fd1498Szrj /* Loop invariant motion pass.  */
2576*38fd1498Szrj 
2577*38fd1498Szrj namespace {
2578*38fd1498Szrj 
2579*38fd1498Szrj const pass_data pass_data_lim =
2580*38fd1498Szrj {
2581*38fd1498Szrj   GIMPLE_PASS, /* type */
2582*38fd1498Szrj   "lim", /* name */
2583*38fd1498Szrj   OPTGROUP_LOOP, /* optinfo_flags */
2584*38fd1498Szrj   TV_LIM, /* tv_id */
2585*38fd1498Szrj   PROP_cfg, /* properties_required */
2586*38fd1498Szrj   0, /* properties_provided */
2587*38fd1498Szrj   0, /* properties_destroyed */
2588*38fd1498Szrj   0, /* todo_flags_start */
2589*38fd1498Szrj   0, /* todo_flags_finish */
2590*38fd1498Szrj };
2591*38fd1498Szrj 
2592*38fd1498Szrj class pass_lim : public gimple_opt_pass
2593*38fd1498Szrj {
2594*38fd1498Szrj public:
pass_lim(gcc::context * ctxt)2595*38fd1498Szrj   pass_lim (gcc::context *ctxt)
2596*38fd1498Szrj     : gimple_opt_pass (pass_data_lim, ctxt)
2597*38fd1498Szrj   {}
2598*38fd1498Szrj 
2599*38fd1498Szrj   /* opt_pass methods: */
clone()2600*38fd1498Szrj   opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)2601*38fd1498Szrj   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2602*38fd1498Szrj   virtual unsigned int execute (function *);
2603*38fd1498Szrj 
2604*38fd1498Szrj }; // class pass_lim
2605*38fd1498Szrj 
2606*38fd1498Szrj unsigned int
execute(function * fun)2607*38fd1498Szrj pass_lim::execute (function *fun)
2608*38fd1498Szrj {
2609*38fd1498Szrj   bool in_loop_pipeline = scev_initialized_p ();
2610*38fd1498Szrj   if (!in_loop_pipeline)
2611*38fd1498Szrj     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2612*38fd1498Szrj 
2613*38fd1498Szrj   if (number_of_loops (fun) <= 1)
2614*38fd1498Szrj     return 0;
2615*38fd1498Szrj   unsigned int todo = tree_ssa_lim ();
2616*38fd1498Szrj 
2617*38fd1498Szrj   if (!in_loop_pipeline)
2618*38fd1498Szrj     loop_optimizer_finalize ();
2619*38fd1498Szrj   else
2620*38fd1498Szrj     scev_reset ();
2621*38fd1498Szrj   return todo;
2622*38fd1498Szrj }
2623*38fd1498Szrj 
2624*38fd1498Szrj } // anon namespace
2625*38fd1498Szrj 
2626*38fd1498Szrj gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)2627*38fd1498Szrj make_pass_lim (gcc::context *ctxt)
2628*38fd1498Szrj {
2629*38fd1498Szrj   return new pass_lim (ctxt);
2630*38fd1498Szrj }
2631*38fd1498Szrj 
2632*38fd1498Szrj 
2633