xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-loop-im.cc (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /* Loop invariant motion.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "tree-ssa.h"
50 #include "dbgcnt.h"
51 
52 /* TODO:  Support for predicated code motion.  I.e.
53 
54    while (1)
55      {
56        if (cond)
57 	 {
58 	   a = inv;
59 	   something;
60 	 }
61      }
62 
63    Where COND and INV are invariants, but evaluating INV may trap or be
64    invalid from some other reason if !COND.  This may be transformed to
65 
66    if (cond)
67      a = inv;
68    while (1)
69      {
70        if (cond)
71 	 something;
72      }  */
73 
74 /* The auxiliary data kept for each statement.  */
75 
76 struct lim_aux_data
77 {
78   class loop *max_loop;	/* The outermost loop in that the statement
79 				   is invariant.  */
80 
81   class loop *tgt_loop;	/* The loop out of that we want to move the
82 				   invariant.  */
83 
84   class loop *always_executed_in;
85 				/* The outermost loop for that we are sure
86 				   the statement is executed if the loop
87 				   is entered.  */
88 
89   unsigned cost;		/* Cost of the computation performed by the
90 				   statement.  */
91 
92   unsigned ref;			/* The simple_mem_ref in this stmt or 0.  */
93 
94   vec<gimple *> depends;	/* Vector of statements that must be also
95 				   hoisted out of the loop when this statement
96 				   is hoisted; i.e. those that define the
97 				   operands of the statement and are inside of
98 				   the MAX_LOOP loop.  */
99 };
100 
101 /* Maps statements to their lim_aux_data.  */
102 
103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104 
105 /* Description of a memory reference location.  */
106 
107 struct mem_ref_loc
108 {
109   tree *ref;			/* The reference itself.  */
110   gimple *stmt;			/* The statement in that it occurs.  */
111 };
112 
113 
114 /* Description of a memory reference.  */
115 
116 class im_mem_ref
117 {
118 public:
119   unsigned id : 30;		/* ID assigned to the memory reference
120 				   (its index in memory_accesses.refs_list)  */
121   unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
122   unsigned ref_decomposed : 1;  /* Whether the ref was hashed from mem.  */
123   hashval_t hash;		/* Its hash value.  */
124 
125   /* The memory access itself and associated caching of alias-oracle
126      query meta-data.  We are using mem.ref == error_mark_node for the
127      case the reference is represented by its single access stmt
128      in accesses_in_loop[0].  */
129   ao_ref mem;
130 
131   bitmap stored;		/* The set of loops in that this memory location
132 				   is stored to.  */
133   bitmap loaded;		/* The set of loops in that this memory location
134 				   is loaded from.  */
135   vec<mem_ref_loc>		accesses_in_loop;
136 				/* The locations of the accesses.  */
137 
138   /* The following set is computed on demand.  */
139   bitmap_head dep_loop;		/* The set of loops in that the memory
140 				   reference is {in,}dependent in
141 				   different modes.  */
142 };
143 
144 /* We use six bits per loop in the ref->dep_loop bitmap to record
145    the dep_kind x dep_state combinations.  */
146 
147 enum dep_kind { lim_raw, sm_war, sm_waw };
148 enum dep_state { dep_unknown, dep_independent, dep_dependent };
149 
150 /* coldest outermost loop for given loop.  */
151 vec<class loop *> coldest_outermost_loop;
152 /* hotter outer loop nearest to given loop.  */
153 vec<class loop *> hotter_than_inner_loop;
154 
155 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE.  */
156 
157 static void
158 record_loop_dependence (class loop *loop, im_mem_ref *ref,
159 			dep_kind kind, dep_state state)
160 {
161   gcc_assert (state != dep_unknown);
162   unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
163   bitmap_set_bit (&ref->dep_loop, bit);
164 }
165 
166 /* Query the loop dependence cache of REF for LOOP, KIND.  */
167 
168 static dep_state
169 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
170 {
171   unsigned first_bit = 6 * loop->num + kind * 2;
172   if (bitmap_bit_p (&ref->dep_loop, first_bit))
173     return dep_independent;
174   else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
175     return dep_dependent;
176   return dep_unknown;
177 }
178 
179 /* Mem_ref hashtable helpers.  */
180 
181 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
182 {
183   typedef ao_ref *compare_type;
184   static inline hashval_t hash (const im_mem_ref *);
185   static inline bool equal (const im_mem_ref *, const ao_ref *);
186 };
187 
188 /* A hash function for class im_mem_ref object OBJ.  */
189 
190 inline hashval_t
191 mem_ref_hasher::hash (const im_mem_ref *mem)
192 {
193   return mem->hash;
194 }
195 
196 /* An equality function for class im_mem_ref object MEM1 with
197    memory reference OBJ2.  */
198 
199 inline bool
200 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
201 {
202   if (obj2->max_size_known_p ())
203     return (mem1->ref_decomposed
204 	    && ((TREE_CODE (mem1->mem.base) == MEM_REF
205 		 && TREE_CODE (obj2->base) == MEM_REF
206 		 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
207 				     TREE_OPERAND (obj2->base, 0), 0)
208 		 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
209 			      mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
210 		|| (operand_equal_p (mem1->mem.base, obj2->base, 0)
211 		    && known_eq (mem1->mem.offset, obj2->offset)))
212 	    && known_eq (mem1->mem.size, obj2->size)
213 	    && known_eq (mem1->mem.max_size, obj2->max_size)
214 	    && mem1->mem.volatile_p == obj2->volatile_p
215 	    && (mem1->mem.ref_alias_set == obj2->ref_alias_set
216 		/* We are not canonicalizing alias-sets but for the
217 		   special-case we didn't canonicalize yet and the
218 		   incoming ref is a alias-set zero MEM we pick
219 		   the correct one already.  */
220 		|| (!mem1->ref_canonical
221 		    && (TREE_CODE (obj2->ref) == MEM_REF
222 			|| TREE_CODE (obj2->ref) == TARGET_MEM_REF)
223 		    && obj2->ref_alias_set == 0)
224 		/* Likewise if there's a canonical ref with alias-set zero.  */
225 		|| (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
226 	    && types_compatible_p (TREE_TYPE (mem1->mem.ref),
227 				   TREE_TYPE (obj2->ref)));
228   else
229     return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
230 }
231 
232 
233 /* Description of memory accesses in loops.  */
234 
235 static struct
236 {
237   /* The hash table of memory references accessed in loops.  */
238   hash_table<mem_ref_hasher> *refs;
239 
240   /* The list of memory references.  */
241   vec<im_mem_ref *> refs_list;
242 
243   /* The set of memory references accessed in each loop.  */
244   vec<bitmap_head> refs_loaded_in_loop;
245 
246   /* The set of memory references stored in each loop.  */
247   vec<bitmap_head> refs_stored_in_loop;
248 
249   /* The set of memory references stored in each loop, including subloops .  */
250   vec<bitmap_head> all_refs_stored_in_loop;
251 
252   /* Cache for expanding memory addresses.  */
253   hash_map<tree, name_expansion *> *ttae_cache;
254 } memory_accesses;
255 
256 /* Obstack for the bitmaps in the above data structures.  */
257 static bitmap_obstack lim_bitmap_obstack;
258 static obstack mem_ref_obstack;
259 
260 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
261 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
262 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
263 
264 /* Minimum cost of an expensive expression.  */
265 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
266 
267 /* The outermost loop for which execution of the header guarantees that the
268    block will be executed.  */
269 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
270 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
271 
272 /* ID of the shared unanalyzable mem.  */
273 #define UNANALYZABLE_MEM_ID 0
274 
275 /* Whether the reference was analyzable.  */
276 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
277 
278 static struct lim_aux_data *
279 init_lim_data (gimple *stmt)
280 {
281   lim_aux_data *p = XCNEW (struct lim_aux_data);
282   lim_aux_data_map->put (stmt, p);
283 
284   return p;
285 }
286 
287 static struct lim_aux_data *
288 get_lim_data (gimple *stmt)
289 {
290   lim_aux_data **p = lim_aux_data_map->get (stmt);
291   if (!p)
292     return NULL;
293 
294   return *p;
295 }
296 
297 /* Releases the memory occupied by DATA.  */
298 
299 static void
300 free_lim_aux_data (struct lim_aux_data *data)
301 {
302   data->depends.release ();
303   free (data);
304 }
305 
306 static void
307 clear_lim_data (gimple *stmt)
308 {
309   lim_aux_data **p = lim_aux_data_map->get (stmt);
310   if (!p)
311     return;
312 
313   free_lim_aux_data (*p);
314   *p = NULL;
315 }
316 
317 
318 /* The possibilities of statement movement.  */
319 enum move_pos
320   {
321     MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
322     MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
323 				   become executed -- memory accesses, ... */
324     MOVE_POSSIBLE		/* Unlimited movement.  */
325   };
326 
327 
328 /* If it is possible to hoist the statement STMT unconditionally,
329    returns MOVE_POSSIBLE.
330    If it is possible to hoist the statement STMT, but we must avoid making
331    it executed if it would not be executed in the original program (e.g.
332    because it may trap), return MOVE_PRESERVE_EXECUTION.
333    Otherwise return MOVE_IMPOSSIBLE.  */
334 
335 static enum move_pos
336 movement_possibility_1 (gimple *stmt)
337 {
338   tree lhs;
339   enum move_pos ret = MOVE_POSSIBLE;
340 
341   if (flag_unswitch_loops
342       && gimple_code (stmt) == GIMPLE_COND)
343     {
344       /* If we perform unswitching, force the operands of the invariant
345 	 condition to be moved out of the loop.  */
346       return MOVE_POSSIBLE;
347     }
348 
349   if (gimple_code (stmt) == GIMPLE_PHI
350       && gimple_phi_num_args (stmt) <= 2
351       && !virtual_operand_p (gimple_phi_result (stmt))
352       && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
353     return MOVE_POSSIBLE;
354 
355   if (gimple_get_lhs (stmt) == NULL_TREE)
356     return MOVE_IMPOSSIBLE;
357 
358   if (gimple_vdef (stmt))
359     return MOVE_IMPOSSIBLE;
360 
361   if (stmt_ends_bb_p (stmt)
362       || gimple_has_volatile_ops (stmt)
363       || gimple_has_side_effects (stmt)
364       || stmt_could_throw_p (cfun, stmt))
365     return MOVE_IMPOSSIBLE;
366 
367   if (is_gimple_call (stmt))
368     {
369       /* While pure or const call is guaranteed to have no side effects, we
370 	 cannot move it arbitrarily.  Consider code like
371 
372 	 char *s = something ();
373 
374 	 while (1)
375 	   {
376 	     if (s)
377 	       t = strlen (s);
378 	     else
379 	       t = 0;
380 	   }
381 
382 	 Here the strlen call cannot be moved out of the loop, even though
383 	 s is invariant.  In addition to possibly creating a call with
384 	 invalid arguments, moving out a function call that is not executed
385 	 may cause performance regressions in case the call is costly and
386 	 not executed at all.  */
387       ret = MOVE_PRESERVE_EXECUTION;
388       lhs = gimple_call_lhs (stmt);
389     }
390   else if (is_gimple_assign (stmt))
391     lhs = gimple_assign_lhs (stmt);
392   else
393     return MOVE_IMPOSSIBLE;
394 
395   if (TREE_CODE (lhs) == SSA_NAME
396       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
397     return MOVE_IMPOSSIBLE;
398 
399   if (TREE_CODE (lhs) != SSA_NAME
400       || gimple_could_trap_p (stmt))
401     return MOVE_PRESERVE_EXECUTION;
402 
403   /* Non local loads in a transaction cannot be hoisted out.  Well,
404      unless the load happens on every path out of the loop, but we
405      don't take this into account yet.  */
406   if (flag_tm
407       && gimple_in_transaction (stmt)
408       && gimple_assign_single_p (stmt))
409     {
410       tree rhs = gimple_assign_rhs1 (stmt);
411       if (DECL_P (rhs) && is_global_var (rhs))
412 	{
413 	  if (dump_file)
414 	    {
415 	      fprintf (dump_file, "Cannot hoist conditional load of ");
416 	      print_generic_expr (dump_file, rhs, TDF_SLIM);
417 	      fprintf (dump_file, " because it is in a transaction.\n");
418 	    }
419 	  return MOVE_IMPOSSIBLE;
420 	}
421     }
422 
423   return ret;
424 }
425 
426 static enum move_pos
427 movement_possibility (gimple *stmt)
428 {
429   enum move_pos pos = movement_possibility_1 (stmt);
430   if (pos == MOVE_POSSIBLE)
431     {
432       use_operand_p use_p;
433       ssa_op_iter ssa_iter;
434       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
435 	if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
436 	    && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
437 	  return MOVE_PRESERVE_EXECUTION;
438     }
439   return pos;
440 }
441 
442 
443 /* Compare the profile count inequality of bb and loop's preheader, it is
444    three-state as stated in profile-count.h, FALSE is returned if inequality
445    cannot be decided.  */
446 bool
447 bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
448 {
449   gcc_assert (bb && loop);
450   return bb->count < loop_preheader_edge (loop)->src->count;
451 }
452 
453 /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
454    count.
455   It does three steps check:
456   1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
457   return NULL which means it should not be moved out at all;
458   2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
459   OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
460   3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
461   hotter loop between OUTERMOST_LOOP and loop in pre-computed
462   HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
463   OUTERMOST_LOOP.
464   At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
465   the hoist target.  */
466 
467 static class loop *
468 get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
469 		      basic_block curr_bb)
470 {
471   gcc_assert (outermost_loop == loop
472 	      || flow_loop_nested_p (outermost_loop, loop));
473 
474   /* If bb_colder_than_loop_preheader returns false due to three-state
475     comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
476     Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP.  */
477   if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop))
478     return NULL;
479 
480   class loop *coldest_loop = coldest_outermost_loop[loop->num];
481   if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
482     {
483       class loop *hotter_loop = hotter_than_inner_loop[loop->num];
484       if (!hotter_loop
485 	  || loop_depth (hotter_loop) < loop_depth (outermost_loop))
486 	return outermost_loop;
487 
488       /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
489 	[loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
490 	hotter_loop, second_coldest_loop, ..., loop]
491 	return second_coldest_loop to be the hoist target.  */
492       class loop *aloop;
493       for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
494 	if (aloop == loop || flow_loop_nested_p (aloop, loop))
495 	  return aloop;
496     }
497   return coldest_loop;
498 }
499 
500 /* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
501    loop to that we could move the expression using DEF if it did not have
502    other operands, i.e. the outermost loop enclosing LOOP in that the value
503    of DEF is invariant.  */
504 
505 static class loop *
506 outermost_invariant_loop (tree def, class loop *loop)
507 {
508   gimple *def_stmt;
509   basic_block def_bb;
510   class loop *max_loop;
511   struct lim_aux_data *lim_data;
512 
513   if (!def)
514     return superloop_at_depth (loop, 1);
515 
516   if (TREE_CODE (def) != SSA_NAME)
517     {
518       gcc_assert (is_gimple_min_invariant (def));
519       return superloop_at_depth (loop, 1);
520     }
521 
522   def_stmt = SSA_NAME_DEF_STMT (def);
523   def_bb = gimple_bb (def_stmt);
524   if (!def_bb)
525     return superloop_at_depth (loop, 1);
526 
527   max_loop = find_common_loop (loop, def_bb->loop_father);
528 
529   lim_data = get_lim_data (def_stmt);
530   if (lim_data != NULL && lim_data->max_loop != NULL)
531     max_loop = find_common_loop (max_loop,
532 				 loop_outer (lim_data->max_loop));
533   if (max_loop == loop)
534     return NULL;
535   max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
536 
537   return max_loop;
538 }
539 
540 /* DATA is a structure containing information associated with a statement
541    inside LOOP.  DEF is one of the operands of this statement.
542 
543    Find the outermost loop enclosing LOOP in that value of DEF is invariant
544    and record this in DATA->max_loop field.  If DEF itself is defined inside
545    this loop as well (i.e. we need to hoist it out of the loop if we want
546    to hoist the statement represented by DATA), record the statement in that
547    DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
548    add the cost of the computation of DEF to the DATA->cost.
549 
550    If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
551 
552 static bool
553 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
554 		bool add_cost)
555 {
556   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
557   basic_block def_bb = gimple_bb (def_stmt);
558   class loop *max_loop;
559   struct lim_aux_data *def_data;
560 
561   if (!def_bb)
562     return true;
563 
564   max_loop = outermost_invariant_loop (def, loop);
565   if (!max_loop)
566     return false;
567 
568   if (flow_loop_nested_p (data->max_loop, max_loop))
569     data->max_loop = max_loop;
570 
571   def_data = get_lim_data (def_stmt);
572   if (!def_data)
573     return true;
574 
575   if (add_cost
576       /* Only add the cost if the statement defining DEF is inside LOOP,
577 	 i.e. if it is likely that by moving the invariants dependent
578 	 on it, we will be able to avoid creating a new register for
579 	 it (since it will be only used in these dependent invariants).  */
580       && def_bb->loop_father == loop)
581     data->cost += def_data->cost;
582 
583   data->depends.safe_push (def_stmt);
584 
585   return true;
586 }
587 
588 /* Returns an estimate for a cost of statement STMT.  The values here
589    are just ad-hoc constants, similar to costs for inlining.  */
590 
591 static unsigned
592 stmt_cost (gimple *stmt)
593 {
594   /* Always try to create possibilities for unswitching.  */
595   if (gimple_code (stmt) == GIMPLE_COND
596       || gimple_code (stmt) == GIMPLE_PHI)
597     return LIM_EXPENSIVE;
598 
599   /* We should be hoisting calls if possible.  */
600   if (is_gimple_call (stmt))
601     {
602       tree fndecl;
603 
604       /* Unless the call is a builtin_constant_p; this always folds to a
605 	 constant, so moving it is useless.  */
606       fndecl = gimple_call_fndecl (stmt);
607       if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
608 	return 0;
609 
610       return LIM_EXPENSIVE;
611     }
612 
613   /* Hoisting memory references out should almost surely be a win.  */
614   if (gimple_references_memory_p (stmt))
615     return LIM_EXPENSIVE;
616 
617   if (gimple_code (stmt) != GIMPLE_ASSIGN)
618     return 1;
619 
620   switch (gimple_assign_rhs_code (stmt))
621     {
622     case MULT_EXPR:
623     case WIDEN_MULT_EXPR:
624     case WIDEN_MULT_PLUS_EXPR:
625     case WIDEN_MULT_MINUS_EXPR:
626     case DOT_PROD_EXPR:
627     case TRUNC_DIV_EXPR:
628     case CEIL_DIV_EXPR:
629     case FLOOR_DIV_EXPR:
630     case ROUND_DIV_EXPR:
631     case EXACT_DIV_EXPR:
632     case CEIL_MOD_EXPR:
633     case FLOOR_MOD_EXPR:
634     case ROUND_MOD_EXPR:
635     case TRUNC_MOD_EXPR:
636     case RDIV_EXPR:
637       /* Division and multiplication are usually expensive.  */
638       return LIM_EXPENSIVE;
639 
640     case LSHIFT_EXPR:
641     case RSHIFT_EXPR:
642     case WIDEN_LSHIFT_EXPR:
643     case LROTATE_EXPR:
644     case RROTATE_EXPR:
645       /* Shifts and rotates are usually expensive.  */
646       return LIM_EXPENSIVE;
647 
648     case CONSTRUCTOR:
649       /* Make vector construction cost proportional to the number
650          of elements.  */
651       return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
652 
653     case SSA_NAME:
654     case PAREN_EXPR:
655       /* Whether or not something is wrapped inside a PAREN_EXPR
656          should not change move cost.  Nor should an intermediate
657 	 unpropagated SSA name copy.  */
658       return 0;
659 
660     default:
661       return 1;
662     }
663 }
664 
665 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
666    REF is independent.  If REF is not independent in LOOP, NULL is returned
667    instead.  */
668 
669 static class loop *
670 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
671 {
672   class loop *aloop;
673 
674   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
675     return NULL;
676 
677   for (aloop = outer;
678        aloop != loop;
679        aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
680     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
681 	&& ref_indep_loop_p (aloop, ref, lim_raw))
682       return aloop;
683 
684   if (ref_indep_loop_p (loop, ref, lim_raw))
685     return loop;
686   else
687     return NULL;
688 }
689 
690 /* If there is a simple load or store to a memory reference in STMT, returns
691    the location of the memory reference, and sets IS_STORE according to whether
692    it is a store or load.  Otherwise, returns NULL.  */
693 
694 static tree *
695 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
696 {
697   tree *lhs, *rhs;
698 
699   /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
700   if (!gimple_assign_single_p (stmt))
701     return NULL;
702 
703   lhs = gimple_assign_lhs_ptr (stmt);
704   rhs = gimple_assign_rhs1_ptr (stmt);
705 
706   if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
707     {
708       *is_store = false;
709       return rhs;
710     }
711   else if (gimple_vdef (stmt)
712 	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
713     {
714       *is_store = true;
715       return lhs;
716     }
717   else
718     return NULL;
719 }
720 
721 /* From a controlling predicate in DOM determine the arguments from
722    the PHI node PHI that are chosen if the predicate evaluates to
723    true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
724    they are non-NULL.  Returns true if the arguments can be determined,
725    else return false.  */
726 
727 static bool
728 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
729 				  tree *true_arg_p, tree *false_arg_p)
730 {
731   edge te, fe;
732   if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
733 					     &te, &fe))
734     return false;
735 
736   if (true_arg_p)
737     *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
738   if (false_arg_p)
739     *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
740 
741   return true;
742 }
743 
744 /* Determine the outermost loop to that it is possible to hoist a statement
745    STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
746    the outermost loop in that the value computed by STMT is invariant.
747    If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
748    we preserve the fact whether STMT is executed.  It also fills other related
749    information to LIM_DATA (STMT).
750 
751    The function returns false if STMT cannot be hoisted outside of the loop it
752    is defined in, and true otherwise.  */
753 
754 static bool
755 determine_max_movement (gimple *stmt, bool must_preserve_exec)
756 {
757   basic_block bb = gimple_bb (stmt);
758   class loop *loop = bb->loop_father;
759   class loop *level;
760   struct lim_aux_data *lim_data = get_lim_data (stmt);
761   tree val;
762   ssa_op_iter iter;
763 
764   if (must_preserve_exec)
765     level = ALWAYS_EXECUTED_IN (bb);
766   else
767     level = superloop_at_depth (loop, 1);
768   lim_data->max_loop = get_coldest_out_loop (level, loop, bb);
769   if (!lim_data->max_loop)
770     return false;
771 
772   if (gphi *phi = dyn_cast <gphi *> (stmt))
773     {
774       use_operand_p use_p;
775       unsigned min_cost = UINT_MAX;
776       unsigned total_cost = 0;
777       struct lim_aux_data *def_data;
778 
779       /* We will end up promoting dependencies to be unconditionally
780 	 evaluated.  For this reason the PHI cost (and thus the
781 	 cost we remove from the loop by doing the invariant motion)
782 	 is that of the cheapest PHI argument dependency chain.  */
783       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
784 	{
785 	  val = USE_FROM_PTR (use_p);
786 
787 	  if (TREE_CODE (val) != SSA_NAME)
788 	    {
789 	      /* Assign const 1 to constants.  */
790 	      min_cost = MIN (min_cost, 1);
791 	      total_cost += 1;
792 	      continue;
793 	    }
794 	  if (!add_dependency (val, lim_data, loop, false))
795 	    return false;
796 
797 	  gimple *def_stmt = SSA_NAME_DEF_STMT (val);
798 	  if (gimple_bb (def_stmt)
799 	      && gimple_bb (def_stmt)->loop_father == loop)
800 	    {
801 	      def_data = get_lim_data (def_stmt);
802 	      if (def_data)
803 		{
804 		  min_cost = MIN (min_cost, def_data->cost);
805 		  total_cost += def_data->cost;
806 		}
807 	    }
808 	}
809 
810       min_cost = MIN (min_cost, total_cost);
811       lim_data->cost += min_cost;
812 
813       if (gimple_phi_num_args (phi) > 1)
814 	{
815 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
816 	  gimple *cond;
817 	  if (gsi_end_p (gsi_last_bb (dom)))
818 	    return false;
819 	  cond = gsi_stmt (gsi_last_bb (dom));
820 	  if (gimple_code (cond) != GIMPLE_COND)
821 	    return false;
822 	  /* Verify that this is an extended form of a diamond and
823 	     the PHI arguments are completely controlled by the
824 	     predicate in DOM.  */
825 	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
826 	    return false;
827 
828 	  /* Fold in dependencies and cost of the condition.  */
829 	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
830 	    {
831 	      if (!add_dependency (val, lim_data, loop, false))
832 		return false;
833 	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
834 	      if (def_data)
835 		lim_data->cost += def_data->cost;
836 	    }
837 
838 	  /* We want to avoid unconditionally executing very expensive
839 	     operations.  As costs for our dependencies cannot be
840 	     negative just claim we are not invariand for this case.
841 	     We also are not sure whether the control-flow inside the
842 	     loop will vanish.  */
843 	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
844 	      && !(min_cost != 0
845 		   && total_cost / min_cost <= 2))
846 	    return false;
847 
848 	  /* Assume that the control-flow in the loop will vanish.
849 	     ???  We should verify this and not artificially increase
850 	     the cost if that is not the case.  */
851 	  lim_data->cost += stmt_cost (stmt);
852 	}
853 
854       return true;
855     }
856   else
857     FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
858       if (!add_dependency (val, lim_data, loop, true))
859 	return false;
860 
861   if (gimple_vuse (stmt))
862     {
863       im_mem_ref *ref
864 	= lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
865       if (ref
866 	  && MEM_ANALYZABLE (ref))
867 	{
868 	  lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
869 						     loop, ref);
870 	  if (!lim_data->max_loop)
871 	    return false;
872 	}
873       else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
874 	return false;
875     }
876 
877   lim_data->cost += stmt_cost (stmt);
878 
879   return true;
880 }
881 
882 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
883    and that one of the operands of this statement is computed by STMT.
884    Ensure that STMT (together with all the statements that define its
885    operands) is hoisted at least out of the loop LEVEL.  */
886 
887 static void
888 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
889 {
890   class loop *stmt_loop = gimple_bb (stmt)->loop_father;
891   struct lim_aux_data *lim_data;
892   gimple *dep_stmt;
893   unsigned i;
894 
895   stmt_loop = find_common_loop (orig_loop, stmt_loop);
896   lim_data = get_lim_data (stmt);
897   if (lim_data != NULL && lim_data->tgt_loop != NULL)
898     stmt_loop = find_common_loop (stmt_loop,
899 				  loop_outer (lim_data->tgt_loop));
900   if (flow_loop_nested_p (stmt_loop, level))
901     return;
902 
903   gcc_assert (level == lim_data->max_loop
904 	      || flow_loop_nested_p (lim_data->max_loop, level));
905 
906   lim_data->tgt_loop = level;
907   FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
908     set_level (dep_stmt, orig_loop, level);
909 }
910 
911 /* Determines an outermost loop from that we want to hoist the statement STMT.
912    For now we chose the outermost possible loop.  TODO -- use profiling
913    information to set it more sanely.  */
914 
915 static void
916 set_profitable_level (gimple *stmt)
917 {
918   set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
919 }
920 
921 /* Returns true if STMT is a call that has side effects.  */
922 
923 static bool
924 nonpure_call_p (gimple *stmt)
925 {
926   if (gimple_code (stmt) != GIMPLE_CALL)
927     return false;
928 
929   return gimple_has_side_effects (stmt);
930 }
931 
932 /* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
933 
934 static gimple *
935 rewrite_reciprocal (gimple_stmt_iterator *bsi)
936 {
937   gassign *stmt, *stmt1, *stmt2;
938   tree name, lhs, type;
939   tree real_one;
940   gimple_stmt_iterator gsi;
941 
942   stmt = as_a <gassign *> (gsi_stmt (*bsi));
943   lhs = gimple_assign_lhs (stmt);
944   type = TREE_TYPE (lhs);
945 
946   real_one = build_one_cst (type);
947 
948   name = make_temp_ssa_name (type, NULL, "reciptmp");
949   stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
950 			       gimple_assign_rhs2 (stmt));
951   stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
952 			       gimple_assign_rhs1 (stmt));
953 
954   /* Replace division stmt with reciprocal and multiply stmts.
955      The multiply stmt is not invariant, so update iterator
956      and avoid rescanning.  */
957   gsi = *bsi;
958   gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
959   gsi_replace (&gsi, stmt2, true);
960 
961   /* Continue processing with invariant reciprocal statement.  */
962   return stmt1;
963 }
964 
965 /* Check if the pattern at *BSI is a bittest of the form
966    (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
967 
968 static gimple *
969 rewrite_bittest (gimple_stmt_iterator *bsi)
970 {
971   gassign *stmt;
972   gimple *stmt1;
973   gassign *stmt2;
974   gimple *use_stmt;
975   gcond *cond_stmt;
976   tree lhs, name, t, a, b;
977   use_operand_p use;
978 
979   stmt = as_a <gassign *> (gsi_stmt (*bsi));
980   lhs = gimple_assign_lhs (stmt);
981 
982   /* Verify that the single use of lhs is a comparison against zero.  */
983   if (TREE_CODE (lhs) != SSA_NAME
984       || !single_imm_use (lhs, &use, &use_stmt))
985     return stmt;
986   cond_stmt = dyn_cast <gcond *> (use_stmt);
987   if (!cond_stmt)
988     return stmt;
989   if (gimple_cond_lhs (cond_stmt) != lhs
990       || (gimple_cond_code (cond_stmt) != NE_EXPR
991 	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
992       || !integer_zerop (gimple_cond_rhs (cond_stmt)))
993     return stmt;
994 
995   /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
996   stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
997   if (gimple_code (stmt1) != GIMPLE_ASSIGN)
998     return stmt;
999 
1000   /* There is a conversion in between possibly inserted by fold.  */
1001   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1002     {
1003       t = gimple_assign_rhs1 (stmt1);
1004       if (TREE_CODE (t) != SSA_NAME
1005 	  || !has_single_use (t))
1006 	return stmt;
1007       stmt1 = SSA_NAME_DEF_STMT (t);
1008       if (gimple_code (stmt1) != GIMPLE_ASSIGN)
1009 	return stmt;
1010     }
1011 
1012   /* Verify that B is loop invariant but A is not.  Verify that with
1013      all the stmt walking we are still in the same loop.  */
1014   if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
1015       || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
1016     return stmt;
1017 
1018   a = gimple_assign_rhs1 (stmt1);
1019   b = gimple_assign_rhs2 (stmt1);
1020 
1021   if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1022       && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1023     {
1024       gimple_stmt_iterator rsi;
1025 
1026       /* 1 << B */
1027       t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1028 		       build_int_cst (TREE_TYPE (a), 1), b);
1029       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1030       stmt1 = gimple_build_assign (name, t);
1031 
1032       /* A & (1 << B) */
1033       t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1034       name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1035       stmt2 = gimple_build_assign (name, t);
1036 
1037       /* Replace the SSA_NAME we compare against zero.  Adjust
1038 	 the type of zero accordingly.  */
1039       SET_USE (use, name);
1040       gimple_cond_set_rhs (cond_stmt,
1041 			   build_int_cst_type (TREE_TYPE (name),
1042 					       0));
1043 
1044       /* Don't use gsi_replace here, none of the new assignments sets
1045 	 the variable originally set in stmt.  Move bsi to stmt1, and
1046 	 then remove the original stmt, so that we get a chance to
1047 	 retain debug info for it.  */
1048       rsi = *bsi;
1049       gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1050       gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1051       gimple *to_release = gsi_stmt (rsi);
1052       gsi_remove (&rsi, true);
1053       release_defs (to_release);
1054 
1055       return stmt1;
1056     }
1057 
1058   return stmt;
1059 }
1060 
1061 /* Determine the outermost loops in that statements in basic block BB are
1062    invariant, and record them to the LIM_DATA associated with the
1063    statements.  */
1064 
1065 static void
1066 compute_invariantness (basic_block bb)
1067 {
1068   enum move_pos pos;
1069   gimple_stmt_iterator bsi;
1070   gimple *stmt;
1071   bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1072   class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1073   struct lim_aux_data *lim_data;
1074 
1075   if (!loop_outer (bb->loop_father))
1076     return;
1077 
1078   if (dump_file && (dump_flags & TDF_DETAILS))
1079     fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1080 	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1081 
1082   /* Look at PHI nodes, but only if there is at most two.
1083      ???  We could relax this further by post-processing the inserted
1084      code and transforming adjacent cond-exprs with the same predicate
1085      to control flow again.  */
1086   bsi = gsi_start_phis (bb);
1087   if (!gsi_end_p (bsi)
1088       && ((gsi_next (&bsi), gsi_end_p (bsi))
1089 	  || (gsi_next (&bsi), gsi_end_p (bsi))))
1090     for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1091       {
1092 	stmt = gsi_stmt (bsi);
1093 
1094 	pos = movement_possibility (stmt);
1095 	if (pos == MOVE_IMPOSSIBLE)
1096 	  continue;
1097 
1098 	lim_data = get_lim_data (stmt);
1099 	if (! lim_data)
1100 	  lim_data = init_lim_data (stmt);
1101 	lim_data->always_executed_in = outermost;
1102 
1103 	if (!determine_max_movement (stmt, false))
1104 	  {
1105 	    lim_data->max_loop = NULL;
1106 	    continue;
1107 	  }
1108 
1109 	if (dump_file && (dump_flags & TDF_DETAILS))
1110 	  {
1111 	    print_gimple_stmt (dump_file, stmt, 2);
1112 	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1113 		     loop_depth (lim_data->max_loop),
1114 		     lim_data->cost);
1115 	  }
1116 
1117 	if (lim_data->cost >= LIM_EXPENSIVE)
1118 	  set_profitable_level (stmt);
1119       }
1120 
1121   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1122     {
1123       stmt = gsi_stmt (bsi);
1124 
1125       pos = movement_possibility (stmt);
1126       if (pos == MOVE_IMPOSSIBLE)
1127 	{
1128 	  if (nonpure_call_p (stmt))
1129 	    {
1130 	      maybe_never = true;
1131 	      outermost = NULL;
1132 	    }
1133 	  /* Make sure to note always_executed_in for stores to make
1134 	     store-motion work.  */
1135 	  else if (stmt_makes_single_store (stmt))
1136 	    {
1137 	      struct lim_aux_data *lim_data = get_lim_data (stmt);
1138 	      if (! lim_data)
1139 		lim_data = init_lim_data (stmt);
1140 	      lim_data->always_executed_in = outermost;
1141 	    }
1142 	  continue;
1143 	}
1144 
1145       if (is_gimple_assign (stmt)
1146 	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1147 	      == GIMPLE_BINARY_RHS))
1148 	{
1149 	  tree op0 = gimple_assign_rhs1 (stmt);
1150 	  tree op1 = gimple_assign_rhs2 (stmt);
1151 	  class loop *ol1 = outermost_invariant_loop (op1,
1152 					loop_containing_stmt (stmt));
1153 
1154 	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1155 	     to be hoisted out of loop, saving expensive divide.  */
1156 	  if (pos == MOVE_POSSIBLE
1157 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1158 	      && flag_unsafe_math_optimizations
1159 	      && !flag_trapping_math
1160 	      && ol1 != NULL
1161 	      && outermost_invariant_loop (op0, ol1) == NULL)
1162 	    stmt = rewrite_reciprocal (&bsi);
1163 
1164 	  /* If the shift count is invariant, convert (A >> B) & 1 to
1165 	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1166 	     saving an expensive shift.  */
1167 	  if (pos == MOVE_POSSIBLE
1168 	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1169 	      && integer_onep (op1)
1170 	      && TREE_CODE (op0) == SSA_NAME
1171 	      && has_single_use (op0))
1172 	    stmt = rewrite_bittest (&bsi);
1173 	}
1174 
1175       lim_data = get_lim_data (stmt);
1176       if (! lim_data)
1177 	lim_data = init_lim_data (stmt);
1178       lim_data->always_executed_in = outermost;
1179 
1180       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1181 	continue;
1182 
1183       if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1184 	{
1185 	  lim_data->max_loop = NULL;
1186 	  continue;
1187 	}
1188 
1189       if (dump_file && (dump_flags & TDF_DETAILS))
1190 	{
1191 	  print_gimple_stmt (dump_file, stmt, 2);
1192 	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1193 		   loop_depth (lim_data->max_loop),
1194 		   lim_data->cost);
1195 	}
1196 
1197       if (lim_data->cost >= LIM_EXPENSIVE)
1198 	set_profitable_level (stmt);
1199     }
1200 }
1201 
1202 /* Hoist the statements in basic block BB out of the loops prescribed by
1203    data stored in LIM_DATA structures associated with each statement.  Callback
1204    for walk_dominator_tree.  */
1205 
1206 unsigned int
1207 move_computations_worker (basic_block bb)
1208 {
1209   class loop *level;
1210   unsigned cost = 0;
1211   struct lim_aux_data *lim_data;
1212   unsigned int todo = 0;
1213 
1214   if (!loop_outer (bb->loop_father))
1215     return todo;
1216 
1217   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1218     {
1219       gassign *new_stmt;
1220       gphi *stmt = bsi.phi ();
1221 
1222       lim_data = get_lim_data (stmt);
1223       if (lim_data == NULL)
1224 	{
1225 	  gsi_next (&bsi);
1226 	  continue;
1227 	}
1228 
1229       cost = lim_data->cost;
1230       level = lim_data->tgt_loop;
1231       clear_lim_data (stmt);
1232 
1233       if (!level)
1234 	{
1235 	  gsi_next (&bsi);
1236 	  continue;
1237 	}
1238 
1239       if (dump_file && (dump_flags & TDF_DETAILS))
1240 	{
1241 	  fprintf (dump_file, "Moving PHI node\n");
1242 	  print_gimple_stmt (dump_file, stmt, 0);
1243 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1244 		   cost, level->num);
1245 	}
1246 
1247       if (gimple_phi_num_args (stmt) == 1)
1248 	{
1249 	  tree arg = PHI_ARG_DEF (stmt, 0);
1250 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1251 					  TREE_CODE (arg), arg);
1252 	}
1253       else
1254 	{
1255 	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1256 	  gimple *cond = gsi_stmt (gsi_last_bb (dom));
1257 	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1258 	  /* Get the PHI arguments corresponding to the true and false
1259 	     edges of COND.  */
1260 	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1261 	  gcc_assert (arg0 && arg1);
1262 	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1263 		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1264 	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1265 					  COND_EXPR, t, arg0, arg1);
1266 	  todo |= TODO_cleanup_cfg;
1267 	}
1268       if (!ALWAYS_EXECUTED_IN (bb)
1269 	  || (ALWAYS_EXECUTED_IN (bb) != level
1270 	      && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1271 	reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1272       gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1273       remove_phi_node (&bsi, false);
1274     }
1275 
1276   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1277     {
1278       edge e;
1279 
1280       gimple *stmt = gsi_stmt (bsi);
1281 
1282       lim_data = get_lim_data (stmt);
1283       if (lim_data == NULL)
1284 	{
1285 	  gsi_next (&bsi);
1286 	  continue;
1287 	}
1288 
1289       cost = lim_data->cost;
1290       level = lim_data->tgt_loop;
1291       clear_lim_data (stmt);
1292 
1293       if (!level)
1294 	{
1295 	  gsi_next (&bsi);
1296 	  continue;
1297 	}
1298 
1299       /* We do not really want to move conditionals out of the loop; we just
1300 	 placed it here to force its operands to be moved if necessary.  */
1301       if (gimple_code (stmt) == GIMPLE_COND)
1302 	{
1303 	  gsi_next (&bsi);
1304 	  continue;
1305 	}
1306 
1307       if (dump_file && (dump_flags & TDF_DETAILS))
1308 	{
1309 	  fprintf (dump_file, "Moving statement\n");
1310 	  print_gimple_stmt (dump_file, stmt, 0);
1311 	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1312 		   cost, level->num);
1313 	}
1314 
1315       e = loop_preheader_edge (level);
1316       gcc_assert (!gimple_vdef (stmt));
1317       if (gimple_vuse (stmt))
1318 	{
1319 	  /* The new VUSE is the one from the virtual PHI in the loop
1320 	     header or the one already present.  */
1321 	  gphi_iterator gsi2;
1322 	  for (gsi2 = gsi_start_phis (e->dest);
1323 	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1324 	    {
1325 	      gphi *phi = gsi2.phi ();
1326 	      if (virtual_operand_p (gimple_phi_result (phi)))
1327 		{
1328 		  SET_USE (gimple_vuse_op (stmt),
1329 			   PHI_ARG_DEF_FROM_EDGE (phi, e));
1330 		  break;
1331 		}
1332 	    }
1333 	}
1334       gsi_remove (&bsi, false);
1335       if (gimple_has_lhs (stmt)
1336 	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1337 	  && (!ALWAYS_EXECUTED_IN (bb)
1338 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1339 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1340 	reset_flow_sensitive_info (gimple_get_lhs (stmt));
1341       /* In case this is a stmt that is not unconditionally executed
1342          when the target loop header is executed and the stmt may
1343 	 invoke undefined integer or pointer overflow rewrite it to
1344 	 unsigned arithmetic.  */
1345       if (is_gimple_assign (stmt)
1346 	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1347 	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1348 	  && arith_code_with_undefined_signed_overflow
1349 	       (gimple_assign_rhs_code (stmt))
1350 	  && (!ALWAYS_EXECUTED_IN (bb)
1351 	      || !(ALWAYS_EXECUTED_IN (bb) == level
1352 		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1353 	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1354       else
1355 	gsi_insert_on_edge (e, stmt);
1356     }
1357 
1358   return todo;
1359 }
1360 
1361 /* Checks whether the statement defining variable *INDEX can be hoisted
1362    out of the loop passed in DATA.  Callback for for_each_index.  */
1363 
1364 static bool
1365 may_move_till (tree ref, tree *index, void *data)
1366 {
1367   class loop *loop = (class loop *) data, *max_loop;
1368 
1369   /* If REF is an array reference, check also that the step and the lower
1370      bound is invariant in LOOP.  */
1371   if (TREE_CODE (ref) == ARRAY_REF)
1372     {
1373       tree step = TREE_OPERAND (ref, 3);
1374       tree lbound = TREE_OPERAND (ref, 2);
1375 
1376       max_loop = outermost_invariant_loop (step, loop);
1377       if (!max_loop)
1378 	return false;
1379 
1380       max_loop = outermost_invariant_loop (lbound, loop);
1381       if (!max_loop)
1382 	return false;
1383     }
1384 
1385   max_loop = outermost_invariant_loop (*index, loop);
1386   if (!max_loop)
1387     return false;
1388 
1389   return true;
1390 }
1391 
1392 /* If OP is SSA NAME, force the statement that defines it to be
1393    moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1394 
1395 static void
1396 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1397 {
1398   gimple *stmt;
1399 
1400   if (!op
1401       || is_gimple_min_invariant (op))
1402     return;
1403 
1404   gcc_assert (TREE_CODE (op) == SSA_NAME);
1405 
1406   stmt = SSA_NAME_DEF_STMT (op);
1407   if (gimple_nop_p (stmt))
1408     return;
1409 
1410   set_level (stmt, orig_loop, loop);
1411 }
1412 
1413 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1414    the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1415    for_each_index.  */
1416 
1417 struct fmt_data
1418 {
1419   class loop *loop;
1420   class loop *orig_loop;
1421 };
1422 
1423 static bool
1424 force_move_till (tree ref, tree *index, void *data)
1425 {
1426   struct fmt_data *fmt_data = (struct fmt_data *) data;
1427 
1428   if (TREE_CODE (ref) == ARRAY_REF)
1429     {
1430       tree step = TREE_OPERAND (ref, 3);
1431       tree lbound = TREE_OPERAND (ref, 2);
1432 
1433       force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1434       force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1435     }
1436 
1437   force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1438 
1439   return true;
1440 }
1441 
1442 /* A function to free the mem_ref object OBJ.  */
1443 
1444 static void
1445 memref_free (class im_mem_ref *mem)
1446 {
1447   mem->accesses_in_loop.release ();
1448 }
1449 
1450 /* Allocates and returns a memory reference description for MEM whose hash
1451    value is HASH and id is ID.  */
1452 
1453 static im_mem_ref *
1454 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1455 {
1456   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1457   if (mem)
1458     ref->mem = *mem;
1459   else
1460     ao_ref_init (&ref->mem, error_mark_node);
1461   ref->id = id;
1462   ref->ref_canonical = false;
1463   ref->ref_decomposed = false;
1464   ref->hash = hash;
1465   ref->stored = NULL;
1466   ref->loaded = NULL;
1467   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1468   ref->accesses_in_loop.create (1);
1469 
1470   return ref;
1471 }
1472 
1473 /* Records memory reference location *LOC in LOOP to the memory reference
1474    description REF.  The reference occurs in statement STMT.  */
1475 
1476 static void
1477 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1478 {
1479   mem_ref_loc aref;
1480   aref.stmt = stmt;
1481   aref.ref = loc;
1482   ref->accesses_in_loop.safe_push (aref);
1483 }
1484 
1485 /* Set the LOOP bit in REF stored bitmap and allocate that if
1486    necessary.  Return whether a bit was changed.  */
1487 
1488 static bool
1489 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1490 {
1491   if (!ref->stored)
1492     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1493   return bitmap_set_bit (ref->stored, loop->num);
1494 }
1495 
1496 /* Marks reference REF as stored in LOOP.  */
1497 
1498 static void
1499 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1500 {
1501   while (loop != current_loops->tree_root
1502 	 && set_ref_stored_in_loop (ref, loop))
1503     loop = loop_outer (loop);
1504 }
1505 
1506 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1507    necessary.  Return whether a bit was changed.  */
1508 
1509 static bool
1510 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1511 {
1512   if (!ref->loaded)
1513     ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1514   return bitmap_set_bit (ref->loaded, loop->num);
1515 }
1516 
1517 /* Marks reference REF as loaded in LOOP.  */
1518 
1519 static void
1520 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1521 {
1522   while (loop != current_loops->tree_root
1523 	 && set_ref_loaded_in_loop (ref, loop))
1524     loop = loop_outer (loop);
1525 }
1526 
1527 /* Gathers memory references in statement STMT in LOOP, storing the
1528    information about them in the memory_accesses structure.  Marks
1529    the vops accessed through unrecognized statements there as
1530    well.  */
1531 
1532 static void
1533 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1534 {
1535   tree *mem = NULL;
1536   hashval_t hash;
1537   im_mem_ref **slot;
1538   im_mem_ref *ref;
1539   bool is_stored;
1540   unsigned id;
1541 
1542   if (!gimple_vuse (stmt))
1543     return;
1544 
1545   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1546   if (!mem && is_gimple_assign (stmt))
1547     {
1548       /* For aggregate copies record distinct references but use them
1549 	 only for disambiguation purposes.  */
1550       id = memory_accesses.refs_list.length ();
1551       ref = mem_ref_alloc (NULL, 0, id);
1552       memory_accesses.refs_list.safe_push (ref);
1553       if (dump_file && (dump_flags & TDF_DETAILS))
1554 	{
1555 	  fprintf (dump_file, "Unhandled memory reference %u: ", id);
1556 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1557 	}
1558       record_mem_ref_loc (ref, stmt, mem);
1559       is_stored = gimple_vdef (stmt);
1560     }
1561   else if (!mem)
1562     {
1563       /* We use the shared mem_ref for all unanalyzable refs.  */
1564       id = UNANALYZABLE_MEM_ID;
1565       ref = memory_accesses.refs_list[id];
1566       if (dump_file && (dump_flags & TDF_DETAILS))
1567 	{
1568 	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1569 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1570 	}
1571       is_stored = gimple_vdef (stmt);
1572     }
1573   else
1574     {
1575       /* We are looking for equal refs that might differ in structure
1576          such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
1577 	 make sure we can canonicalize the ref in the hashtable if
1578 	 non-operand_equal_p refs are found.  For the lookup we mark
1579 	 the case we want strict equality with aor.max_size == -1.  */
1580       ao_ref aor;
1581       ao_ref_init (&aor, *mem);
1582       ao_ref_base (&aor);
1583       ao_ref_alias_set (&aor);
1584       HOST_WIDE_INT offset, size, max_size;
1585       poly_int64 saved_maxsize = aor.max_size, mem_off;
1586       tree mem_base;
1587       bool ref_decomposed;
1588       if (aor.max_size_known_p ()
1589 	  && aor.offset.is_constant (&offset)
1590 	  && aor.size.is_constant (&size)
1591 	  && aor.max_size.is_constant (&max_size)
1592 	  && size == max_size
1593 	  && (size % BITS_PER_UNIT) == 0
1594 	  /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1595 	     size.  Make sure this is consistent with the extraction.  */
1596 	  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1597 	  && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1598 		       aor.size)
1599 	  && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1600 	{
1601 	  ref_decomposed = true;
1602 	  tree base = ao_ref_base (&aor);
1603 	  poly_int64 moffset;
1604 	  HOST_WIDE_INT mcoffset;
1605 	  if (TREE_CODE (base) == MEM_REF
1606 	      && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1607 	      && moffset.is_constant (&mcoffset))
1608 	    {
1609 	      hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1610 	      hash = iterative_hash_host_wide_int (mcoffset, hash);
1611 	    }
1612 	  else
1613 	    {
1614 	      hash = iterative_hash_expr (base, 0);
1615 	      hash = iterative_hash_host_wide_int (offset, hash);
1616 	    }
1617 	  hash = iterative_hash_host_wide_int (size, hash);
1618 	}
1619       else
1620 	{
1621 	  ref_decomposed = false;
1622 	  hash = iterative_hash_expr (aor.ref, 0);
1623 	  aor.max_size = -1;
1624 	}
1625       slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1626       aor.max_size = saved_maxsize;
1627       if (*slot)
1628 	{
1629 	  if (!(*slot)->ref_canonical
1630 	      && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1631 	    {
1632 	      /* If we didn't yet canonicalize the hashtable ref (which
1633 	         we'll end up using for code insertion) and hit a second
1634 		 equal ref that is not structurally equivalent create
1635 		 a canonical ref which is a bare MEM_REF.  */
1636 	      if (TREE_CODE (*mem) == MEM_REF
1637 		  || TREE_CODE (*mem) == TARGET_MEM_REF)
1638 		{
1639 		  (*slot)->mem.ref = *mem;
1640 		  (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1641 		}
1642 	      else
1643 		{
1644 		  tree ref_alias_type = reference_alias_ptr_type (*mem);
1645 		  unsigned int ref_align = get_object_alignment (*mem);
1646 		  tree ref_type = TREE_TYPE (*mem);
1647 		  tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1648 				     unshare_expr (mem_base));
1649 		  if (TYPE_ALIGN (ref_type) != ref_align)
1650 		    ref_type = build_aligned_type (ref_type, ref_align);
1651 		  (*slot)->mem.ref
1652 		    = fold_build2 (MEM_REF, ref_type, tmp,
1653 				   build_int_cst (ref_alias_type, mem_off));
1654 		  if ((*slot)->mem.volatile_p)
1655 		    TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1656 		  gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1657 				       && is_gimple_mem_ref_addr
1658 				            (TREE_OPERAND ((*slot)->mem.ref,
1659 							   0)));
1660 		  (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1661 		}
1662 	      (*slot)->ref_canonical = true;
1663 	    }
1664 	  ref = *slot;
1665 	  id = ref->id;
1666 	}
1667       else
1668 	{
1669 	  id = memory_accesses.refs_list.length ();
1670 	  ref = mem_ref_alloc (&aor, hash, id);
1671 	  ref->ref_decomposed = ref_decomposed;
1672 	  memory_accesses.refs_list.safe_push (ref);
1673 	  *slot = ref;
1674 
1675 	  if (dump_file && (dump_flags & TDF_DETAILS))
1676 	    {
1677 	      fprintf (dump_file, "Memory reference %u: ", id);
1678 	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1679 	      fprintf (dump_file, "\n");
1680 	    }
1681 	}
1682 
1683       record_mem_ref_loc (ref, stmt, mem);
1684     }
1685   if (is_stored)
1686     {
1687       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1688       mark_ref_stored (ref, loop);
1689     }
1690   /* A not simple memory op is also a read when it is a write.  */
1691   if (!is_stored || id == UNANALYZABLE_MEM_ID
1692       || ref->mem.ref == error_mark_node)
1693     {
1694       bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1695       mark_ref_loaded (ref, loop);
1696     }
1697   init_lim_data (stmt)->ref = ref->id;
1698   return;
1699 }
1700 
1701 static unsigned *bb_loop_postorder;
1702 
1703 /* qsort sort function to sort blocks after their loop fathers postorder.  */
1704 
1705 static int
1706 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1707 				void *bb_loop_postorder_)
1708 {
1709   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1710   basic_block bb1 = *(const basic_block *)bb1_;
1711   basic_block bb2 = *(const basic_block *)bb2_;
1712   class loop *loop1 = bb1->loop_father;
1713   class loop *loop2 = bb2->loop_father;
1714   if (loop1->num == loop2->num)
1715     return bb1->index - bb2->index;
1716   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1717 }
1718 
1719 /* qsort sort function to sort ref locs after their loop fathers postorder.  */
1720 
1721 static int
1722 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1723 				 void *bb_loop_postorder_)
1724 {
1725   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1726   const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1727   const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1728   class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1729   class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1730   if (loop1->num == loop2->num)
1731     return 0;
1732   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1733 }
1734 
1735 /* Gathers memory references in loops.  */
1736 
1737 static void
1738 analyze_memory_references (bool store_motion)
1739 {
1740   gimple_stmt_iterator bsi;
1741   basic_block bb, *bbs;
1742   class loop *outer;
1743   unsigned i, n;
1744 
1745   /* Collect all basic-blocks in loops and sort them after their
1746      loops postorder.  */
1747   i = 0;
1748   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1749   FOR_EACH_BB_FN (bb, cfun)
1750     if (bb->loop_father != current_loops->tree_root)
1751       bbs[i++] = bb;
1752   n = i;
1753   gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1754 	      bb_loop_postorder);
1755 
1756   /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1757      That results in better locality for all the bitmaps.  It also
1758      automatically sorts the location list of gathered memory references
1759      after their loop postorder number allowing to binary-search it.  */
1760   for (i = 0; i < n; ++i)
1761     {
1762       basic_block bb = bbs[i];
1763       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1764         gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1765     }
1766 
1767   /* Verify the list of gathered memory references is sorted after their
1768      loop postorder number.  */
1769   if (flag_checking)
1770     {
1771       im_mem_ref *ref;
1772       FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1773 	for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1774 	  gcc_assert (sort_locs_in_loop_postorder_cmp
1775 			(&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1776 			 bb_loop_postorder) <= 0);
1777     }
1778 
1779   free (bbs);
1780 
1781   if (!store_motion)
1782     return;
1783 
1784   /* Propagate the information about accessed memory references up
1785      the loop hierarchy.  */
1786   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1787     {
1788       /* Finalize the overall touched references (including subloops).  */
1789       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1790 		       &memory_accesses.refs_stored_in_loop[loop->num]);
1791 
1792       /* Propagate the information about accessed memory references up
1793 	 the loop hierarchy.  */
1794       outer = loop_outer (loop);
1795       if (outer == current_loops->tree_root)
1796 	continue;
1797 
1798       bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1799 		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1800     }
1801 }
1802 
1803 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1804    tree_to_aff_combination_expand.  */
1805 
1806 static bool
1807 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1808 		      hash_map<tree, name_expansion *> **ttae_cache,
1809 		      bool tbaa_p)
1810 {
1811   gcc_checking_assert (mem1->mem.ref != error_mark_node
1812 		       && mem2->mem.ref != error_mark_node);
1813 
1814   /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1815      object and their offset differ in such a way that the locations cannot
1816      overlap, then they cannot alias.  */
1817   poly_widest_int size1, size2;
1818   aff_tree off1, off2;
1819 
1820   /* Perform basic offset and type-based disambiguation.  */
1821   if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1822     return false;
1823 
1824   /* The expansion of addresses may be a bit expensive, thus we only do
1825      the check at -O2 and higher optimization levels.  */
1826   if (optimize < 2)
1827     return true;
1828 
1829   get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1830   get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1831   aff_combination_expand (&off1, ttae_cache);
1832   aff_combination_expand (&off2, ttae_cache);
1833   aff_combination_scale (&off1, -1);
1834   aff_combination_add (&off2, &off1);
1835 
1836   if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1837     return false;
1838 
1839   return true;
1840 }
1841 
1842 /* Compare function for bsearch searching for reference locations
1843    in a loop.  */
1844 
1845 static int
1846 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1847 			  void *bb_loop_postorder_)
1848 {
1849   unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1850   class loop *loop = (class loop *)const_cast<void *>(loop_);
1851   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1852   class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1853   if (loop->num  == loc_loop->num
1854       || flow_loop_nested_p (loop, loc_loop))
1855     return 0;
1856   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1857 	  ? -1 : 1);
1858 }
1859 
1860 /* Iterates over all locations of REF in LOOP and its subloops calling
1861    fn.operator() with the location as argument.  When that operator
1862    returns true the iteration is stopped and true is returned.
1863    Otherwise false is returned.  */
1864 
1865 template <typename FN>
1866 static bool
1867 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1868 {
1869   unsigned i;
1870   mem_ref_loc *loc;
1871 
1872   /* Search for the cluster of locs in the accesses_in_loop vector
1873      which is sorted after postorder index of the loop father.  */
1874   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1875 				       bb_loop_postorder);
1876   if (!loc)
1877     return false;
1878 
1879   /* We have found one location inside loop or its sub-loops.  Iterate
1880      both forward and backward to cover the whole cluster.  */
1881   i = loc - ref->accesses_in_loop.address ();
1882   while (i > 0)
1883     {
1884       --i;
1885       mem_ref_loc *l = &ref->accesses_in_loop[i];
1886       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1887 	break;
1888       if (fn (l))
1889 	return true;
1890     }
1891   for (i = loc - ref->accesses_in_loop.address ();
1892        i < ref->accesses_in_loop.length (); ++i)
1893     {
1894       mem_ref_loc *l = &ref->accesses_in_loop[i];
1895       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1896 	break;
1897       if (fn (l))
1898 	return true;
1899     }
1900 
1901   return false;
1902 }
1903 
1904 /* Rewrites location LOC by TMP_VAR.  */
1905 
1906 class rewrite_mem_ref_loc
1907 {
1908 public:
1909   rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1910   bool operator () (mem_ref_loc *loc);
1911   tree tmp_var;
1912 };
1913 
1914 bool
1915 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1916 {
1917   *loc->ref = tmp_var;
1918   update_stmt (loc->stmt);
1919   return false;
1920 }
1921 
1922 /* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1923 
1924 static void
1925 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1926 {
1927   for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1928 }
1929 
1930 /* Stores the first reference location in LOCP.  */
1931 
1932 class first_mem_ref_loc_1
1933 {
1934 public:
1935   first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1936   bool operator () (mem_ref_loc *loc);
1937   mem_ref_loc **locp;
1938 };
1939 
1940 bool
1941 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1942 {
1943   *locp = loc;
1944   return true;
1945 }
1946 
1947 /* Returns the first reference location to REF in LOOP.  */
1948 
1949 static mem_ref_loc *
1950 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1951 {
1952   mem_ref_loc *locp = NULL;
1953   for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1954   return locp;
1955 }
1956 
1957 /* Helper function for execute_sm.  Emit code to store TMP_VAR into
1958    MEM along edge EX.
1959 
1960    The store is only done if MEM has changed.  We do this so no
1961    changes to MEM occur on code paths that did not originally store
1962    into it.
1963 
1964    The common case for execute_sm will transform:
1965 
1966      for (...) {
1967        if (foo)
1968          stuff;
1969        else
1970          MEM = TMP_VAR;
1971      }
1972 
1973    into:
1974 
1975      lsm = MEM;
1976      for (...) {
1977        if (foo)
1978          stuff;
1979        else
1980          lsm = TMP_VAR;
1981      }
1982      MEM = lsm;
1983 
1984   This function will generate:
1985 
1986      lsm = MEM;
1987 
1988      lsm_flag = false;
1989      ...
1990      for (...) {
1991        if (foo)
1992          stuff;
1993        else {
1994          lsm = TMP_VAR;
1995          lsm_flag = true;
1996        }
1997      }
1998      if (lsm_flag)	<--
1999        MEM = lsm;	<-- (X)
2000 
2001   In case MEM and TMP_VAR are NULL the function will return the then
2002   block so the caller can insert (X) and other related stmts.
2003 */
2004 
2005 static basic_block
2006 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2007 		       edge preheader, hash_set <basic_block> *flag_bbs,
2008 		       edge &append_cond_position, edge &last_cond_fallthru)
2009 {
2010   basic_block new_bb, then_bb, old_dest;
2011   bool loop_has_only_one_exit;
2012   edge then_old_edge;
2013   gimple_stmt_iterator gsi;
2014   gimple *stmt;
2015   bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2016 
2017   profile_count count_sum = profile_count::zero ();
2018   int nbbs = 0, ncount = 0;
2019   profile_probability flag_probability = profile_probability::uninitialized ();
2020 
2021   /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2022      at loop exit.
2023 
2024      This code may look fancy, but it cannot update profile very realistically
2025      because we do not know the probability that flag will be true at given
2026      loop exit.
2027 
2028      We look for two interesting extremes
2029        - when exit is dominated by block setting the flag, we know it will
2030          always be true.  This is a common case.
2031        - when all blocks setting the flag have very low frequency we know
2032          it will likely be false.
2033      In all other cases we default to 2/3 for flag being true.  */
2034 
2035   for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2036        it != flag_bbs->end (); ++it)
2037     {
2038        if ((*it)->count.initialized_p ())
2039          count_sum += (*it)->count, ncount ++;
2040        if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2041 	 flag_probability = profile_probability::always ();
2042        nbbs++;
2043     }
2044 
2045   profile_probability cap = profile_probability::always ().apply_scale (2, 3);
2046 
2047   if (flag_probability.initialized_p ())
2048     ;
2049   else if (ncount == nbbs
2050 	   && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2051     {
2052       flag_probability = count_sum.probability_in (preheader->count ());
2053       if (flag_probability > cap)
2054 	flag_probability = cap;
2055     }
2056 
2057   if (!flag_probability.initialized_p ())
2058     flag_probability = cap;
2059 
2060   /* ?? Insert store after previous store if applicable.  See note
2061      below.  */
2062   if (append_cond_position)
2063     ex = append_cond_position;
2064 
2065   loop_has_only_one_exit = single_pred_p (ex->dest);
2066 
2067   if (loop_has_only_one_exit)
2068     ex = split_block_after_labels (ex->dest);
2069   else
2070     {
2071       for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2072 	   !gsi_end_p (gpi); gsi_next (&gpi))
2073 	{
2074 	  gphi *phi = gpi.phi ();
2075 	  if (virtual_operand_p (gimple_phi_result (phi)))
2076 	    continue;
2077 
2078 	  /* When the destination has a non-virtual PHI node with multiple
2079 	     predecessors make sure we preserve the PHI structure by
2080 	     forcing a forwarder block so that hoisting of that PHI will
2081 	     still work.  */
2082 	  split_edge (ex);
2083 	  break;
2084 	}
2085     }
2086 
2087   old_dest = ex->dest;
2088   new_bb = split_edge (ex);
2089   then_bb = create_empty_bb (new_bb);
2090   then_bb->count = new_bb->count.apply_probability (flag_probability);
2091   if (irr)
2092     then_bb->flags = BB_IRREDUCIBLE_LOOP;
2093   add_bb_to_loop (then_bb, new_bb->loop_father);
2094 
2095   gsi = gsi_start_bb (new_bb);
2096   stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2097 			    NULL_TREE, NULL_TREE);
2098   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2099 
2100   /* Insert actual store.  */
2101   if (mem)
2102     {
2103       gsi = gsi_start_bb (then_bb);
2104       stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2105       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2106     }
2107 
2108   edge e1 = single_succ_edge (new_bb);
2109   edge e2 = make_edge (new_bb, then_bb,
2110 	               EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2111   e2->probability = flag_probability;
2112 
2113   e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2114   e1->flags &= ~EDGE_FALLTHRU;
2115 
2116   e1->probability = flag_probability.invert ();
2117 
2118   then_old_edge = make_single_succ_edge (then_bb, old_dest,
2119 			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2120 
2121   set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2122 
2123   if (append_cond_position)
2124     {
2125       basic_block prevbb = last_cond_fallthru->src;
2126       redirect_edge_succ (last_cond_fallthru, new_bb);
2127       set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2128       set_immediate_dominator (CDI_DOMINATORS, old_dest,
2129 			       recompute_dominator (CDI_DOMINATORS, old_dest));
2130     }
2131 
2132   /* ?? Because stores may alias, they must happen in the exact
2133      sequence they originally happened.  Save the position right after
2134      the (_lsm) store we just created so we can continue appending after
2135      it and maintain the original order.  */
2136   append_cond_position = then_old_edge;
2137   last_cond_fallthru = find_edge (new_bb, old_dest);
2138 
2139   if (!loop_has_only_one_exit)
2140     for (gphi_iterator gpi = gsi_start_phis (old_dest);
2141 	 !gsi_end_p (gpi); gsi_next (&gpi))
2142       {
2143 	gphi *phi = gpi.phi ();
2144 	unsigned i;
2145 
2146 	for (i = 0; i < gimple_phi_num_args (phi); i++)
2147 	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2148 	    {
2149 	      tree arg = gimple_phi_arg_def (phi, i);
2150 	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2151 	      update_stmt (phi);
2152 	    }
2153       }
2154 
2155   return then_bb;
2156 }
2157 
2158 /* When REF is set on the location, set flag indicating the store.  */
2159 
2160 class sm_set_flag_if_changed
2161 {
2162 public:
2163   sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2164 	 : flag (flag_), bbs (bbs_) {}
2165   bool operator () (mem_ref_loc *loc);
2166   tree flag;
2167   hash_set <basic_block> *bbs;
2168 };
2169 
2170 bool
2171 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2172 {
2173   /* Only set the flag for writes.  */
2174   if (is_gimple_assign (loc->stmt)
2175       && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2176     {
2177       gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2178       gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2179       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2180       bbs->add (gimple_bb (stmt));
2181     }
2182   return false;
2183 }
2184 
2185 /* Helper function for execute_sm.  On every location where REF is
2186    set, set an appropriate flag indicating the store.  */
2187 
2188 static tree
2189 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2190 				hash_set <basic_block> *bbs)
2191 {
2192   tree flag;
2193   char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2194   flag = create_tmp_reg (boolean_type_node, str);
2195   for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2196   return flag;
2197 }
2198 
2199 struct sm_aux
2200 {
2201   tree tmp_var;
2202   tree store_flag;
2203   hash_set <basic_block> flag_bbs;
2204 };
2205 
2206 /* Executes store motion of memory reference REF from LOOP.
2207    Exits from the LOOP are stored in EXITS.  The initialization of the
2208    temporary variable is put to the preheader of the loop, and assignments
2209    to the reference from the temporary variable are emitted to exits.  */
2210 
2211 static void
2212 execute_sm (class loop *loop, im_mem_ref *ref,
2213 	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2214 	    bool use_other_flag_var)
2215 {
2216   gassign *load;
2217   struct fmt_data fmt_data;
2218   struct lim_aux_data *lim_data;
2219   bool multi_threaded_model_p = false;
2220   gimple_stmt_iterator gsi;
2221   sm_aux *aux = new sm_aux;
2222 
2223   if (dump_file && (dump_flags & TDF_DETAILS))
2224     {
2225       fprintf (dump_file, "Executing store motion of ");
2226       print_generic_expr (dump_file, ref->mem.ref);
2227       fprintf (dump_file, " from loop %d\n", loop->num);
2228     }
2229 
2230   aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2231 				 get_lsm_tmp_name (ref->mem.ref, ~0));
2232 
2233   fmt_data.loop = loop;
2234   fmt_data.orig_loop = loop;
2235   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2236 
2237   bool always_stored = ref_always_accessed_p (loop, ref, true);
2238   if (maybe_mt
2239       && (bb_in_transaction (loop_preheader_edge (loop)->src)
2240 	  || (! flag_store_data_races && ! always_stored)))
2241     multi_threaded_model_p = true;
2242 
2243   if (multi_threaded_model_p && !use_other_flag_var)
2244     aux->store_flag
2245       = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2246   else
2247     aux->store_flag = NULL_TREE;
2248 
2249   /* Remember variable setup.  */
2250   aux_map.put (ref, aux);
2251 
2252   rewrite_mem_refs (loop, ref, aux->tmp_var);
2253 
2254   /* Emit the load code on a random exit edge or into the latch if
2255      the loop does not exit, so that we are sure it will be processed
2256      by move_computations after all dependencies.  */
2257   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2258 
2259   /* Avoid doing a load if there was no load of the ref in the loop.
2260      Esp. when the ref is not always stored we cannot optimize it
2261      away later.  But when it is not always stored we must use a conditional
2262      store then.  */
2263   if ((!always_stored && !multi_threaded_model_p)
2264       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2265     load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2266   else
2267     {
2268       /* If not emitting a load mark the uninitialized state on the
2269 	 loop entry as not to be warned for.  */
2270       tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2271       suppress_warning (uninit, OPT_Wuninitialized);
2272       load = gimple_build_assign (aux->tmp_var, uninit);
2273     }
2274   lim_data = init_lim_data (load);
2275   lim_data->max_loop = loop;
2276   lim_data->tgt_loop = loop;
2277   gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2278 
2279   if (aux->store_flag)
2280     {
2281       load = gimple_build_assign (aux->store_flag, boolean_false_node);
2282       lim_data = init_lim_data (load);
2283       lim_data->max_loop = loop;
2284       lim_data->tgt_loop = loop;
2285       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2286     }
2287 }
2288 
2289 /* sm_ord is used for ordinary stores we can retain order with respect
2290        to other stores
2291    sm_unord is used for conditional executed stores which need to be
2292        able to execute in arbitrary order with respect to other stores
2293    sm_other is used for stores we do not try to apply store motion to.  */
2294 enum sm_kind { sm_ord, sm_unord, sm_other };
2295 struct seq_entry
2296 {
2297   seq_entry () {}
2298   seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2299     : first (f), second (k), from (fr) {}
2300   unsigned first;
2301   sm_kind second;
2302   tree from;
2303 };
2304 
2305 static void
2306 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2307 		 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2308 		 edge &append_cond_position, edge &last_cond_fallthru)
2309 {
2310   /* Sink the stores to exit from the loop.  */
2311   for (unsigned i = seq.length (); i > 0; --i)
2312     {
2313       im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2314       if (seq[i-1].second == sm_other)
2315 	{
2316 	  gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2317 	  if (dump_file && (dump_flags & TDF_DETAILS))
2318 	    {
2319 	      fprintf (dump_file, "Re-issueing dependent store of ");
2320 	      print_generic_expr (dump_file, ref->mem.ref);
2321 	      fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2322 		       loop->num, ex->src->index, ex->dest->index);
2323 	    }
2324 	  gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2325 						seq[i-1].from);
2326 	  gsi_insert_on_edge (ex, store);
2327 	}
2328       else
2329 	{
2330 	  sm_aux *aux = *aux_map.get (ref);
2331 	  if (!aux->store_flag || kind == sm_ord)
2332 	    {
2333 	      gassign *store;
2334 	      store = gimple_build_assign (unshare_expr (ref->mem.ref),
2335 					   aux->tmp_var);
2336 	      gsi_insert_on_edge (ex, store);
2337 	    }
2338 	  else
2339 	    execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2340 				   aux->store_flag,
2341 				   loop_preheader_edge (loop), &aux->flag_bbs,
2342 				   append_cond_position, last_cond_fallthru);
2343 	}
2344     }
2345 }
2346 
2347 /* Push the SM candidate at index PTR in the sequence SEQ down until
2348    we hit the next SM candidate.  Return true if that went OK and
2349    false if we could not disambiguate agains another unrelated ref.
2350    Update *AT to the index where the candidate now resides.  */
2351 
2352 static bool
2353 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2354 {
2355   *at = ptr;
2356   for (; ptr > 0; --ptr)
2357     {
2358       seq_entry &new_cand = seq[ptr];
2359       seq_entry &against = seq[ptr-1];
2360       if (against.second == sm_ord
2361 	  || (against.second == sm_other && against.from != NULL_TREE))
2362 	/* Found the tail of the sequence.  */
2363 	break;
2364       /* We may not ignore self-dependences here.  */
2365       if (new_cand.first == against.first
2366 	  || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2367 				  memory_accesses.refs_list[against.first],
2368 				  false))
2369 	/* ???  Prune new_cand from the list of refs to apply SM to.  */
2370 	return false;
2371       std::swap (new_cand, against);
2372       *at = ptr - 1;
2373     }
2374   return true;
2375 }
2376 
2377 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2378    walking backwards from VDEF (or the end of BB if VDEF is NULL).  */
2379 
2380 static int
2381 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2382 		 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2383 		 bitmap refs_not_supported, bool forked,
2384 		 bitmap fully_visited)
2385 {
2386   if (!vdef)
2387     for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2388 	 gsi_prev (&gsi))
2389       {
2390 	vdef = gimple_vdef (gsi_stmt (gsi));
2391 	if (vdef)
2392 	  break;
2393       }
2394   if (!vdef)
2395     {
2396       gphi *vphi = get_virtual_phi (bb);
2397       if (vphi)
2398 	vdef = gimple_phi_result (vphi);
2399     }
2400   if (!vdef)
2401     {
2402       if (single_pred_p (bb))
2403 	/* This handles the perfect nest case.  */
2404 	return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2405 				seq, refs_not_in_seq, refs_not_supported,
2406 				forked, fully_visited);
2407       return 0;
2408     }
2409   do
2410     {
2411       gimple *def = SSA_NAME_DEF_STMT (vdef);
2412       if (gimple_bb (def) != bb)
2413 	{
2414 	  /* If we forked by processing a PHI do not allow our walk to
2415 	     merge again until we handle that robustly.  */
2416 	  if (forked)
2417 	    {
2418 	      /* Mark refs_not_in_seq as unsupported.  */
2419 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2420 	      return 1;
2421 	    }
2422 	  /* Otherwise it doesn't really matter if we end up in different
2423 	     BBs.  */
2424 	  bb = gimple_bb (def);
2425 	}
2426       if (gphi *phi = dyn_cast <gphi *> (def))
2427 	{
2428 	  /* Handle CFG merges.  Until we handle forks (gimple_bb (def) != bb)
2429 	     this is still linear.
2430 	     Eventually we want to cache intermediate results per BB
2431 	     (but we can't easily cache for different exits?).  */
2432 	  /* Stop at PHIs with possible backedges.  */
2433 	  if (bb == bb->loop_father->header
2434 	      || bb->flags & BB_IRREDUCIBLE_LOOP)
2435 	    {
2436 	      /* Mark refs_not_in_seq as unsupported.  */
2437 	      bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2438 	      return 1;
2439 	    }
2440 	  if (gimple_phi_num_args (phi) == 1)
2441 	    return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2442 				    gimple_phi_arg_def (phi, 0), seq,
2443 				    refs_not_in_seq, refs_not_supported,
2444 				    false, fully_visited);
2445 	  if (bitmap_bit_p (fully_visited,
2446 			    SSA_NAME_VERSION (gimple_phi_result (phi))))
2447 	    return 1;
2448 	  auto_vec<seq_entry> first_edge_seq;
2449 	  auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2450 	  int eret;
2451 	  bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2452 	  eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2453 				  gimple_phi_arg_def (phi, 0),
2454 				  first_edge_seq,
2455 				  tem_refs_not_in_seq, refs_not_supported,
2456 				  true, fully_visited);
2457 	  if (eret != 1)
2458 	    return -1;
2459 	  /* Simplify our lives by pruning the sequence of !sm_ord.  */
2460 	  while (!first_edge_seq.is_empty ()
2461 		 && first_edge_seq.last ().second != sm_ord)
2462 	    first_edge_seq.pop ();
2463 	  for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2464 	    {
2465 	      tree vuse = gimple_phi_arg_def (phi, i);
2466 	      edge e = gimple_phi_arg_edge (phi, i);
2467 	      auto_vec<seq_entry> edge_seq;
2468 	      bitmap_and_compl (tem_refs_not_in_seq,
2469 				refs_not_in_seq, refs_not_supported);
2470 	      /* If we've marked all refs we search for as unsupported
2471 		 we can stop processing and use the sequence as before
2472 		 the PHI.  */
2473 	      if (bitmap_empty_p (tem_refs_not_in_seq))
2474 		return 1;
2475 	      eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2476 				      tem_refs_not_in_seq, refs_not_supported,
2477 				      true, fully_visited);
2478 	      if (eret != 1)
2479 		return -1;
2480 	      /* Simplify our lives by pruning the sequence of !sm_ord.  */
2481 	      while (!edge_seq.is_empty ()
2482 		     && edge_seq.last ().second != sm_ord)
2483 		edge_seq.pop ();
2484 	      unsigned min_len = MIN(first_edge_seq.length (),
2485 				     edge_seq.length ());
2486 	      /* Incrementally merge seqs into first_edge_seq.  */
2487 	      int first_uneq = -1;
2488 	      auto_vec<seq_entry, 2> extra_refs;
2489 	      for (unsigned int i = 0; i < min_len; ++i)
2490 		{
2491 		  /* ???  We can more intelligently merge when we face different
2492 		     order by additional sinking operations in one sequence.
2493 		     For now we simply mark them as to be processed by the
2494 		     not order-preserving SM code.  */
2495 		  if (first_edge_seq[i].first != edge_seq[i].first)
2496 		    {
2497 		      if (first_edge_seq[i].second == sm_ord)
2498 			bitmap_set_bit (refs_not_supported,
2499 					first_edge_seq[i].first);
2500 		      if (edge_seq[i].second == sm_ord)
2501 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2502 		      first_edge_seq[i].second = sm_other;
2503 		      first_edge_seq[i].from = NULL_TREE;
2504 		      /* Record the dropped refs for later processing.  */
2505 		      if (first_uneq == -1)
2506 			first_uneq = i;
2507 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2508 						       sm_other, NULL_TREE));
2509 		    }
2510 		  /* sm_other prevails.  */
2511 		  else if (first_edge_seq[i].second != edge_seq[i].second)
2512 		    {
2513 		      /* Make sure the ref is marked as not supported.  */
2514 		      bitmap_set_bit (refs_not_supported,
2515 				      first_edge_seq[i].first);
2516 		      first_edge_seq[i].second = sm_other;
2517 		      first_edge_seq[i].from = NULL_TREE;
2518 		    }
2519 		  else if (first_edge_seq[i].second == sm_other
2520 			   && first_edge_seq[i].from != NULL_TREE
2521 			   && (edge_seq[i].from == NULL_TREE
2522 			       || !operand_equal_p (first_edge_seq[i].from,
2523 						    edge_seq[i].from, 0)))
2524 		    first_edge_seq[i].from = NULL_TREE;
2525 		}
2526 	      /* Any excess elements become sm_other since they are now
2527 		 coonditionally executed.  */
2528 	      if (first_edge_seq.length () > edge_seq.length ())
2529 		{
2530 		  for (unsigned i = edge_seq.length ();
2531 		       i < first_edge_seq.length (); ++i)
2532 		    {
2533 		      if (first_edge_seq[i].second == sm_ord)
2534 			bitmap_set_bit (refs_not_supported,
2535 					first_edge_seq[i].first);
2536 		      first_edge_seq[i].second = sm_other;
2537 		    }
2538 		}
2539 	      else if (edge_seq.length () > first_edge_seq.length ())
2540 		{
2541 		  if (first_uneq == -1)
2542 		    first_uneq = first_edge_seq.length ();
2543 		  for (unsigned i = first_edge_seq.length ();
2544 		       i < edge_seq.length (); ++i)
2545 		    {
2546 		      if (edge_seq[i].second == sm_ord)
2547 			bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2548 		      extra_refs.safe_push (seq_entry (edge_seq[i].first,
2549 						       sm_other, NULL_TREE));
2550 		    }
2551 		}
2552 	      /* Put unmerged refs at first_uneq to force dependence checking
2553 		 on them.  */
2554 	      if (first_uneq != -1)
2555 		{
2556 		  /* Missing ordered_splice_at.  */
2557 		  if ((unsigned)first_uneq == first_edge_seq.length ())
2558 		    first_edge_seq.safe_splice (extra_refs);
2559 		  else
2560 		    {
2561 		      unsigned fes_length = first_edge_seq.length ();
2562 		      first_edge_seq.safe_grow (fes_length
2563 						+ extra_refs.length ());
2564 		      memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2565 			       &first_edge_seq[first_uneq],
2566 			       (fes_length - first_uneq) * sizeof (seq_entry));
2567 		      memcpy (&first_edge_seq[first_uneq],
2568 			      extra_refs.address (),
2569 			      extra_refs.length () * sizeof (seq_entry));
2570 		    }
2571 		}
2572 	    }
2573 	  /* Use the sequence from the first edge and push SMs down.  */
2574 	  for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2575 	    {
2576 	      unsigned id = first_edge_seq[i].first;
2577 	      seq.safe_push (first_edge_seq[i]);
2578 	      unsigned new_idx;
2579 	      if ((first_edge_seq[i].second == sm_ord
2580 		   || (first_edge_seq[i].second == sm_other
2581 		       && first_edge_seq[i].from != NULL_TREE))
2582 		  && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2583 		{
2584 		  if (first_edge_seq[i].second == sm_ord)
2585 		    bitmap_set_bit (refs_not_supported, id);
2586 		  /* Mark it sm_other.  */
2587 		  seq[new_idx].second = sm_other;
2588 		  seq[new_idx].from = NULL_TREE;
2589 		}
2590 	    }
2591 	  bitmap_set_bit (fully_visited,
2592 			  SSA_NAME_VERSION (gimple_phi_result (phi)));
2593 	  return 1;
2594 	}
2595       lim_aux_data *data = get_lim_data (def);
2596       gcc_assert (data);
2597       if (data->ref == UNANALYZABLE_MEM_ID)
2598 	return -1;
2599       /* Stop at memory references which we can't move.  */
2600       else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node
2601 	       || TREE_THIS_VOLATILE
2602 		    (memory_accesses.refs_list[data->ref]->mem.ref))
2603 	{
2604 	  /* Mark refs_not_in_seq as unsupported.  */
2605 	  bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2606 	  return 1;
2607 	}
2608       /* One of the stores we want to apply SM to and we've not yet seen.  */
2609       else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2610 	{
2611 	  seq.safe_push (seq_entry (data->ref, sm_ord));
2612 
2613 	  /* 1) push it down the queue until a SMed
2614 	     and not ignored ref is reached, skipping all not SMed refs
2615 	     and ignored refs via non-TBAA disambiguation.  */
2616 	  unsigned new_idx;
2617 	  if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2618 	      /* If that fails but we did not fork yet continue, we'll see
2619 		 to re-materialize all of the stores in the sequence then.
2620 		 Further stores will only be pushed up to this one.  */
2621 	      && forked)
2622 	    {
2623 	      bitmap_set_bit (refs_not_supported, data->ref);
2624 	      /* Mark it sm_other.  */
2625 	      seq[new_idx].second = sm_other;
2626 	    }
2627 
2628 	  /* 2) check whether we've seen all refs we want to SM and if so
2629 	     declare success for the active exit  */
2630 	  if (bitmap_empty_p (refs_not_in_seq))
2631 	    return 1;
2632 	}
2633       else
2634 	/* Another store not part of the final sequence.  Simply push it.  */
2635 	seq.safe_push (seq_entry (data->ref, sm_other,
2636 				  gimple_assign_rhs1 (def)));
2637 
2638       vdef = gimple_vuse (def);
2639     }
2640   while (1);
2641 }
2642 
2643 /* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2644    edges of the LOOP.  */
2645 
2646 static void
2647 hoist_memory_references (class loop *loop, bitmap mem_refs,
2648 			 const vec<edge> &exits)
2649 {
2650   im_mem_ref *ref;
2651   unsigned  i;
2652   bitmap_iterator bi;
2653 
2654   /* There's a special case we can use ordered re-materialization for
2655      conditionally excuted stores which is when all stores in the loop
2656      happen in the same basic-block.  In that case we know we'll reach
2657      all stores and thus can simply process that BB and emit a single
2658      conditional block of ordered materializations.  See PR102436.  */
2659   basic_block single_store_bb = NULL;
2660   EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2661 			    0, i, bi)
2662     {
2663       bool fail = false;
2664       ref = memory_accesses.refs_list[i];
2665       for (auto loc : ref->accesses_in_loop)
2666 	if (!gimple_vdef (loc.stmt))
2667 	  ;
2668 	else if (!single_store_bb)
2669 	  {
2670 	    single_store_bb = gimple_bb (loc.stmt);
2671 	    bool conditional = false;
2672 	    for (edge e : exits)
2673 	      if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2674 		{
2675 		  /* Conditional as seen from e.  */
2676 		  conditional = true;
2677 		  break;
2678 		}
2679 	    if (!conditional)
2680 	      {
2681 		fail = true;
2682 		break;
2683 	      }
2684 	  }
2685 	else if (single_store_bb != gimple_bb (loc.stmt))
2686 	  {
2687 	    fail = true;
2688 	    break;
2689 	  }
2690       if (fail)
2691 	{
2692 	  single_store_bb = NULL;
2693 	  break;
2694 	}
2695     }
2696   if (single_store_bb)
2697     {
2698       /* Analyze the single block with stores.  */
2699       auto_bitmap fully_visited;
2700       auto_bitmap refs_not_supported;
2701       auto_bitmap refs_not_in_seq;
2702       auto_vec<seq_entry> seq;
2703       bitmap_copy (refs_not_in_seq, mem_refs);
2704       int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2705 				 seq, refs_not_in_seq, refs_not_supported,
2706 				 false, fully_visited);
2707       if (res != 1)
2708 	{
2709 	  /* Unhandled refs can still fail this.  */
2710 	  bitmap_clear (mem_refs);
2711 	  return;
2712 	}
2713 
2714       /* We cannot handle sm_other since we neither remember the
2715 	 stored location nor the value at the point we execute them.  */
2716       for (unsigned i = 0; i < seq.length (); ++i)
2717 	{
2718 	  unsigned new_i;
2719 	  if (seq[i].second == sm_other
2720 	      && seq[i].from != NULL_TREE)
2721 	    seq[i].from = NULL_TREE;
2722 	  else if ((seq[i].second == sm_ord
2723 		    || (seq[i].second == sm_other
2724 			&& seq[i].from != NULL_TREE))
2725 		   && !sm_seq_push_down (seq, i, &new_i))
2726 	    {
2727 	      bitmap_set_bit (refs_not_supported, seq[new_i].first);
2728 	      seq[new_i].second = sm_other;
2729 	      seq[new_i].from = NULL_TREE;
2730 	    }
2731 	}
2732       bitmap_and_compl_into (mem_refs, refs_not_supported);
2733       if (bitmap_empty_p (mem_refs))
2734 	return;
2735 
2736       /* Prune seq.  */
2737       while (seq.last ().second == sm_other
2738 	     && seq.last ().from == NULL_TREE)
2739 	seq.pop ();
2740 
2741       hash_map<im_mem_ref *, sm_aux *> aux_map;
2742 
2743       /* Execute SM but delay the store materialization for ordered
2744 	 sequences on exit.  */
2745       bool first_p = true;
2746       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2747 	{
2748 	  ref = memory_accesses.refs_list[i];
2749 	  execute_sm (loop, ref, aux_map, true, !first_p);
2750 	  first_p = false;
2751 	}
2752 
2753       /* Get at the single flag variable we eventually produced.  */
2754       im_mem_ref *ref
2755 	= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2756       sm_aux *aux = *aux_map.get (ref);
2757 
2758       /* Materialize ordered store sequences on exits.  */
2759       edge e;
2760       FOR_EACH_VEC_ELT (exits, i, e)
2761 	{
2762 	  edge append_cond_position = NULL;
2763 	  edge last_cond_fallthru = NULL;
2764 	  edge insert_e = e;
2765 	  /* Construct the single flag variable control flow and insert
2766 	     the ordered seq of stores in the then block.  With
2767 	     -fstore-data-races we can do the stores unconditionally.  */
2768 	  if (aux->store_flag)
2769 	    insert_e
2770 	      = single_pred_edge
2771 		  (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2772 					  aux->store_flag,
2773 					  loop_preheader_edge (loop),
2774 					  &aux->flag_bbs, append_cond_position,
2775 					  last_cond_fallthru));
2776 	  execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2777 			   append_cond_position, last_cond_fallthru);
2778 	  gsi_commit_one_edge_insert (insert_e, NULL);
2779 	}
2780 
2781       for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2782 	   iter != aux_map.end (); ++iter)
2783 	delete (*iter).second;
2784 
2785       return;
2786     }
2787 
2788   /* To address PR57359 before actually applying store-motion check
2789      the candidates found for validity with regards to reordering
2790      relative to other stores which we until here disambiguated using
2791      TBAA which isn't valid.
2792      What matters is the order of the last stores to the mem_refs
2793      with respect to the other stores of the loop at the point of the
2794      loop exits.  */
2795 
2796   /* For each exit compute the store order, pruning from mem_refs
2797      on the fly.  */
2798   /* The complexity of this is at least
2799      O(number of exits * number of SM refs) but more approaching
2800      O(number of exits * number of SM refs * number of stores).  */
2801   /* ???  Somehow do this in a single sweep over the loop body.  */
2802   auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2803   auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2804   edge e;
2805   FOR_EACH_VEC_ELT (exits, i, e)
2806     {
2807       vec<seq_entry> seq;
2808       seq.create (4);
2809       auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2810       bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2811       if (bitmap_empty_p (refs_not_in_seq))
2812 	{
2813 	  seq.release ();
2814 	  break;
2815 	}
2816       auto_bitmap fully_visited;
2817       int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2818 				 seq, refs_not_in_seq,
2819 				 refs_not_supported, false,
2820 				 fully_visited);
2821       if (res != 1)
2822 	{
2823 	  bitmap_copy (refs_not_supported, mem_refs);
2824 	  seq.release ();
2825 	  break;
2826 	}
2827       sms.safe_push (std::make_pair (e, seq));
2828     }
2829 
2830   /* Prune pruned mem_refs from earlier processed exits.  */
2831   bool changed = !bitmap_empty_p (refs_not_supported);
2832   while (changed)
2833     {
2834       changed = false;
2835       std::pair<edge, vec<seq_entry> > *seq;
2836       FOR_EACH_VEC_ELT (sms, i, seq)
2837 	{
2838 	  bool need_to_push = false;
2839 	  for (unsigned i = 0; i < seq->second.length (); ++i)
2840 	    {
2841 	      sm_kind kind = seq->second[i].second;
2842 	      if (kind == sm_other && seq->second[i].from == NULL_TREE)
2843 		break;
2844 	      unsigned id = seq->second[i].first;
2845 	      unsigned new_idx;
2846 	      if (kind == sm_ord
2847 		  && bitmap_bit_p (refs_not_supported, id))
2848 		{
2849 		  seq->second[i].second = sm_other;
2850 		  gcc_assert (seq->second[i].from == NULL_TREE);
2851 		  need_to_push = true;
2852 		}
2853 	      else if (need_to_push
2854 		       && !sm_seq_push_down (seq->second, i, &new_idx))
2855 		{
2856 		  /* We need to push down both sm_ord and sm_other
2857 		     but for the latter we need to disqualify all
2858 		     following refs.  */
2859 		  if (kind == sm_ord)
2860 		    {
2861 		      if (bitmap_set_bit (refs_not_supported, id))
2862 			changed = true;
2863 		      seq->second[new_idx].second = sm_other;
2864 		    }
2865 		  else
2866 		    {
2867 		      for (unsigned j = seq->second.length () - 1;
2868 			   j > new_idx; --j)
2869 			if (seq->second[j].second == sm_ord
2870 			    && bitmap_set_bit (refs_not_supported,
2871 					       seq->second[j].first))
2872 			  changed = true;
2873 		      seq->second.truncate (new_idx);
2874 		      break;
2875 		    }
2876 		}
2877 	    }
2878 	}
2879     }
2880   std::pair<edge, vec<seq_entry> > *seq;
2881   FOR_EACH_VEC_ELT (sms, i, seq)
2882     {
2883       /* Prune sm_other from the end.  */
2884       while (!seq->second.is_empty ()
2885 	     && seq->second.last ().second == sm_other)
2886 	seq->second.pop ();
2887       /* Prune duplicates from the start.  */
2888       auto_bitmap seen (&lim_bitmap_obstack);
2889       unsigned j, k;
2890       for (j = k = 0; j < seq->second.length (); ++j)
2891 	if (bitmap_set_bit (seen, seq->second[j].first))
2892 	  {
2893 	    if (k != j)
2894 	      seq->second[k] = seq->second[j];
2895 	    ++k;
2896 	  }
2897       seq->second.truncate (k);
2898       /* And verify.  */
2899       seq_entry *e;
2900       FOR_EACH_VEC_ELT (seq->second, j, e)
2901 	gcc_assert (e->second == sm_ord
2902 		    || (e->second == sm_other && e->from != NULL_TREE));
2903     }
2904 
2905   /* Verify dependence for refs we cannot handle with the order preserving
2906      code (refs_not_supported) or prune them from mem_refs.  */
2907   auto_vec<seq_entry> unord_refs;
2908   EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2909     {
2910       ref = memory_accesses.refs_list[i];
2911       if (!ref_indep_loop_p (loop, ref, sm_waw))
2912 	bitmap_clear_bit (mem_refs, i);
2913       /* We've now verified store order for ref with respect to all other
2914 	 stores in the loop does not matter.  */
2915       else
2916 	unord_refs.safe_push (seq_entry (i, sm_unord));
2917     }
2918 
2919   hash_map<im_mem_ref *, sm_aux *> aux_map;
2920 
2921   /* Execute SM but delay the store materialization for ordered
2922      sequences on exit.  */
2923   EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2924     {
2925       ref = memory_accesses.refs_list[i];
2926       execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2927 		  false);
2928     }
2929 
2930   /* Materialize ordered store sequences on exits.  */
2931   FOR_EACH_VEC_ELT (exits, i, e)
2932     {
2933       edge append_cond_position = NULL;
2934       edge last_cond_fallthru = NULL;
2935       if (i < sms.length ())
2936 	{
2937 	  gcc_assert (sms[i].first == e);
2938 	  execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2939 			   append_cond_position, last_cond_fallthru);
2940 	  sms[i].second.release ();
2941 	}
2942       if (!unord_refs.is_empty ())
2943 	execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2944 			 append_cond_position, last_cond_fallthru);
2945       /* Commit edge inserts here to preserve the order of stores
2946 	 when an exit exits multiple loops.  */
2947       gsi_commit_one_edge_insert (e, NULL);
2948     }
2949 
2950   for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2951        iter != aux_map.end (); ++iter)
2952     delete (*iter).second;
2953 }
2954 
2955 class ref_always_accessed
2956 {
2957 public:
2958   ref_always_accessed (class loop *loop_, bool stored_p_)
2959       : loop (loop_), stored_p (stored_p_) {}
2960   bool operator () (mem_ref_loc *loc);
2961   class loop *loop;
2962   bool stored_p;
2963 };
2964 
2965 bool
2966 ref_always_accessed::operator () (mem_ref_loc *loc)
2967 {
2968   class loop *must_exec;
2969 
2970   struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2971   if (!lim_data)
2972     return false;
2973 
2974   /* If we require an always executed store make sure the statement
2975      is a store.  */
2976   if (stored_p)
2977     {
2978       tree lhs = gimple_get_lhs (loc->stmt);
2979       if (!lhs
2980 	  || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2981 	return false;
2982     }
2983 
2984   must_exec = lim_data->always_executed_in;
2985   if (!must_exec)
2986     return false;
2987 
2988   if (must_exec == loop
2989       || flow_loop_nested_p (must_exec, loop))
2990     return true;
2991 
2992   return false;
2993 }
2994 
2995 /* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2996    make sure REF is always stored to in LOOP.  */
2997 
2998 static bool
2999 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3000 {
3001   return for_all_locs_in_loop (loop, ref,
3002 			       ref_always_accessed (loop, stored_p));
3003 }
3004 
3005 /* Returns true if REF1 and REF2 are independent.  */
3006 
3007 static bool
3008 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3009 {
3010   if (ref1 == ref2)
3011     return true;
3012 
3013   if (dump_file && (dump_flags & TDF_DETAILS))
3014     fprintf (dump_file, "Querying dependency of refs %u and %u: ",
3015 	     ref1->id, ref2->id);
3016 
3017   if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
3018     {
3019       if (dump_file && (dump_flags & TDF_DETAILS))
3020 	fprintf (dump_file, "dependent.\n");
3021       return false;
3022     }
3023   else
3024     {
3025       if (dump_file && (dump_flags & TDF_DETAILS))
3026 	fprintf (dump_file, "independent.\n");
3027       return true;
3028     }
3029 }
3030 
3031 /* Returns true if REF is independent on all other accessess in LOOP.
3032    KIND specifies the kind of dependence to consider.
3033      lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3034 	     dependences so if true REF can be hoisted out of LOOP
3035      sm_war disambiguates a store REF against all other loads to see
3036 	    whether the store can be sunk across loads out of LOOP
3037      sm_waw disambiguates a store REF against all other stores to see
3038 	    whether the store can be sunk across stores out of LOOP.  */
3039 
3040 static bool
3041 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3042 {
3043   bool indep_p = true;
3044   bitmap refs_to_check;
3045 
3046   if (kind == sm_war)
3047     refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3048   else
3049     refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3050 
3051   if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3052       || ref->mem.ref == error_mark_node)
3053     indep_p = false;
3054   else
3055     {
3056       /* tri-state, { unknown, independent, dependent }  */
3057       dep_state state = query_loop_dependence (loop, ref, kind);
3058       if (state != dep_unknown)
3059 	return state == dep_independent ? true : false;
3060 
3061       class loop *inner = loop->inner;
3062       while (inner)
3063 	{
3064 	  if (!ref_indep_loop_p (inner, ref, kind))
3065 	    {
3066 	      indep_p = false;
3067 	      break;
3068 	    }
3069 	  inner = inner->next;
3070 	}
3071 
3072       if (indep_p)
3073 	{
3074 	  unsigned i;
3075 	  bitmap_iterator bi;
3076 	  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3077 	    {
3078 	      im_mem_ref *aref = memory_accesses.refs_list[i];
3079 	      if (aref->mem.ref == error_mark_node)
3080 		{
3081 		  gimple *stmt = aref->accesses_in_loop[0].stmt;
3082 		  if ((kind == sm_war
3083 		       && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3084 						    kind != sm_waw))
3085 		      || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3086 						   kind != sm_waw))
3087 		    {
3088 		      indep_p = false;
3089 		      break;
3090 		    }
3091 		}
3092 	      else if (!refs_independent_p (ref, aref, kind != sm_waw))
3093 		{
3094 		  indep_p = false;
3095 		  break;
3096 		}
3097 	    }
3098 	}
3099     }
3100 
3101   if (dump_file && (dump_flags & TDF_DETAILS))
3102     fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3103 	     kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3104 	     ref->id, loop->num, indep_p ? "independent" : "dependent");
3105 
3106   /* Record the computed result in the cache.  */
3107   record_loop_dependence (loop, ref, kind,
3108 			  indep_p ? dep_independent : dep_dependent);
3109 
3110   return indep_p;
3111 }
3112 
3113 class ref_in_loop_hot_body
3114 {
3115 public:
3116   ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3117   bool operator () (mem_ref_loc *loc);
3118   class loop *l;
3119 };
3120 
3121 /* Check the coldest loop between loop L and innermost loop.  If there is one
3122    cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3123    no cold loop means no store motion.  get_coldest_out_loop also handles cases
3124    when l is inner_loop.  */
3125 bool
3126 ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3127 {
3128   basic_block curr_bb = gimple_bb (loc->stmt);
3129   class loop *inner_loop = curr_bb->loop_father;
3130   return get_coldest_out_loop (l, inner_loop, curr_bb);
3131 }
3132 
3133 
3134 /* Returns true if we can perform store motion of REF from LOOP.  */
3135 
3136 static bool
3137 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3138 {
3139   tree base;
3140 
3141   /* Can't hoist unanalyzable refs.  */
3142   if (!MEM_ANALYZABLE (ref))
3143     return false;
3144 
3145   /* Can't hoist/sink aggregate copies.  */
3146   if (ref->mem.ref == error_mark_node)
3147     return false;
3148 
3149   /* It should be movable.  */
3150   if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3151       || TREE_THIS_VOLATILE (ref->mem.ref)
3152       || !for_each_index (&ref->mem.ref, may_move_till, loop))
3153     return false;
3154 
3155   /* If it can throw fail, we do not properly update EH info.  */
3156   if (tree_could_throw_p (ref->mem.ref))
3157     return false;
3158 
3159   /* If it can trap, it must be always executed in LOOP.
3160      Readonly memory locations may trap when storing to them, but
3161      tree_could_trap_p is a predicate for rvalues, so check that
3162      explicitly.  */
3163   base = get_base_address (ref->mem.ref);
3164   if ((tree_could_trap_p (ref->mem.ref)
3165        || (DECL_P (base) && TREE_READONLY (base)))
3166       /* ???  We can at least use false here, allowing loads?  We
3167 	 are forcing conditional stores if the ref is not always
3168 	 stored to later anyway.  So this would only guard
3169 	 the load we need to emit.  Thus when the ref is not
3170 	 loaded we can elide this completely?  */
3171       && !ref_always_accessed_p (loop, ref, true))
3172     return false;
3173 
3174   /* Verify all loads of ref can be hoisted.  */
3175   if (ref->loaded
3176       && bitmap_bit_p (ref->loaded, loop->num)
3177       && !ref_indep_loop_p (loop, ref, lim_raw))
3178     return false;
3179 
3180   /* Verify the candidate can be disambiguated against all loads,
3181      that is, we can elide all in-loop stores.  Disambiguation
3182      against stores is done later when we cannot guarantee preserving
3183      the order of stores.  */
3184   if (!ref_indep_loop_p (loop, ref, sm_war))
3185     return false;
3186 
3187   /* Verify whether the candidate is hot for LOOP.  Only do store motion if the
3188     candidate's profile count is hot.  Statement in cold BB shouldn't be moved
3189     out of it's loop_father.  */
3190   if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
3191     return false;
3192 
3193   return true;
3194 }
3195 
3196 /* Marks the references in LOOP for that store motion should be performed
3197    in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
3198    motion was performed in one of the outer loops.  */
3199 
3200 static void
3201 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3202 {
3203   bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3204   unsigned i;
3205   bitmap_iterator bi;
3206   im_mem_ref *ref;
3207 
3208   EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3209     {
3210       ref = memory_accesses.refs_list[i];
3211       if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3212 	bitmap_set_bit (refs_to_sm, i);
3213     }
3214 }
3215 
3216 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3217    for a store motion optimization (i.e. whether we can insert statement
3218    on its exits).  */
3219 
3220 static bool
3221 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3222 		      const vec<edge> &exits)
3223 {
3224   unsigned i;
3225   edge ex;
3226 
3227   FOR_EACH_VEC_ELT (exits, i, ex)
3228     if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3229       return false;
3230 
3231   return true;
3232 }
3233 
3234 /* Try to perform store motion for all memory references modified inside
3235    LOOP.  SM_EXECUTED is the bitmap of the memory references for that
3236    store motion was executed in one of the outer loops.  */
3237 
3238 static void
3239 store_motion_loop (class loop *loop, bitmap sm_executed)
3240 {
3241   auto_vec<edge> exits = get_loop_exit_edges (loop);
3242   class loop *subloop;
3243   bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3244 
3245   if (loop_suitable_for_sm (loop, exits))
3246     {
3247       find_refs_for_sm (loop, sm_executed, sm_in_loop);
3248       if (!bitmap_empty_p (sm_in_loop))
3249 	hoist_memory_references (loop, sm_in_loop, exits);
3250     }
3251 
3252   bitmap_ior_into (sm_executed, sm_in_loop);
3253   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3254     store_motion_loop (subloop, sm_executed);
3255   bitmap_and_compl_into (sm_executed, sm_in_loop);
3256   BITMAP_FREE (sm_in_loop);
3257 }
3258 
3259 /* Try to perform store motion for all memory references modified inside
3260    loops.  */
3261 
3262 static void
3263 do_store_motion (void)
3264 {
3265   class loop *loop;
3266   bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3267 
3268   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3269     store_motion_loop (loop, sm_executed);
3270 
3271   BITMAP_FREE (sm_executed);
3272 }
3273 
3274 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3275    for each such basic block bb records the outermost loop for that execution
3276    of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
3277    blocks that contain a nonpure call.  */
3278 
3279 static void
3280 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3281 {
3282   basic_block bb = NULL, last = NULL;
3283   edge e;
3284   class loop *inn_loop = loop;
3285 
3286   if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3287     {
3288       auto_vec<basic_block, 64> worklist;
3289       worklist.reserve_exact (loop->num_nodes);
3290       worklist.quick_push (loop->header);
3291       do
3292 	{
3293 	  edge_iterator ei;
3294 	  bb = worklist.pop ();
3295 
3296 	  if (!flow_bb_inside_loop_p (inn_loop, bb))
3297 	    {
3298 	      /* When we are leaving a possibly infinite inner loop
3299 		 we have to stop processing.  */
3300 	      if (!finite_loop_p (inn_loop))
3301 		break;
3302 	      /* If the loop was finite we can continue with processing
3303 		 the loop we exited to.  */
3304 	      inn_loop = bb->loop_father;
3305 	    }
3306 
3307 	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3308 	    last = bb;
3309 
3310 	  if (bitmap_bit_p (contains_call, bb->index))
3311 	    break;
3312 
3313 	  /* If LOOP exits from this BB stop processing.  */
3314 	  FOR_EACH_EDGE (e, ei, bb->succs)
3315 	    if (!flow_bb_inside_loop_p (loop, e->dest))
3316 	      break;
3317 	  if (e)
3318 	    break;
3319 
3320 	  /* A loop might be infinite (TODO use simple loop analysis
3321 	     to disprove this if possible).  */
3322 	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
3323 	    break;
3324 
3325 	  if (bb->loop_father->header == bb)
3326 	    /* Record that we enter into a subloop since it might not
3327 	       be finite.  */
3328 	    /* ???  Entering into a not always executed subloop makes
3329 	       fill_always_executed_in quadratic in loop depth since
3330 	       we walk those loops N times.  This is not a problem
3331 	       in practice though, see PR102253 for a worst-case testcase.  */
3332 	    inn_loop = bb->loop_father;
3333 
3334 	  /* Walk the body of LOOP sorted by dominance relation.  Additionally,
3335 	     if a basic block S dominates the latch, then only blocks dominated
3336 	     by S are after it.
3337 	     This is get_loop_body_in_dom_order using a worklist algorithm and
3338 	     stopping once we are no longer interested in visiting further
3339 	     blocks.  */
3340 	  unsigned old_len = worklist.length ();
3341 	  unsigned postpone = 0;
3342 	  for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3343 	       son;
3344 	       son = next_dom_son (CDI_DOMINATORS, son))
3345 	    {
3346 	      if (!flow_bb_inside_loop_p (loop, son))
3347 		continue;
3348 	      if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3349 		postpone = worklist.length ();
3350 	      worklist.quick_push (son);
3351 	    }
3352 	  if (postpone)
3353 	    /* Postponing the block that dominates the latch means
3354 	       processing it last and thus putting it earliest in the
3355 	       worklist.  */
3356 	    std::swap (worklist[old_len], worklist[postpone]);
3357 	}
3358       while (!worklist.is_empty ());
3359 
3360       while (1)
3361 	{
3362 	  if (dump_enabled_p ())
3363 	    dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3364 			 last->index, loop->num);
3365 	  SET_ALWAYS_EXECUTED_IN (last, loop);
3366 	  if (last == loop->header)
3367 	    break;
3368 	  last = get_immediate_dominator (CDI_DOMINATORS, last);
3369 	}
3370     }
3371 
3372   for (loop = loop->inner; loop; loop = loop->next)
3373     fill_always_executed_in_1 (loop, contains_call);
3374 }
3375 
3376 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3377    for each such basic block bb records the outermost loop for that execution
3378    of its header implies execution of bb.  */
3379 
3380 static void
3381 fill_always_executed_in (void)
3382 {
3383   basic_block bb;
3384   class loop *loop;
3385 
3386   auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3387   bitmap_clear (contains_call);
3388   FOR_EACH_BB_FN (bb, cfun)
3389     {
3390       gimple_stmt_iterator gsi;
3391       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3392 	{
3393 	  if (nonpure_call_p (gsi_stmt (gsi)))
3394 	    break;
3395 	}
3396 
3397       if (!gsi_end_p (gsi))
3398 	bitmap_set_bit (contains_call, bb->index);
3399     }
3400 
3401   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3402     fill_always_executed_in_1 (loop, contains_call);
3403 }
3404 
3405 /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3406    to LOOP.  Then recursively iterate each inner loop.  */
3407 
3408 void
3409 fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3410 				  class loop *hotter_loop, class loop *loop)
3411 {
3412   if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3413 				     coldest_loop))
3414     coldest_loop = loop;
3415 
3416   coldest_outermost_loop[loop->num] = coldest_loop;
3417 
3418   hotter_than_inner_loop[loop->num] = NULL;
3419   class loop *outer_loop = loop_outer (loop);
3420   if (hotter_loop
3421       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3422 					hotter_loop))
3423     hotter_than_inner_loop[loop->num] = hotter_loop;
3424 
3425   if (outer_loop && outer_loop != current_loops->tree_root
3426       && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src,
3427 					outer_loop))
3428     hotter_than_inner_loop[loop->num] = outer_loop;
3429 
3430   if (dump_enabled_p ())
3431     {
3432       dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3433 		   loop->num, coldest_loop->num);
3434       if (hotter_than_inner_loop[loop->num])
3435 	dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3436 		     hotter_than_inner_loop[loop->num]->num);
3437       else
3438 	dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3439     }
3440 
3441   class loop *inner_loop;
3442   for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3443     fill_coldest_and_hotter_out_loop (coldest_loop,
3444 				      hotter_than_inner_loop[loop->num],
3445 				      inner_loop);
3446 }
3447 
3448 /* Compute the global information needed by the loop invariant motion pass.  */
3449 
3450 static void
3451 tree_ssa_lim_initialize (bool store_motion)
3452 {
3453   unsigned i;
3454 
3455   bitmap_obstack_initialize (&lim_bitmap_obstack);
3456   gcc_obstack_init (&mem_ref_obstack);
3457   lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3458 
3459   if (flag_tm)
3460     compute_transaction_bits ();
3461 
3462   memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3463   memory_accesses.refs_list.create (100);
3464   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
3465   memory_accesses.refs_list.quick_push
3466     (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3467 
3468   memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3469   memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3470   memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3471   memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3472   if (store_motion)
3473     {
3474       memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3475       memory_accesses.all_refs_stored_in_loop.quick_grow
3476 						      (number_of_loops (cfun));
3477     }
3478 
3479   for (i = 0; i < number_of_loops (cfun); i++)
3480     {
3481       bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3482 			 &lim_bitmap_obstack);
3483       bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3484 			 &lim_bitmap_obstack);
3485       if (store_motion)
3486 	bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3487 			   &lim_bitmap_obstack);
3488     }
3489 
3490   memory_accesses.ttae_cache = NULL;
3491 
3492   /* Initialize bb_loop_postorder with a mapping from loop->num to
3493      its postorder index.  */
3494   i = 0;
3495   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3496   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3497     bb_loop_postorder[loop->num] = i++;
3498 }
3499 
3500 /* Cleans up after the invariant motion pass.  */
3501 
3502 static void
3503 tree_ssa_lim_finalize (void)
3504 {
3505   basic_block bb;
3506   unsigned i;
3507   im_mem_ref *ref;
3508 
3509   FOR_EACH_BB_FN (bb, cfun)
3510     SET_ALWAYS_EXECUTED_IN (bb, NULL);
3511 
3512   bitmap_obstack_release (&lim_bitmap_obstack);
3513   delete lim_aux_data_map;
3514 
3515   delete memory_accesses.refs;
3516   memory_accesses.refs = NULL;
3517 
3518   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3519     memref_free (ref);
3520   memory_accesses.refs_list.release ();
3521   obstack_free (&mem_ref_obstack, NULL);
3522 
3523   memory_accesses.refs_loaded_in_loop.release ();
3524   memory_accesses.refs_stored_in_loop.release ();
3525   memory_accesses.all_refs_stored_in_loop.release ();
3526 
3527   if (memory_accesses.ttae_cache)
3528     free_affine_expand_cache (&memory_accesses.ttae_cache);
3529 
3530   free (bb_loop_postorder);
3531 
3532   coldest_outermost_loop.release ();
3533   hotter_than_inner_loop.release ();
3534 }
3535 
3536 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
3537    i.e. those that are likely to be win regardless of the register pressure.
3538    Only perform store motion if STORE_MOTION is true.  */
3539 
3540 unsigned int
3541 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3542 {
3543   unsigned int todo = 0;
3544 
3545   tree_ssa_lim_initialize (store_motion);
3546 
3547   mark_ssa_maybe_undefs ();
3548 
3549   /* Gathers information about memory accesses in the loops.  */
3550   analyze_memory_references (store_motion);
3551 
3552   /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
3553   fill_always_executed_in ();
3554 
3555   /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3556    */
3557   class loop *loop;
3558   coldest_outermost_loop.create (number_of_loops (cfun));
3559   coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
3560   hotter_than_inner_loop.create (number_of_loops (cfun));
3561   hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
3562   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3563     fill_coldest_and_hotter_out_loop (loop, NULL, loop);
3564 
3565   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3566   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3567 
3568   /* For each statement determine the outermost loop in that it is
3569      invariant and cost for computing the invariant.  */
3570   for (int i = 0; i < n; ++i)
3571     compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3572 
3573   /* Execute store motion.  Force the necessary invariants to be moved
3574      out of the loops as well.  */
3575   if (store_motion)
3576     do_store_motion ();
3577 
3578   free (rpo);
3579   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3580   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3581 
3582   /* Move the expressions that are expensive enough.  */
3583   for (int i = 0; i < n; ++i)
3584     todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3585 
3586   free (rpo);
3587 
3588   gsi_commit_edge_inserts ();
3589   if (need_ssa_update_p (fun))
3590     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3591 
3592   tree_ssa_lim_finalize ();
3593 
3594   return todo;
3595 }
3596 
3597 /* Loop invariant motion pass.  */
3598 
3599 namespace {
3600 
3601 const pass_data pass_data_lim =
3602 {
3603   GIMPLE_PASS, /* type */
3604   "lim", /* name */
3605   OPTGROUP_LOOP, /* optinfo_flags */
3606   TV_LIM, /* tv_id */
3607   PROP_cfg, /* properties_required */
3608   0, /* properties_provided */
3609   0, /* properties_destroyed */
3610   0, /* todo_flags_start */
3611   0, /* todo_flags_finish */
3612 };
3613 
3614 class pass_lim : public gimple_opt_pass
3615 {
3616 public:
3617   pass_lim (gcc::context *ctxt)
3618     : gimple_opt_pass (pass_data_lim, ctxt)
3619   {}
3620 
3621   /* opt_pass methods: */
3622   opt_pass * clone () { return new pass_lim (m_ctxt); }
3623   virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3624   virtual unsigned int execute (function *);
3625 
3626 }; // class pass_lim
3627 
3628 unsigned int
3629 pass_lim::execute (function *fun)
3630 {
3631   bool in_loop_pipeline = scev_initialized_p ();
3632   if (!in_loop_pipeline)
3633     loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3634 
3635   if (number_of_loops (fun) <= 1)
3636     return 0;
3637   unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3638 
3639   if (!in_loop_pipeline)
3640     loop_optimizer_finalize ();
3641   else
3642     scev_reset ();
3643   return todo;
3644 }
3645 
3646 } // anon namespace
3647 
3648 gimple_opt_pass *
3649 make_pass_lim (gcc::context *ctxt)
3650 {
3651   return new pass_lim (ctxt);
3652 }
3653 
3654 
3655