11debfc3dSmrg /* Loop invariant motion.
2*8feb0f0bSmrg Copyright (C) 2003-2020 Free Software Foundation, Inc.
31debfc3dSmrg
41debfc3dSmrg This file is part of GCC.
51debfc3dSmrg
61debfc3dSmrg GCC is free software; you can redistribute it and/or modify it
71debfc3dSmrg under the terms of the GNU General Public License as published by the
81debfc3dSmrg Free Software Foundation; either version 3, or (at your option) any
91debfc3dSmrg later version.
101debfc3dSmrg
111debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT
121debfc3dSmrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
131debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
141debfc3dSmrg for more details.
151debfc3dSmrg
161debfc3dSmrg You should have received a copy of the GNU General Public License
171debfc3dSmrg along with GCC; see the file COPYING3. If not see
181debfc3dSmrg <http://www.gnu.org/licenses/>. */
191debfc3dSmrg
201debfc3dSmrg #include "config.h"
211debfc3dSmrg #include "system.h"
221debfc3dSmrg #include "coretypes.h"
231debfc3dSmrg #include "backend.h"
241debfc3dSmrg #include "tree.h"
251debfc3dSmrg #include "gimple.h"
261debfc3dSmrg #include "cfghooks.h"
271debfc3dSmrg #include "tree-pass.h"
281debfc3dSmrg #include "ssa.h"
291debfc3dSmrg #include "gimple-pretty-print.h"
301debfc3dSmrg #include "fold-const.h"
311debfc3dSmrg #include "cfganal.h"
321debfc3dSmrg #include "tree-eh.h"
331debfc3dSmrg #include "gimplify.h"
341debfc3dSmrg #include "gimple-iterator.h"
351debfc3dSmrg #include "tree-cfg.h"
361debfc3dSmrg #include "tree-ssa-loop-manip.h"
371debfc3dSmrg #include "tree-ssa-loop.h"
381debfc3dSmrg #include "tree-into-ssa.h"
391debfc3dSmrg #include "cfgloop.h"
401debfc3dSmrg #include "domwalk.h"
411debfc3dSmrg #include "tree-affine.h"
421debfc3dSmrg #include "tree-ssa-propagate.h"
431debfc3dSmrg #include "trans-mem.h"
441debfc3dSmrg #include "gimple-fold.h"
451debfc3dSmrg #include "tree-scalar-evolution.h"
461debfc3dSmrg #include "tree-ssa-loop-niter.h"
47c0a68be4Smrg #include "alias.h"
48c0a68be4Smrg #include "builtins.h"
49c0a68be4Smrg #include "tree-dfa.h"
501debfc3dSmrg
511debfc3dSmrg /* TODO: Support for predicated code motion. I.e.
521debfc3dSmrg
531debfc3dSmrg while (1)
541debfc3dSmrg {
551debfc3dSmrg if (cond)
561debfc3dSmrg {
571debfc3dSmrg a = inv;
581debfc3dSmrg something;
591debfc3dSmrg }
601debfc3dSmrg }
611debfc3dSmrg
621debfc3dSmrg Where COND and INV are invariants, but evaluating INV may trap or be
631debfc3dSmrg invalid from some other reason if !COND. This may be transformed to
641debfc3dSmrg
651debfc3dSmrg if (cond)
661debfc3dSmrg a = inv;
671debfc3dSmrg while (1)
681debfc3dSmrg {
691debfc3dSmrg if (cond)
701debfc3dSmrg something;
711debfc3dSmrg } */
721debfc3dSmrg
731debfc3dSmrg /* The auxiliary data kept for each statement. */
741debfc3dSmrg
751debfc3dSmrg struct lim_aux_data
761debfc3dSmrg {
77*8feb0f0bSmrg class loop *max_loop; /* The outermost loop in that the statement
781debfc3dSmrg is invariant. */
791debfc3dSmrg
80*8feb0f0bSmrg class loop *tgt_loop; /* The loop out of that we want to move the
811debfc3dSmrg invariant. */
821debfc3dSmrg
83*8feb0f0bSmrg class loop *always_executed_in;
841debfc3dSmrg /* The outermost loop for that we are sure
851debfc3dSmrg the statement is executed if the loop
861debfc3dSmrg is entered. */
871debfc3dSmrg
881debfc3dSmrg unsigned cost; /* Cost of the computation performed by the
891debfc3dSmrg statement. */
901debfc3dSmrg
91a2dc1f3fSmrg unsigned ref; /* The simple_mem_ref in this stmt or 0. */
92a2dc1f3fSmrg
931debfc3dSmrg vec<gimple *> depends; /* Vector of statements that must be also
941debfc3dSmrg hoisted out of the loop when this statement
951debfc3dSmrg is hoisted; i.e. those that define the
961debfc3dSmrg operands of the statement and are inside of
971debfc3dSmrg the MAX_LOOP loop. */
981debfc3dSmrg };
991debfc3dSmrg
1001debfc3dSmrg /* Maps statements to their lim_aux_data. */
1011debfc3dSmrg
1021debfc3dSmrg static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
1031debfc3dSmrg
1041debfc3dSmrg /* Description of a memory reference location. */
1051debfc3dSmrg
1061debfc3dSmrg struct mem_ref_loc
1071debfc3dSmrg {
1081debfc3dSmrg tree *ref; /* The reference itself. */
1091debfc3dSmrg gimple *stmt; /* The statement in that it occurs. */
1101debfc3dSmrg };
1111debfc3dSmrg
1121debfc3dSmrg
1131debfc3dSmrg /* Description of a memory reference. */
1141debfc3dSmrg
115*8feb0f0bSmrg class im_mem_ref
1161debfc3dSmrg {
117*8feb0f0bSmrg public:
118c0a68be4Smrg unsigned id : 30; /* ID assigned to the memory reference
1191debfc3dSmrg (its index in memory_accesses.refs_list) */
120c0a68be4Smrg unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121c0a68be4Smrg unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
1221debfc3dSmrg hashval_t hash; /* Its hash value. */
1231debfc3dSmrg
1241debfc3dSmrg /* The memory access itself and associated caching of alias-oracle
1251debfc3dSmrg query meta-data. */
1261debfc3dSmrg ao_ref mem;
1271debfc3dSmrg
1281debfc3dSmrg bitmap stored; /* The set of loops in that this memory location
1291debfc3dSmrg is stored to. */
1301debfc3dSmrg vec<mem_ref_loc> accesses_in_loop;
1311debfc3dSmrg /* The locations of the accesses. Vector
1321debfc3dSmrg indexed by the loop number. */
1331debfc3dSmrg
1341debfc3dSmrg /* The following sets are computed on demand. We keep both set and
1351debfc3dSmrg its complement, so that we know whether the information was
1361debfc3dSmrg already computed or not. */
1371debfc3dSmrg bitmap_head indep_loop; /* The set of loops in that the memory
1381debfc3dSmrg reference is independent, meaning:
1391debfc3dSmrg If it is stored in the loop, this store
1401debfc3dSmrg is independent on all other loads and
1411debfc3dSmrg stores.
1421debfc3dSmrg If it is only loaded, then it is independent
1431debfc3dSmrg on all stores in the loop. */
1441debfc3dSmrg bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
1451debfc3dSmrg };
1461debfc3dSmrg
1471debfc3dSmrg /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
1481debfc3dSmrg to record (in)dependence against stores in the loop and its subloops, the
1491debfc3dSmrg second to record (in)dependence against all references in the loop
1501debfc3dSmrg and its subloops. */
1511debfc3dSmrg #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
1521debfc3dSmrg
1531debfc3dSmrg /* Mem_ref hashtable helpers. */
1541debfc3dSmrg
1551debfc3dSmrg struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
1561debfc3dSmrg {
157c0a68be4Smrg typedef ao_ref *compare_type;
1581debfc3dSmrg static inline hashval_t hash (const im_mem_ref *);
159c0a68be4Smrg static inline bool equal (const im_mem_ref *, const ao_ref *);
1601debfc3dSmrg };
1611debfc3dSmrg
162*8feb0f0bSmrg /* A hash function for class im_mem_ref object OBJ. */
1631debfc3dSmrg
1641debfc3dSmrg inline hashval_t
hash(const im_mem_ref * mem)1651debfc3dSmrg mem_ref_hasher::hash (const im_mem_ref *mem)
1661debfc3dSmrg {
1671debfc3dSmrg return mem->hash;
1681debfc3dSmrg }
1691debfc3dSmrg
170*8feb0f0bSmrg /* An equality function for class im_mem_ref object MEM1 with
1711debfc3dSmrg memory reference OBJ2. */
1721debfc3dSmrg
1731debfc3dSmrg inline bool
equal(const im_mem_ref * mem1,const ao_ref * obj2)174c0a68be4Smrg mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
1751debfc3dSmrg {
176c0a68be4Smrg if (obj2->max_size_known_p ())
177c0a68be4Smrg return (mem1->ref_decomposed
178c0a68be4Smrg && operand_equal_p (mem1->mem.base, obj2->base, 0)
179c0a68be4Smrg && known_eq (mem1->mem.offset, obj2->offset)
180c0a68be4Smrg && known_eq (mem1->mem.size, obj2->size)
181c0a68be4Smrg && known_eq (mem1->mem.max_size, obj2->max_size)
182c0a68be4Smrg && mem1->mem.volatile_p == obj2->volatile_p
183c0a68be4Smrg && (mem1->mem.ref_alias_set == obj2->ref_alias_set
184c0a68be4Smrg /* We are not canonicalizing alias-sets but for the
185c0a68be4Smrg special-case we didn't canonicalize yet and the
186c0a68be4Smrg incoming ref is a alias-set zero MEM we pick
187c0a68be4Smrg the correct one already. */
188c0a68be4Smrg || (!mem1->ref_canonical
189c0a68be4Smrg && (TREE_CODE (obj2->ref) == MEM_REF
190c0a68be4Smrg || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
191c0a68be4Smrg && obj2->ref_alias_set == 0)
192c0a68be4Smrg /* Likewise if there's a canonical ref with alias-set zero. */
193c0a68be4Smrg || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
194c0a68be4Smrg && types_compatible_p (TREE_TYPE (mem1->mem.ref),
195c0a68be4Smrg TREE_TYPE (obj2->ref)));
196c0a68be4Smrg else
197c0a68be4Smrg return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
1981debfc3dSmrg }
1991debfc3dSmrg
2001debfc3dSmrg
2011debfc3dSmrg /* Description of memory accesses in loops. */
2021debfc3dSmrg
2031debfc3dSmrg static struct
2041debfc3dSmrg {
2051debfc3dSmrg /* The hash table of memory references accessed in loops. */
2061debfc3dSmrg hash_table<mem_ref_hasher> *refs;
2071debfc3dSmrg
2081debfc3dSmrg /* The list of memory references. */
2091debfc3dSmrg vec<im_mem_ref *> refs_list;
2101debfc3dSmrg
2111debfc3dSmrg /* The set of memory references accessed in each loop. */
2121debfc3dSmrg vec<bitmap_head> refs_in_loop;
2131debfc3dSmrg
2141debfc3dSmrg /* The set of memory references stored in each loop. */
2151debfc3dSmrg vec<bitmap_head> refs_stored_in_loop;
2161debfc3dSmrg
2171debfc3dSmrg /* The set of memory references stored in each loop, including subloops . */
2181debfc3dSmrg vec<bitmap_head> all_refs_stored_in_loop;
2191debfc3dSmrg
2201debfc3dSmrg /* Cache for expanding memory addresses. */
2211debfc3dSmrg hash_map<tree, name_expansion *> *ttae_cache;
2221debfc3dSmrg } memory_accesses;
2231debfc3dSmrg
2241debfc3dSmrg /* Obstack for the bitmaps in the above data structures. */
2251debfc3dSmrg static bitmap_obstack lim_bitmap_obstack;
2261debfc3dSmrg static obstack mem_ref_obstack;
2271debfc3dSmrg
228*8feb0f0bSmrg static bool ref_indep_loop_p (class loop *, im_mem_ref *);
229*8feb0f0bSmrg static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
2301debfc3dSmrg
2311debfc3dSmrg /* Minimum cost of an expensive expression. */
232*8feb0f0bSmrg #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
2331debfc3dSmrg
2341debfc3dSmrg /* The outermost loop for which execution of the header guarantees that the
2351debfc3dSmrg block will be executed. */
236*8feb0f0bSmrg #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
2371debfc3dSmrg #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
2381debfc3dSmrg
2391debfc3dSmrg /* ID of the shared unanalyzable mem. */
2401debfc3dSmrg #define UNANALYZABLE_MEM_ID 0
2411debfc3dSmrg
2421debfc3dSmrg /* Whether the reference was analyzable. */
2431debfc3dSmrg #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
2441debfc3dSmrg
2451debfc3dSmrg static struct lim_aux_data *
init_lim_data(gimple * stmt)2461debfc3dSmrg init_lim_data (gimple *stmt)
2471debfc3dSmrg {
2481debfc3dSmrg lim_aux_data *p = XCNEW (struct lim_aux_data);
2491debfc3dSmrg lim_aux_data_map->put (stmt, p);
2501debfc3dSmrg
2511debfc3dSmrg return p;
2521debfc3dSmrg }
2531debfc3dSmrg
2541debfc3dSmrg static struct lim_aux_data *
get_lim_data(gimple * stmt)2551debfc3dSmrg get_lim_data (gimple *stmt)
2561debfc3dSmrg {
2571debfc3dSmrg lim_aux_data **p = lim_aux_data_map->get (stmt);
2581debfc3dSmrg if (!p)
2591debfc3dSmrg return NULL;
2601debfc3dSmrg
2611debfc3dSmrg return *p;
2621debfc3dSmrg }
2631debfc3dSmrg
2641debfc3dSmrg /* Releases the memory occupied by DATA. */
2651debfc3dSmrg
2661debfc3dSmrg static void
free_lim_aux_data(struct lim_aux_data * data)2671debfc3dSmrg free_lim_aux_data (struct lim_aux_data *data)
2681debfc3dSmrg {
2691debfc3dSmrg data->depends.release ();
2701debfc3dSmrg free (data);
2711debfc3dSmrg }
2721debfc3dSmrg
2731debfc3dSmrg static void
clear_lim_data(gimple * stmt)2741debfc3dSmrg clear_lim_data (gimple *stmt)
2751debfc3dSmrg {
2761debfc3dSmrg lim_aux_data **p = lim_aux_data_map->get (stmt);
2771debfc3dSmrg if (!p)
2781debfc3dSmrg return;
2791debfc3dSmrg
2801debfc3dSmrg free_lim_aux_data (*p);
2811debfc3dSmrg *p = NULL;
2821debfc3dSmrg }
2831debfc3dSmrg
2841debfc3dSmrg
2851debfc3dSmrg /* The possibilities of statement movement. */
2861debfc3dSmrg enum move_pos
2871debfc3dSmrg {
2881debfc3dSmrg MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
2891debfc3dSmrg MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
2901debfc3dSmrg become executed -- memory accesses, ... */
2911debfc3dSmrg MOVE_POSSIBLE /* Unlimited movement. */
2921debfc3dSmrg };
2931debfc3dSmrg
2941debfc3dSmrg
2951debfc3dSmrg /* If it is possible to hoist the statement STMT unconditionally,
2961debfc3dSmrg returns MOVE_POSSIBLE.
2971debfc3dSmrg If it is possible to hoist the statement STMT, but we must avoid making
2981debfc3dSmrg it executed if it would not be executed in the original program (e.g.
2991debfc3dSmrg because it may trap), return MOVE_PRESERVE_EXECUTION.
3001debfc3dSmrg Otherwise return MOVE_IMPOSSIBLE. */
3011debfc3dSmrg
3021debfc3dSmrg enum move_pos
movement_possibility(gimple * stmt)3031debfc3dSmrg movement_possibility (gimple *stmt)
3041debfc3dSmrg {
3051debfc3dSmrg tree lhs;
3061debfc3dSmrg enum move_pos ret = MOVE_POSSIBLE;
3071debfc3dSmrg
3081debfc3dSmrg if (flag_unswitch_loops
3091debfc3dSmrg && gimple_code (stmt) == GIMPLE_COND)
3101debfc3dSmrg {
3111debfc3dSmrg /* If we perform unswitching, force the operands of the invariant
3121debfc3dSmrg condition to be moved out of the loop. */
3131debfc3dSmrg return MOVE_POSSIBLE;
3141debfc3dSmrg }
3151debfc3dSmrg
3161debfc3dSmrg if (gimple_code (stmt) == GIMPLE_PHI
3171debfc3dSmrg && gimple_phi_num_args (stmt) <= 2
3181debfc3dSmrg && !virtual_operand_p (gimple_phi_result (stmt))
3191debfc3dSmrg && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
3201debfc3dSmrg return MOVE_POSSIBLE;
3211debfc3dSmrg
3221debfc3dSmrg if (gimple_get_lhs (stmt) == NULL_TREE)
3231debfc3dSmrg return MOVE_IMPOSSIBLE;
3241debfc3dSmrg
3251debfc3dSmrg if (gimple_vdef (stmt))
3261debfc3dSmrg return MOVE_IMPOSSIBLE;
3271debfc3dSmrg
3281debfc3dSmrg if (stmt_ends_bb_p (stmt)
3291debfc3dSmrg || gimple_has_volatile_ops (stmt)
3301debfc3dSmrg || gimple_has_side_effects (stmt)
331c0a68be4Smrg || stmt_could_throw_p (cfun, stmt))
3321debfc3dSmrg return MOVE_IMPOSSIBLE;
3331debfc3dSmrg
3341debfc3dSmrg if (is_gimple_call (stmt))
3351debfc3dSmrg {
3361debfc3dSmrg /* While pure or const call is guaranteed to have no side effects, we
3371debfc3dSmrg cannot move it arbitrarily. Consider code like
3381debfc3dSmrg
3391debfc3dSmrg char *s = something ();
3401debfc3dSmrg
3411debfc3dSmrg while (1)
3421debfc3dSmrg {
3431debfc3dSmrg if (s)
3441debfc3dSmrg t = strlen (s);
3451debfc3dSmrg else
3461debfc3dSmrg t = 0;
3471debfc3dSmrg }
3481debfc3dSmrg
3491debfc3dSmrg Here the strlen call cannot be moved out of the loop, even though
3501debfc3dSmrg s is invariant. In addition to possibly creating a call with
3511debfc3dSmrg invalid arguments, moving out a function call that is not executed
3521debfc3dSmrg may cause performance regressions in case the call is costly and
3531debfc3dSmrg not executed at all. */
3541debfc3dSmrg ret = MOVE_PRESERVE_EXECUTION;
3551debfc3dSmrg lhs = gimple_call_lhs (stmt);
3561debfc3dSmrg }
3571debfc3dSmrg else if (is_gimple_assign (stmt))
3581debfc3dSmrg lhs = gimple_assign_lhs (stmt);
3591debfc3dSmrg else
3601debfc3dSmrg return MOVE_IMPOSSIBLE;
3611debfc3dSmrg
3621debfc3dSmrg if (TREE_CODE (lhs) == SSA_NAME
3631debfc3dSmrg && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
3641debfc3dSmrg return MOVE_IMPOSSIBLE;
3651debfc3dSmrg
3661debfc3dSmrg if (TREE_CODE (lhs) != SSA_NAME
3671debfc3dSmrg || gimple_could_trap_p (stmt))
3681debfc3dSmrg return MOVE_PRESERVE_EXECUTION;
3691debfc3dSmrg
3701debfc3dSmrg /* Non local loads in a transaction cannot be hoisted out. Well,
3711debfc3dSmrg unless the load happens on every path out of the loop, but we
3721debfc3dSmrg don't take this into account yet. */
3731debfc3dSmrg if (flag_tm
3741debfc3dSmrg && gimple_in_transaction (stmt)
3751debfc3dSmrg && gimple_assign_single_p (stmt))
3761debfc3dSmrg {
3771debfc3dSmrg tree rhs = gimple_assign_rhs1 (stmt);
3781debfc3dSmrg if (DECL_P (rhs) && is_global_var (rhs))
3791debfc3dSmrg {
3801debfc3dSmrg if (dump_file)
3811debfc3dSmrg {
3821debfc3dSmrg fprintf (dump_file, "Cannot hoist conditional load of ");
3831debfc3dSmrg print_generic_expr (dump_file, rhs, TDF_SLIM);
3841debfc3dSmrg fprintf (dump_file, " because it is in a transaction.\n");
3851debfc3dSmrg }
3861debfc3dSmrg return MOVE_IMPOSSIBLE;
3871debfc3dSmrg }
3881debfc3dSmrg }
3891debfc3dSmrg
3901debfc3dSmrg return ret;
3911debfc3dSmrg }
3921debfc3dSmrg
3931debfc3dSmrg /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
3941debfc3dSmrg loop to that we could move the expression using DEF if it did not have
3951debfc3dSmrg other operands, i.e. the outermost loop enclosing LOOP in that the value
3961debfc3dSmrg of DEF is invariant. */
3971debfc3dSmrg
398*8feb0f0bSmrg static class loop *
outermost_invariant_loop(tree def,class loop * loop)399*8feb0f0bSmrg outermost_invariant_loop (tree def, class loop *loop)
4001debfc3dSmrg {
4011debfc3dSmrg gimple *def_stmt;
4021debfc3dSmrg basic_block def_bb;
403*8feb0f0bSmrg class loop *max_loop;
4041debfc3dSmrg struct lim_aux_data *lim_data;
4051debfc3dSmrg
4061debfc3dSmrg if (!def)
4071debfc3dSmrg return superloop_at_depth (loop, 1);
4081debfc3dSmrg
4091debfc3dSmrg if (TREE_CODE (def) != SSA_NAME)
4101debfc3dSmrg {
4111debfc3dSmrg gcc_assert (is_gimple_min_invariant (def));
4121debfc3dSmrg return superloop_at_depth (loop, 1);
4131debfc3dSmrg }
4141debfc3dSmrg
4151debfc3dSmrg def_stmt = SSA_NAME_DEF_STMT (def);
4161debfc3dSmrg def_bb = gimple_bb (def_stmt);
4171debfc3dSmrg if (!def_bb)
4181debfc3dSmrg return superloop_at_depth (loop, 1);
4191debfc3dSmrg
4201debfc3dSmrg max_loop = find_common_loop (loop, def_bb->loop_father);
4211debfc3dSmrg
4221debfc3dSmrg lim_data = get_lim_data (def_stmt);
4231debfc3dSmrg if (lim_data != NULL && lim_data->max_loop != NULL)
4241debfc3dSmrg max_loop = find_common_loop (max_loop,
4251debfc3dSmrg loop_outer (lim_data->max_loop));
4261debfc3dSmrg if (max_loop == loop)
4271debfc3dSmrg return NULL;
4281debfc3dSmrg max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
4291debfc3dSmrg
4301debfc3dSmrg return max_loop;
4311debfc3dSmrg }
4321debfc3dSmrg
4331debfc3dSmrg /* DATA is a structure containing information associated with a statement
4341debfc3dSmrg inside LOOP. DEF is one of the operands of this statement.
4351debfc3dSmrg
4361debfc3dSmrg Find the outermost loop enclosing LOOP in that value of DEF is invariant
4371debfc3dSmrg and record this in DATA->max_loop field. If DEF itself is defined inside
4381debfc3dSmrg this loop as well (i.e. we need to hoist it out of the loop if we want
4391debfc3dSmrg to hoist the statement represented by DATA), record the statement in that
4401debfc3dSmrg DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
4411debfc3dSmrg add the cost of the computation of DEF to the DATA->cost.
4421debfc3dSmrg
4431debfc3dSmrg If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
4441debfc3dSmrg
4451debfc3dSmrg static bool
add_dependency(tree def,struct lim_aux_data * data,class loop * loop,bool add_cost)446*8feb0f0bSmrg add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
4471debfc3dSmrg bool add_cost)
4481debfc3dSmrg {
4491debfc3dSmrg gimple *def_stmt = SSA_NAME_DEF_STMT (def);
4501debfc3dSmrg basic_block def_bb = gimple_bb (def_stmt);
451*8feb0f0bSmrg class loop *max_loop;
4521debfc3dSmrg struct lim_aux_data *def_data;
4531debfc3dSmrg
4541debfc3dSmrg if (!def_bb)
4551debfc3dSmrg return true;
4561debfc3dSmrg
4571debfc3dSmrg max_loop = outermost_invariant_loop (def, loop);
4581debfc3dSmrg if (!max_loop)
4591debfc3dSmrg return false;
4601debfc3dSmrg
4611debfc3dSmrg if (flow_loop_nested_p (data->max_loop, max_loop))
4621debfc3dSmrg data->max_loop = max_loop;
4631debfc3dSmrg
4641debfc3dSmrg def_data = get_lim_data (def_stmt);
4651debfc3dSmrg if (!def_data)
4661debfc3dSmrg return true;
4671debfc3dSmrg
4681debfc3dSmrg if (add_cost
4691debfc3dSmrg /* Only add the cost if the statement defining DEF is inside LOOP,
4701debfc3dSmrg i.e. if it is likely that by moving the invariants dependent
4711debfc3dSmrg on it, we will be able to avoid creating a new register for
4721debfc3dSmrg it (since it will be only used in these dependent invariants). */
4731debfc3dSmrg && def_bb->loop_father == loop)
4741debfc3dSmrg data->cost += def_data->cost;
4751debfc3dSmrg
4761debfc3dSmrg data->depends.safe_push (def_stmt);
4771debfc3dSmrg
4781debfc3dSmrg return true;
4791debfc3dSmrg }
4801debfc3dSmrg
4811debfc3dSmrg /* Returns an estimate for a cost of statement STMT. The values here
4821debfc3dSmrg are just ad-hoc constants, similar to costs for inlining. */
4831debfc3dSmrg
4841debfc3dSmrg static unsigned
stmt_cost(gimple * stmt)4851debfc3dSmrg stmt_cost (gimple *stmt)
4861debfc3dSmrg {
4871debfc3dSmrg /* Always try to create possibilities for unswitching. */
4881debfc3dSmrg if (gimple_code (stmt) == GIMPLE_COND
4891debfc3dSmrg || gimple_code (stmt) == GIMPLE_PHI)
4901debfc3dSmrg return LIM_EXPENSIVE;
4911debfc3dSmrg
4921debfc3dSmrg /* We should be hoisting calls if possible. */
4931debfc3dSmrg if (is_gimple_call (stmt))
4941debfc3dSmrg {
4951debfc3dSmrg tree fndecl;
4961debfc3dSmrg
4971debfc3dSmrg /* Unless the call is a builtin_constant_p; this always folds to a
4981debfc3dSmrg constant, so moving it is useless. */
4991debfc3dSmrg fndecl = gimple_call_fndecl (stmt);
500c0a68be4Smrg if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
5011debfc3dSmrg return 0;
5021debfc3dSmrg
5031debfc3dSmrg return LIM_EXPENSIVE;
5041debfc3dSmrg }
5051debfc3dSmrg
5061debfc3dSmrg /* Hoisting memory references out should almost surely be a win. */
5071debfc3dSmrg if (gimple_references_memory_p (stmt))
5081debfc3dSmrg return LIM_EXPENSIVE;
5091debfc3dSmrg
5101debfc3dSmrg if (gimple_code (stmt) != GIMPLE_ASSIGN)
5111debfc3dSmrg return 1;
5121debfc3dSmrg
5131debfc3dSmrg switch (gimple_assign_rhs_code (stmt))
5141debfc3dSmrg {
5151debfc3dSmrg case MULT_EXPR:
5161debfc3dSmrg case WIDEN_MULT_EXPR:
5171debfc3dSmrg case WIDEN_MULT_PLUS_EXPR:
5181debfc3dSmrg case WIDEN_MULT_MINUS_EXPR:
5191debfc3dSmrg case DOT_PROD_EXPR:
5201debfc3dSmrg case TRUNC_DIV_EXPR:
5211debfc3dSmrg case CEIL_DIV_EXPR:
5221debfc3dSmrg case FLOOR_DIV_EXPR:
5231debfc3dSmrg case ROUND_DIV_EXPR:
5241debfc3dSmrg case EXACT_DIV_EXPR:
5251debfc3dSmrg case CEIL_MOD_EXPR:
5261debfc3dSmrg case FLOOR_MOD_EXPR:
5271debfc3dSmrg case ROUND_MOD_EXPR:
5281debfc3dSmrg case TRUNC_MOD_EXPR:
5291debfc3dSmrg case RDIV_EXPR:
5301debfc3dSmrg /* Division and multiplication are usually expensive. */
5311debfc3dSmrg return LIM_EXPENSIVE;
5321debfc3dSmrg
5331debfc3dSmrg case LSHIFT_EXPR:
5341debfc3dSmrg case RSHIFT_EXPR:
5351debfc3dSmrg case WIDEN_LSHIFT_EXPR:
5361debfc3dSmrg case LROTATE_EXPR:
5371debfc3dSmrg case RROTATE_EXPR:
5381debfc3dSmrg /* Shifts and rotates are usually expensive. */
5391debfc3dSmrg return LIM_EXPENSIVE;
5401debfc3dSmrg
5411debfc3dSmrg case CONSTRUCTOR:
5421debfc3dSmrg /* Make vector construction cost proportional to the number
5431debfc3dSmrg of elements. */
5441debfc3dSmrg return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
5451debfc3dSmrg
5461debfc3dSmrg case SSA_NAME:
5471debfc3dSmrg case PAREN_EXPR:
5481debfc3dSmrg /* Whether or not something is wrapped inside a PAREN_EXPR
5491debfc3dSmrg should not change move cost. Nor should an intermediate
5501debfc3dSmrg unpropagated SSA name copy. */
5511debfc3dSmrg return 0;
5521debfc3dSmrg
5531debfc3dSmrg default:
5541debfc3dSmrg return 1;
5551debfc3dSmrg }
5561debfc3dSmrg }
5571debfc3dSmrg
5581debfc3dSmrg /* Finds the outermost loop between OUTER and LOOP in that the memory reference
5591debfc3dSmrg REF is independent. If REF is not independent in LOOP, NULL is returned
5601debfc3dSmrg instead. */
5611debfc3dSmrg
562*8feb0f0bSmrg static class loop *
outermost_indep_loop(class loop * outer,class loop * loop,im_mem_ref * ref)563*8feb0f0bSmrg outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
5641debfc3dSmrg {
565*8feb0f0bSmrg class loop *aloop;
5661debfc3dSmrg
5671debfc3dSmrg if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
5681debfc3dSmrg return NULL;
5691debfc3dSmrg
5701debfc3dSmrg for (aloop = outer;
5711debfc3dSmrg aloop != loop;
5721debfc3dSmrg aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
5731debfc3dSmrg if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
5741debfc3dSmrg && ref_indep_loop_p (aloop, ref))
5751debfc3dSmrg return aloop;
5761debfc3dSmrg
5771debfc3dSmrg if (ref_indep_loop_p (loop, ref))
5781debfc3dSmrg return loop;
5791debfc3dSmrg else
5801debfc3dSmrg return NULL;
5811debfc3dSmrg }
5821debfc3dSmrg
5831debfc3dSmrg /* If there is a simple load or store to a memory reference in STMT, returns
5841debfc3dSmrg the location of the memory reference, and sets IS_STORE according to whether
5851debfc3dSmrg it is a store or load. Otherwise, returns NULL. */
5861debfc3dSmrg
5871debfc3dSmrg static tree *
simple_mem_ref_in_stmt(gimple * stmt,bool * is_store)5881debfc3dSmrg simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
5891debfc3dSmrg {
5901debfc3dSmrg tree *lhs, *rhs;
5911debfc3dSmrg
5921debfc3dSmrg /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
5931debfc3dSmrg if (!gimple_assign_single_p (stmt))
5941debfc3dSmrg return NULL;
5951debfc3dSmrg
5961debfc3dSmrg lhs = gimple_assign_lhs_ptr (stmt);
5971debfc3dSmrg rhs = gimple_assign_rhs1_ptr (stmt);
5981debfc3dSmrg
5991debfc3dSmrg if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
6001debfc3dSmrg {
6011debfc3dSmrg *is_store = false;
6021debfc3dSmrg return rhs;
6031debfc3dSmrg }
6041debfc3dSmrg else if (gimple_vdef (stmt)
6051debfc3dSmrg && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
6061debfc3dSmrg {
6071debfc3dSmrg *is_store = true;
6081debfc3dSmrg return lhs;
6091debfc3dSmrg }
6101debfc3dSmrg else
6111debfc3dSmrg return NULL;
6121debfc3dSmrg }
6131debfc3dSmrg
6141debfc3dSmrg /* From a controlling predicate in DOM determine the arguments from
6151debfc3dSmrg the PHI node PHI that are chosen if the predicate evaluates to
6161debfc3dSmrg true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
6171debfc3dSmrg they are non-NULL. Returns true if the arguments can be determined,
6181debfc3dSmrg else return false. */
6191debfc3dSmrg
6201debfc3dSmrg static bool
extract_true_false_args_from_phi(basic_block dom,gphi * phi,tree * true_arg_p,tree * false_arg_p)6211debfc3dSmrg extract_true_false_args_from_phi (basic_block dom, gphi *phi,
6221debfc3dSmrg tree *true_arg_p, tree *false_arg_p)
6231debfc3dSmrg {
6241debfc3dSmrg edge te, fe;
6251debfc3dSmrg if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
6261debfc3dSmrg &te, &fe))
6271debfc3dSmrg return false;
6281debfc3dSmrg
6291debfc3dSmrg if (true_arg_p)
6301debfc3dSmrg *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
6311debfc3dSmrg if (false_arg_p)
6321debfc3dSmrg *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
6331debfc3dSmrg
6341debfc3dSmrg return true;
6351debfc3dSmrg }
6361debfc3dSmrg
6371debfc3dSmrg /* Determine the outermost loop to that it is possible to hoist a statement
6381debfc3dSmrg STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
6391debfc3dSmrg the outermost loop in that the value computed by STMT is invariant.
6401debfc3dSmrg If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
6411debfc3dSmrg we preserve the fact whether STMT is executed. It also fills other related
6421debfc3dSmrg information to LIM_DATA (STMT).
6431debfc3dSmrg
6441debfc3dSmrg The function returns false if STMT cannot be hoisted outside of the loop it
6451debfc3dSmrg is defined in, and true otherwise. */
6461debfc3dSmrg
6471debfc3dSmrg static bool
determine_max_movement(gimple * stmt,bool must_preserve_exec)6481debfc3dSmrg determine_max_movement (gimple *stmt, bool must_preserve_exec)
6491debfc3dSmrg {
6501debfc3dSmrg basic_block bb = gimple_bb (stmt);
651*8feb0f0bSmrg class loop *loop = bb->loop_father;
652*8feb0f0bSmrg class loop *level;
6531debfc3dSmrg struct lim_aux_data *lim_data = get_lim_data (stmt);
6541debfc3dSmrg tree val;
6551debfc3dSmrg ssa_op_iter iter;
6561debfc3dSmrg
6571debfc3dSmrg if (must_preserve_exec)
6581debfc3dSmrg level = ALWAYS_EXECUTED_IN (bb);
6591debfc3dSmrg else
6601debfc3dSmrg level = superloop_at_depth (loop, 1);
6611debfc3dSmrg lim_data->max_loop = level;
6621debfc3dSmrg
6631debfc3dSmrg if (gphi *phi = dyn_cast <gphi *> (stmt))
6641debfc3dSmrg {
6651debfc3dSmrg use_operand_p use_p;
6661debfc3dSmrg unsigned min_cost = UINT_MAX;
6671debfc3dSmrg unsigned total_cost = 0;
6681debfc3dSmrg struct lim_aux_data *def_data;
6691debfc3dSmrg
6701debfc3dSmrg /* We will end up promoting dependencies to be unconditionally
6711debfc3dSmrg evaluated. For this reason the PHI cost (and thus the
6721debfc3dSmrg cost we remove from the loop by doing the invariant motion)
6731debfc3dSmrg is that of the cheapest PHI argument dependency chain. */
6741debfc3dSmrg FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
6751debfc3dSmrg {
6761debfc3dSmrg val = USE_FROM_PTR (use_p);
6771debfc3dSmrg
6781debfc3dSmrg if (TREE_CODE (val) != SSA_NAME)
6791debfc3dSmrg {
6801debfc3dSmrg /* Assign const 1 to constants. */
6811debfc3dSmrg min_cost = MIN (min_cost, 1);
6821debfc3dSmrg total_cost += 1;
6831debfc3dSmrg continue;
6841debfc3dSmrg }
6851debfc3dSmrg if (!add_dependency (val, lim_data, loop, false))
6861debfc3dSmrg return false;
6871debfc3dSmrg
6881debfc3dSmrg gimple *def_stmt = SSA_NAME_DEF_STMT (val);
6891debfc3dSmrg if (gimple_bb (def_stmt)
6901debfc3dSmrg && gimple_bb (def_stmt)->loop_father == loop)
6911debfc3dSmrg {
6921debfc3dSmrg def_data = get_lim_data (def_stmt);
6931debfc3dSmrg if (def_data)
6941debfc3dSmrg {
6951debfc3dSmrg min_cost = MIN (min_cost, def_data->cost);
6961debfc3dSmrg total_cost += def_data->cost;
6971debfc3dSmrg }
6981debfc3dSmrg }
6991debfc3dSmrg }
7001debfc3dSmrg
7011debfc3dSmrg min_cost = MIN (min_cost, total_cost);
7021debfc3dSmrg lim_data->cost += min_cost;
7031debfc3dSmrg
7041debfc3dSmrg if (gimple_phi_num_args (phi) > 1)
7051debfc3dSmrg {
7061debfc3dSmrg basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
7071debfc3dSmrg gimple *cond;
7081debfc3dSmrg if (gsi_end_p (gsi_last_bb (dom)))
7091debfc3dSmrg return false;
7101debfc3dSmrg cond = gsi_stmt (gsi_last_bb (dom));
7111debfc3dSmrg if (gimple_code (cond) != GIMPLE_COND)
7121debfc3dSmrg return false;
7131debfc3dSmrg /* Verify that this is an extended form of a diamond and
7141debfc3dSmrg the PHI arguments are completely controlled by the
7151debfc3dSmrg predicate in DOM. */
7161debfc3dSmrg if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
7171debfc3dSmrg return false;
7181debfc3dSmrg
7191debfc3dSmrg /* Fold in dependencies and cost of the condition. */
7201debfc3dSmrg FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
7211debfc3dSmrg {
7221debfc3dSmrg if (!add_dependency (val, lim_data, loop, false))
7231debfc3dSmrg return false;
7241debfc3dSmrg def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
7251debfc3dSmrg if (def_data)
7261debfc3dSmrg lim_data->cost += def_data->cost;
7271debfc3dSmrg }
7281debfc3dSmrg
7291debfc3dSmrg /* We want to avoid unconditionally executing very expensive
7301debfc3dSmrg operations. As costs for our dependencies cannot be
7311debfc3dSmrg negative just claim we are not invariand for this case.
7321debfc3dSmrg We also are not sure whether the control-flow inside the
7331debfc3dSmrg loop will vanish. */
7341debfc3dSmrg if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
7351debfc3dSmrg && !(min_cost != 0
7361debfc3dSmrg && total_cost / min_cost <= 2))
7371debfc3dSmrg return false;
7381debfc3dSmrg
7391debfc3dSmrg /* Assume that the control-flow in the loop will vanish.
7401debfc3dSmrg ??? We should verify this and not artificially increase
7411debfc3dSmrg the cost if that is not the case. */
7421debfc3dSmrg lim_data->cost += stmt_cost (stmt);
7431debfc3dSmrg }
7441debfc3dSmrg
7451debfc3dSmrg return true;
7461debfc3dSmrg }
7471debfc3dSmrg else
7481debfc3dSmrg FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
7491debfc3dSmrg if (!add_dependency (val, lim_data, loop, true))
7501debfc3dSmrg return false;
7511debfc3dSmrg
7521debfc3dSmrg if (gimple_vuse (stmt))
7531debfc3dSmrg {
754a2dc1f3fSmrg im_mem_ref *ref
755a2dc1f3fSmrg = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
756a2dc1f3fSmrg if (ref
757a2dc1f3fSmrg && MEM_ANALYZABLE (ref))
7581debfc3dSmrg {
759a2dc1f3fSmrg lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
760a2dc1f3fSmrg loop, ref);
7611debfc3dSmrg if (!lim_data->max_loop)
7621debfc3dSmrg return false;
7631debfc3dSmrg }
764a2dc1f3fSmrg else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
7651debfc3dSmrg return false;
7661debfc3dSmrg }
7671debfc3dSmrg
7681debfc3dSmrg lim_data->cost += stmt_cost (stmt);
7691debfc3dSmrg
7701debfc3dSmrg return true;
7711debfc3dSmrg }
7721debfc3dSmrg
7731debfc3dSmrg /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
7741debfc3dSmrg and that one of the operands of this statement is computed by STMT.
7751debfc3dSmrg Ensure that STMT (together with all the statements that define its
7761debfc3dSmrg operands) is hoisted at least out of the loop LEVEL. */
7771debfc3dSmrg
7781debfc3dSmrg static void
set_level(gimple * stmt,class loop * orig_loop,class loop * level)779*8feb0f0bSmrg set_level (gimple *stmt, class loop *orig_loop, class loop *level)
7801debfc3dSmrg {
781*8feb0f0bSmrg class loop *stmt_loop = gimple_bb (stmt)->loop_father;
7821debfc3dSmrg struct lim_aux_data *lim_data;
7831debfc3dSmrg gimple *dep_stmt;
7841debfc3dSmrg unsigned i;
7851debfc3dSmrg
7861debfc3dSmrg stmt_loop = find_common_loop (orig_loop, stmt_loop);
7871debfc3dSmrg lim_data = get_lim_data (stmt);
7881debfc3dSmrg if (lim_data != NULL && lim_data->tgt_loop != NULL)
7891debfc3dSmrg stmt_loop = find_common_loop (stmt_loop,
7901debfc3dSmrg loop_outer (lim_data->tgt_loop));
7911debfc3dSmrg if (flow_loop_nested_p (stmt_loop, level))
7921debfc3dSmrg return;
7931debfc3dSmrg
7941debfc3dSmrg gcc_assert (level == lim_data->max_loop
7951debfc3dSmrg || flow_loop_nested_p (lim_data->max_loop, level));
7961debfc3dSmrg
7971debfc3dSmrg lim_data->tgt_loop = level;
7981debfc3dSmrg FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
7991debfc3dSmrg set_level (dep_stmt, orig_loop, level);
8001debfc3dSmrg }
8011debfc3dSmrg
8021debfc3dSmrg /* Determines an outermost loop from that we want to hoist the statement STMT.
8031debfc3dSmrg For now we chose the outermost possible loop. TODO -- use profiling
8041debfc3dSmrg information to set it more sanely. */
8051debfc3dSmrg
8061debfc3dSmrg static void
set_profitable_level(gimple * stmt)8071debfc3dSmrg set_profitable_level (gimple *stmt)
8081debfc3dSmrg {
8091debfc3dSmrg set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
8101debfc3dSmrg }
8111debfc3dSmrg
8121debfc3dSmrg /* Returns true if STMT is a call that has side effects. */
8131debfc3dSmrg
8141debfc3dSmrg static bool
nonpure_call_p(gimple * stmt)8151debfc3dSmrg nonpure_call_p (gimple *stmt)
8161debfc3dSmrg {
8171debfc3dSmrg if (gimple_code (stmt) != GIMPLE_CALL)
8181debfc3dSmrg return false;
8191debfc3dSmrg
8201debfc3dSmrg return gimple_has_side_effects (stmt);
8211debfc3dSmrg }
8221debfc3dSmrg
8231debfc3dSmrg /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
8241debfc3dSmrg
8251debfc3dSmrg static gimple *
rewrite_reciprocal(gimple_stmt_iterator * bsi)8261debfc3dSmrg rewrite_reciprocal (gimple_stmt_iterator *bsi)
8271debfc3dSmrg {
8281debfc3dSmrg gassign *stmt, *stmt1, *stmt2;
8291debfc3dSmrg tree name, lhs, type;
8301debfc3dSmrg tree real_one;
8311debfc3dSmrg gimple_stmt_iterator gsi;
8321debfc3dSmrg
8331debfc3dSmrg stmt = as_a <gassign *> (gsi_stmt (*bsi));
8341debfc3dSmrg lhs = gimple_assign_lhs (stmt);
8351debfc3dSmrg type = TREE_TYPE (lhs);
8361debfc3dSmrg
8371debfc3dSmrg real_one = build_one_cst (type);
8381debfc3dSmrg
8391debfc3dSmrg name = make_temp_ssa_name (type, NULL, "reciptmp");
8401debfc3dSmrg stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
8411debfc3dSmrg gimple_assign_rhs2 (stmt));
8421debfc3dSmrg stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
8431debfc3dSmrg gimple_assign_rhs1 (stmt));
8441debfc3dSmrg
8451debfc3dSmrg /* Replace division stmt with reciprocal and multiply stmts.
8461debfc3dSmrg The multiply stmt is not invariant, so update iterator
8471debfc3dSmrg and avoid rescanning. */
8481debfc3dSmrg gsi = *bsi;
8491debfc3dSmrg gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
8501debfc3dSmrg gsi_replace (&gsi, stmt2, true);
8511debfc3dSmrg
8521debfc3dSmrg /* Continue processing with invariant reciprocal statement. */
8531debfc3dSmrg return stmt1;
8541debfc3dSmrg }
8551debfc3dSmrg
8561debfc3dSmrg /* Check if the pattern at *BSI is a bittest of the form
8571debfc3dSmrg (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
8581debfc3dSmrg
8591debfc3dSmrg static gimple *
rewrite_bittest(gimple_stmt_iterator * bsi)8601debfc3dSmrg rewrite_bittest (gimple_stmt_iterator *bsi)
8611debfc3dSmrg {
8621debfc3dSmrg gassign *stmt;
8631debfc3dSmrg gimple *stmt1;
8641debfc3dSmrg gassign *stmt2;
8651debfc3dSmrg gimple *use_stmt;
8661debfc3dSmrg gcond *cond_stmt;
8671debfc3dSmrg tree lhs, name, t, a, b;
8681debfc3dSmrg use_operand_p use;
8691debfc3dSmrg
8701debfc3dSmrg stmt = as_a <gassign *> (gsi_stmt (*bsi));
8711debfc3dSmrg lhs = gimple_assign_lhs (stmt);
8721debfc3dSmrg
8731debfc3dSmrg /* Verify that the single use of lhs is a comparison against zero. */
8741debfc3dSmrg if (TREE_CODE (lhs) != SSA_NAME
8751debfc3dSmrg || !single_imm_use (lhs, &use, &use_stmt))
8761debfc3dSmrg return stmt;
8771debfc3dSmrg cond_stmt = dyn_cast <gcond *> (use_stmt);
8781debfc3dSmrg if (!cond_stmt)
8791debfc3dSmrg return stmt;
8801debfc3dSmrg if (gimple_cond_lhs (cond_stmt) != lhs
8811debfc3dSmrg || (gimple_cond_code (cond_stmt) != NE_EXPR
8821debfc3dSmrg && gimple_cond_code (cond_stmt) != EQ_EXPR)
8831debfc3dSmrg || !integer_zerop (gimple_cond_rhs (cond_stmt)))
8841debfc3dSmrg return stmt;
8851debfc3dSmrg
8861debfc3dSmrg /* Get at the operands of the shift. The rhs is TMP1 & 1. */
8871debfc3dSmrg stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
8881debfc3dSmrg if (gimple_code (stmt1) != GIMPLE_ASSIGN)
8891debfc3dSmrg return stmt;
8901debfc3dSmrg
8911debfc3dSmrg /* There is a conversion in between possibly inserted by fold. */
8921debfc3dSmrg if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
8931debfc3dSmrg {
8941debfc3dSmrg t = gimple_assign_rhs1 (stmt1);
8951debfc3dSmrg if (TREE_CODE (t) != SSA_NAME
8961debfc3dSmrg || !has_single_use (t))
8971debfc3dSmrg return stmt;
8981debfc3dSmrg stmt1 = SSA_NAME_DEF_STMT (t);
8991debfc3dSmrg if (gimple_code (stmt1) != GIMPLE_ASSIGN)
9001debfc3dSmrg return stmt;
9011debfc3dSmrg }
9021debfc3dSmrg
9031debfc3dSmrg /* Verify that B is loop invariant but A is not. Verify that with
9041debfc3dSmrg all the stmt walking we are still in the same loop. */
9051debfc3dSmrg if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
9061debfc3dSmrg || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
9071debfc3dSmrg return stmt;
9081debfc3dSmrg
9091debfc3dSmrg a = gimple_assign_rhs1 (stmt1);
9101debfc3dSmrg b = gimple_assign_rhs2 (stmt1);
9111debfc3dSmrg
9121debfc3dSmrg if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
9131debfc3dSmrg && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
9141debfc3dSmrg {
9151debfc3dSmrg gimple_stmt_iterator rsi;
9161debfc3dSmrg
9171debfc3dSmrg /* 1 << B */
9181debfc3dSmrg t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
9191debfc3dSmrg build_int_cst (TREE_TYPE (a), 1), b);
9201debfc3dSmrg name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
9211debfc3dSmrg stmt1 = gimple_build_assign (name, t);
9221debfc3dSmrg
9231debfc3dSmrg /* A & (1 << B) */
9241debfc3dSmrg t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
9251debfc3dSmrg name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
9261debfc3dSmrg stmt2 = gimple_build_assign (name, t);
9271debfc3dSmrg
9281debfc3dSmrg /* Replace the SSA_NAME we compare against zero. Adjust
9291debfc3dSmrg the type of zero accordingly. */
9301debfc3dSmrg SET_USE (use, name);
9311debfc3dSmrg gimple_cond_set_rhs (cond_stmt,
9321debfc3dSmrg build_int_cst_type (TREE_TYPE (name),
9331debfc3dSmrg 0));
9341debfc3dSmrg
9351debfc3dSmrg /* Don't use gsi_replace here, none of the new assignments sets
9361debfc3dSmrg the variable originally set in stmt. Move bsi to stmt1, and
9371debfc3dSmrg then remove the original stmt, so that we get a chance to
9381debfc3dSmrg retain debug info for it. */
9391debfc3dSmrg rsi = *bsi;
9401debfc3dSmrg gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
9411debfc3dSmrg gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
9421debfc3dSmrg gimple *to_release = gsi_stmt (rsi);
9431debfc3dSmrg gsi_remove (&rsi, true);
9441debfc3dSmrg release_defs (to_release);
9451debfc3dSmrg
9461debfc3dSmrg return stmt1;
9471debfc3dSmrg }
9481debfc3dSmrg
9491debfc3dSmrg return stmt;
9501debfc3dSmrg }
9511debfc3dSmrg
9521debfc3dSmrg /* For each statement determines the outermost loop in that it is invariant,
9531debfc3dSmrg - statements on whose motion it depends and the cost of the computation.
9541debfc3dSmrg - This information is stored to the LIM_DATA structure associated with
9551debfc3dSmrg - each statement. */
9561debfc3dSmrg class invariantness_dom_walker : public dom_walker
9571debfc3dSmrg {
9581debfc3dSmrg public:
invariantness_dom_walker(cdi_direction direction)9591debfc3dSmrg invariantness_dom_walker (cdi_direction direction)
9601debfc3dSmrg : dom_walker (direction) {}
9611debfc3dSmrg
9621debfc3dSmrg virtual edge before_dom_children (basic_block);
9631debfc3dSmrg };
9641debfc3dSmrg
9651debfc3dSmrg /* Determine the outermost loops in that statements in basic block BB are
9661debfc3dSmrg invariant, and record them to the LIM_DATA associated with the statements.
9671debfc3dSmrg Callback for dom_walker. */
9681debfc3dSmrg
9691debfc3dSmrg edge
before_dom_children(basic_block bb)9701debfc3dSmrg invariantness_dom_walker::before_dom_children (basic_block bb)
9711debfc3dSmrg {
9721debfc3dSmrg enum move_pos pos;
9731debfc3dSmrg gimple_stmt_iterator bsi;
9741debfc3dSmrg gimple *stmt;
9751debfc3dSmrg bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
976*8feb0f0bSmrg class loop *outermost = ALWAYS_EXECUTED_IN (bb);
9771debfc3dSmrg struct lim_aux_data *lim_data;
9781debfc3dSmrg
9791debfc3dSmrg if (!loop_outer (bb->loop_father))
9801debfc3dSmrg return NULL;
9811debfc3dSmrg
9821debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
9831debfc3dSmrg fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
9841debfc3dSmrg bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
9851debfc3dSmrg
9861debfc3dSmrg /* Look at PHI nodes, but only if there is at most two.
9871debfc3dSmrg ??? We could relax this further by post-processing the inserted
9881debfc3dSmrg code and transforming adjacent cond-exprs with the same predicate
9891debfc3dSmrg to control flow again. */
9901debfc3dSmrg bsi = gsi_start_phis (bb);
9911debfc3dSmrg if (!gsi_end_p (bsi)
9921debfc3dSmrg && ((gsi_next (&bsi), gsi_end_p (bsi))
9931debfc3dSmrg || (gsi_next (&bsi), gsi_end_p (bsi))))
9941debfc3dSmrg for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
9951debfc3dSmrg {
9961debfc3dSmrg stmt = gsi_stmt (bsi);
9971debfc3dSmrg
9981debfc3dSmrg pos = movement_possibility (stmt);
9991debfc3dSmrg if (pos == MOVE_IMPOSSIBLE)
10001debfc3dSmrg continue;
10011debfc3dSmrg
1002a2dc1f3fSmrg lim_data = get_lim_data (stmt);
1003a2dc1f3fSmrg if (! lim_data)
10041debfc3dSmrg lim_data = init_lim_data (stmt);
10051debfc3dSmrg lim_data->always_executed_in = outermost;
10061debfc3dSmrg
10071debfc3dSmrg if (!determine_max_movement (stmt, false))
10081debfc3dSmrg {
10091debfc3dSmrg lim_data->max_loop = NULL;
10101debfc3dSmrg continue;
10111debfc3dSmrg }
10121debfc3dSmrg
10131debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
10141debfc3dSmrg {
1015a2dc1f3fSmrg print_gimple_stmt (dump_file, stmt, 2);
10161debfc3dSmrg fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
10171debfc3dSmrg loop_depth (lim_data->max_loop),
10181debfc3dSmrg lim_data->cost);
10191debfc3dSmrg }
10201debfc3dSmrg
10211debfc3dSmrg if (lim_data->cost >= LIM_EXPENSIVE)
10221debfc3dSmrg set_profitable_level (stmt);
10231debfc3dSmrg }
10241debfc3dSmrg
10251debfc3dSmrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
10261debfc3dSmrg {
10271debfc3dSmrg stmt = gsi_stmt (bsi);
10281debfc3dSmrg
10291debfc3dSmrg pos = movement_possibility (stmt);
10301debfc3dSmrg if (pos == MOVE_IMPOSSIBLE)
10311debfc3dSmrg {
10321debfc3dSmrg if (nonpure_call_p (stmt))
10331debfc3dSmrg {
10341debfc3dSmrg maybe_never = true;
10351debfc3dSmrg outermost = NULL;
10361debfc3dSmrg }
10371debfc3dSmrg /* Make sure to note always_executed_in for stores to make
10381debfc3dSmrg store-motion work. */
10391debfc3dSmrg else if (stmt_makes_single_store (stmt))
10401debfc3dSmrg {
1041a2dc1f3fSmrg struct lim_aux_data *lim_data = get_lim_data (stmt);
1042a2dc1f3fSmrg if (! lim_data)
1043a2dc1f3fSmrg lim_data = init_lim_data (stmt);
10441debfc3dSmrg lim_data->always_executed_in = outermost;
10451debfc3dSmrg }
10461debfc3dSmrg continue;
10471debfc3dSmrg }
10481debfc3dSmrg
10491debfc3dSmrg if (is_gimple_assign (stmt)
10501debfc3dSmrg && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
10511debfc3dSmrg == GIMPLE_BINARY_RHS))
10521debfc3dSmrg {
10531debfc3dSmrg tree op0 = gimple_assign_rhs1 (stmt);
10541debfc3dSmrg tree op1 = gimple_assign_rhs2 (stmt);
1055*8feb0f0bSmrg class loop *ol1 = outermost_invariant_loop (op1,
10561debfc3dSmrg loop_containing_stmt (stmt));
10571debfc3dSmrg
10581debfc3dSmrg /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
10591debfc3dSmrg to be hoisted out of loop, saving expensive divide. */
10601debfc3dSmrg if (pos == MOVE_POSSIBLE
10611debfc3dSmrg && gimple_assign_rhs_code (stmt) == RDIV_EXPR
10621debfc3dSmrg && flag_unsafe_math_optimizations
10631debfc3dSmrg && !flag_trapping_math
10641debfc3dSmrg && ol1 != NULL
10651debfc3dSmrg && outermost_invariant_loop (op0, ol1) == NULL)
10661debfc3dSmrg stmt = rewrite_reciprocal (&bsi);
10671debfc3dSmrg
10681debfc3dSmrg /* If the shift count is invariant, convert (A >> B) & 1 to
10691debfc3dSmrg A & (1 << B) allowing the bit mask to be hoisted out of the loop
10701debfc3dSmrg saving an expensive shift. */
10711debfc3dSmrg if (pos == MOVE_POSSIBLE
10721debfc3dSmrg && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
10731debfc3dSmrg && integer_onep (op1)
10741debfc3dSmrg && TREE_CODE (op0) == SSA_NAME
10751debfc3dSmrg && has_single_use (op0))
10761debfc3dSmrg stmt = rewrite_bittest (&bsi);
10771debfc3dSmrg }
10781debfc3dSmrg
1079a2dc1f3fSmrg lim_data = get_lim_data (stmt);
1080a2dc1f3fSmrg if (! lim_data)
10811debfc3dSmrg lim_data = init_lim_data (stmt);
10821debfc3dSmrg lim_data->always_executed_in = outermost;
10831debfc3dSmrg
10841debfc3dSmrg if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
10851debfc3dSmrg continue;
10861debfc3dSmrg
10871debfc3dSmrg if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
10881debfc3dSmrg {
10891debfc3dSmrg lim_data->max_loop = NULL;
10901debfc3dSmrg continue;
10911debfc3dSmrg }
10921debfc3dSmrg
10931debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
10941debfc3dSmrg {
1095a2dc1f3fSmrg print_gimple_stmt (dump_file, stmt, 2);
10961debfc3dSmrg fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
10971debfc3dSmrg loop_depth (lim_data->max_loop),
10981debfc3dSmrg lim_data->cost);
10991debfc3dSmrg }
11001debfc3dSmrg
11011debfc3dSmrg if (lim_data->cost >= LIM_EXPENSIVE)
11021debfc3dSmrg set_profitable_level (stmt);
11031debfc3dSmrg }
11041debfc3dSmrg return NULL;
11051debfc3dSmrg }
11061debfc3dSmrg
11071debfc3dSmrg /* Hoist the statements in basic block BB out of the loops prescribed by
11081debfc3dSmrg data stored in LIM_DATA structures associated with each statement. Callback
11091debfc3dSmrg for walk_dominator_tree. */
11101debfc3dSmrg
11111debfc3dSmrg unsigned int
move_computations_worker(basic_block bb)11121debfc3dSmrg move_computations_worker (basic_block bb)
11131debfc3dSmrg {
1114*8feb0f0bSmrg class loop *level;
11151debfc3dSmrg unsigned cost = 0;
11161debfc3dSmrg struct lim_aux_data *lim_data;
11171debfc3dSmrg unsigned int todo = 0;
11181debfc3dSmrg
11191debfc3dSmrg if (!loop_outer (bb->loop_father))
11201debfc3dSmrg return todo;
11211debfc3dSmrg
11221debfc3dSmrg for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
11231debfc3dSmrg {
11241debfc3dSmrg gassign *new_stmt;
11251debfc3dSmrg gphi *stmt = bsi.phi ();
11261debfc3dSmrg
11271debfc3dSmrg lim_data = get_lim_data (stmt);
11281debfc3dSmrg if (lim_data == NULL)
11291debfc3dSmrg {
11301debfc3dSmrg gsi_next (&bsi);
11311debfc3dSmrg continue;
11321debfc3dSmrg }
11331debfc3dSmrg
11341debfc3dSmrg cost = lim_data->cost;
11351debfc3dSmrg level = lim_data->tgt_loop;
11361debfc3dSmrg clear_lim_data (stmt);
11371debfc3dSmrg
11381debfc3dSmrg if (!level)
11391debfc3dSmrg {
11401debfc3dSmrg gsi_next (&bsi);
11411debfc3dSmrg continue;
11421debfc3dSmrg }
11431debfc3dSmrg
11441debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
11451debfc3dSmrg {
11461debfc3dSmrg fprintf (dump_file, "Moving PHI node\n");
1147a2dc1f3fSmrg print_gimple_stmt (dump_file, stmt, 0);
11481debfc3dSmrg fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
11491debfc3dSmrg cost, level->num);
11501debfc3dSmrg }
11511debfc3dSmrg
11521debfc3dSmrg if (gimple_phi_num_args (stmt) == 1)
11531debfc3dSmrg {
11541debfc3dSmrg tree arg = PHI_ARG_DEF (stmt, 0);
11551debfc3dSmrg new_stmt = gimple_build_assign (gimple_phi_result (stmt),
11561debfc3dSmrg TREE_CODE (arg), arg);
11571debfc3dSmrg }
11581debfc3dSmrg else
11591debfc3dSmrg {
11601debfc3dSmrg basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
11611debfc3dSmrg gimple *cond = gsi_stmt (gsi_last_bb (dom));
11621debfc3dSmrg tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
11631debfc3dSmrg /* Get the PHI arguments corresponding to the true and false
11641debfc3dSmrg edges of COND. */
11651debfc3dSmrg extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
11661debfc3dSmrg gcc_assert (arg0 && arg1);
11671debfc3dSmrg t = build2 (gimple_cond_code (cond), boolean_type_node,
11681debfc3dSmrg gimple_cond_lhs (cond), gimple_cond_rhs (cond));
11691debfc3dSmrg new_stmt = gimple_build_assign (gimple_phi_result (stmt),
11701debfc3dSmrg COND_EXPR, t, arg0, arg1);
11711debfc3dSmrg todo |= TODO_cleanup_cfg;
11721debfc3dSmrg }
1173*8feb0f0bSmrg if (!ALWAYS_EXECUTED_IN (bb)
11741debfc3dSmrg || (ALWAYS_EXECUTED_IN (bb) != level
1175*8feb0f0bSmrg && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1176*8feb0f0bSmrg reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
11771debfc3dSmrg gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
11781debfc3dSmrg remove_phi_node (&bsi, false);
11791debfc3dSmrg }
11801debfc3dSmrg
11811debfc3dSmrg for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
11821debfc3dSmrg {
11831debfc3dSmrg edge e;
11841debfc3dSmrg
11851debfc3dSmrg gimple *stmt = gsi_stmt (bsi);
11861debfc3dSmrg
11871debfc3dSmrg lim_data = get_lim_data (stmt);
11881debfc3dSmrg if (lim_data == NULL)
11891debfc3dSmrg {
11901debfc3dSmrg gsi_next (&bsi);
11911debfc3dSmrg continue;
11921debfc3dSmrg }
11931debfc3dSmrg
11941debfc3dSmrg cost = lim_data->cost;
11951debfc3dSmrg level = lim_data->tgt_loop;
11961debfc3dSmrg clear_lim_data (stmt);
11971debfc3dSmrg
11981debfc3dSmrg if (!level)
11991debfc3dSmrg {
12001debfc3dSmrg gsi_next (&bsi);
12011debfc3dSmrg continue;
12021debfc3dSmrg }
12031debfc3dSmrg
12041debfc3dSmrg /* We do not really want to move conditionals out of the loop; we just
12051debfc3dSmrg placed it here to force its operands to be moved if necessary. */
12061debfc3dSmrg if (gimple_code (stmt) == GIMPLE_COND)
12071debfc3dSmrg continue;
12081debfc3dSmrg
12091debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
12101debfc3dSmrg {
12111debfc3dSmrg fprintf (dump_file, "Moving statement\n");
1212a2dc1f3fSmrg print_gimple_stmt (dump_file, stmt, 0);
12131debfc3dSmrg fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
12141debfc3dSmrg cost, level->num);
12151debfc3dSmrg }
12161debfc3dSmrg
12171debfc3dSmrg e = loop_preheader_edge (level);
12181debfc3dSmrg gcc_assert (!gimple_vdef (stmt));
12191debfc3dSmrg if (gimple_vuse (stmt))
12201debfc3dSmrg {
12211debfc3dSmrg /* The new VUSE is the one from the virtual PHI in the loop
12221debfc3dSmrg header or the one already present. */
12231debfc3dSmrg gphi_iterator gsi2;
12241debfc3dSmrg for (gsi2 = gsi_start_phis (e->dest);
12251debfc3dSmrg !gsi_end_p (gsi2); gsi_next (&gsi2))
12261debfc3dSmrg {
12271debfc3dSmrg gphi *phi = gsi2.phi ();
12281debfc3dSmrg if (virtual_operand_p (gimple_phi_result (phi)))
12291debfc3dSmrg {
1230*8feb0f0bSmrg SET_USE (gimple_vuse_op (stmt),
1231*8feb0f0bSmrg PHI_ARG_DEF_FROM_EDGE (phi, e));
12321debfc3dSmrg break;
12331debfc3dSmrg }
12341debfc3dSmrg }
12351debfc3dSmrg }
12361debfc3dSmrg gsi_remove (&bsi, false);
12371debfc3dSmrg if (gimple_has_lhs (stmt)
12381debfc3dSmrg && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
12391debfc3dSmrg && (!ALWAYS_EXECUTED_IN (bb)
12401debfc3dSmrg || !(ALWAYS_EXECUTED_IN (bb) == level
12411debfc3dSmrg || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1242*8feb0f0bSmrg reset_flow_sensitive_info (gimple_get_lhs (stmt));
12431debfc3dSmrg /* In case this is a stmt that is not unconditionally executed
12441debfc3dSmrg when the target loop header is executed and the stmt may
12451debfc3dSmrg invoke undefined integer or pointer overflow rewrite it to
12461debfc3dSmrg unsigned arithmetic. */
12471debfc3dSmrg if (is_gimple_assign (stmt)
12481debfc3dSmrg && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
12491debfc3dSmrg && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
12501debfc3dSmrg && arith_code_with_undefined_signed_overflow
12511debfc3dSmrg (gimple_assign_rhs_code (stmt))
12521debfc3dSmrg && (!ALWAYS_EXECUTED_IN (bb)
12531debfc3dSmrg || !(ALWAYS_EXECUTED_IN (bb) == level
12541debfc3dSmrg || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
12551debfc3dSmrg gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
12561debfc3dSmrg else
12571debfc3dSmrg gsi_insert_on_edge (e, stmt);
12581debfc3dSmrg }
12591debfc3dSmrg
12601debfc3dSmrg return todo;
12611debfc3dSmrg }
12621debfc3dSmrg
12631debfc3dSmrg /* Hoist the statements out of the loops prescribed by data stored in
12641debfc3dSmrg LIM_DATA structures associated with each statement.*/
12651debfc3dSmrg
12661debfc3dSmrg static unsigned int
move_computations(void)12671debfc3dSmrg move_computations (void)
12681debfc3dSmrg {
12691debfc3dSmrg int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
12701debfc3dSmrg int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
12711debfc3dSmrg unsigned todo = 0;
12721debfc3dSmrg
12731debfc3dSmrg for (int i = 0; i < n; ++i)
12741debfc3dSmrg todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
12751debfc3dSmrg
12761debfc3dSmrg free (rpo);
12771debfc3dSmrg
12781debfc3dSmrg gsi_commit_edge_inserts ();
12791debfc3dSmrg if (need_ssa_update_p (cfun))
12801debfc3dSmrg rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
12811debfc3dSmrg
12821debfc3dSmrg return todo;
12831debfc3dSmrg }
12841debfc3dSmrg
12851debfc3dSmrg /* Checks whether the statement defining variable *INDEX can be hoisted
12861debfc3dSmrg out of the loop passed in DATA. Callback for for_each_index. */
12871debfc3dSmrg
12881debfc3dSmrg static bool
may_move_till(tree ref,tree * index,void * data)12891debfc3dSmrg may_move_till (tree ref, tree *index, void *data)
12901debfc3dSmrg {
1291*8feb0f0bSmrg class loop *loop = (class loop *) data, *max_loop;
12921debfc3dSmrg
12931debfc3dSmrg /* If REF is an array reference, check also that the step and the lower
12941debfc3dSmrg bound is invariant in LOOP. */
12951debfc3dSmrg if (TREE_CODE (ref) == ARRAY_REF)
12961debfc3dSmrg {
12971debfc3dSmrg tree step = TREE_OPERAND (ref, 3);
12981debfc3dSmrg tree lbound = TREE_OPERAND (ref, 2);
12991debfc3dSmrg
13001debfc3dSmrg max_loop = outermost_invariant_loop (step, loop);
13011debfc3dSmrg if (!max_loop)
13021debfc3dSmrg return false;
13031debfc3dSmrg
13041debfc3dSmrg max_loop = outermost_invariant_loop (lbound, loop);
13051debfc3dSmrg if (!max_loop)
13061debfc3dSmrg return false;
13071debfc3dSmrg }
13081debfc3dSmrg
13091debfc3dSmrg max_loop = outermost_invariant_loop (*index, loop);
13101debfc3dSmrg if (!max_loop)
13111debfc3dSmrg return false;
13121debfc3dSmrg
13131debfc3dSmrg return true;
13141debfc3dSmrg }
13151debfc3dSmrg
13161debfc3dSmrg /* If OP is SSA NAME, force the statement that defines it to be
13171debfc3dSmrg moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
13181debfc3dSmrg
13191debfc3dSmrg static void
force_move_till_op(tree op,class loop * orig_loop,class loop * loop)1320*8feb0f0bSmrg force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
13211debfc3dSmrg {
13221debfc3dSmrg gimple *stmt;
13231debfc3dSmrg
13241debfc3dSmrg if (!op
13251debfc3dSmrg || is_gimple_min_invariant (op))
13261debfc3dSmrg return;
13271debfc3dSmrg
13281debfc3dSmrg gcc_assert (TREE_CODE (op) == SSA_NAME);
13291debfc3dSmrg
13301debfc3dSmrg stmt = SSA_NAME_DEF_STMT (op);
13311debfc3dSmrg if (gimple_nop_p (stmt))
13321debfc3dSmrg return;
13331debfc3dSmrg
13341debfc3dSmrg set_level (stmt, orig_loop, loop);
13351debfc3dSmrg }
13361debfc3dSmrg
13371debfc3dSmrg /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
13381debfc3dSmrg the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
13391debfc3dSmrg for_each_index. */
13401debfc3dSmrg
13411debfc3dSmrg struct fmt_data
13421debfc3dSmrg {
1343*8feb0f0bSmrg class loop *loop;
1344*8feb0f0bSmrg class loop *orig_loop;
13451debfc3dSmrg };
13461debfc3dSmrg
13471debfc3dSmrg static bool
force_move_till(tree ref,tree * index,void * data)13481debfc3dSmrg force_move_till (tree ref, tree *index, void *data)
13491debfc3dSmrg {
13501debfc3dSmrg struct fmt_data *fmt_data = (struct fmt_data *) data;
13511debfc3dSmrg
13521debfc3dSmrg if (TREE_CODE (ref) == ARRAY_REF)
13531debfc3dSmrg {
13541debfc3dSmrg tree step = TREE_OPERAND (ref, 3);
13551debfc3dSmrg tree lbound = TREE_OPERAND (ref, 2);
13561debfc3dSmrg
13571debfc3dSmrg force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
13581debfc3dSmrg force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
13591debfc3dSmrg }
13601debfc3dSmrg
13611debfc3dSmrg force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
13621debfc3dSmrg
13631debfc3dSmrg return true;
13641debfc3dSmrg }
13651debfc3dSmrg
13661debfc3dSmrg /* A function to free the mem_ref object OBJ. */
13671debfc3dSmrg
13681debfc3dSmrg static void
memref_free(class im_mem_ref * mem)1369*8feb0f0bSmrg memref_free (class im_mem_ref *mem)
13701debfc3dSmrg {
13711debfc3dSmrg mem->accesses_in_loop.release ();
13721debfc3dSmrg }
13731debfc3dSmrg
13741debfc3dSmrg /* Allocates and returns a memory reference description for MEM whose hash
13751debfc3dSmrg value is HASH and id is ID. */
13761debfc3dSmrg
13771debfc3dSmrg static im_mem_ref *
mem_ref_alloc(ao_ref * mem,unsigned hash,unsigned id)1378c0a68be4Smrg mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
13791debfc3dSmrg {
1380*8feb0f0bSmrg im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1381c0a68be4Smrg if (mem)
1382c0a68be4Smrg ref->mem = *mem;
1383c0a68be4Smrg else
1384c0a68be4Smrg ao_ref_init (&ref->mem, error_mark_node);
13851debfc3dSmrg ref->id = id;
1386c0a68be4Smrg ref->ref_canonical = false;
1387c0a68be4Smrg ref->ref_decomposed = false;
13881debfc3dSmrg ref->hash = hash;
13891debfc3dSmrg ref->stored = NULL;
13901debfc3dSmrg bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
13911debfc3dSmrg bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
13921debfc3dSmrg ref->accesses_in_loop.create (1);
13931debfc3dSmrg
13941debfc3dSmrg return ref;
13951debfc3dSmrg }
13961debfc3dSmrg
13971debfc3dSmrg /* Records memory reference location *LOC in LOOP to the memory reference
13981debfc3dSmrg description REF. The reference occurs in statement STMT. */
13991debfc3dSmrg
14001debfc3dSmrg static void
record_mem_ref_loc(im_mem_ref * ref,gimple * stmt,tree * loc)14011debfc3dSmrg record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
14021debfc3dSmrg {
14031debfc3dSmrg mem_ref_loc aref;
14041debfc3dSmrg aref.stmt = stmt;
14051debfc3dSmrg aref.ref = loc;
14061debfc3dSmrg ref->accesses_in_loop.safe_push (aref);
14071debfc3dSmrg }
14081debfc3dSmrg
14091debfc3dSmrg /* Set the LOOP bit in REF stored bitmap and allocate that if
14101debfc3dSmrg necessary. Return whether a bit was changed. */
14111debfc3dSmrg
14121debfc3dSmrg static bool
set_ref_stored_in_loop(im_mem_ref * ref,class loop * loop)1413*8feb0f0bSmrg set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
14141debfc3dSmrg {
14151debfc3dSmrg if (!ref->stored)
14161debfc3dSmrg ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
14171debfc3dSmrg return bitmap_set_bit (ref->stored, loop->num);
14181debfc3dSmrg }
14191debfc3dSmrg
14201debfc3dSmrg /* Marks reference REF as stored in LOOP. */
14211debfc3dSmrg
14221debfc3dSmrg static void
mark_ref_stored(im_mem_ref * ref,class loop * loop)1423*8feb0f0bSmrg mark_ref_stored (im_mem_ref *ref, class loop *loop)
14241debfc3dSmrg {
14251debfc3dSmrg while (loop != current_loops->tree_root
14261debfc3dSmrg && set_ref_stored_in_loop (ref, loop))
14271debfc3dSmrg loop = loop_outer (loop);
14281debfc3dSmrg }
14291debfc3dSmrg
14301debfc3dSmrg /* Gathers memory references in statement STMT in LOOP, storing the
14311debfc3dSmrg information about them in the memory_accesses structure. Marks
14321debfc3dSmrg the vops accessed through unrecognized statements there as
14331debfc3dSmrg well. */
14341debfc3dSmrg
14351debfc3dSmrg static void
gather_mem_refs_stmt(class loop * loop,gimple * stmt)1436*8feb0f0bSmrg gather_mem_refs_stmt (class loop *loop, gimple *stmt)
14371debfc3dSmrg {
14381debfc3dSmrg tree *mem = NULL;
14391debfc3dSmrg hashval_t hash;
14401debfc3dSmrg im_mem_ref **slot;
14411debfc3dSmrg im_mem_ref *ref;
14421debfc3dSmrg bool is_stored;
14431debfc3dSmrg unsigned id;
14441debfc3dSmrg
14451debfc3dSmrg if (!gimple_vuse (stmt))
14461debfc3dSmrg return;
14471debfc3dSmrg
14481debfc3dSmrg mem = simple_mem_ref_in_stmt (stmt, &is_stored);
14491debfc3dSmrg if (!mem)
14501debfc3dSmrg {
14511debfc3dSmrg /* We use the shared mem_ref for all unanalyzable refs. */
14521debfc3dSmrg id = UNANALYZABLE_MEM_ID;
14531debfc3dSmrg ref = memory_accesses.refs_list[id];
14541debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
14551debfc3dSmrg {
14561debfc3dSmrg fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
14571debfc3dSmrg print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
14581debfc3dSmrg }
14591debfc3dSmrg is_stored = gimple_vdef (stmt);
14601debfc3dSmrg }
14611debfc3dSmrg else
14621debfc3dSmrg {
1463c0a68be4Smrg /* We are looking for equal refs that might differ in structure
1464c0a68be4Smrg such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1465c0a68be4Smrg make sure we can canonicalize the ref in the hashtable if
1466c0a68be4Smrg non-operand_equal_p refs are found. For the lookup we mark
1467c0a68be4Smrg the case we want strict equality with aor.max_size == -1. */
1468c0a68be4Smrg ao_ref aor;
1469c0a68be4Smrg ao_ref_init (&aor, *mem);
1470c0a68be4Smrg ao_ref_base (&aor);
1471c0a68be4Smrg ao_ref_alias_set (&aor);
1472c0a68be4Smrg HOST_WIDE_INT offset, size, max_size;
1473c0a68be4Smrg poly_int64 saved_maxsize = aor.max_size, mem_off;
1474c0a68be4Smrg tree mem_base;
1475c0a68be4Smrg bool ref_decomposed;
1476c0a68be4Smrg if (aor.max_size_known_p ()
1477c0a68be4Smrg && aor.offset.is_constant (&offset)
1478c0a68be4Smrg && aor.size.is_constant (&size)
1479c0a68be4Smrg && aor.max_size.is_constant (&max_size)
1480c0a68be4Smrg && size == max_size
1481c0a68be4Smrg && (size % BITS_PER_UNIT) == 0
1482c0a68be4Smrg /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1483c0a68be4Smrg size. Make sure this is consistent with the extraction. */
1484c0a68be4Smrg && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1485c0a68be4Smrg && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1486c0a68be4Smrg aor.size)
1487c0a68be4Smrg && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1488c0a68be4Smrg {
1489c0a68be4Smrg ref_decomposed = true;
1490c0a68be4Smrg hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1491c0a68be4Smrg hash = iterative_hash_host_wide_int (offset, hash);
1492c0a68be4Smrg hash = iterative_hash_host_wide_int (size, hash);
1493c0a68be4Smrg }
1494c0a68be4Smrg else
1495c0a68be4Smrg {
1496c0a68be4Smrg ref_decomposed = false;
1497c0a68be4Smrg hash = iterative_hash_expr (aor.ref, 0);
1498c0a68be4Smrg aor.max_size = -1;
1499c0a68be4Smrg }
1500c0a68be4Smrg slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1501c0a68be4Smrg aor.max_size = saved_maxsize;
15021debfc3dSmrg if (*slot)
15031debfc3dSmrg {
1504c0a68be4Smrg if (!(*slot)->ref_canonical
1505c0a68be4Smrg && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1506c0a68be4Smrg {
1507c0a68be4Smrg /* If we didn't yet canonicalize the hashtable ref (which
1508c0a68be4Smrg we'll end up using for code insertion) and hit a second
1509c0a68be4Smrg equal ref that is not structurally equivalent create
1510c0a68be4Smrg a canonical ref which is a bare MEM_REF. */
1511c0a68be4Smrg if (TREE_CODE (*mem) == MEM_REF
1512c0a68be4Smrg || TREE_CODE (*mem) == TARGET_MEM_REF)
1513c0a68be4Smrg {
1514c0a68be4Smrg (*slot)->mem.ref = *mem;
1515c0a68be4Smrg (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1516c0a68be4Smrg }
1517c0a68be4Smrg else
1518c0a68be4Smrg {
1519c0a68be4Smrg tree ref_alias_type = reference_alias_ptr_type (*mem);
1520c0a68be4Smrg unsigned int ref_align = get_object_alignment (*mem);
1521c0a68be4Smrg tree ref_type = TREE_TYPE (*mem);
1522*8feb0f0bSmrg tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1523*8feb0f0bSmrg unshare_expr (mem_base));
1524c0a68be4Smrg if (TYPE_ALIGN (ref_type) != ref_align)
1525c0a68be4Smrg ref_type = build_aligned_type (ref_type, ref_align);
1526c0a68be4Smrg (*slot)->mem.ref
1527c0a68be4Smrg = fold_build2 (MEM_REF, ref_type, tmp,
1528c0a68be4Smrg build_int_cst (ref_alias_type, mem_off));
1529c0a68be4Smrg if ((*slot)->mem.volatile_p)
1530c0a68be4Smrg TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1531c0a68be4Smrg gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1532c0a68be4Smrg && is_gimple_mem_ref_addr
1533c0a68be4Smrg (TREE_OPERAND ((*slot)->mem.ref,
1534c0a68be4Smrg 0)));
1535c0a68be4Smrg (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1536c0a68be4Smrg }
1537c0a68be4Smrg (*slot)->ref_canonical = true;
1538c0a68be4Smrg }
15391debfc3dSmrg ref = *slot;
15401debfc3dSmrg id = ref->id;
15411debfc3dSmrg }
15421debfc3dSmrg else
15431debfc3dSmrg {
15441debfc3dSmrg id = memory_accesses.refs_list.length ();
1545c0a68be4Smrg ref = mem_ref_alloc (&aor, hash, id);
1546c0a68be4Smrg ref->ref_decomposed = ref_decomposed;
15471debfc3dSmrg memory_accesses.refs_list.safe_push (ref);
15481debfc3dSmrg *slot = ref;
15491debfc3dSmrg
15501debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
15511debfc3dSmrg {
15521debfc3dSmrg fprintf (dump_file, "Memory reference %u: ", id);
15531debfc3dSmrg print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
15541debfc3dSmrg fprintf (dump_file, "\n");
15551debfc3dSmrg }
15561debfc3dSmrg }
15571debfc3dSmrg
15581debfc3dSmrg record_mem_ref_loc (ref, stmt, mem);
15591debfc3dSmrg }
15601debfc3dSmrg bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
15611debfc3dSmrg if (is_stored)
15621debfc3dSmrg {
15631debfc3dSmrg bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
15641debfc3dSmrg mark_ref_stored (ref, loop);
15651debfc3dSmrg }
1566a2dc1f3fSmrg init_lim_data (stmt)->ref = ref->id;
15671debfc3dSmrg return;
15681debfc3dSmrg }
15691debfc3dSmrg
15701debfc3dSmrg static unsigned *bb_loop_postorder;
15711debfc3dSmrg
15721debfc3dSmrg /* qsort sort function to sort blocks after their loop fathers postorder. */
15731debfc3dSmrg
15741debfc3dSmrg static int
sort_bbs_in_loop_postorder_cmp(const void * bb1_,const void * bb2_,void * bb_loop_postorder_)1575*8feb0f0bSmrg sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1576*8feb0f0bSmrg void *bb_loop_postorder_)
15771debfc3dSmrg {
1578*8feb0f0bSmrg unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1579*8feb0f0bSmrg basic_block bb1 = *(const basic_block *)bb1_;
1580*8feb0f0bSmrg basic_block bb2 = *(const basic_block *)bb2_;
1581*8feb0f0bSmrg class loop *loop1 = bb1->loop_father;
1582*8feb0f0bSmrg class loop *loop2 = bb2->loop_father;
15831debfc3dSmrg if (loop1->num == loop2->num)
1584a2dc1f3fSmrg return bb1->index - bb2->index;
15851debfc3dSmrg return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
15861debfc3dSmrg }
15871debfc3dSmrg
15881debfc3dSmrg /* qsort sort function to sort ref locs after their loop fathers postorder. */
15891debfc3dSmrg
15901debfc3dSmrg static int
sort_locs_in_loop_postorder_cmp(const void * loc1_,const void * loc2_,void * bb_loop_postorder_)1591*8feb0f0bSmrg sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1592*8feb0f0bSmrg void *bb_loop_postorder_)
15931debfc3dSmrg {
1594*8feb0f0bSmrg unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1595*8feb0f0bSmrg const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1596*8feb0f0bSmrg const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1597*8feb0f0bSmrg class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1598*8feb0f0bSmrg class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
15991debfc3dSmrg if (loop1->num == loop2->num)
16001debfc3dSmrg return 0;
16011debfc3dSmrg return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
16021debfc3dSmrg }
16031debfc3dSmrg
16041debfc3dSmrg /* Gathers memory references in loops. */
16051debfc3dSmrg
16061debfc3dSmrg static void
analyze_memory_references(void)16071debfc3dSmrg analyze_memory_references (void)
16081debfc3dSmrg {
16091debfc3dSmrg gimple_stmt_iterator bsi;
16101debfc3dSmrg basic_block bb, *bbs;
1611*8feb0f0bSmrg class loop *loop, *outer;
16121debfc3dSmrg unsigned i, n;
16131debfc3dSmrg
16141debfc3dSmrg /* Collect all basic-blocks in loops and sort them after their
16151debfc3dSmrg loops postorder. */
16161debfc3dSmrg i = 0;
16171debfc3dSmrg bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
16181debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
16191debfc3dSmrg if (bb->loop_father != current_loops->tree_root)
16201debfc3dSmrg bbs[i++] = bb;
16211debfc3dSmrg n = i;
1622*8feb0f0bSmrg gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1623*8feb0f0bSmrg bb_loop_postorder);
16241debfc3dSmrg
16251debfc3dSmrg /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
16261debfc3dSmrg That results in better locality for all the bitmaps. */
16271debfc3dSmrg for (i = 0; i < n; ++i)
16281debfc3dSmrg {
16291debfc3dSmrg basic_block bb = bbs[i];
16301debfc3dSmrg for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
16311debfc3dSmrg gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
16321debfc3dSmrg }
16331debfc3dSmrg
16341debfc3dSmrg /* Sort the location list of gathered memory references after their
16351debfc3dSmrg loop postorder number. */
16361debfc3dSmrg im_mem_ref *ref;
16371debfc3dSmrg FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1638*8feb0f0bSmrg ref->accesses_in_loop.sort (sort_locs_in_loop_postorder_cmp,
1639*8feb0f0bSmrg bb_loop_postorder);
16401debfc3dSmrg
16411debfc3dSmrg free (bbs);
16421debfc3dSmrg
16431debfc3dSmrg /* Propagate the information about accessed memory references up
16441debfc3dSmrg the loop hierarchy. */
16451debfc3dSmrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
16461debfc3dSmrg {
16471debfc3dSmrg /* Finalize the overall touched references (including subloops). */
16481debfc3dSmrg bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
16491debfc3dSmrg &memory_accesses.refs_stored_in_loop[loop->num]);
16501debfc3dSmrg
16511debfc3dSmrg /* Propagate the information about accessed memory references up
16521debfc3dSmrg the loop hierarchy. */
16531debfc3dSmrg outer = loop_outer (loop);
16541debfc3dSmrg if (outer == current_loops->tree_root)
16551debfc3dSmrg continue;
16561debfc3dSmrg
16571debfc3dSmrg bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
16581debfc3dSmrg &memory_accesses.all_refs_stored_in_loop[loop->num]);
16591debfc3dSmrg }
16601debfc3dSmrg }
16611debfc3dSmrg
16621debfc3dSmrg /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
16631debfc3dSmrg tree_to_aff_combination_expand. */
16641debfc3dSmrg
16651debfc3dSmrg static bool
mem_refs_may_alias_p(im_mem_ref * mem1,im_mem_ref * mem2,hash_map<tree,name_expansion * > ** ttae_cache)16661debfc3dSmrg mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
16671debfc3dSmrg hash_map<tree, name_expansion *> **ttae_cache)
16681debfc3dSmrg {
16691debfc3dSmrg /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
16701debfc3dSmrg object and their offset differ in such a way that the locations cannot
16711debfc3dSmrg overlap, then they cannot alias. */
1672a2dc1f3fSmrg poly_widest_int size1, size2;
16731debfc3dSmrg aff_tree off1, off2;
16741debfc3dSmrg
16751debfc3dSmrg /* Perform basic offset and type-based disambiguation. */
16761debfc3dSmrg if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
16771debfc3dSmrg return false;
16781debfc3dSmrg
16791debfc3dSmrg /* The expansion of addresses may be a bit expensive, thus we only do
16801debfc3dSmrg the check at -O2 and higher optimization levels. */
16811debfc3dSmrg if (optimize < 2)
16821debfc3dSmrg return true;
16831debfc3dSmrg
16841debfc3dSmrg get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
16851debfc3dSmrg get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
16861debfc3dSmrg aff_combination_expand (&off1, ttae_cache);
16871debfc3dSmrg aff_combination_expand (&off2, ttae_cache);
16881debfc3dSmrg aff_combination_scale (&off1, -1);
16891debfc3dSmrg aff_combination_add (&off2, &off1);
16901debfc3dSmrg
16911debfc3dSmrg if (aff_comb_cannot_overlap_p (&off2, size1, size2))
16921debfc3dSmrg return false;
16931debfc3dSmrg
16941debfc3dSmrg return true;
16951debfc3dSmrg }
16961debfc3dSmrg
16971debfc3dSmrg /* Compare function for bsearch searching for reference locations
16981debfc3dSmrg in a loop. */
16991debfc3dSmrg
17001debfc3dSmrg static int
find_ref_loc_in_loop_cmp(const void * loop_,const void * loc_,void * bb_loop_postorder_)1701*8feb0f0bSmrg find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1702*8feb0f0bSmrg void *bb_loop_postorder_)
17031debfc3dSmrg {
1704*8feb0f0bSmrg unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1705*8feb0f0bSmrg class loop *loop = (class loop *)const_cast<void *>(loop_);
17061debfc3dSmrg mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1707*8feb0f0bSmrg class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
17081debfc3dSmrg if (loop->num == loc_loop->num
17091debfc3dSmrg || flow_loop_nested_p (loop, loc_loop))
17101debfc3dSmrg return 0;
17111debfc3dSmrg return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
17121debfc3dSmrg ? -1 : 1);
17131debfc3dSmrg }
17141debfc3dSmrg
17151debfc3dSmrg /* Iterates over all locations of REF in LOOP and its subloops calling
17161debfc3dSmrg fn.operator() with the location as argument. When that operator
17171debfc3dSmrg returns true the iteration is stopped and true is returned.
17181debfc3dSmrg Otherwise false is returned. */
17191debfc3dSmrg
17201debfc3dSmrg template <typename FN>
17211debfc3dSmrg static bool
for_all_locs_in_loop(class loop * loop,im_mem_ref * ref,FN fn)1722*8feb0f0bSmrg for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
17231debfc3dSmrg {
17241debfc3dSmrg unsigned i;
17251debfc3dSmrg mem_ref_loc *loc;
17261debfc3dSmrg
17271debfc3dSmrg /* Search for the cluster of locs in the accesses_in_loop vector
17281debfc3dSmrg which is sorted after postorder index of the loop father. */
1729*8feb0f0bSmrg loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1730*8feb0f0bSmrg bb_loop_postorder);
17311debfc3dSmrg if (!loc)
17321debfc3dSmrg return false;
17331debfc3dSmrg
17341debfc3dSmrg /* We have found one location inside loop or its sub-loops. Iterate
17351debfc3dSmrg both forward and backward to cover the whole cluster. */
17361debfc3dSmrg i = loc - ref->accesses_in_loop.address ();
17371debfc3dSmrg while (i > 0)
17381debfc3dSmrg {
17391debfc3dSmrg --i;
17401debfc3dSmrg mem_ref_loc *l = &ref->accesses_in_loop[i];
17411debfc3dSmrg if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
17421debfc3dSmrg break;
17431debfc3dSmrg if (fn (l))
17441debfc3dSmrg return true;
17451debfc3dSmrg }
17461debfc3dSmrg for (i = loc - ref->accesses_in_loop.address ();
17471debfc3dSmrg i < ref->accesses_in_loop.length (); ++i)
17481debfc3dSmrg {
17491debfc3dSmrg mem_ref_loc *l = &ref->accesses_in_loop[i];
17501debfc3dSmrg if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
17511debfc3dSmrg break;
17521debfc3dSmrg if (fn (l))
17531debfc3dSmrg return true;
17541debfc3dSmrg }
17551debfc3dSmrg
17561debfc3dSmrg return false;
17571debfc3dSmrg }
17581debfc3dSmrg
17591debfc3dSmrg /* Rewrites location LOC by TMP_VAR. */
17601debfc3dSmrg
1761*8feb0f0bSmrg class rewrite_mem_ref_loc
17621debfc3dSmrg {
1763*8feb0f0bSmrg public:
rewrite_mem_ref_loc(tree tmp_var_)17641debfc3dSmrg rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
17651debfc3dSmrg bool operator () (mem_ref_loc *loc);
17661debfc3dSmrg tree tmp_var;
17671debfc3dSmrg };
17681debfc3dSmrg
17691debfc3dSmrg bool
operator()17701debfc3dSmrg rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
17711debfc3dSmrg {
17721debfc3dSmrg *loc->ref = tmp_var;
17731debfc3dSmrg update_stmt (loc->stmt);
17741debfc3dSmrg return false;
17751debfc3dSmrg }
17761debfc3dSmrg
17771debfc3dSmrg /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
17781debfc3dSmrg
17791debfc3dSmrg static void
rewrite_mem_refs(class loop * loop,im_mem_ref * ref,tree tmp_var)1780*8feb0f0bSmrg rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
17811debfc3dSmrg {
17821debfc3dSmrg for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
17831debfc3dSmrg }
17841debfc3dSmrg
17851debfc3dSmrg /* Stores the first reference location in LOCP. */
17861debfc3dSmrg
1787*8feb0f0bSmrg class first_mem_ref_loc_1
17881debfc3dSmrg {
1789*8feb0f0bSmrg public:
first_mem_ref_loc_1(mem_ref_loc ** locp_)17901debfc3dSmrg first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
17911debfc3dSmrg bool operator () (mem_ref_loc *loc);
17921debfc3dSmrg mem_ref_loc **locp;
17931debfc3dSmrg };
17941debfc3dSmrg
17951debfc3dSmrg bool
operator()17961debfc3dSmrg first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
17971debfc3dSmrg {
17981debfc3dSmrg *locp = loc;
17991debfc3dSmrg return true;
18001debfc3dSmrg }
18011debfc3dSmrg
18021debfc3dSmrg /* Returns the first reference location to REF in LOOP. */
18031debfc3dSmrg
18041debfc3dSmrg static mem_ref_loc *
first_mem_ref_loc(class loop * loop,im_mem_ref * ref)1805*8feb0f0bSmrg first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
18061debfc3dSmrg {
18071debfc3dSmrg mem_ref_loc *locp = NULL;
18081debfc3dSmrg for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
18091debfc3dSmrg return locp;
18101debfc3dSmrg }
18111debfc3dSmrg
18121debfc3dSmrg struct prev_flag_edges {
18131debfc3dSmrg /* Edge to insert new flag comparison code. */
18141debfc3dSmrg edge append_cond_position;
18151debfc3dSmrg
18161debfc3dSmrg /* Edge for fall through from previous flag comparison. */
18171debfc3dSmrg edge last_cond_fallthru;
18181debfc3dSmrg };
18191debfc3dSmrg
18201debfc3dSmrg /* Helper function for execute_sm. Emit code to store TMP_VAR into
18211debfc3dSmrg MEM along edge EX.
18221debfc3dSmrg
18231debfc3dSmrg The store is only done if MEM has changed. We do this so no
18241debfc3dSmrg changes to MEM occur on code paths that did not originally store
18251debfc3dSmrg into it.
18261debfc3dSmrg
18271debfc3dSmrg The common case for execute_sm will transform:
18281debfc3dSmrg
18291debfc3dSmrg for (...) {
18301debfc3dSmrg if (foo)
18311debfc3dSmrg stuff;
18321debfc3dSmrg else
18331debfc3dSmrg MEM = TMP_VAR;
18341debfc3dSmrg }
18351debfc3dSmrg
18361debfc3dSmrg into:
18371debfc3dSmrg
18381debfc3dSmrg lsm = MEM;
18391debfc3dSmrg for (...) {
18401debfc3dSmrg if (foo)
18411debfc3dSmrg stuff;
18421debfc3dSmrg else
18431debfc3dSmrg lsm = TMP_VAR;
18441debfc3dSmrg }
18451debfc3dSmrg MEM = lsm;
18461debfc3dSmrg
18471debfc3dSmrg This function will generate:
18481debfc3dSmrg
18491debfc3dSmrg lsm = MEM;
18501debfc3dSmrg
18511debfc3dSmrg lsm_flag = false;
18521debfc3dSmrg ...
18531debfc3dSmrg for (...) {
18541debfc3dSmrg if (foo)
18551debfc3dSmrg stuff;
18561debfc3dSmrg else {
18571debfc3dSmrg lsm = TMP_VAR;
18581debfc3dSmrg lsm_flag = true;
18591debfc3dSmrg }
18601debfc3dSmrg }
18611debfc3dSmrg if (lsm_flag) <--
18621debfc3dSmrg MEM = lsm; <--
18631debfc3dSmrg */
18641debfc3dSmrg
18651debfc3dSmrg static void
execute_sm_if_changed(edge ex,tree mem,tree tmp_var,tree flag,edge preheader,hash_set<basic_block> * flag_bbs)1866a2dc1f3fSmrg execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1867a2dc1f3fSmrg edge preheader, hash_set <basic_block> *flag_bbs)
18681debfc3dSmrg {
18691debfc3dSmrg basic_block new_bb, then_bb, old_dest;
18701debfc3dSmrg bool loop_has_only_one_exit;
18711debfc3dSmrg edge then_old_edge, orig_ex = ex;
18721debfc3dSmrg gimple_stmt_iterator gsi;
18731debfc3dSmrg gimple *stmt;
18741debfc3dSmrg struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
18751debfc3dSmrg bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
18761debfc3dSmrg
1877a2dc1f3fSmrg profile_count count_sum = profile_count::zero ();
1878a2dc1f3fSmrg int nbbs = 0, ncount = 0;
1879a2dc1f3fSmrg profile_probability flag_probability = profile_probability::uninitialized ();
1880a2dc1f3fSmrg
1881a2dc1f3fSmrg /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1882a2dc1f3fSmrg at loop exit.
1883a2dc1f3fSmrg
1884a2dc1f3fSmrg This code may look fancy, but it cannot update profile very realistically
1885a2dc1f3fSmrg because we do not know the probability that flag will be true at given
1886a2dc1f3fSmrg loop exit.
1887a2dc1f3fSmrg
1888a2dc1f3fSmrg We look for two interesting extremes
1889a2dc1f3fSmrg - when exit is dominated by block setting the flag, we know it will
1890a2dc1f3fSmrg always be true. This is a common case.
1891a2dc1f3fSmrg - when all blocks setting the flag have very low frequency we know
1892a2dc1f3fSmrg it will likely be false.
1893a2dc1f3fSmrg In all other cases we default to 2/3 for flag being true. */
1894a2dc1f3fSmrg
1895a2dc1f3fSmrg for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1896a2dc1f3fSmrg it != flag_bbs->end (); ++it)
1897a2dc1f3fSmrg {
1898a2dc1f3fSmrg if ((*it)->count.initialized_p ())
1899a2dc1f3fSmrg count_sum += (*it)->count, ncount ++;
1900a2dc1f3fSmrg if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1901a2dc1f3fSmrg flag_probability = profile_probability::always ();
1902a2dc1f3fSmrg nbbs++;
1903a2dc1f3fSmrg }
1904a2dc1f3fSmrg
1905a2dc1f3fSmrg profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1906a2dc1f3fSmrg
1907a2dc1f3fSmrg if (flag_probability.initialized_p ())
1908a2dc1f3fSmrg ;
1909a2dc1f3fSmrg else if (ncount == nbbs
1910a2dc1f3fSmrg && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1911a2dc1f3fSmrg {
1912a2dc1f3fSmrg flag_probability = count_sum.probability_in (preheader->count ());
1913a2dc1f3fSmrg if (flag_probability > cap)
1914a2dc1f3fSmrg flag_probability = cap;
1915a2dc1f3fSmrg }
1916a2dc1f3fSmrg
1917a2dc1f3fSmrg if (!flag_probability.initialized_p ())
1918a2dc1f3fSmrg flag_probability = cap;
1919a2dc1f3fSmrg
19201debfc3dSmrg /* ?? Insert store after previous store if applicable. See note
19211debfc3dSmrg below. */
19221debfc3dSmrg if (prev_edges)
19231debfc3dSmrg ex = prev_edges->append_cond_position;
19241debfc3dSmrg
19251debfc3dSmrg loop_has_only_one_exit = single_pred_p (ex->dest);
19261debfc3dSmrg
19271debfc3dSmrg if (loop_has_only_one_exit)
19281debfc3dSmrg ex = split_block_after_labels (ex->dest);
19291debfc3dSmrg else
19301debfc3dSmrg {
19311debfc3dSmrg for (gphi_iterator gpi = gsi_start_phis (ex->dest);
19321debfc3dSmrg !gsi_end_p (gpi); gsi_next (&gpi))
19331debfc3dSmrg {
19341debfc3dSmrg gphi *phi = gpi.phi ();
19351debfc3dSmrg if (virtual_operand_p (gimple_phi_result (phi)))
19361debfc3dSmrg continue;
19371debfc3dSmrg
19381debfc3dSmrg /* When the destination has a non-virtual PHI node with multiple
19391debfc3dSmrg predecessors make sure we preserve the PHI structure by
19401debfc3dSmrg forcing a forwarder block so that hoisting of that PHI will
19411debfc3dSmrg still work. */
19421debfc3dSmrg split_edge (ex);
19431debfc3dSmrg break;
19441debfc3dSmrg }
19451debfc3dSmrg }
19461debfc3dSmrg
19471debfc3dSmrg old_dest = ex->dest;
19481debfc3dSmrg new_bb = split_edge (ex);
19491debfc3dSmrg then_bb = create_empty_bb (new_bb);
1950a2dc1f3fSmrg then_bb->count = new_bb->count.apply_probability (flag_probability);
19511debfc3dSmrg if (irr)
19521debfc3dSmrg then_bb->flags = BB_IRREDUCIBLE_LOOP;
19531debfc3dSmrg add_bb_to_loop (then_bb, new_bb->loop_father);
19541debfc3dSmrg
19551debfc3dSmrg gsi = gsi_start_bb (new_bb);
19561debfc3dSmrg stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
19571debfc3dSmrg NULL_TREE, NULL_TREE);
19581debfc3dSmrg gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
19591debfc3dSmrg
19601debfc3dSmrg gsi = gsi_start_bb (then_bb);
19611debfc3dSmrg /* Insert actual store. */
19621debfc3dSmrg stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
19631debfc3dSmrg gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
19641debfc3dSmrg
1965a2dc1f3fSmrg edge e1 = single_succ_edge (new_bb);
1966a2dc1f3fSmrg edge e2 = make_edge (new_bb, then_bb,
19671debfc3dSmrg EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1968a2dc1f3fSmrg e2->probability = flag_probability;
1969a2dc1f3fSmrg
1970a2dc1f3fSmrg e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1971a2dc1f3fSmrg e1->flags &= ~EDGE_FALLTHRU;
1972a2dc1f3fSmrg
1973a2dc1f3fSmrg e1->probability = flag_probability.invert ();
1974a2dc1f3fSmrg
1975a2dc1f3fSmrg then_old_edge = make_single_succ_edge (then_bb, old_dest,
19761debfc3dSmrg EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
19771debfc3dSmrg
19781debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
19791debfc3dSmrg
19801debfc3dSmrg if (prev_edges)
19811debfc3dSmrg {
19821debfc3dSmrg basic_block prevbb = prev_edges->last_cond_fallthru->src;
19831debfc3dSmrg redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
19841debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
19851debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, old_dest,
19861debfc3dSmrg recompute_dominator (CDI_DOMINATORS, old_dest));
19871debfc3dSmrg }
19881debfc3dSmrg
19891debfc3dSmrg /* ?? Because stores may alias, they must happen in the exact
19901debfc3dSmrg sequence they originally happened. Save the position right after
19911debfc3dSmrg the (_lsm) store we just created so we can continue appending after
19921debfc3dSmrg it and maintain the original order. */
19931debfc3dSmrg {
19941debfc3dSmrg struct prev_flag_edges *p;
19951debfc3dSmrg
19961debfc3dSmrg if (orig_ex->aux)
19971debfc3dSmrg orig_ex->aux = NULL;
19981debfc3dSmrg alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
19991debfc3dSmrg p = (struct prev_flag_edges *) orig_ex->aux;
20001debfc3dSmrg p->append_cond_position = then_old_edge;
20011debfc3dSmrg p->last_cond_fallthru = find_edge (new_bb, old_dest);
20021debfc3dSmrg orig_ex->aux = (void *) p;
20031debfc3dSmrg }
20041debfc3dSmrg
20051debfc3dSmrg if (!loop_has_only_one_exit)
20061debfc3dSmrg for (gphi_iterator gpi = gsi_start_phis (old_dest);
20071debfc3dSmrg !gsi_end_p (gpi); gsi_next (&gpi))
20081debfc3dSmrg {
20091debfc3dSmrg gphi *phi = gpi.phi ();
20101debfc3dSmrg unsigned i;
20111debfc3dSmrg
20121debfc3dSmrg for (i = 0; i < gimple_phi_num_args (phi); i++)
20131debfc3dSmrg if (gimple_phi_arg_edge (phi, i)->src == new_bb)
20141debfc3dSmrg {
20151debfc3dSmrg tree arg = gimple_phi_arg_def (phi, i);
20161debfc3dSmrg add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
20171debfc3dSmrg update_stmt (phi);
20181debfc3dSmrg }
20191debfc3dSmrg }
20201debfc3dSmrg }
20211debfc3dSmrg
20221debfc3dSmrg /* When REF is set on the location, set flag indicating the store. */
20231debfc3dSmrg
2024*8feb0f0bSmrg class sm_set_flag_if_changed
20251debfc3dSmrg {
2026*8feb0f0bSmrg public:
sm_set_flag_if_changed(tree flag_,hash_set<basic_block> * bbs_)2027a2dc1f3fSmrg sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2028a2dc1f3fSmrg : flag (flag_), bbs (bbs_) {}
20291debfc3dSmrg bool operator () (mem_ref_loc *loc);
20301debfc3dSmrg tree flag;
2031a2dc1f3fSmrg hash_set <basic_block> *bbs;
20321debfc3dSmrg };
20331debfc3dSmrg
20341debfc3dSmrg bool
operator()20351debfc3dSmrg sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
20361debfc3dSmrg {
20371debfc3dSmrg /* Only set the flag for writes. */
20381debfc3dSmrg if (is_gimple_assign (loc->stmt)
20391debfc3dSmrg && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
20401debfc3dSmrg {
20411debfc3dSmrg gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
20421debfc3dSmrg gimple *stmt = gimple_build_assign (flag, boolean_true_node);
20431debfc3dSmrg gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2044a2dc1f3fSmrg bbs->add (gimple_bb (stmt));
20451debfc3dSmrg }
20461debfc3dSmrg return false;
20471debfc3dSmrg }
20481debfc3dSmrg
20491debfc3dSmrg /* Helper function for execute_sm. On every location where REF is
20501debfc3dSmrg set, set an appropriate flag indicating the store. */
20511debfc3dSmrg
20521debfc3dSmrg static tree
execute_sm_if_changed_flag_set(class loop * loop,im_mem_ref * ref,hash_set<basic_block> * bbs)2053*8feb0f0bSmrg execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2054a2dc1f3fSmrg hash_set <basic_block> *bbs)
20551debfc3dSmrg {
20561debfc3dSmrg tree flag;
20571debfc3dSmrg char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
20581debfc3dSmrg flag = create_tmp_reg (boolean_type_node, str);
2059a2dc1f3fSmrg for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
20601debfc3dSmrg return flag;
20611debfc3dSmrg }
20621debfc3dSmrg
20631debfc3dSmrg /* Executes store motion of memory reference REF from LOOP.
20641debfc3dSmrg Exits from the LOOP are stored in EXITS. The initialization of the
20651debfc3dSmrg temporary variable is put to the preheader of the loop, and assignments
20661debfc3dSmrg to the reference from the temporary variable are emitted to exits. */
20671debfc3dSmrg
20681debfc3dSmrg static void
execute_sm(class loop * loop,vec<edge> exits,im_mem_ref * ref)2069*8feb0f0bSmrg execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
20701debfc3dSmrg {
20711debfc3dSmrg tree tmp_var, store_flag = NULL_TREE;
20721debfc3dSmrg unsigned i;
20731debfc3dSmrg gassign *load;
20741debfc3dSmrg struct fmt_data fmt_data;
20751debfc3dSmrg edge ex;
20761debfc3dSmrg struct lim_aux_data *lim_data;
20771debfc3dSmrg bool multi_threaded_model_p = false;
20781debfc3dSmrg gimple_stmt_iterator gsi;
2079a2dc1f3fSmrg hash_set<basic_block> flag_bbs;
20801debfc3dSmrg
20811debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
20821debfc3dSmrg {
20831debfc3dSmrg fprintf (dump_file, "Executing store motion of ");
2084a2dc1f3fSmrg print_generic_expr (dump_file, ref->mem.ref);
20851debfc3dSmrg fprintf (dump_file, " from loop %d\n", loop->num);
20861debfc3dSmrg }
20871debfc3dSmrg
20881debfc3dSmrg tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
20891debfc3dSmrg get_lsm_tmp_name (ref->mem.ref, ~0));
20901debfc3dSmrg
20911debfc3dSmrg fmt_data.loop = loop;
20921debfc3dSmrg fmt_data.orig_loop = loop;
20931debfc3dSmrg for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
20941debfc3dSmrg
20951debfc3dSmrg if (bb_in_transaction (loop_preheader_edge (loop)->src)
2096*8feb0f0bSmrg || (! flag_store_data_races
2097a2dc1f3fSmrg && ! ref_always_accessed_p (loop, ref, true)))
20981debfc3dSmrg multi_threaded_model_p = true;
20991debfc3dSmrg
21001debfc3dSmrg if (multi_threaded_model_p)
2101a2dc1f3fSmrg store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
21021debfc3dSmrg
21031debfc3dSmrg rewrite_mem_refs (loop, ref, tmp_var);
21041debfc3dSmrg
21051debfc3dSmrg /* Emit the load code on a random exit edge or into the latch if
21061debfc3dSmrg the loop does not exit, so that we are sure it will be processed
21071debfc3dSmrg by move_computations after all dependencies. */
21081debfc3dSmrg gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
21091debfc3dSmrg
21101debfc3dSmrg /* FIXME/TODO: For the multi-threaded variant, we could avoid this
21111debfc3dSmrg load altogether, since the store is predicated by a flag. We
21121debfc3dSmrg could, do the load only if it was originally in the loop. */
21131debfc3dSmrg load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
21141debfc3dSmrg lim_data = init_lim_data (load);
21151debfc3dSmrg lim_data->max_loop = loop;
21161debfc3dSmrg lim_data->tgt_loop = loop;
21171debfc3dSmrg gsi_insert_before (&gsi, load, GSI_SAME_STMT);
21181debfc3dSmrg
21191debfc3dSmrg if (multi_threaded_model_p)
21201debfc3dSmrg {
21211debfc3dSmrg load = gimple_build_assign (store_flag, boolean_false_node);
21221debfc3dSmrg lim_data = init_lim_data (load);
21231debfc3dSmrg lim_data->max_loop = loop;
21241debfc3dSmrg lim_data->tgt_loop = loop;
21251debfc3dSmrg gsi_insert_before (&gsi, load, GSI_SAME_STMT);
21261debfc3dSmrg }
21271debfc3dSmrg
21281debfc3dSmrg /* Sink the store to every exit from the loop. */
21291debfc3dSmrg FOR_EACH_VEC_ELT (exits, i, ex)
21301debfc3dSmrg if (!multi_threaded_model_p)
21311debfc3dSmrg {
21321debfc3dSmrg gassign *store;
21331debfc3dSmrg store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
21341debfc3dSmrg gsi_insert_on_edge (ex, store);
21351debfc3dSmrg }
21361debfc3dSmrg else
2137a2dc1f3fSmrg execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2138a2dc1f3fSmrg loop_preheader_edge (loop), &flag_bbs);
21391debfc3dSmrg }
21401debfc3dSmrg
21411debfc3dSmrg /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
21421debfc3dSmrg edges of the LOOP. */
21431debfc3dSmrg
21441debfc3dSmrg static void
hoist_memory_references(class loop * loop,bitmap mem_refs,vec<edge> exits)2145*8feb0f0bSmrg hoist_memory_references (class loop *loop, bitmap mem_refs,
21461debfc3dSmrg vec<edge> exits)
21471debfc3dSmrg {
21481debfc3dSmrg im_mem_ref *ref;
21491debfc3dSmrg unsigned i;
21501debfc3dSmrg bitmap_iterator bi;
21511debfc3dSmrg
21521debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
21531debfc3dSmrg {
21541debfc3dSmrg ref = memory_accesses.refs_list[i];
21551debfc3dSmrg execute_sm (loop, exits, ref);
21561debfc3dSmrg }
21571debfc3dSmrg }
21581debfc3dSmrg
2159*8feb0f0bSmrg class ref_always_accessed
21601debfc3dSmrg {
2161*8feb0f0bSmrg public:
ref_always_accessed(class loop * loop_,bool stored_p_)2162*8feb0f0bSmrg ref_always_accessed (class loop *loop_, bool stored_p_)
21631debfc3dSmrg : loop (loop_), stored_p (stored_p_) {}
21641debfc3dSmrg bool operator () (mem_ref_loc *loc);
2165*8feb0f0bSmrg class loop *loop;
21661debfc3dSmrg bool stored_p;
21671debfc3dSmrg };
21681debfc3dSmrg
21691debfc3dSmrg bool
operator()21701debfc3dSmrg ref_always_accessed::operator () (mem_ref_loc *loc)
21711debfc3dSmrg {
2172*8feb0f0bSmrg class loop *must_exec;
21731debfc3dSmrg
2174*8feb0f0bSmrg struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2175*8feb0f0bSmrg if (!lim_data)
21761debfc3dSmrg return false;
21771debfc3dSmrg
21781debfc3dSmrg /* If we require an always executed store make sure the statement
2179*8feb0f0bSmrg is a store. */
21801debfc3dSmrg if (stored_p)
21811debfc3dSmrg {
21821debfc3dSmrg tree lhs = gimple_get_lhs (loc->stmt);
21831debfc3dSmrg if (!lhs
2184*8feb0f0bSmrg || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
21851debfc3dSmrg return false;
21861debfc3dSmrg }
21871debfc3dSmrg
2188*8feb0f0bSmrg must_exec = lim_data->always_executed_in;
21891debfc3dSmrg if (!must_exec)
21901debfc3dSmrg return false;
21911debfc3dSmrg
21921debfc3dSmrg if (must_exec == loop
21931debfc3dSmrg || flow_loop_nested_p (must_exec, loop))
21941debfc3dSmrg return true;
21951debfc3dSmrg
21961debfc3dSmrg return false;
21971debfc3dSmrg }
21981debfc3dSmrg
21991debfc3dSmrg /* Returns true if REF is always accessed in LOOP. If STORED_P is true
22001debfc3dSmrg make sure REF is always stored to in LOOP. */
22011debfc3dSmrg
22021debfc3dSmrg static bool
ref_always_accessed_p(class loop * loop,im_mem_ref * ref,bool stored_p)2203*8feb0f0bSmrg ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
22041debfc3dSmrg {
22051debfc3dSmrg return for_all_locs_in_loop (loop, ref,
22061debfc3dSmrg ref_always_accessed (loop, stored_p));
22071debfc3dSmrg }
22081debfc3dSmrg
22091debfc3dSmrg /* Returns true if REF1 and REF2 are independent. */
22101debfc3dSmrg
22111debfc3dSmrg static bool
refs_independent_p(im_mem_ref * ref1,im_mem_ref * ref2)22121debfc3dSmrg refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
22131debfc3dSmrg {
22141debfc3dSmrg if (ref1 == ref2)
22151debfc3dSmrg return true;
22161debfc3dSmrg
22171debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
22181debfc3dSmrg fprintf (dump_file, "Querying dependency of refs %u and %u: ",
22191debfc3dSmrg ref1->id, ref2->id);
22201debfc3dSmrg
22211debfc3dSmrg if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
22221debfc3dSmrg {
22231debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
22241debfc3dSmrg fprintf (dump_file, "dependent.\n");
22251debfc3dSmrg return false;
22261debfc3dSmrg }
22271debfc3dSmrg else
22281debfc3dSmrg {
22291debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
22301debfc3dSmrg fprintf (dump_file, "independent.\n");
22311debfc3dSmrg return true;
22321debfc3dSmrg }
22331debfc3dSmrg }
22341debfc3dSmrg
22351debfc3dSmrg /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
22361debfc3dSmrg and its super-loops. */
22371debfc3dSmrg
22381debfc3dSmrg static void
record_dep_loop(class loop * loop,im_mem_ref * ref,bool stored_p)2239*8feb0f0bSmrg record_dep_loop (class loop *loop, im_mem_ref *ref, bool stored_p)
22401debfc3dSmrg {
22411debfc3dSmrg /* We can propagate dependent-in-loop bits up the loop
22421debfc3dSmrg hierarchy to all outer loops. */
22431debfc3dSmrg while (loop != current_loops->tree_root
22441debfc3dSmrg && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
22451debfc3dSmrg loop = loop_outer (loop);
22461debfc3dSmrg }
22471debfc3dSmrg
22481debfc3dSmrg /* Returns true if REF is independent on all other memory
22491debfc3dSmrg references in LOOP. */
22501debfc3dSmrg
22511debfc3dSmrg static bool
ref_indep_loop_p_1(class loop * loop,im_mem_ref * ref,bool stored_p)2252*8feb0f0bSmrg ref_indep_loop_p_1 (class loop *loop, im_mem_ref *ref, bool stored_p)
22531debfc3dSmrg {
22541debfc3dSmrg stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
22551debfc3dSmrg
22561debfc3dSmrg bool indep_p = true;
22571debfc3dSmrg bitmap refs_to_check;
22581debfc3dSmrg
22591debfc3dSmrg if (stored_p)
22601debfc3dSmrg refs_to_check = &memory_accesses.refs_in_loop[loop->num];
22611debfc3dSmrg else
22621debfc3dSmrg refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
22631debfc3dSmrg
22641debfc3dSmrg if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
22651debfc3dSmrg indep_p = false;
22661debfc3dSmrg else
22671debfc3dSmrg {
22681debfc3dSmrg if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
22691debfc3dSmrg return true;
22701debfc3dSmrg if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
22711debfc3dSmrg return false;
22721debfc3dSmrg
2273*8feb0f0bSmrg class loop *inner = loop->inner;
22741debfc3dSmrg while (inner)
22751debfc3dSmrg {
22761debfc3dSmrg if (!ref_indep_loop_p_1 (inner, ref, stored_p))
22771debfc3dSmrg {
22781debfc3dSmrg indep_p = false;
22791debfc3dSmrg break;
22801debfc3dSmrg }
22811debfc3dSmrg inner = inner->next;
22821debfc3dSmrg }
22831debfc3dSmrg
22841debfc3dSmrg if (indep_p)
22851debfc3dSmrg {
22861debfc3dSmrg unsigned i;
22871debfc3dSmrg bitmap_iterator bi;
22881debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
22891debfc3dSmrg {
22901debfc3dSmrg im_mem_ref *aref = memory_accesses.refs_list[i];
22911debfc3dSmrg if (!refs_independent_p (ref, aref))
22921debfc3dSmrg {
22931debfc3dSmrg indep_p = false;
22941debfc3dSmrg break;
22951debfc3dSmrg }
22961debfc3dSmrg }
22971debfc3dSmrg }
22981debfc3dSmrg }
22991debfc3dSmrg
23001debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
23011debfc3dSmrg fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
23021debfc3dSmrg ref->id, loop->num, indep_p ? "independent" : "dependent");
23031debfc3dSmrg
23041debfc3dSmrg /* Record the computed result in the cache. */
23051debfc3dSmrg if (indep_p)
23061debfc3dSmrg {
23071debfc3dSmrg if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
23081debfc3dSmrg && stored_p)
23091debfc3dSmrg {
23101debfc3dSmrg /* If it's independend against all refs then it's independent
23111debfc3dSmrg against stores, too. */
23121debfc3dSmrg bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
23131debfc3dSmrg }
23141debfc3dSmrg }
23151debfc3dSmrg else
23161debfc3dSmrg {
23171debfc3dSmrg record_dep_loop (loop, ref, stored_p);
23181debfc3dSmrg if (!stored_p)
23191debfc3dSmrg {
23201debfc3dSmrg /* If it's dependent against stores it's dependent against
23211debfc3dSmrg all refs, too. */
23221debfc3dSmrg record_dep_loop (loop, ref, true);
23231debfc3dSmrg }
23241debfc3dSmrg }
23251debfc3dSmrg
23261debfc3dSmrg return indep_p;
23271debfc3dSmrg }
23281debfc3dSmrg
23291debfc3dSmrg /* Returns true if REF is independent on all other memory references in
23301debfc3dSmrg LOOP. */
23311debfc3dSmrg
23321debfc3dSmrg static bool
ref_indep_loop_p(class loop * loop,im_mem_ref * ref)2333*8feb0f0bSmrg ref_indep_loop_p (class loop *loop, im_mem_ref *ref)
23341debfc3dSmrg {
23351debfc3dSmrg gcc_checking_assert (MEM_ANALYZABLE (ref));
23361debfc3dSmrg
23371debfc3dSmrg return ref_indep_loop_p_1 (loop, ref, false);
23381debfc3dSmrg }
23391debfc3dSmrg
23401debfc3dSmrg /* Returns true if we can perform store motion of REF from LOOP. */
23411debfc3dSmrg
23421debfc3dSmrg static bool
can_sm_ref_p(class loop * loop,im_mem_ref * ref)2343*8feb0f0bSmrg can_sm_ref_p (class loop *loop, im_mem_ref *ref)
23441debfc3dSmrg {
23451debfc3dSmrg tree base;
23461debfc3dSmrg
23471debfc3dSmrg /* Can't hoist unanalyzable refs. */
23481debfc3dSmrg if (!MEM_ANALYZABLE (ref))
23491debfc3dSmrg return false;
23501debfc3dSmrg
23511debfc3dSmrg /* It should be movable. */
23521debfc3dSmrg if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
23531debfc3dSmrg || TREE_THIS_VOLATILE (ref->mem.ref)
23541debfc3dSmrg || !for_each_index (&ref->mem.ref, may_move_till, loop))
23551debfc3dSmrg return false;
23561debfc3dSmrg
23571debfc3dSmrg /* If it can throw fail, we do not properly update EH info. */
23581debfc3dSmrg if (tree_could_throw_p (ref->mem.ref))
23591debfc3dSmrg return false;
23601debfc3dSmrg
23611debfc3dSmrg /* If it can trap, it must be always executed in LOOP.
23621debfc3dSmrg Readonly memory locations may trap when storing to them, but
23631debfc3dSmrg tree_could_trap_p is a predicate for rvalues, so check that
23641debfc3dSmrg explicitly. */
23651debfc3dSmrg base = get_base_address (ref->mem.ref);
23661debfc3dSmrg if ((tree_could_trap_p (ref->mem.ref)
23671debfc3dSmrg || (DECL_P (base) && TREE_READONLY (base)))
23681debfc3dSmrg && !ref_always_accessed_p (loop, ref, true))
23691debfc3dSmrg return false;
23701debfc3dSmrg
23711debfc3dSmrg /* And it must be independent on all other memory references
23721debfc3dSmrg in LOOP. */
23731debfc3dSmrg if (!ref_indep_loop_p (loop, ref))
23741debfc3dSmrg return false;
23751debfc3dSmrg
23761debfc3dSmrg return true;
23771debfc3dSmrg }
23781debfc3dSmrg
23791debfc3dSmrg /* Marks the references in LOOP for that store motion should be performed
23801debfc3dSmrg in REFS_TO_SM. SM_EXECUTED is the set of references for that store
23811debfc3dSmrg motion was performed in one of the outer loops. */
23821debfc3dSmrg
23831debfc3dSmrg static void
find_refs_for_sm(class loop * loop,bitmap sm_executed,bitmap refs_to_sm)2384*8feb0f0bSmrg find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
23851debfc3dSmrg {
23861debfc3dSmrg bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
23871debfc3dSmrg unsigned i;
23881debfc3dSmrg bitmap_iterator bi;
23891debfc3dSmrg im_mem_ref *ref;
23901debfc3dSmrg
23911debfc3dSmrg EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
23921debfc3dSmrg {
23931debfc3dSmrg ref = memory_accesses.refs_list[i];
23941debfc3dSmrg if (can_sm_ref_p (loop, ref))
23951debfc3dSmrg bitmap_set_bit (refs_to_sm, i);
23961debfc3dSmrg }
23971debfc3dSmrg }
23981debfc3dSmrg
23991debfc3dSmrg /* Checks whether LOOP (with exits stored in EXITS array) is suitable
24001debfc3dSmrg for a store motion optimization (i.e. whether we can insert statement
24011debfc3dSmrg on its exits). */
24021debfc3dSmrg
24031debfc3dSmrg static bool
loop_suitable_for_sm(class loop * loop ATTRIBUTE_UNUSED,vec<edge> exits)2404*8feb0f0bSmrg loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
24051debfc3dSmrg vec<edge> exits)
24061debfc3dSmrg {
24071debfc3dSmrg unsigned i;
24081debfc3dSmrg edge ex;
24091debfc3dSmrg
24101debfc3dSmrg FOR_EACH_VEC_ELT (exits, i, ex)
24111debfc3dSmrg if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
24121debfc3dSmrg return false;
24131debfc3dSmrg
24141debfc3dSmrg return true;
24151debfc3dSmrg }
24161debfc3dSmrg
24171debfc3dSmrg /* Try to perform store motion for all memory references modified inside
24181debfc3dSmrg LOOP. SM_EXECUTED is the bitmap of the memory references for that
24191debfc3dSmrg store motion was executed in one of the outer loops. */
24201debfc3dSmrg
24211debfc3dSmrg static void
store_motion_loop(class loop * loop,bitmap sm_executed)2422*8feb0f0bSmrg store_motion_loop (class loop *loop, bitmap sm_executed)
24231debfc3dSmrg {
24241debfc3dSmrg vec<edge> exits = get_loop_exit_edges (loop);
2425*8feb0f0bSmrg class loop *subloop;
24261debfc3dSmrg bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
24271debfc3dSmrg
24281debfc3dSmrg if (loop_suitable_for_sm (loop, exits))
24291debfc3dSmrg {
24301debfc3dSmrg find_refs_for_sm (loop, sm_executed, sm_in_loop);
24311debfc3dSmrg hoist_memory_references (loop, sm_in_loop, exits);
24321debfc3dSmrg }
24331debfc3dSmrg exits.release ();
24341debfc3dSmrg
24351debfc3dSmrg bitmap_ior_into (sm_executed, sm_in_loop);
24361debfc3dSmrg for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
24371debfc3dSmrg store_motion_loop (subloop, sm_executed);
24381debfc3dSmrg bitmap_and_compl_into (sm_executed, sm_in_loop);
24391debfc3dSmrg BITMAP_FREE (sm_in_loop);
24401debfc3dSmrg }
24411debfc3dSmrg
24421debfc3dSmrg /* Try to perform store motion for all memory references modified inside
24431debfc3dSmrg loops. */
24441debfc3dSmrg
24451debfc3dSmrg static void
store_motion(void)24461debfc3dSmrg store_motion (void)
24471debfc3dSmrg {
2448*8feb0f0bSmrg class loop *loop;
24491debfc3dSmrg bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
24501debfc3dSmrg
24511debfc3dSmrg for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
24521debfc3dSmrg store_motion_loop (loop, sm_executed);
24531debfc3dSmrg
24541debfc3dSmrg BITMAP_FREE (sm_executed);
24551debfc3dSmrg gsi_commit_edge_inserts ();
24561debfc3dSmrg }
24571debfc3dSmrg
24581debfc3dSmrg /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
24591debfc3dSmrg for each such basic block bb records the outermost loop for that execution
24601debfc3dSmrg of its header implies execution of bb. CONTAINS_CALL is the bitmap of
24611debfc3dSmrg blocks that contain a nonpure call. */
24621debfc3dSmrg
24631debfc3dSmrg static void
fill_always_executed_in_1(class loop * loop,sbitmap contains_call)2464*8feb0f0bSmrg fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
24651debfc3dSmrg {
24661debfc3dSmrg basic_block bb = NULL, *bbs, last = NULL;
24671debfc3dSmrg unsigned i;
24681debfc3dSmrg edge e;
2469*8feb0f0bSmrg class loop *inn_loop = loop;
24701debfc3dSmrg
24711debfc3dSmrg if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
24721debfc3dSmrg {
24731debfc3dSmrg bbs = get_loop_body_in_dom_order (loop);
24741debfc3dSmrg
24751debfc3dSmrg for (i = 0; i < loop->num_nodes; i++)
24761debfc3dSmrg {
24771debfc3dSmrg edge_iterator ei;
24781debfc3dSmrg bb = bbs[i];
24791debfc3dSmrg
24801debfc3dSmrg if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
24811debfc3dSmrg last = bb;
24821debfc3dSmrg
24831debfc3dSmrg if (bitmap_bit_p (contains_call, bb->index))
24841debfc3dSmrg break;
24851debfc3dSmrg
24861debfc3dSmrg FOR_EACH_EDGE (e, ei, bb->succs)
24871debfc3dSmrg {
24881debfc3dSmrg /* If there is an exit from this BB. */
24891debfc3dSmrg if (!flow_bb_inside_loop_p (loop, e->dest))
24901debfc3dSmrg break;
24911debfc3dSmrg /* Or we enter a possibly non-finite loop. */
24921debfc3dSmrg if (flow_loop_nested_p (bb->loop_father,
24931debfc3dSmrg e->dest->loop_father)
24941debfc3dSmrg && ! finite_loop_p (e->dest->loop_father))
24951debfc3dSmrg break;
24961debfc3dSmrg }
24971debfc3dSmrg if (e)
24981debfc3dSmrg break;
24991debfc3dSmrg
25001debfc3dSmrg /* A loop might be infinite (TODO use simple loop analysis
25011debfc3dSmrg to disprove this if possible). */
25021debfc3dSmrg if (bb->flags & BB_IRREDUCIBLE_LOOP)
25031debfc3dSmrg break;
25041debfc3dSmrg
25051debfc3dSmrg if (!flow_bb_inside_loop_p (inn_loop, bb))
25061debfc3dSmrg break;
25071debfc3dSmrg
25081debfc3dSmrg if (bb->loop_father->header == bb)
25091debfc3dSmrg {
25101debfc3dSmrg if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
25111debfc3dSmrg break;
25121debfc3dSmrg
25131debfc3dSmrg /* In a loop that is always entered we may proceed anyway.
25141debfc3dSmrg But record that we entered it and stop once we leave it. */
25151debfc3dSmrg inn_loop = bb->loop_father;
25161debfc3dSmrg }
25171debfc3dSmrg }
25181debfc3dSmrg
25191debfc3dSmrg while (1)
25201debfc3dSmrg {
25211debfc3dSmrg SET_ALWAYS_EXECUTED_IN (last, loop);
25221debfc3dSmrg if (last == loop->header)
25231debfc3dSmrg break;
25241debfc3dSmrg last = get_immediate_dominator (CDI_DOMINATORS, last);
25251debfc3dSmrg }
25261debfc3dSmrg
25271debfc3dSmrg free (bbs);
25281debfc3dSmrg }
25291debfc3dSmrg
25301debfc3dSmrg for (loop = loop->inner; loop; loop = loop->next)
25311debfc3dSmrg fill_always_executed_in_1 (loop, contains_call);
25321debfc3dSmrg }
25331debfc3dSmrg
25341debfc3dSmrg /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
25351debfc3dSmrg for each such basic block bb records the outermost loop for that execution
25361debfc3dSmrg of its header implies execution of bb. */
25371debfc3dSmrg
25381debfc3dSmrg static void
fill_always_executed_in(void)25391debfc3dSmrg fill_always_executed_in (void)
25401debfc3dSmrg {
25411debfc3dSmrg basic_block bb;
2542*8feb0f0bSmrg class loop *loop;
25431debfc3dSmrg
25441debfc3dSmrg auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
25451debfc3dSmrg bitmap_clear (contains_call);
25461debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
25471debfc3dSmrg {
25481debfc3dSmrg gimple_stmt_iterator gsi;
25491debfc3dSmrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
25501debfc3dSmrg {
25511debfc3dSmrg if (nonpure_call_p (gsi_stmt (gsi)))
25521debfc3dSmrg break;
25531debfc3dSmrg }
25541debfc3dSmrg
25551debfc3dSmrg if (!gsi_end_p (gsi))
25561debfc3dSmrg bitmap_set_bit (contains_call, bb->index);
25571debfc3dSmrg }
25581debfc3dSmrg
25591debfc3dSmrg for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
25601debfc3dSmrg fill_always_executed_in_1 (loop, contains_call);
25611debfc3dSmrg }
25621debfc3dSmrg
25631debfc3dSmrg
25641debfc3dSmrg /* Compute the global information needed by the loop invariant motion pass. */
25651debfc3dSmrg
25661debfc3dSmrg static void
tree_ssa_lim_initialize(void)25671debfc3dSmrg tree_ssa_lim_initialize (void)
25681debfc3dSmrg {
2569*8feb0f0bSmrg class loop *loop;
25701debfc3dSmrg unsigned i;
25711debfc3dSmrg
25721debfc3dSmrg bitmap_obstack_initialize (&lim_bitmap_obstack);
25731debfc3dSmrg gcc_obstack_init (&mem_ref_obstack);
25741debfc3dSmrg lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
25751debfc3dSmrg
25761debfc3dSmrg if (flag_tm)
25771debfc3dSmrg compute_transaction_bits ();
25781debfc3dSmrg
25791debfc3dSmrg alloc_aux_for_edges (0);
25801debfc3dSmrg
25811debfc3dSmrg memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
25821debfc3dSmrg memory_accesses.refs_list.create (100);
25831debfc3dSmrg /* Allocate a special, unanalyzable mem-ref with ID zero. */
25841debfc3dSmrg memory_accesses.refs_list.quick_push
2585c0a68be4Smrg (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
25861debfc3dSmrg
25871debfc3dSmrg memory_accesses.refs_in_loop.create (number_of_loops (cfun));
25881debfc3dSmrg memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
25891debfc3dSmrg memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
25901debfc3dSmrg memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
25911debfc3dSmrg memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
25921debfc3dSmrg memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
25931debfc3dSmrg
25941debfc3dSmrg for (i = 0; i < number_of_loops (cfun); i++)
25951debfc3dSmrg {
25961debfc3dSmrg bitmap_initialize (&memory_accesses.refs_in_loop[i],
25971debfc3dSmrg &lim_bitmap_obstack);
25981debfc3dSmrg bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
25991debfc3dSmrg &lim_bitmap_obstack);
26001debfc3dSmrg bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
26011debfc3dSmrg &lim_bitmap_obstack);
26021debfc3dSmrg }
26031debfc3dSmrg
26041debfc3dSmrg memory_accesses.ttae_cache = NULL;
26051debfc3dSmrg
26061debfc3dSmrg /* Initialize bb_loop_postorder with a mapping from loop->num to
26071debfc3dSmrg its postorder index. */
26081debfc3dSmrg i = 0;
26091debfc3dSmrg bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
26101debfc3dSmrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
26111debfc3dSmrg bb_loop_postorder[loop->num] = i++;
26121debfc3dSmrg }
26131debfc3dSmrg
26141debfc3dSmrg /* Cleans up after the invariant motion pass. */
26151debfc3dSmrg
26161debfc3dSmrg static void
tree_ssa_lim_finalize(void)26171debfc3dSmrg tree_ssa_lim_finalize (void)
26181debfc3dSmrg {
26191debfc3dSmrg basic_block bb;
26201debfc3dSmrg unsigned i;
26211debfc3dSmrg im_mem_ref *ref;
26221debfc3dSmrg
26231debfc3dSmrg free_aux_for_edges ();
26241debfc3dSmrg
26251debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
26261debfc3dSmrg SET_ALWAYS_EXECUTED_IN (bb, NULL);
26271debfc3dSmrg
26281debfc3dSmrg bitmap_obstack_release (&lim_bitmap_obstack);
26291debfc3dSmrg delete lim_aux_data_map;
26301debfc3dSmrg
26311debfc3dSmrg delete memory_accesses.refs;
26321debfc3dSmrg memory_accesses.refs = NULL;
26331debfc3dSmrg
26341debfc3dSmrg FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
26351debfc3dSmrg memref_free (ref);
26361debfc3dSmrg memory_accesses.refs_list.release ();
26371debfc3dSmrg obstack_free (&mem_ref_obstack, NULL);
26381debfc3dSmrg
26391debfc3dSmrg memory_accesses.refs_in_loop.release ();
26401debfc3dSmrg memory_accesses.refs_stored_in_loop.release ();
26411debfc3dSmrg memory_accesses.all_refs_stored_in_loop.release ();
26421debfc3dSmrg
26431debfc3dSmrg if (memory_accesses.ttae_cache)
26441debfc3dSmrg free_affine_expand_cache (&memory_accesses.ttae_cache);
26451debfc3dSmrg
26461debfc3dSmrg free (bb_loop_postorder);
26471debfc3dSmrg }
26481debfc3dSmrg
26491debfc3dSmrg /* Moves invariants from loops. Only "expensive" invariants are moved out --
26501debfc3dSmrg i.e. those that are likely to be win regardless of the register pressure. */
26511debfc3dSmrg
26521debfc3dSmrg static unsigned int
tree_ssa_lim(void)26531debfc3dSmrg tree_ssa_lim (void)
26541debfc3dSmrg {
26551debfc3dSmrg unsigned int todo;
26561debfc3dSmrg
26571debfc3dSmrg tree_ssa_lim_initialize ();
26581debfc3dSmrg
26591debfc3dSmrg /* Gathers information about memory accesses in the loops. */
26601debfc3dSmrg analyze_memory_references ();
26611debfc3dSmrg
26621debfc3dSmrg /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
26631debfc3dSmrg fill_always_executed_in ();
26641debfc3dSmrg
26651debfc3dSmrg /* For each statement determine the outermost loop in that it is
26661debfc3dSmrg invariant and cost for computing the invariant. */
26671debfc3dSmrg invariantness_dom_walker (CDI_DOMINATORS)
26681debfc3dSmrg .walk (cfun->cfg->x_entry_block_ptr);
26691debfc3dSmrg
26701debfc3dSmrg /* Execute store motion. Force the necessary invariants to be moved
26711debfc3dSmrg out of the loops as well. */
26721debfc3dSmrg store_motion ();
26731debfc3dSmrg
26741debfc3dSmrg /* Move the expressions that are expensive enough. */
26751debfc3dSmrg todo = move_computations ();
26761debfc3dSmrg
26771debfc3dSmrg tree_ssa_lim_finalize ();
26781debfc3dSmrg
26791debfc3dSmrg return todo;
26801debfc3dSmrg }
26811debfc3dSmrg
26821debfc3dSmrg /* Loop invariant motion pass. */
26831debfc3dSmrg
26841debfc3dSmrg namespace {
26851debfc3dSmrg
26861debfc3dSmrg const pass_data pass_data_lim =
26871debfc3dSmrg {
26881debfc3dSmrg GIMPLE_PASS, /* type */
26891debfc3dSmrg "lim", /* name */
26901debfc3dSmrg OPTGROUP_LOOP, /* optinfo_flags */
26911debfc3dSmrg TV_LIM, /* tv_id */
26921debfc3dSmrg PROP_cfg, /* properties_required */
26931debfc3dSmrg 0, /* properties_provided */
26941debfc3dSmrg 0, /* properties_destroyed */
26951debfc3dSmrg 0, /* todo_flags_start */
26961debfc3dSmrg 0, /* todo_flags_finish */
26971debfc3dSmrg };
26981debfc3dSmrg
26991debfc3dSmrg class pass_lim : public gimple_opt_pass
27001debfc3dSmrg {
27011debfc3dSmrg public:
pass_lim(gcc::context * ctxt)27021debfc3dSmrg pass_lim (gcc::context *ctxt)
27031debfc3dSmrg : gimple_opt_pass (pass_data_lim, ctxt)
27041debfc3dSmrg {}
27051debfc3dSmrg
27061debfc3dSmrg /* opt_pass methods: */
clone()27071debfc3dSmrg opt_pass * clone () { return new pass_lim (m_ctxt); }
gate(function *)27081debfc3dSmrg virtual bool gate (function *) { return flag_tree_loop_im != 0; }
27091debfc3dSmrg virtual unsigned int execute (function *);
27101debfc3dSmrg
27111debfc3dSmrg }; // class pass_lim
27121debfc3dSmrg
27131debfc3dSmrg unsigned int
execute(function * fun)27141debfc3dSmrg pass_lim::execute (function *fun)
27151debfc3dSmrg {
27161debfc3dSmrg bool in_loop_pipeline = scev_initialized_p ();
27171debfc3dSmrg if (!in_loop_pipeline)
27181debfc3dSmrg loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
27191debfc3dSmrg
27201debfc3dSmrg if (number_of_loops (fun) <= 1)
27211debfc3dSmrg return 0;
27221debfc3dSmrg unsigned int todo = tree_ssa_lim ();
27231debfc3dSmrg
27241debfc3dSmrg if (!in_loop_pipeline)
27251debfc3dSmrg loop_optimizer_finalize ();
2726a2dc1f3fSmrg else
2727a2dc1f3fSmrg scev_reset ();
27281debfc3dSmrg return todo;
27291debfc3dSmrg }
27301debfc3dSmrg
27311debfc3dSmrg } // anon namespace
27321debfc3dSmrg
27331debfc3dSmrg gimple_opt_pass *
make_pass_lim(gcc::context * ctxt)27341debfc3dSmrg make_pass_lim (gcc::context *ctxt)
27351debfc3dSmrg {
27361debfc3dSmrg return new pass_lim (ctxt);
27371debfc3dSmrg }
27381debfc3dSmrg
27391debfc3dSmrg
2740