11debfc3dSmrg /* Loop unrolling.
2*8feb0f0bSmrg Copyright (C) 2002-2020 Free Software Foundation, Inc.
31debfc3dSmrg
41debfc3dSmrg This file is part of GCC.
51debfc3dSmrg
61debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
71debfc3dSmrg the terms of the GNU General Public License as published by the Free
81debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
91debfc3dSmrg version.
101debfc3dSmrg
111debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
121debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
131debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
141debfc3dSmrg for more details.
151debfc3dSmrg
161debfc3dSmrg You should have received a copy of the GNU General Public License
171debfc3dSmrg along with GCC; see the file COPYING3. If not see
181debfc3dSmrg <http://www.gnu.org/licenses/>. */
191debfc3dSmrg
201debfc3dSmrg #include "config.h"
211debfc3dSmrg #include "system.h"
221debfc3dSmrg #include "coretypes.h"
231debfc3dSmrg #include "backend.h"
241debfc3dSmrg #include "target.h"
251debfc3dSmrg #include "rtl.h"
261debfc3dSmrg #include "tree.h"
271debfc3dSmrg #include "cfghooks.h"
281debfc3dSmrg #include "memmodel.h"
291debfc3dSmrg #include "optabs.h"
301debfc3dSmrg #include "emit-rtl.h"
311debfc3dSmrg #include "recog.h"
321debfc3dSmrg #include "profile.h"
331debfc3dSmrg #include "cfgrtl.h"
341debfc3dSmrg #include "cfgloop.h"
351debfc3dSmrg #include "dojump.h"
361debfc3dSmrg #include "expr.h"
371debfc3dSmrg #include "dumpfile.h"
381debfc3dSmrg
391debfc3dSmrg /* This pass performs loop unrolling. We only perform this
401debfc3dSmrg optimization on innermost loops (with single exception) because
411debfc3dSmrg the impact on performance is greatest here, and we want to avoid
421debfc3dSmrg unnecessary code size growth. The gain is caused by greater sequentiality
431debfc3dSmrg of code, better code to optimize for further passes and in some cases
441debfc3dSmrg by fewer testings of exit conditions. The main problem is code growth,
451debfc3dSmrg that impacts performance negatively due to effect of caches.
461debfc3dSmrg
471debfc3dSmrg What we do:
481debfc3dSmrg
491debfc3dSmrg -- unrolling of loops that roll constant times; this is almost always
501debfc3dSmrg win, as we get rid of exit condition tests.
511debfc3dSmrg -- unrolling of loops that roll number of times that we can compute
521debfc3dSmrg in runtime; we also get rid of exit condition tests here, but there
531debfc3dSmrg is the extra expense for calculating the number of iterations
541debfc3dSmrg -- simple unrolling of remaining loops; this is performed only if we
551debfc3dSmrg are asked to, as the gain is questionable in this case and often
561debfc3dSmrg it may even slow down the code
571debfc3dSmrg For more detailed descriptions of each of those, see comments at
581debfc3dSmrg appropriate function below.
591debfc3dSmrg
601debfc3dSmrg There is a lot of parameters (defined and described in params.def) that
611debfc3dSmrg control how much we unroll.
621debfc3dSmrg
631debfc3dSmrg ??? A great problem is that we don't have a good way how to determine
641debfc3dSmrg how many times we should unroll the loop; the experiments I have made
651debfc3dSmrg showed that this choice may affect performance in order of several %.
661debfc3dSmrg */
671debfc3dSmrg
681debfc3dSmrg /* Information about induction variables to split. */
691debfc3dSmrg
701debfc3dSmrg struct iv_to_split
711debfc3dSmrg {
721debfc3dSmrg rtx_insn *insn; /* The insn in that the induction variable occurs. */
731debfc3dSmrg rtx orig_var; /* The variable (register) for the IV before split. */
741debfc3dSmrg rtx base_var; /* The variable on that the values in the further
751debfc3dSmrg iterations are based. */
761debfc3dSmrg rtx step; /* Step of the induction variable. */
771debfc3dSmrg struct iv_to_split *next; /* Next entry in walking order. */
781debfc3dSmrg };
791debfc3dSmrg
801debfc3dSmrg /* Information about accumulators to expand. */
811debfc3dSmrg
821debfc3dSmrg struct var_to_expand
831debfc3dSmrg {
841debfc3dSmrg rtx_insn *insn; /* The insn in that the variable expansion occurs. */
851debfc3dSmrg rtx reg; /* The accumulator which is expanded. */
861debfc3dSmrg vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
871debfc3dSmrg struct var_to_expand *next; /* Next entry in walking order. */
881debfc3dSmrg enum rtx_code op; /* The type of the accumulation - addition, subtraction
891debfc3dSmrg or multiplication. */
901debfc3dSmrg int expansion_count; /* Count the number of expansions generated so far. */
911debfc3dSmrg int reuse_expansion; /* The expansion we intend to reuse to expand
921debfc3dSmrg the accumulator. If REUSE_EXPANSION is 0 reuse
931debfc3dSmrg the original accumulator. Else use
941debfc3dSmrg var_expansions[REUSE_EXPANSION - 1]. */
951debfc3dSmrg };
961debfc3dSmrg
971debfc3dSmrg /* Hashtable helper for iv_to_split. */
981debfc3dSmrg
991debfc3dSmrg struct iv_split_hasher : free_ptr_hash <iv_to_split>
1001debfc3dSmrg {
1011debfc3dSmrg static inline hashval_t hash (const iv_to_split *);
1021debfc3dSmrg static inline bool equal (const iv_to_split *, const iv_to_split *);
1031debfc3dSmrg };
1041debfc3dSmrg
1051debfc3dSmrg
1061debfc3dSmrg /* A hash function for information about insns to split. */
1071debfc3dSmrg
1081debfc3dSmrg inline hashval_t
hash(const iv_to_split * ivts)1091debfc3dSmrg iv_split_hasher::hash (const iv_to_split *ivts)
1101debfc3dSmrg {
1111debfc3dSmrg return (hashval_t) INSN_UID (ivts->insn);
1121debfc3dSmrg }
1131debfc3dSmrg
1141debfc3dSmrg /* An equality functions for information about insns to split. */
1151debfc3dSmrg
1161debfc3dSmrg inline bool
equal(const iv_to_split * i1,const iv_to_split * i2)1171debfc3dSmrg iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
1181debfc3dSmrg {
1191debfc3dSmrg return i1->insn == i2->insn;
1201debfc3dSmrg }
1211debfc3dSmrg
1221debfc3dSmrg /* Hashtable helper for iv_to_split. */
1231debfc3dSmrg
1241debfc3dSmrg struct var_expand_hasher : free_ptr_hash <var_to_expand>
1251debfc3dSmrg {
1261debfc3dSmrg static inline hashval_t hash (const var_to_expand *);
1271debfc3dSmrg static inline bool equal (const var_to_expand *, const var_to_expand *);
1281debfc3dSmrg };
1291debfc3dSmrg
1301debfc3dSmrg /* Return a hash for VES. */
1311debfc3dSmrg
1321debfc3dSmrg inline hashval_t
hash(const var_to_expand * ves)1331debfc3dSmrg var_expand_hasher::hash (const var_to_expand *ves)
1341debfc3dSmrg {
1351debfc3dSmrg return (hashval_t) INSN_UID (ves->insn);
1361debfc3dSmrg }
1371debfc3dSmrg
1381debfc3dSmrg /* Return true if I1 and I2 refer to the same instruction. */
1391debfc3dSmrg
1401debfc3dSmrg inline bool
equal(const var_to_expand * i1,const var_to_expand * i2)1411debfc3dSmrg var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
1421debfc3dSmrg {
1431debfc3dSmrg return i1->insn == i2->insn;
1441debfc3dSmrg }
1451debfc3dSmrg
1461debfc3dSmrg /* Information about optimization applied in
1471debfc3dSmrg the unrolled loop. */
1481debfc3dSmrg
1491debfc3dSmrg struct opt_info
1501debfc3dSmrg {
1511debfc3dSmrg hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
1521debfc3dSmrg split. */
1531debfc3dSmrg struct iv_to_split *iv_to_split_head; /* The first iv to split. */
1541debfc3dSmrg struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
1551debfc3dSmrg hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
1561debfc3dSmrg insns with accumulators to expand. */
1571debfc3dSmrg struct var_to_expand *var_to_expand_head; /* The first var to expand. */
1581debfc3dSmrg struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
1591debfc3dSmrg unsigned first_new_block; /* The first basic block that was
1601debfc3dSmrg duplicated. */
1611debfc3dSmrg basic_block loop_exit; /* The loop exit basic block. */
1621debfc3dSmrg basic_block loop_preheader; /* The loop preheader basic block. */
1631debfc3dSmrg };
1641debfc3dSmrg
165*8feb0f0bSmrg static void decide_unroll_stupid (class loop *, int);
166*8feb0f0bSmrg static void decide_unroll_constant_iterations (class loop *, int);
167*8feb0f0bSmrg static void decide_unroll_runtime_iterations (class loop *, int);
168*8feb0f0bSmrg static void unroll_loop_stupid (class loop *);
1691debfc3dSmrg static void decide_unrolling (int);
170*8feb0f0bSmrg static void unroll_loop_constant_iterations (class loop *);
171*8feb0f0bSmrg static void unroll_loop_runtime_iterations (class loop *);
172*8feb0f0bSmrg static struct opt_info *analyze_insns_in_loop (class loop *);
1731debfc3dSmrg static void opt_info_start_duplication (struct opt_info *);
1741debfc3dSmrg static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
1751debfc3dSmrg static void free_opt_info (struct opt_info *);
176*8feb0f0bSmrg static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
177*8feb0f0bSmrg static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
1781debfc3dSmrg static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
1791debfc3dSmrg static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
1801debfc3dSmrg static void insert_var_expansion_initialization (struct var_to_expand *,
1811debfc3dSmrg basic_block);
1821debfc3dSmrg static void combine_var_copies_in_loop_exit (struct var_to_expand *,
1831debfc3dSmrg basic_block);
1841debfc3dSmrg static rtx get_expansion (struct var_to_expand *);
1851debfc3dSmrg
1861debfc3dSmrg /* Emit a message summarizing the unroll that will be
1871debfc3dSmrg performed for LOOP, along with the loop's location LOCUS, if
1881debfc3dSmrg appropriate given the dump or -fopt-info settings. */
1891debfc3dSmrg
1901debfc3dSmrg static void
report_unroll(class loop * loop,dump_location_t locus)191*8feb0f0bSmrg report_unroll (class loop *loop, dump_location_t locus)
1921debfc3dSmrg {
193a2dc1f3fSmrg dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
1941debfc3dSmrg
1951debfc3dSmrg if (loop->lpt_decision.decision == LPT_NONE)
1961debfc3dSmrg return;
1971debfc3dSmrg
1981debfc3dSmrg if (!dump_enabled_p ())
1991debfc3dSmrg return;
2001debfc3dSmrg
201c0a68be4Smrg dump_metadata_t metadata (report_flags, locus.get_impl_location ());
202c0a68be4Smrg dump_printf_loc (metadata, locus.get_user_location (),
2031debfc3dSmrg "loop unrolled %d times",
2041debfc3dSmrg loop->lpt_decision.times);
205a2dc1f3fSmrg if (profile_info && loop->header->count.initialized_p ())
206c0a68be4Smrg dump_printf (metadata,
2071debfc3dSmrg " (header execution count %d)",
208a2dc1f3fSmrg (int)loop->header->count.to_gcov_type ());
2091debfc3dSmrg
210c0a68be4Smrg dump_printf (metadata, "\n");
2111debfc3dSmrg }
2121debfc3dSmrg
2131debfc3dSmrg /* Decide whether unroll loops and how much. */
2141debfc3dSmrg static void
decide_unrolling(int flags)2151debfc3dSmrg decide_unrolling (int flags)
2161debfc3dSmrg {
217*8feb0f0bSmrg class loop *loop;
2181debfc3dSmrg
2191debfc3dSmrg /* Scan the loops, inner ones first. */
2201debfc3dSmrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2211debfc3dSmrg {
2221debfc3dSmrg loop->lpt_decision.decision = LPT_NONE;
223c0a68be4Smrg dump_user_location_t locus = get_loop_location (loop);
2241debfc3dSmrg
2251debfc3dSmrg if (dump_enabled_p ())
226a2dc1f3fSmrg dump_printf_loc (MSG_NOTE, locus,
227a2dc1f3fSmrg "considering unrolling loop %d at BB %d\n",
2281debfc3dSmrg loop->num, loop->header->index);
2291debfc3dSmrg
230a2dc1f3fSmrg if (loop->unroll == 1)
231a2dc1f3fSmrg {
232a2dc1f3fSmrg if (dump_file)
233a2dc1f3fSmrg fprintf (dump_file,
234a2dc1f3fSmrg ";; Not unrolling loop, user didn't want it unrolled\n");
235a2dc1f3fSmrg continue;
236a2dc1f3fSmrg }
237a2dc1f3fSmrg
2381debfc3dSmrg /* Do not peel cold areas. */
2391debfc3dSmrg if (optimize_loop_for_size_p (loop))
2401debfc3dSmrg {
2411debfc3dSmrg if (dump_file)
2421debfc3dSmrg fprintf (dump_file, ";; Not considering loop, cold area\n");
2431debfc3dSmrg continue;
2441debfc3dSmrg }
2451debfc3dSmrg
2461debfc3dSmrg /* Can the loop be manipulated? */
2471debfc3dSmrg if (!can_duplicate_loop_p (loop))
2481debfc3dSmrg {
2491debfc3dSmrg if (dump_file)
2501debfc3dSmrg fprintf (dump_file,
2511debfc3dSmrg ";; Not considering loop, cannot duplicate\n");
2521debfc3dSmrg continue;
2531debfc3dSmrg }
2541debfc3dSmrg
2551debfc3dSmrg /* Skip non-innermost loops. */
2561debfc3dSmrg if (loop->inner)
2571debfc3dSmrg {
2581debfc3dSmrg if (dump_file)
2591debfc3dSmrg fprintf (dump_file, ";; Not considering loop, is not innermost\n");
2601debfc3dSmrg continue;
2611debfc3dSmrg }
2621debfc3dSmrg
2631debfc3dSmrg loop->ninsns = num_loop_insns (loop);
2641debfc3dSmrg loop->av_ninsns = average_num_loop_insns (loop);
2651debfc3dSmrg
266a2dc1f3fSmrg /* Try transformations one by one in decreasing order of priority. */
2671debfc3dSmrg decide_unroll_constant_iterations (loop, flags);
2681debfc3dSmrg if (loop->lpt_decision.decision == LPT_NONE)
2691debfc3dSmrg decide_unroll_runtime_iterations (loop, flags);
2701debfc3dSmrg if (loop->lpt_decision.decision == LPT_NONE)
2711debfc3dSmrg decide_unroll_stupid (loop, flags);
2721debfc3dSmrg
2731debfc3dSmrg report_unroll (loop, locus);
2741debfc3dSmrg }
2751debfc3dSmrg }
2761debfc3dSmrg
2771debfc3dSmrg /* Unroll LOOPS. */
2781debfc3dSmrg void
unroll_loops(int flags)2791debfc3dSmrg unroll_loops (int flags)
2801debfc3dSmrg {
281*8feb0f0bSmrg class loop *loop;
2821debfc3dSmrg bool changed = false;
2831debfc3dSmrg
2841debfc3dSmrg /* Now decide rest of unrolling. */
2851debfc3dSmrg decide_unrolling (flags);
2861debfc3dSmrg
2871debfc3dSmrg /* Scan the loops, inner ones first. */
2881debfc3dSmrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2891debfc3dSmrg {
2901debfc3dSmrg /* And perform the appropriate transformations. */
2911debfc3dSmrg switch (loop->lpt_decision.decision)
2921debfc3dSmrg {
2931debfc3dSmrg case LPT_UNROLL_CONSTANT:
2941debfc3dSmrg unroll_loop_constant_iterations (loop);
2951debfc3dSmrg changed = true;
2961debfc3dSmrg break;
2971debfc3dSmrg case LPT_UNROLL_RUNTIME:
2981debfc3dSmrg unroll_loop_runtime_iterations (loop);
2991debfc3dSmrg changed = true;
3001debfc3dSmrg break;
3011debfc3dSmrg case LPT_UNROLL_STUPID:
3021debfc3dSmrg unroll_loop_stupid (loop);
3031debfc3dSmrg changed = true;
3041debfc3dSmrg break;
3051debfc3dSmrg case LPT_NONE:
3061debfc3dSmrg break;
3071debfc3dSmrg default:
3081debfc3dSmrg gcc_unreachable ();
3091debfc3dSmrg }
3101debfc3dSmrg }
3111debfc3dSmrg
3121debfc3dSmrg if (changed)
3131debfc3dSmrg {
3141debfc3dSmrg calculate_dominance_info (CDI_DOMINATORS);
3151debfc3dSmrg fix_loop_structure (NULL);
3161debfc3dSmrg }
3171debfc3dSmrg
3181debfc3dSmrg iv_analysis_done ();
3191debfc3dSmrg }
3201debfc3dSmrg
3211debfc3dSmrg /* Check whether exit of the LOOP is at the end of loop body. */
3221debfc3dSmrg
3231debfc3dSmrg static bool
loop_exit_at_end_p(class loop * loop)324*8feb0f0bSmrg loop_exit_at_end_p (class loop *loop)
3251debfc3dSmrg {
326*8feb0f0bSmrg class niter_desc *desc = get_simple_loop_desc (loop);
3271debfc3dSmrg rtx_insn *insn;
3281debfc3dSmrg
3291debfc3dSmrg /* We should never have conditional in latch block. */
3301debfc3dSmrg gcc_assert (desc->in_edge->dest != loop->header);
3311debfc3dSmrg
3321debfc3dSmrg if (desc->in_edge->dest != loop->latch)
3331debfc3dSmrg return false;
3341debfc3dSmrg
3351debfc3dSmrg /* Check that the latch is empty. */
3361debfc3dSmrg FOR_BB_INSNS (loop->latch, insn)
3371debfc3dSmrg {
3381debfc3dSmrg if (INSN_P (insn) && active_insn_p (insn))
3391debfc3dSmrg return false;
3401debfc3dSmrg }
3411debfc3dSmrg
3421debfc3dSmrg return true;
3431debfc3dSmrg }
3441debfc3dSmrg
3451debfc3dSmrg /* Decide whether to unroll LOOP iterating constant number of times
3461debfc3dSmrg and how much. */
3471debfc3dSmrg
3481debfc3dSmrg static void
decide_unroll_constant_iterations(class loop * loop,int flags)349*8feb0f0bSmrg decide_unroll_constant_iterations (class loop *loop, int flags)
3501debfc3dSmrg {
3511debfc3dSmrg unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
352*8feb0f0bSmrg class niter_desc *desc;
3531debfc3dSmrg widest_int iterations;
3541debfc3dSmrg
355a2dc1f3fSmrg /* If we were not asked to unroll this loop, just return back silently. */
356a2dc1f3fSmrg if (!(flags & UAP_UNROLL) && !loop->unroll)
3571debfc3dSmrg return;
3581debfc3dSmrg
359a2dc1f3fSmrg if (dump_enabled_p ())
360a2dc1f3fSmrg dump_printf (MSG_NOTE,
361a2dc1f3fSmrg "considering unrolling loop with constant "
3621debfc3dSmrg "number of iterations\n");
3631debfc3dSmrg
3641debfc3dSmrg /* nunroll = total number of copies of the original loop body in
365a2dc1f3fSmrg unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
366*8feb0f0bSmrg nunroll = param_max_unrolled_insns / loop->ninsns;
3671debfc3dSmrg nunroll_by_av
368*8feb0f0bSmrg = param_max_average_unrolled_insns / loop->av_ninsns;
3691debfc3dSmrg if (nunroll > nunroll_by_av)
3701debfc3dSmrg nunroll = nunroll_by_av;
371*8feb0f0bSmrg if (nunroll > (unsigned) param_max_unroll_times)
372*8feb0f0bSmrg nunroll = param_max_unroll_times;
3731debfc3dSmrg
3741debfc3dSmrg if (targetm.loop_unroll_adjust)
3751debfc3dSmrg nunroll = targetm.loop_unroll_adjust (nunroll, loop);
3761debfc3dSmrg
3771debfc3dSmrg /* Skip big loops. */
3781debfc3dSmrg if (nunroll <= 1)
3791debfc3dSmrg {
3801debfc3dSmrg if (dump_file)
3811debfc3dSmrg fprintf (dump_file, ";; Not considering loop, is too big\n");
3821debfc3dSmrg return;
3831debfc3dSmrg }
3841debfc3dSmrg
3851debfc3dSmrg /* Check for simple loops. */
3861debfc3dSmrg desc = get_simple_loop_desc (loop);
3871debfc3dSmrg
3881debfc3dSmrg /* Check number of iterations. */
3891debfc3dSmrg if (!desc->simple_p || !desc->const_iter || desc->assumptions)
3901debfc3dSmrg {
3911debfc3dSmrg if (dump_file)
3921debfc3dSmrg fprintf (dump_file,
3931debfc3dSmrg ";; Unable to prove that the loop iterates constant times\n");
3941debfc3dSmrg return;
3951debfc3dSmrg }
3961debfc3dSmrg
397a2dc1f3fSmrg /* Check for an explicit unrolling factor. */
398a2dc1f3fSmrg if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
399a2dc1f3fSmrg {
400a2dc1f3fSmrg /* However we cannot unroll completely at the RTL level a loop with
401a2dc1f3fSmrg constant number of iterations; it should have been peeled instead. */
402a2dc1f3fSmrg if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
403a2dc1f3fSmrg {
404a2dc1f3fSmrg if (dump_file)
405a2dc1f3fSmrg fprintf (dump_file, ";; Loop should have been peeled\n");
406a2dc1f3fSmrg }
407a2dc1f3fSmrg else
408a2dc1f3fSmrg {
409a2dc1f3fSmrg loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
410a2dc1f3fSmrg loop->lpt_decision.times = loop->unroll - 1;
411a2dc1f3fSmrg }
412a2dc1f3fSmrg return;
413a2dc1f3fSmrg }
414a2dc1f3fSmrg
4151debfc3dSmrg /* Check whether the loop rolls enough to consider.
4161debfc3dSmrg Consult also loop bounds and profile; in the case the loop has more
4171debfc3dSmrg than one exit it may well loop less than determined maximal number
4181debfc3dSmrg of iterations. */
4191debfc3dSmrg if (desc->niter < 2 * nunroll
4201debfc3dSmrg || ((get_estimated_loop_iterations (loop, &iterations)
4211debfc3dSmrg || get_likely_max_loop_iterations (loop, &iterations))
4221debfc3dSmrg && wi::ltu_p (iterations, 2 * nunroll)))
4231debfc3dSmrg {
4241debfc3dSmrg if (dump_file)
4251debfc3dSmrg fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
4261debfc3dSmrg return;
4271debfc3dSmrg }
4281debfc3dSmrg
4291debfc3dSmrg /* Success; now compute number of iterations to unroll. We alter
4301debfc3dSmrg nunroll so that as few as possible copies of loop body are
4311debfc3dSmrg necessary, while still not decreasing the number of unrollings
4321debfc3dSmrg too much (at most by 1). */
4331debfc3dSmrg best_copies = 2 * nunroll + 10;
4341debfc3dSmrg
4351debfc3dSmrg i = 2 * nunroll + 2;
436a2dc1f3fSmrg if (i > desc->niter - 2)
4371debfc3dSmrg i = desc->niter - 2;
4381debfc3dSmrg
4391debfc3dSmrg for (; i >= nunroll - 1; i--)
4401debfc3dSmrg {
4411debfc3dSmrg unsigned exit_mod = desc->niter % (i + 1);
4421debfc3dSmrg
4431debfc3dSmrg if (!loop_exit_at_end_p (loop))
4441debfc3dSmrg n_copies = exit_mod + i + 1;
4451debfc3dSmrg else if (exit_mod != (unsigned) i
4461debfc3dSmrg || desc->noloop_assumptions != NULL_RTX)
4471debfc3dSmrg n_copies = exit_mod + i + 2;
4481debfc3dSmrg else
4491debfc3dSmrg n_copies = i + 1;
4501debfc3dSmrg
4511debfc3dSmrg if (n_copies < best_copies)
4521debfc3dSmrg {
4531debfc3dSmrg best_copies = n_copies;
4541debfc3dSmrg best_unroll = i;
4551debfc3dSmrg }
4561debfc3dSmrg }
4571debfc3dSmrg
4581debfc3dSmrg loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
4591debfc3dSmrg loop->lpt_decision.times = best_unroll;
4601debfc3dSmrg }
4611debfc3dSmrg
4621debfc3dSmrg /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
4631debfc3dSmrg The transformation does this:
4641debfc3dSmrg
4651debfc3dSmrg for (i = 0; i < 102; i++)
4661debfc3dSmrg body;
4671debfc3dSmrg
4681debfc3dSmrg ==> (LOOP->LPT_DECISION.TIMES == 3)
4691debfc3dSmrg
4701debfc3dSmrg i = 0;
4711debfc3dSmrg body; i++;
4721debfc3dSmrg body; i++;
4731debfc3dSmrg while (i < 102)
4741debfc3dSmrg {
4751debfc3dSmrg body; i++;
4761debfc3dSmrg body; i++;
4771debfc3dSmrg body; i++;
4781debfc3dSmrg body; i++;
4791debfc3dSmrg }
4801debfc3dSmrg */
4811debfc3dSmrg static void
unroll_loop_constant_iterations(class loop * loop)482*8feb0f0bSmrg unroll_loop_constant_iterations (class loop *loop)
4831debfc3dSmrg {
4841debfc3dSmrg unsigned HOST_WIDE_INT niter;
4851debfc3dSmrg unsigned exit_mod;
4861debfc3dSmrg unsigned i;
4871debfc3dSmrg edge e;
4881debfc3dSmrg unsigned max_unroll = loop->lpt_decision.times;
489*8feb0f0bSmrg class niter_desc *desc = get_simple_loop_desc (loop);
4901debfc3dSmrg bool exit_at_end = loop_exit_at_end_p (loop);
4911debfc3dSmrg struct opt_info *opt_info = NULL;
4921debfc3dSmrg bool ok;
4931debfc3dSmrg
4941debfc3dSmrg niter = desc->niter;
4951debfc3dSmrg
4961debfc3dSmrg /* Should not get here (such loop should be peeled instead). */
4971debfc3dSmrg gcc_assert (niter > max_unroll + 1);
4981debfc3dSmrg
4991debfc3dSmrg exit_mod = niter % (max_unroll + 1);
5001debfc3dSmrg
5011debfc3dSmrg auto_sbitmap wont_exit (max_unroll + 2);
5021debfc3dSmrg bitmap_ones (wont_exit);
5031debfc3dSmrg
5041debfc3dSmrg auto_vec<edge> remove_edges;
5051debfc3dSmrg if (flag_split_ivs_in_unroller
5061debfc3dSmrg || flag_variable_expansion_in_unroller)
5071debfc3dSmrg opt_info = analyze_insns_in_loop (loop);
5081debfc3dSmrg
5091debfc3dSmrg if (!exit_at_end)
5101debfc3dSmrg {
5111debfc3dSmrg /* The exit is not at the end of the loop; leave exit test
5121debfc3dSmrg in the first copy, so that the loops that start with test
5131debfc3dSmrg of exit condition have continuous body after unrolling. */
5141debfc3dSmrg
5151debfc3dSmrg if (dump_file)
5161debfc3dSmrg fprintf (dump_file, ";; Condition at beginning of loop.\n");
5171debfc3dSmrg
5181debfc3dSmrg /* Peel exit_mod iterations. */
5191debfc3dSmrg bitmap_clear_bit (wont_exit, 0);
5201debfc3dSmrg if (desc->noloop_assumptions)
5211debfc3dSmrg bitmap_clear_bit (wont_exit, 1);
5221debfc3dSmrg
5231debfc3dSmrg if (exit_mod)
5241debfc3dSmrg {
5251debfc3dSmrg opt_info_start_duplication (opt_info);
5261debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
5271debfc3dSmrg exit_mod,
5281debfc3dSmrg wont_exit, desc->out_edge,
5291debfc3dSmrg &remove_edges,
5301debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ
5311debfc3dSmrg | (opt_info && exit_mod > 1
5321debfc3dSmrg ? DLTHE_RECORD_COPY_NUMBER
5331debfc3dSmrg : 0));
5341debfc3dSmrg gcc_assert (ok);
5351debfc3dSmrg
5361debfc3dSmrg if (opt_info && exit_mod > 1)
5371debfc3dSmrg apply_opt_in_copies (opt_info, exit_mod, false, false);
5381debfc3dSmrg
5391debfc3dSmrg desc->noloop_assumptions = NULL_RTX;
5401debfc3dSmrg desc->niter -= exit_mod;
5411debfc3dSmrg loop->nb_iterations_upper_bound -= exit_mod;
5421debfc3dSmrg if (loop->any_estimate
5431debfc3dSmrg && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
5441debfc3dSmrg loop->nb_iterations_estimate -= exit_mod;
5451debfc3dSmrg else
5461debfc3dSmrg loop->any_estimate = false;
5471debfc3dSmrg if (loop->any_likely_upper_bound
5481debfc3dSmrg && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
5491debfc3dSmrg loop->nb_iterations_likely_upper_bound -= exit_mod;
5501debfc3dSmrg else
5511debfc3dSmrg loop->any_likely_upper_bound = false;
5521debfc3dSmrg }
5531debfc3dSmrg
5541debfc3dSmrg bitmap_set_bit (wont_exit, 1);
5551debfc3dSmrg }
5561debfc3dSmrg else
5571debfc3dSmrg {
5581debfc3dSmrg /* Leave exit test in last copy, for the same reason as above if
5591debfc3dSmrg the loop tests the condition at the end of loop body. */
5601debfc3dSmrg
5611debfc3dSmrg if (dump_file)
5621debfc3dSmrg fprintf (dump_file, ";; Condition at end of loop.\n");
5631debfc3dSmrg
5641debfc3dSmrg /* We know that niter >= max_unroll + 2; so we do not need to care of
5651debfc3dSmrg case when we would exit before reaching the loop. So just peel
5661debfc3dSmrg exit_mod + 1 iterations. */
5671debfc3dSmrg if (exit_mod != max_unroll
5681debfc3dSmrg || desc->noloop_assumptions)
5691debfc3dSmrg {
5701debfc3dSmrg bitmap_clear_bit (wont_exit, 0);
5711debfc3dSmrg if (desc->noloop_assumptions)
5721debfc3dSmrg bitmap_clear_bit (wont_exit, 1);
5731debfc3dSmrg
5741debfc3dSmrg opt_info_start_duplication (opt_info);
5751debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
5761debfc3dSmrg exit_mod + 1,
5771debfc3dSmrg wont_exit, desc->out_edge,
5781debfc3dSmrg &remove_edges,
5791debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ
5801debfc3dSmrg | (opt_info && exit_mod > 0
5811debfc3dSmrg ? DLTHE_RECORD_COPY_NUMBER
5821debfc3dSmrg : 0));
5831debfc3dSmrg gcc_assert (ok);
5841debfc3dSmrg
5851debfc3dSmrg if (opt_info && exit_mod > 0)
5861debfc3dSmrg apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
5871debfc3dSmrg
5881debfc3dSmrg desc->niter -= exit_mod + 1;
5891debfc3dSmrg loop->nb_iterations_upper_bound -= exit_mod + 1;
5901debfc3dSmrg if (loop->any_estimate
5911debfc3dSmrg && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
5921debfc3dSmrg loop->nb_iterations_estimate -= exit_mod + 1;
5931debfc3dSmrg else
5941debfc3dSmrg loop->any_estimate = false;
5951debfc3dSmrg if (loop->any_likely_upper_bound
5961debfc3dSmrg && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
5971debfc3dSmrg loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
5981debfc3dSmrg else
5991debfc3dSmrg loop->any_likely_upper_bound = false;
6001debfc3dSmrg desc->noloop_assumptions = NULL_RTX;
6011debfc3dSmrg
6021debfc3dSmrg bitmap_set_bit (wont_exit, 0);
6031debfc3dSmrg bitmap_set_bit (wont_exit, 1);
6041debfc3dSmrg }
6051debfc3dSmrg
6061debfc3dSmrg bitmap_clear_bit (wont_exit, max_unroll);
6071debfc3dSmrg }
6081debfc3dSmrg
6091debfc3dSmrg /* Now unroll the loop. */
6101debfc3dSmrg
6111debfc3dSmrg opt_info_start_duplication (opt_info);
6121debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
6131debfc3dSmrg max_unroll,
6141debfc3dSmrg wont_exit, desc->out_edge,
6151debfc3dSmrg &remove_edges,
6161debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ
6171debfc3dSmrg | (opt_info
6181debfc3dSmrg ? DLTHE_RECORD_COPY_NUMBER
6191debfc3dSmrg : 0));
6201debfc3dSmrg gcc_assert (ok);
6211debfc3dSmrg
6221debfc3dSmrg if (opt_info)
6231debfc3dSmrg {
6241debfc3dSmrg apply_opt_in_copies (opt_info, max_unroll, true, true);
6251debfc3dSmrg free_opt_info (opt_info);
6261debfc3dSmrg }
6271debfc3dSmrg
6281debfc3dSmrg if (exit_at_end)
6291debfc3dSmrg {
6301debfc3dSmrg basic_block exit_block = get_bb_copy (desc->in_edge->src);
6311debfc3dSmrg /* Find a new in and out edge; they are in the last copy we have made. */
6321debfc3dSmrg
6331debfc3dSmrg if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
6341debfc3dSmrg {
6351debfc3dSmrg desc->out_edge = EDGE_SUCC (exit_block, 0);
6361debfc3dSmrg desc->in_edge = EDGE_SUCC (exit_block, 1);
6371debfc3dSmrg }
6381debfc3dSmrg else
6391debfc3dSmrg {
6401debfc3dSmrg desc->out_edge = EDGE_SUCC (exit_block, 1);
6411debfc3dSmrg desc->in_edge = EDGE_SUCC (exit_block, 0);
6421debfc3dSmrg }
6431debfc3dSmrg }
6441debfc3dSmrg
6451debfc3dSmrg desc->niter /= max_unroll + 1;
6461debfc3dSmrg loop->nb_iterations_upper_bound
6471debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
6481debfc3dSmrg if (loop->any_estimate)
6491debfc3dSmrg loop->nb_iterations_estimate
6501debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
6511debfc3dSmrg if (loop->any_likely_upper_bound)
6521debfc3dSmrg loop->nb_iterations_likely_upper_bound
6531debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
654a05ac97eSmrg desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
6551debfc3dSmrg
6561debfc3dSmrg /* Remove the edges. */
6571debfc3dSmrg FOR_EACH_VEC_ELT (remove_edges, i, e)
6581debfc3dSmrg remove_path (e);
6591debfc3dSmrg
6601debfc3dSmrg if (dump_file)
6611debfc3dSmrg fprintf (dump_file,
6621debfc3dSmrg ";; Unrolled loop %d times, constant # of iterations %i insns\n",
6631debfc3dSmrg max_unroll, num_loop_insns (loop));
6641debfc3dSmrg }
6651debfc3dSmrg
6661debfc3dSmrg /* Decide whether to unroll LOOP iterating runtime computable number of times
6671debfc3dSmrg and how much. */
6681debfc3dSmrg static void
decide_unroll_runtime_iterations(class loop * loop,int flags)669*8feb0f0bSmrg decide_unroll_runtime_iterations (class loop *loop, int flags)
6701debfc3dSmrg {
6711debfc3dSmrg unsigned nunroll, nunroll_by_av, i;
672*8feb0f0bSmrg class niter_desc *desc;
6731debfc3dSmrg widest_int iterations;
6741debfc3dSmrg
675a2dc1f3fSmrg /* If we were not asked to unroll this loop, just return back silently. */
676a2dc1f3fSmrg if (!(flags & UAP_UNROLL) && !loop->unroll)
6771debfc3dSmrg return;
6781debfc3dSmrg
679a2dc1f3fSmrg if (dump_enabled_p ())
680a2dc1f3fSmrg dump_printf (MSG_NOTE,
681a2dc1f3fSmrg "considering unrolling loop with runtime-"
6821debfc3dSmrg "computable number of iterations\n");
6831debfc3dSmrg
6841debfc3dSmrg /* nunroll = total number of copies of the original loop body in
6851debfc3dSmrg unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
686*8feb0f0bSmrg nunroll = param_max_unrolled_insns / loop->ninsns;
687*8feb0f0bSmrg nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
6881debfc3dSmrg if (nunroll > nunroll_by_av)
6891debfc3dSmrg nunroll = nunroll_by_av;
690*8feb0f0bSmrg if (nunroll > (unsigned) param_max_unroll_times)
691*8feb0f0bSmrg nunroll = param_max_unroll_times;
6921debfc3dSmrg
6931debfc3dSmrg if (targetm.loop_unroll_adjust)
6941debfc3dSmrg nunroll = targetm.loop_unroll_adjust (nunroll, loop);
6951debfc3dSmrg
696a2dc1f3fSmrg if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
697a2dc1f3fSmrg nunroll = loop->unroll;
698a2dc1f3fSmrg
6991debfc3dSmrg /* Skip big loops. */
7001debfc3dSmrg if (nunroll <= 1)
7011debfc3dSmrg {
7021debfc3dSmrg if (dump_file)
7031debfc3dSmrg fprintf (dump_file, ";; Not considering loop, is too big\n");
7041debfc3dSmrg return;
7051debfc3dSmrg }
7061debfc3dSmrg
7071debfc3dSmrg /* Check for simple loops. */
7081debfc3dSmrg desc = get_simple_loop_desc (loop);
7091debfc3dSmrg
7101debfc3dSmrg /* Check simpleness. */
7111debfc3dSmrg if (!desc->simple_p || desc->assumptions)
7121debfc3dSmrg {
7131debfc3dSmrg if (dump_file)
7141debfc3dSmrg fprintf (dump_file,
7151debfc3dSmrg ";; Unable to prove that the number of iterations "
7161debfc3dSmrg "can be counted in runtime\n");
7171debfc3dSmrg return;
7181debfc3dSmrg }
7191debfc3dSmrg
7201debfc3dSmrg if (desc->const_iter)
7211debfc3dSmrg {
7221debfc3dSmrg if (dump_file)
7231debfc3dSmrg fprintf (dump_file, ";; Loop iterates constant times\n");
7241debfc3dSmrg return;
7251debfc3dSmrg }
7261debfc3dSmrg
7271debfc3dSmrg /* Check whether the loop rolls. */
7281debfc3dSmrg if ((get_estimated_loop_iterations (loop, &iterations)
7291debfc3dSmrg || get_likely_max_loop_iterations (loop, &iterations))
7301debfc3dSmrg && wi::ltu_p (iterations, 2 * nunroll))
7311debfc3dSmrg {
7321debfc3dSmrg if (dump_file)
7331debfc3dSmrg fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
7341debfc3dSmrg return;
7351debfc3dSmrg }
7361debfc3dSmrg
737a2dc1f3fSmrg /* Success; now force nunroll to be power of 2, as code-gen
738a2dc1f3fSmrg requires it, we are unable to cope with overflows in
739a2dc1f3fSmrg computation of number of iterations. */
7401debfc3dSmrg for (i = 1; 2 * i <= nunroll; i *= 2)
7411debfc3dSmrg continue;
7421debfc3dSmrg
7431debfc3dSmrg loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
7441debfc3dSmrg loop->lpt_decision.times = i - 1;
7451debfc3dSmrg }
7461debfc3dSmrg
7471debfc3dSmrg /* Splits edge E and inserts the sequence of instructions INSNS on it, and
7481debfc3dSmrg returns the newly created block. If INSNS is NULL_RTX, nothing is changed
7491debfc3dSmrg and NULL is returned instead. */
7501debfc3dSmrg
7511debfc3dSmrg basic_block
split_edge_and_insert(edge e,rtx_insn * insns)7521debfc3dSmrg split_edge_and_insert (edge e, rtx_insn *insns)
7531debfc3dSmrg {
7541debfc3dSmrg basic_block bb;
7551debfc3dSmrg
7561debfc3dSmrg if (!insns)
7571debfc3dSmrg return NULL;
7581debfc3dSmrg bb = split_edge (e);
7591debfc3dSmrg emit_insn_after (insns, BB_END (bb));
7601debfc3dSmrg
7611debfc3dSmrg /* ??? We used to assume that INSNS can contain control flow insns, and
7621debfc3dSmrg that we had to try to find sub basic blocks in BB to maintain a valid
7631debfc3dSmrg CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
7641debfc3dSmrg and call break_superblocks when going out of cfglayout mode. But it
7651debfc3dSmrg turns out that this never happens; and that if it does ever happen,
7661debfc3dSmrg the verify_flow_info at the end of the RTL loop passes would fail.
7671debfc3dSmrg
7681debfc3dSmrg There are two reasons why we expected we could have control flow insns
7691debfc3dSmrg in INSNS. The first is when a comparison has to be done in parts, and
7701debfc3dSmrg the second is when the number of iterations is computed for loops with
7711debfc3dSmrg the number of iterations known at runtime. In both cases, test cases
7721debfc3dSmrg to get control flow in INSNS appear to be impossible to construct:
7731debfc3dSmrg
7741debfc3dSmrg * If do_compare_rtx_and_jump needs several branches to do comparison
7751debfc3dSmrg in a mode that needs comparison by parts, we cannot analyze the
7761debfc3dSmrg number of iterations of the loop, and we never get to unrolling it.
7771debfc3dSmrg
7781debfc3dSmrg * The code in expand_divmod that was suspected to cause creation of
7791debfc3dSmrg branching code seems to be only accessed for signed division. The
7801debfc3dSmrg divisions used by # of iterations analysis are always unsigned.
7811debfc3dSmrg Problems might arise on architectures that emits branching code
7821debfc3dSmrg for some operations that may appear in the unroller (especially
7831debfc3dSmrg for division), but we have no such architectures.
7841debfc3dSmrg
7851debfc3dSmrg Considering all this, it was decided that we should for now assume
7861debfc3dSmrg that INSNS can in theory contain control flow insns, but in practice
7871debfc3dSmrg it never does. So we don't handle the theoretical case, and should
7881debfc3dSmrg a real failure ever show up, we have a pretty good clue for how to
7891debfc3dSmrg fix it. */
7901debfc3dSmrg
7911debfc3dSmrg return bb;
7921debfc3dSmrg }
7931debfc3dSmrg
7941debfc3dSmrg /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
7951debfc3dSmrg true, with probability PROB. If CINSN is not NULL, it is the insn to copy
7961debfc3dSmrg in order to create a jump. */
7971debfc3dSmrg
7981debfc3dSmrg static rtx_insn *
compare_and_jump_seq(rtx op0,rtx op1,enum rtx_code comp,rtx_code_label * label,profile_probability prob,rtx_insn * cinsn)7991debfc3dSmrg compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
800a2dc1f3fSmrg rtx_code_label *label, profile_probability prob,
801a2dc1f3fSmrg rtx_insn *cinsn)
8021debfc3dSmrg {
8031debfc3dSmrg rtx_insn *seq;
8041debfc3dSmrg rtx_jump_insn *jump;
8051debfc3dSmrg rtx cond;
8061debfc3dSmrg machine_mode mode;
8071debfc3dSmrg
8081debfc3dSmrg mode = GET_MODE (op0);
8091debfc3dSmrg if (mode == VOIDmode)
8101debfc3dSmrg mode = GET_MODE (op1);
8111debfc3dSmrg
8121debfc3dSmrg start_sequence ();
8131debfc3dSmrg if (GET_MODE_CLASS (mode) == MODE_CC)
8141debfc3dSmrg {
8151debfc3dSmrg /* A hack -- there seems to be no easy generic way how to make a
8161debfc3dSmrg conditional jump from a ccmode comparison. */
8171debfc3dSmrg gcc_assert (cinsn);
8181debfc3dSmrg cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
8191debfc3dSmrg gcc_assert (GET_CODE (cond) == comp);
8201debfc3dSmrg gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
8211debfc3dSmrg gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
8221debfc3dSmrg emit_jump_insn (copy_insn (PATTERN (cinsn)));
8231debfc3dSmrg jump = as_a <rtx_jump_insn *> (get_last_insn ());
8241debfc3dSmrg JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
8251debfc3dSmrg LABEL_NUSES (JUMP_LABEL (jump))++;
8261debfc3dSmrg redirect_jump (jump, label, 0);
8271debfc3dSmrg }
8281debfc3dSmrg else
8291debfc3dSmrg {
8301debfc3dSmrg gcc_assert (!cinsn);
8311debfc3dSmrg
8321debfc3dSmrg op0 = force_operand (op0, NULL_RTX);
8331debfc3dSmrg op1 = force_operand (op1, NULL_RTX);
8341debfc3dSmrg do_compare_rtx_and_jump (op0, op1, comp, 0,
835a2dc1f3fSmrg mode, NULL_RTX, NULL, label,
836a2dc1f3fSmrg profile_probability::uninitialized ());
8371debfc3dSmrg jump = as_a <rtx_jump_insn *> (get_last_insn ());
8381debfc3dSmrg jump->set_jump_target (label);
8391debfc3dSmrg LABEL_NUSES (label)++;
8401debfc3dSmrg }
841a2dc1f3fSmrg if (prob.initialized_p ())
842a2dc1f3fSmrg add_reg_br_prob_note (jump, prob);
8431debfc3dSmrg
8441debfc3dSmrg seq = get_insns ();
8451debfc3dSmrg end_sequence ();
8461debfc3dSmrg
8471debfc3dSmrg return seq;
8481debfc3dSmrg }
8491debfc3dSmrg
850a2dc1f3fSmrg /* Unroll LOOP for which we are able to count number of iterations in
851a2dc1f3fSmrg runtime LOOP->LPT_DECISION.TIMES times. The times value must be a
852a2dc1f3fSmrg power of two. The transformation does this (with some extra care
853a2dc1f3fSmrg for case n < 0):
8541debfc3dSmrg
8551debfc3dSmrg for (i = 0; i < n; i++)
8561debfc3dSmrg body;
8571debfc3dSmrg
8581debfc3dSmrg ==> (LOOP->LPT_DECISION.TIMES == 3)
8591debfc3dSmrg
8601debfc3dSmrg i = 0;
8611debfc3dSmrg mod = n % 4;
8621debfc3dSmrg
8631debfc3dSmrg switch (mod)
8641debfc3dSmrg {
8651debfc3dSmrg case 3:
8661debfc3dSmrg body; i++;
8671debfc3dSmrg case 2:
8681debfc3dSmrg body; i++;
8691debfc3dSmrg case 1:
8701debfc3dSmrg body; i++;
8711debfc3dSmrg case 0: ;
8721debfc3dSmrg }
8731debfc3dSmrg
8741debfc3dSmrg while (i < n)
8751debfc3dSmrg {
8761debfc3dSmrg body; i++;
8771debfc3dSmrg body; i++;
8781debfc3dSmrg body; i++;
8791debfc3dSmrg body; i++;
8801debfc3dSmrg }
8811debfc3dSmrg */
8821debfc3dSmrg static void
unroll_loop_runtime_iterations(class loop * loop)883*8feb0f0bSmrg unroll_loop_runtime_iterations (class loop *loop)
8841debfc3dSmrg {
8851debfc3dSmrg rtx old_niter, niter, tmp;
8861debfc3dSmrg rtx_insn *init_code, *branch_code;
887a2dc1f3fSmrg unsigned i, j;
888a2dc1f3fSmrg profile_probability p;
8891debfc3dSmrg basic_block preheader, *body, swtch, ezc_swtch = NULL;
890a2dc1f3fSmrg int may_exit_copy;
891a2dc1f3fSmrg profile_count iter_count, new_count;
8921debfc3dSmrg unsigned n_peel;
8931debfc3dSmrg edge e;
8941debfc3dSmrg bool extra_zero_check, last_may_exit;
8951debfc3dSmrg unsigned max_unroll = loop->lpt_decision.times;
896*8feb0f0bSmrg class niter_desc *desc = get_simple_loop_desc (loop);
8971debfc3dSmrg bool exit_at_end = loop_exit_at_end_p (loop);
8981debfc3dSmrg struct opt_info *opt_info = NULL;
8991debfc3dSmrg bool ok;
9001debfc3dSmrg
9011debfc3dSmrg if (flag_split_ivs_in_unroller
9021debfc3dSmrg || flag_variable_expansion_in_unroller)
9031debfc3dSmrg opt_info = analyze_insns_in_loop (loop);
9041debfc3dSmrg
9051debfc3dSmrg /* Remember blocks whose dominators will have to be updated. */
9061debfc3dSmrg auto_vec<basic_block> dom_bbs;
9071debfc3dSmrg
9081debfc3dSmrg body = get_loop_body (loop);
9091debfc3dSmrg for (i = 0; i < loop->num_nodes; i++)
9101debfc3dSmrg {
9111debfc3dSmrg vec<basic_block> ldom;
9121debfc3dSmrg basic_block bb;
9131debfc3dSmrg
9141debfc3dSmrg ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
9151debfc3dSmrg FOR_EACH_VEC_ELT (ldom, j, bb)
9161debfc3dSmrg if (!flow_bb_inside_loop_p (loop, bb))
9171debfc3dSmrg dom_bbs.safe_push (bb);
9181debfc3dSmrg
9191debfc3dSmrg ldom.release ();
9201debfc3dSmrg }
9211debfc3dSmrg free (body);
9221debfc3dSmrg
9231debfc3dSmrg if (!exit_at_end)
9241debfc3dSmrg {
9251debfc3dSmrg /* Leave exit in first copy (for explanation why see comment in
9261debfc3dSmrg unroll_loop_constant_iterations). */
9271debfc3dSmrg may_exit_copy = 0;
9281debfc3dSmrg n_peel = max_unroll - 1;
9291debfc3dSmrg extra_zero_check = true;
9301debfc3dSmrg last_may_exit = false;
9311debfc3dSmrg }
9321debfc3dSmrg else
9331debfc3dSmrg {
9341debfc3dSmrg /* Leave exit in last copy (for explanation why see comment in
9351debfc3dSmrg unroll_loop_constant_iterations). */
9361debfc3dSmrg may_exit_copy = max_unroll;
9371debfc3dSmrg n_peel = max_unroll;
9381debfc3dSmrg extra_zero_check = false;
9391debfc3dSmrg last_may_exit = true;
9401debfc3dSmrg }
9411debfc3dSmrg
9421debfc3dSmrg /* Get expression for number of iterations. */
9431debfc3dSmrg start_sequence ();
9441debfc3dSmrg old_niter = niter = gen_reg_rtx (desc->mode);
9451debfc3dSmrg tmp = force_operand (copy_rtx (desc->niter_expr), niter);
9461debfc3dSmrg if (tmp != niter)
9471debfc3dSmrg emit_move_insn (niter, tmp);
9481debfc3dSmrg
9491debfc3dSmrg /* For loops that exit at end and whose number of iterations is reliable,
9501debfc3dSmrg add one to niter to account for first pass through loop body before
9511debfc3dSmrg reaching exit test. */
9521debfc3dSmrg if (exit_at_end && !desc->noloop_assumptions)
9531debfc3dSmrg {
9541debfc3dSmrg niter = expand_simple_binop (desc->mode, PLUS,
9551debfc3dSmrg niter, const1_rtx,
9561debfc3dSmrg NULL_RTX, 0, OPTAB_LIB_WIDEN);
9571debfc3dSmrg old_niter = niter;
9581debfc3dSmrg }
9591debfc3dSmrg
9601debfc3dSmrg /* Count modulo by ANDing it with max_unroll; we use the fact that
9611debfc3dSmrg the number of unrollings is a power of two, and thus this is correct
9621debfc3dSmrg even if there is overflow in the computation. */
9631debfc3dSmrg niter = expand_simple_binop (desc->mode, AND,
9641debfc3dSmrg niter, gen_int_mode (max_unroll, desc->mode),
9651debfc3dSmrg NULL_RTX, 0, OPTAB_LIB_WIDEN);
9661debfc3dSmrg
9671debfc3dSmrg init_code = get_insns ();
9681debfc3dSmrg end_sequence ();
9691debfc3dSmrg unshare_all_rtl_in_chain (init_code);
9701debfc3dSmrg
9711debfc3dSmrg /* Precondition the loop. */
9721debfc3dSmrg split_edge_and_insert (loop_preheader_edge (loop), init_code);
9731debfc3dSmrg
9741debfc3dSmrg auto_vec<edge> remove_edges;
9751debfc3dSmrg
9761debfc3dSmrg auto_sbitmap wont_exit (max_unroll + 2);
9771debfc3dSmrg
9781debfc3dSmrg if (extra_zero_check || desc->noloop_assumptions)
9791debfc3dSmrg {
9801debfc3dSmrg /* Peel the first copy of loop body. Leave the exit test if the number
9811debfc3dSmrg of iterations is not reliable. Also record the place of the extra zero
9821debfc3dSmrg check. */
9831debfc3dSmrg bitmap_clear (wont_exit);
9841debfc3dSmrg if (!desc->noloop_assumptions)
9851debfc3dSmrg bitmap_set_bit (wont_exit, 1);
9861debfc3dSmrg ezc_swtch = loop_preheader_edge (loop)->src;
9871debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
9881debfc3dSmrg 1, wont_exit, desc->out_edge,
9891debfc3dSmrg &remove_edges,
9901debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ);
9911debfc3dSmrg gcc_assert (ok);
9921debfc3dSmrg }
9931debfc3dSmrg
9941debfc3dSmrg /* Record the place where switch will be built for preconditioning. */
9951debfc3dSmrg swtch = split_edge (loop_preheader_edge (loop));
9961debfc3dSmrg
997a2dc1f3fSmrg /* Compute count increments for each switch block and initialize
9981debfc3dSmrg innermost switch block. Switch blocks and peeled loop copies are built
9991debfc3dSmrg from innermost outward. */
1000a2dc1f3fSmrg iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
10011debfc3dSmrg swtch->count = new_count;
10021debfc3dSmrg
10031debfc3dSmrg for (i = 0; i < n_peel; i++)
10041debfc3dSmrg {
10051debfc3dSmrg /* Peel the copy. */
10061debfc3dSmrg bitmap_clear (wont_exit);
10071debfc3dSmrg if (i != n_peel - 1 || !last_may_exit)
10081debfc3dSmrg bitmap_set_bit (wont_exit, 1);
10091debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
10101debfc3dSmrg 1, wont_exit, desc->out_edge,
10111debfc3dSmrg &remove_edges,
10121debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ);
10131debfc3dSmrg gcc_assert (ok);
10141debfc3dSmrg
10151debfc3dSmrg /* Create item for switch. */
10161debfc3dSmrg j = n_peel - i - (extra_zero_check ? 0 : 1);
1017a2dc1f3fSmrg p = profile_probability::always ().apply_scale (1, i + 2);
10181debfc3dSmrg
10191debfc3dSmrg preheader = split_edge (loop_preheader_edge (loop));
1020a2dc1f3fSmrg /* Add in count of edge from switch block. */
10211debfc3dSmrg preheader->count += iter_count;
1022a05ac97eSmrg branch_code = compare_and_jump_seq (copy_rtx (niter),
1023a05ac97eSmrg gen_int_mode (j, desc->mode), EQ,
1024a2dc1f3fSmrg block_label (preheader), p, NULL);
10251debfc3dSmrg
10261debfc3dSmrg /* We rely on the fact that the compare and jump cannot be optimized out,
10271debfc3dSmrg and hence the cfg we create is correct. */
10281debfc3dSmrg gcc_assert (branch_code != NULL_RTX);
10291debfc3dSmrg
10301debfc3dSmrg swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
10311debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1032a2dc1f3fSmrg single_succ_edge (swtch)->probability = p.invert ();
10331debfc3dSmrg new_count += iter_count;
10341debfc3dSmrg swtch->count = new_count;
10351debfc3dSmrg e = make_edge (swtch, preheader,
10361debfc3dSmrg single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
10371debfc3dSmrg e->probability = p;
10381debfc3dSmrg }
10391debfc3dSmrg
10401debfc3dSmrg if (extra_zero_check)
10411debfc3dSmrg {
10421debfc3dSmrg /* Add branch for zero iterations. */
1043a2dc1f3fSmrg p = profile_probability::always ().apply_scale (1, max_unroll + 1);
10441debfc3dSmrg swtch = ezc_swtch;
10451debfc3dSmrg preheader = split_edge (loop_preheader_edge (loop));
1046a2dc1f3fSmrg /* Recompute count adjustments since initial peel copy may
10471debfc3dSmrg have exited and reduced those values that were computed above. */
1048a2dc1f3fSmrg iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1049a2dc1f3fSmrg /* Add in count of edge from switch block. */
10501debfc3dSmrg preheader->count += iter_count;
10511debfc3dSmrg branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
10521debfc3dSmrg block_label (preheader), p,
10531debfc3dSmrg NULL);
10541debfc3dSmrg gcc_assert (branch_code != NULL_RTX);
10551debfc3dSmrg
10561debfc3dSmrg swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
10571debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1058a2dc1f3fSmrg single_succ_edge (swtch)->probability = p.invert ();
10591debfc3dSmrg e = make_edge (swtch, preheader,
10601debfc3dSmrg single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
10611debfc3dSmrg e->probability = p;
10621debfc3dSmrg }
10631debfc3dSmrg
10641debfc3dSmrg /* Recount dominators for outer blocks. */
10651debfc3dSmrg iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
10661debfc3dSmrg
10671debfc3dSmrg /* And unroll loop. */
10681debfc3dSmrg
10691debfc3dSmrg bitmap_ones (wont_exit);
10701debfc3dSmrg bitmap_clear_bit (wont_exit, may_exit_copy);
10711debfc3dSmrg opt_info_start_duplication (opt_info);
10721debfc3dSmrg
10731debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
10741debfc3dSmrg max_unroll,
10751debfc3dSmrg wont_exit, desc->out_edge,
10761debfc3dSmrg &remove_edges,
10771debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ
10781debfc3dSmrg | (opt_info
10791debfc3dSmrg ? DLTHE_RECORD_COPY_NUMBER
10801debfc3dSmrg : 0));
10811debfc3dSmrg gcc_assert (ok);
10821debfc3dSmrg
10831debfc3dSmrg if (opt_info)
10841debfc3dSmrg {
10851debfc3dSmrg apply_opt_in_copies (opt_info, max_unroll, true, true);
10861debfc3dSmrg free_opt_info (opt_info);
10871debfc3dSmrg }
10881debfc3dSmrg
10891debfc3dSmrg if (exit_at_end)
10901debfc3dSmrg {
10911debfc3dSmrg basic_block exit_block = get_bb_copy (desc->in_edge->src);
10921debfc3dSmrg /* Find a new in and out edge; they are in the last copy we have
10931debfc3dSmrg made. */
10941debfc3dSmrg
10951debfc3dSmrg if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
10961debfc3dSmrg {
10971debfc3dSmrg desc->out_edge = EDGE_SUCC (exit_block, 0);
10981debfc3dSmrg desc->in_edge = EDGE_SUCC (exit_block, 1);
10991debfc3dSmrg }
11001debfc3dSmrg else
11011debfc3dSmrg {
11021debfc3dSmrg desc->out_edge = EDGE_SUCC (exit_block, 1);
11031debfc3dSmrg desc->in_edge = EDGE_SUCC (exit_block, 0);
11041debfc3dSmrg }
11051debfc3dSmrg }
11061debfc3dSmrg
11071debfc3dSmrg /* Remove the edges. */
11081debfc3dSmrg FOR_EACH_VEC_ELT (remove_edges, i, e)
11091debfc3dSmrg remove_path (e);
11101debfc3dSmrg
11111debfc3dSmrg /* We must be careful when updating the number of iterations due to
11121debfc3dSmrg preconditioning and the fact that the value must be valid at entry
11131debfc3dSmrg of the loop. After passing through the above code, we see that
11141debfc3dSmrg the correct new number of iterations is this: */
11151debfc3dSmrg gcc_assert (!desc->const_iter);
11161debfc3dSmrg desc->niter_expr =
11171debfc3dSmrg simplify_gen_binary (UDIV, desc->mode, old_niter,
11181debfc3dSmrg gen_int_mode (max_unroll + 1, desc->mode));
11191debfc3dSmrg loop->nb_iterations_upper_bound
11201debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
11211debfc3dSmrg if (loop->any_estimate)
11221debfc3dSmrg loop->nb_iterations_estimate
11231debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
11241debfc3dSmrg if (loop->any_likely_upper_bound)
11251debfc3dSmrg loop->nb_iterations_likely_upper_bound
11261debfc3dSmrg = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
11271debfc3dSmrg if (exit_at_end)
11281debfc3dSmrg {
11291debfc3dSmrg desc->niter_expr =
11301debfc3dSmrg simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
11311debfc3dSmrg desc->noloop_assumptions = NULL_RTX;
11321debfc3dSmrg --loop->nb_iterations_upper_bound;
11331debfc3dSmrg if (loop->any_estimate
11341debfc3dSmrg && loop->nb_iterations_estimate != 0)
11351debfc3dSmrg --loop->nb_iterations_estimate;
11361debfc3dSmrg else
11371debfc3dSmrg loop->any_estimate = false;
11381debfc3dSmrg if (loop->any_likely_upper_bound
11391debfc3dSmrg && loop->nb_iterations_likely_upper_bound != 0)
11401debfc3dSmrg --loop->nb_iterations_likely_upper_bound;
11411debfc3dSmrg else
11421debfc3dSmrg loop->any_likely_upper_bound = false;
11431debfc3dSmrg }
11441debfc3dSmrg
11451debfc3dSmrg if (dump_file)
11461debfc3dSmrg fprintf (dump_file,
11471debfc3dSmrg ";; Unrolled loop %d times, counting # of iterations "
11481debfc3dSmrg "in runtime, %i insns\n",
11491debfc3dSmrg max_unroll, num_loop_insns (loop));
11501debfc3dSmrg }
11511debfc3dSmrg
11521debfc3dSmrg /* Decide whether to unroll LOOP stupidly and how much. */
11531debfc3dSmrg static void
decide_unroll_stupid(class loop * loop,int flags)1154*8feb0f0bSmrg decide_unroll_stupid (class loop *loop, int flags)
11551debfc3dSmrg {
11561debfc3dSmrg unsigned nunroll, nunroll_by_av, i;
1157*8feb0f0bSmrg class niter_desc *desc;
11581debfc3dSmrg widest_int iterations;
11591debfc3dSmrg
1160a2dc1f3fSmrg /* If we were not asked to unroll this loop, just return back silently. */
1161a2dc1f3fSmrg if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
11621debfc3dSmrg return;
11631debfc3dSmrg
1164a2dc1f3fSmrg if (dump_enabled_p ())
1165a2dc1f3fSmrg dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
11661debfc3dSmrg
11671debfc3dSmrg /* nunroll = total number of copies of the original loop body in
11681debfc3dSmrg unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1169*8feb0f0bSmrg nunroll = param_max_unrolled_insns / loop->ninsns;
11701debfc3dSmrg nunroll_by_av
1171*8feb0f0bSmrg = param_max_average_unrolled_insns / loop->av_ninsns;
11721debfc3dSmrg if (nunroll > nunroll_by_av)
11731debfc3dSmrg nunroll = nunroll_by_av;
1174*8feb0f0bSmrg if (nunroll > (unsigned) param_max_unroll_times)
1175*8feb0f0bSmrg nunroll = param_max_unroll_times;
11761debfc3dSmrg
11771debfc3dSmrg if (targetm.loop_unroll_adjust)
11781debfc3dSmrg nunroll = targetm.loop_unroll_adjust (nunroll, loop);
11791debfc3dSmrg
1180a2dc1f3fSmrg if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1181a2dc1f3fSmrg nunroll = loop->unroll;
1182a2dc1f3fSmrg
11831debfc3dSmrg /* Skip big loops. */
11841debfc3dSmrg if (nunroll <= 1)
11851debfc3dSmrg {
11861debfc3dSmrg if (dump_file)
11871debfc3dSmrg fprintf (dump_file, ";; Not considering loop, is too big\n");
11881debfc3dSmrg return;
11891debfc3dSmrg }
11901debfc3dSmrg
11911debfc3dSmrg /* Check for simple loops. */
11921debfc3dSmrg desc = get_simple_loop_desc (loop);
11931debfc3dSmrg
11941debfc3dSmrg /* Check simpleness. */
11951debfc3dSmrg if (desc->simple_p && !desc->assumptions)
11961debfc3dSmrg {
11971debfc3dSmrg if (dump_file)
1198a2dc1f3fSmrg fprintf (dump_file, ";; Loop is simple\n");
11991debfc3dSmrg return;
12001debfc3dSmrg }
12011debfc3dSmrg
12021debfc3dSmrg /* Do not unroll loops with branches inside -- it increases number
12031debfc3dSmrg of mispredicts.
12041debfc3dSmrg TODO: this heuristic needs tunning; call inside the loop body
12051debfc3dSmrg is also relatively good reason to not unroll. */
12061debfc3dSmrg if (num_loop_branches (loop) > 1)
12071debfc3dSmrg {
12081debfc3dSmrg if (dump_file)
12091debfc3dSmrg fprintf (dump_file, ";; Not unrolling, contains branches\n");
12101debfc3dSmrg return;
12111debfc3dSmrg }
12121debfc3dSmrg
12131debfc3dSmrg /* Check whether the loop rolls. */
12141debfc3dSmrg if ((get_estimated_loop_iterations (loop, &iterations)
12151debfc3dSmrg || get_likely_max_loop_iterations (loop, &iterations))
12161debfc3dSmrg && wi::ltu_p (iterations, 2 * nunroll))
12171debfc3dSmrg {
12181debfc3dSmrg if (dump_file)
12191debfc3dSmrg fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
12201debfc3dSmrg return;
12211debfc3dSmrg }
12221debfc3dSmrg
12231debfc3dSmrg /* Success. Now force nunroll to be power of 2, as it seems that this
12241debfc3dSmrg improves results (partially because of better alignments, partially
12251debfc3dSmrg because of some dark magic). */
12261debfc3dSmrg for (i = 1; 2 * i <= nunroll; i *= 2)
12271debfc3dSmrg continue;
12281debfc3dSmrg
12291debfc3dSmrg loop->lpt_decision.decision = LPT_UNROLL_STUPID;
12301debfc3dSmrg loop->lpt_decision.times = i - 1;
12311debfc3dSmrg }
12321debfc3dSmrg
12331debfc3dSmrg /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
12341debfc3dSmrg
12351debfc3dSmrg while (cond)
12361debfc3dSmrg body;
12371debfc3dSmrg
12381debfc3dSmrg ==> (LOOP->LPT_DECISION.TIMES == 3)
12391debfc3dSmrg
12401debfc3dSmrg while (cond)
12411debfc3dSmrg {
12421debfc3dSmrg body;
12431debfc3dSmrg if (!cond) break;
12441debfc3dSmrg body;
12451debfc3dSmrg if (!cond) break;
12461debfc3dSmrg body;
12471debfc3dSmrg if (!cond) break;
12481debfc3dSmrg body;
12491debfc3dSmrg }
12501debfc3dSmrg */
12511debfc3dSmrg static void
unroll_loop_stupid(class loop * loop)1252*8feb0f0bSmrg unroll_loop_stupid (class loop *loop)
12531debfc3dSmrg {
12541debfc3dSmrg unsigned nunroll = loop->lpt_decision.times;
1255*8feb0f0bSmrg class niter_desc *desc = get_simple_loop_desc (loop);
12561debfc3dSmrg struct opt_info *opt_info = NULL;
12571debfc3dSmrg bool ok;
12581debfc3dSmrg
12591debfc3dSmrg if (flag_split_ivs_in_unroller
12601debfc3dSmrg || flag_variable_expansion_in_unroller)
12611debfc3dSmrg opt_info = analyze_insns_in_loop (loop);
12621debfc3dSmrg
12631debfc3dSmrg auto_sbitmap wont_exit (nunroll + 1);
12641debfc3dSmrg bitmap_clear (wont_exit);
12651debfc3dSmrg opt_info_start_duplication (opt_info);
12661debfc3dSmrg
12671debfc3dSmrg ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
12681debfc3dSmrg nunroll, wont_exit,
12691debfc3dSmrg NULL, NULL,
12701debfc3dSmrg DLTHE_FLAG_UPDATE_FREQ
12711debfc3dSmrg | (opt_info
12721debfc3dSmrg ? DLTHE_RECORD_COPY_NUMBER
12731debfc3dSmrg : 0));
12741debfc3dSmrg gcc_assert (ok);
12751debfc3dSmrg
12761debfc3dSmrg if (opt_info)
12771debfc3dSmrg {
12781debfc3dSmrg apply_opt_in_copies (opt_info, nunroll, true, true);
12791debfc3dSmrg free_opt_info (opt_info);
12801debfc3dSmrg }
12811debfc3dSmrg
12821debfc3dSmrg if (desc->simple_p)
12831debfc3dSmrg {
12841debfc3dSmrg /* We indeed may get here provided that there are nontrivial assumptions
12851debfc3dSmrg for a loop to be really simple. We could update the counts, but the
12861debfc3dSmrg problem is that we are unable to decide which exit will be taken
12871debfc3dSmrg (not really true in case the number of iterations is constant,
12881debfc3dSmrg but no one will do anything with this information, so we do not
12891debfc3dSmrg worry about it). */
12901debfc3dSmrg desc->simple_p = false;
12911debfc3dSmrg }
12921debfc3dSmrg
12931debfc3dSmrg if (dump_file)
12941debfc3dSmrg fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
12951debfc3dSmrg nunroll, num_loop_insns (loop));
12961debfc3dSmrg }
12971debfc3dSmrg
12981debfc3dSmrg /* Returns true if REG is referenced in one nondebug insn in LOOP.
12991debfc3dSmrg Set *DEBUG_USES to the number of debug insns that reference the
13001debfc3dSmrg variable. */
13011debfc3dSmrg
13021debfc3dSmrg static bool
referenced_in_one_insn_in_loop_p(class loop * loop,rtx reg,int * debug_uses)1303*8feb0f0bSmrg referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
13041debfc3dSmrg int *debug_uses)
13051debfc3dSmrg {
13061debfc3dSmrg basic_block *body, bb;
13071debfc3dSmrg unsigned i;
13081debfc3dSmrg int count_ref = 0;
13091debfc3dSmrg rtx_insn *insn;
13101debfc3dSmrg
13111debfc3dSmrg body = get_loop_body (loop);
13121debfc3dSmrg for (i = 0; i < loop->num_nodes; i++)
13131debfc3dSmrg {
13141debfc3dSmrg bb = body[i];
13151debfc3dSmrg
13161debfc3dSmrg FOR_BB_INSNS (bb, insn)
13171debfc3dSmrg if (!rtx_referenced_p (reg, insn))
13181debfc3dSmrg continue;
13191debfc3dSmrg else if (DEBUG_INSN_P (insn))
13201debfc3dSmrg ++*debug_uses;
13211debfc3dSmrg else if (++count_ref > 1)
13221debfc3dSmrg break;
13231debfc3dSmrg }
13241debfc3dSmrg free (body);
13251debfc3dSmrg return (count_ref == 1);
13261debfc3dSmrg }
13271debfc3dSmrg
13281debfc3dSmrg /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
13291debfc3dSmrg
13301debfc3dSmrg static void
reset_debug_uses_in_loop(class loop * loop,rtx reg,int debug_uses)1331*8feb0f0bSmrg reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
13321debfc3dSmrg {
13331debfc3dSmrg basic_block *body, bb;
13341debfc3dSmrg unsigned i;
13351debfc3dSmrg rtx_insn *insn;
13361debfc3dSmrg
13371debfc3dSmrg body = get_loop_body (loop);
13381debfc3dSmrg for (i = 0; debug_uses && i < loop->num_nodes; i++)
13391debfc3dSmrg {
13401debfc3dSmrg bb = body[i];
13411debfc3dSmrg
13421debfc3dSmrg FOR_BB_INSNS (bb, insn)
13431debfc3dSmrg if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
13441debfc3dSmrg continue;
13451debfc3dSmrg else
13461debfc3dSmrg {
13471debfc3dSmrg validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
13481debfc3dSmrg gen_rtx_UNKNOWN_VAR_LOC (), 0);
13491debfc3dSmrg if (!--debug_uses)
13501debfc3dSmrg break;
13511debfc3dSmrg }
13521debfc3dSmrg }
13531debfc3dSmrg free (body);
13541debfc3dSmrg }
13551debfc3dSmrg
13561debfc3dSmrg /* Determine whether INSN contains an accumulator
13571debfc3dSmrg which can be expanded into separate copies,
13581debfc3dSmrg one for each copy of the LOOP body.
13591debfc3dSmrg
13601debfc3dSmrg for (i = 0 ; i < n; i++)
13611debfc3dSmrg sum += a[i];
13621debfc3dSmrg
13631debfc3dSmrg ==>
13641debfc3dSmrg
13651debfc3dSmrg sum += a[i]
13661debfc3dSmrg ....
13671debfc3dSmrg i = i+1;
13681debfc3dSmrg sum1 += a[i]
13691debfc3dSmrg ....
13701debfc3dSmrg i = i+1
13711debfc3dSmrg sum2 += a[i];
13721debfc3dSmrg ....
13731debfc3dSmrg
13741debfc3dSmrg Return NULL if INSN contains no opportunity for expansion of accumulator.
13751debfc3dSmrg Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
13761debfc3dSmrg information and return a pointer to it.
13771debfc3dSmrg */
13781debfc3dSmrg
13791debfc3dSmrg static struct var_to_expand *
analyze_insn_to_expand_var(class loop * loop,rtx_insn * insn)1380*8feb0f0bSmrg analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
13811debfc3dSmrg {
13821debfc3dSmrg rtx set, dest, src;
13831debfc3dSmrg struct var_to_expand *ves;
13841debfc3dSmrg unsigned accum_pos;
13851debfc3dSmrg enum rtx_code code;
13861debfc3dSmrg int debug_uses = 0;
13871debfc3dSmrg
13881debfc3dSmrg set = single_set (insn);
13891debfc3dSmrg if (!set)
13901debfc3dSmrg return NULL;
13911debfc3dSmrg
13921debfc3dSmrg dest = SET_DEST (set);
13931debfc3dSmrg src = SET_SRC (set);
13941debfc3dSmrg code = GET_CODE (src);
13951debfc3dSmrg
13961debfc3dSmrg if (code != PLUS && code != MINUS && code != MULT && code != FMA)
13971debfc3dSmrg return NULL;
13981debfc3dSmrg
13991debfc3dSmrg if (FLOAT_MODE_P (GET_MODE (dest)))
14001debfc3dSmrg {
14011debfc3dSmrg if (!flag_associative_math)
14021debfc3dSmrg return NULL;
14031debfc3dSmrg /* In the case of FMA, we're also changing the rounding. */
14041debfc3dSmrg if (code == FMA && !flag_unsafe_math_optimizations)
14051debfc3dSmrg return NULL;
14061debfc3dSmrg }
14071debfc3dSmrg
14081debfc3dSmrg /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
14091debfc3dSmrg in MD. But if there is no optab to generate the insn, we cannot
14101debfc3dSmrg perform the variable expansion. This can happen if an MD provides
14111debfc3dSmrg an insn but not a named pattern to generate it, for example to avoid
14121debfc3dSmrg producing code that needs additional mode switches like for x87/mmx.
14131debfc3dSmrg
14141debfc3dSmrg So we check have_insn_for which looks for an optab for the operation
14151debfc3dSmrg in SRC. If it doesn't exist, we can't perform the expansion even
14161debfc3dSmrg though INSN is valid. */
14171debfc3dSmrg if (!have_insn_for (code, GET_MODE (src)))
14181debfc3dSmrg return NULL;
14191debfc3dSmrg
14201debfc3dSmrg if (!REG_P (dest)
14211debfc3dSmrg && !(GET_CODE (dest) == SUBREG
14221debfc3dSmrg && REG_P (SUBREG_REG (dest))))
14231debfc3dSmrg return NULL;
14241debfc3dSmrg
14251debfc3dSmrg /* Find the accumulator use within the operation. */
14261debfc3dSmrg if (code == FMA)
14271debfc3dSmrg {
14281debfc3dSmrg /* We only support accumulation via FMA in the ADD position. */
14291debfc3dSmrg if (!rtx_equal_p (dest, XEXP (src, 2)))
14301debfc3dSmrg return NULL;
14311debfc3dSmrg accum_pos = 2;
14321debfc3dSmrg }
14331debfc3dSmrg else if (rtx_equal_p (dest, XEXP (src, 0)))
14341debfc3dSmrg accum_pos = 0;
14351debfc3dSmrg else if (rtx_equal_p (dest, XEXP (src, 1)))
14361debfc3dSmrg {
14371debfc3dSmrg /* The method of expansion that we are using; which includes the
14381debfc3dSmrg initialization of the expansions with zero and the summation of
14391debfc3dSmrg the expansions at the end of the computation will yield wrong
14401debfc3dSmrg results for (x = something - x) thus avoid using it in that case. */
14411debfc3dSmrg if (code == MINUS)
14421debfc3dSmrg return NULL;
14431debfc3dSmrg accum_pos = 1;
14441debfc3dSmrg }
14451debfc3dSmrg else
14461debfc3dSmrg return NULL;
14471debfc3dSmrg
14481debfc3dSmrg /* It must not otherwise be used. */
14491debfc3dSmrg if (code == FMA)
14501debfc3dSmrg {
14511debfc3dSmrg if (rtx_referenced_p (dest, XEXP (src, 0))
14521debfc3dSmrg || rtx_referenced_p (dest, XEXP (src, 1)))
14531debfc3dSmrg return NULL;
14541debfc3dSmrg }
14551debfc3dSmrg else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
14561debfc3dSmrg return NULL;
14571debfc3dSmrg
14581debfc3dSmrg /* It must be used in exactly one insn. */
14591debfc3dSmrg if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
14601debfc3dSmrg return NULL;
14611debfc3dSmrg
14621debfc3dSmrg if (dump_file)
14631debfc3dSmrg {
14641debfc3dSmrg fprintf (dump_file, "\n;; Expanding Accumulator ");
14651debfc3dSmrg print_rtl (dump_file, dest);
14661debfc3dSmrg fprintf (dump_file, "\n");
14671debfc3dSmrg }
14681debfc3dSmrg
14691debfc3dSmrg if (debug_uses)
14701debfc3dSmrg /* Instead of resetting the debug insns, we could replace each
14711debfc3dSmrg debug use in the loop with the sum or product of all expanded
14721debfc3dSmrg accumulators. Since we'll only know of all expansions at the
14731debfc3dSmrg end, we'd have to keep track of which vars_to_expand a debug
14741debfc3dSmrg insn in the loop references, take note of each copy of the
14751debfc3dSmrg debug insn during unrolling, and when it's all done, compute
14761debfc3dSmrg the sum or product of each variable and adjust the original
14771debfc3dSmrg debug insn and each copy thereof. What a pain! */
14781debfc3dSmrg reset_debug_uses_in_loop (loop, dest, debug_uses);
14791debfc3dSmrg
14801debfc3dSmrg /* Record the accumulator to expand. */
14811debfc3dSmrg ves = XNEW (struct var_to_expand);
14821debfc3dSmrg ves->insn = insn;
14831debfc3dSmrg ves->reg = copy_rtx (dest);
14841debfc3dSmrg ves->var_expansions.create (1);
14851debfc3dSmrg ves->next = NULL;
14861debfc3dSmrg ves->op = GET_CODE (src);
14871debfc3dSmrg ves->expansion_count = 0;
14881debfc3dSmrg ves->reuse_expansion = 0;
14891debfc3dSmrg return ves;
14901debfc3dSmrg }
14911debfc3dSmrg
14921debfc3dSmrg /* Determine whether there is an induction variable in INSN that
14931debfc3dSmrg we would like to split during unrolling.
14941debfc3dSmrg
14951debfc3dSmrg I.e. replace
14961debfc3dSmrg
14971debfc3dSmrg i = i + 1;
14981debfc3dSmrg ...
14991debfc3dSmrg i = i + 1;
15001debfc3dSmrg ...
15011debfc3dSmrg i = i + 1;
15021debfc3dSmrg ...
15031debfc3dSmrg
15041debfc3dSmrg type chains by
15051debfc3dSmrg
15061debfc3dSmrg i0 = i + 1
15071debfc3dSmrg ...
15081debfc3dSmrg i = i0 + 1
15091debfc3dSmrg ...
15101debfc3dSmrg i = i0 + 2
15111debfc3dSmrg ...
15121debfc3dSmrg
15131debfc3dSmrg Return NULL if INSN contains no interesting IVs. Otherwise, allocate
15141debfc3dSmrg an IV_TO_SPLIT structure, fill it with the relevant information and return a
15151debfc3dSmrg pointer to it. */
15161debfc3dSmrg
15171debfc3dSmrg static struct iv_to_split *
analyze_iv_to_split_insn(rtx_insn * insn)15181debfc3dSmrg analyze_iv_to_split_insn (rtx_insn *insn)
15191debfc3dSmrg {
15201debfc3dSmrg rtx set, dest;
1521*8feb0f0bSmrg class rtx_iv iv;
15221debfc3dSmrg struct iv_to_split *ivts;
1523a2dc1f3fSmrg scalar_int_mode mode;
15241debfc3dSmrg bool ok;
15251debfc3dSmrg
15261debfc3dSmrg /* For now we just split the basic induction variables. Later this may be
15271debfc3dSmrg extended for example by selecting also addresses of memory references. */
15281debfc3dSmrg set = single_set (insn);
15291debfc3dSmrg if (!set)
15301debfc3dSmrg return NULL;
15311debfc3dSmrg
15321debfc3dSmrg dest = SET_DEST (set);
1533a2dc1f3fSmrg if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
15341debfc3dSmrg return NULL;
15351debfc3dSmrg
1536a2dc1f3fSmrg if (!biv_p (insn, mode, dest))
15371debfc3dSmrg return NULL;
15381debfc3dSmrg
15391debfc3dSmrg ok = iv_analyze_result (insn, dest, &iv);
15401debfc3dSmrg
15411debfc3dSmrg /* This used to be an assert under the assumption that if biv_p returns
15421debfc3dSmrg true that iv_analyze_result must also return true. However, that
15431debfc3dSmrg assumption is not strictly correct as evidenced by pr25569.
15441debfc3dSmrg
15451debfc3dSmrg Returning NULL when iv_analyze_result returns false is safe and
15461debfc3dSmrg avoids the problems in pr25569 until the iv_analyze_* routines
15471debfc3dSmrg can be fixed, which is apparently hard and time consuming
15481debfc3dSmrg according to their author. */
15491debfc3dSmrg if (! ok)
15501debfc3dSmrg return NULL;
15511debfc3dSmrg
15521debfc3dSmrg if (iv.step == const0_rtx
15531debfc3dSmrg || iv.mode != iv.extend_mode)
15541debfc3dSmrg return NULL;
15551debfc3dSmrg
15561debfc3dSmrg /* Record the insn to split. */
15571debfc3dSmrg ivts = XNEW (struct iv_to_split);
15581debfc3dSmrg ivts->insn = insn;
15591debfc3dSmrg ivts->orig_var = dest;
15601debfc3dSmrg ivts->base_var = NULL_RTX;
15611debfc3dSmrg ivts->step = iv.step;
15621debfc3dSmrg ivts->next = NULL;
15631debfc3dSmrg
15641debfc3dSmrg return ivts;
15651debfc3dSmrg }
15661debfc3dSmrg
15671debfc3dSmrg /* Determines which of insns in LOOP can be optimized.
15681debfc3dSmrg Return a OPT_INFO struct with the relevant hash tables filled
15691debfc3dSmrg with all insns to be optimized. The FIRST_NEW_BLOCK field
15701debfc3dSmrg is undefined for the return value. */
15711debfc3dSmrg
15721debfc3dSmrg static struct opt_info *
analyze_insns_in_loop(class loop * loop)1573*8feb0f0bSmrg analyze_insns_in_loop (class loop *loop)
15741debfc3dSmrg {
15751debfc3dSmrg basic_block *body, bb;
15761debfc3dSmrg unsigned i;
15771debfc3dSmrg struct opt_info *opt_info = XCNEW (struct opt_info);
15781debfc3dSmrg rtx_insn *insn;
15791debfc3dSmrg struct iv_to_split *ivts = NULL;
15801debfc3dSmrg struct var_to_expand *ves = NULL;
15811debfc3dSmrg iv_to_split **slot1;
15821debfc3dSmrg var_to_expand **slot2;
15831debfc3dSmrg vec<edge> edges = get_loop_exit_edges (loop);
15841debfc3dSmrg edge exit;
15851debfc3dSmrg bool can_apply = false;
15861debfc3dSmrg
15871debfc3dSmrg iv_analysis_loop_init (loop);
15881debfc3dSmrg
15891debfc3dSmrg body = get_loop_body (loop);
15901debfc3dSmrg
15911debfc3dSmrg if (flag_split_ivs_in_unroller)
15921debfc3dSmrg {
15931debfc3dSmrg opt_info->insns_to_split
15941debfc3dSmrg = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
15951debfc3dSmrg opt_info->iv_to_split_head = NULL;
15961debfc3dSmrg opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
15971debfc3dSmrg }
15981debfc3dSmrg
15991debfc3dSmrg /* Record the loop exit bb and loop preheader before the unrolling. */
16001debfc3dSmrg opt_info->loop_preheader = loop_preheader_edge (loop)->src;
16011debfc3dSmrg
16021debfc3dSmrg if (edges.length () == 1)
16031debfc3dSmrg {
16041debfc3dSmrg exit = edges[0];
16051debfc3dSmrg if (!(exit->flags & EDGE_COMPLEX))
16061debfc3dSmrg {
16071debfc3dSmrg opt_info->loop_exit = split_edge (exit);
16081debfc3dSmrg can_apply = true;
16091debfc3dSmrg }
16101debfc3dSmrg }
16111debfc3dSmrg
16121debfc3dSmrg if (flag_variable_expansion_in_unroller
16131debfc3dSmrg && can_apply)
16141debfc3dSmrg {
16151debfc3dSmrg opt_info->insns_with_var_to_expand
16161debfc3dSmrg = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
16171debfc3dSmrg opt_info->var_to_expand_head = NULL;
16181debfc3dSmrg opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
16191debfc3dSmrg }
16201debfc3dSmrg
16211debfc3dSmrg for (i = 0; i < loop->num_nodes; i++)
16221debfc3dSmrg {
16231debfc3dSmrg bb = body[i];
16241debfc3dSmrg if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
16251debfc3dSmrg continue;
16261debfc3dSmrg
16271debfc3dSmrg FOR_BB_INSNS (bb, insn)
16281debfc3dSmrg {
16291debfc3dSmrg if (!INSN_P (insn))
16301debfc3dSmrg continue;
16311debfc3dSmrg
16321debfc3dSmrg if (opt_info->insns_to_split)
16331debfc3dSmrg ivts = analyze_iv_to_split_insn (insn);
16341debfc3dSmrg
16351debfc3dSmrg if (ivts)
16361debfc3dSmrg {
16371debfc3dSmrg slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
16381debfc3dSmrg gcc_assert (*slot1 == NULL);
16391debfc3dSmrg *slot1 = ivts;
16401debfc3dSmrg *opt_info->iv_to_split_tail = ivts;
16411debfc3dSmrg opt_info->iv_to_split_tail = &ivts->next;
16421debfc3dSmrg continue;
16431debfc3dSmrg }
16441debfc3dSmrg
16451debfc3dSmrg if (opt_info->insns_with_var_to_expand)
16461debfc3dSmrg ves = analyze_insn_to_expand_var (loop, insn);
16471debfc3dSmrg
16481debfc3dSmrg if (ves)
16491debfc3dSmrg {
16501debfc3dSmrg slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
16511debfc3dSmrg gcc_assert (*slot2 == NULL);
16521debfc3dSmrg *slot2 = ves;
16531debfc3dSmrg *opt_info->var_to_expand_tail = ves;
16541debfc3dSmrg opt_info->var_to_expand_tail = &ves->next;
16551debfc3dSmrg }
16561debfc3dSmrg }
16571debfc3dSmrg }
16581debfc3dSmrg
16591debfc3dSmrg edges.release ();
16601debfc3dSmrg free (body);
16611debfc3dSmrg return opt_info;
16621debfc3dSmrg }
16631debfc3dSmrg
16641debfc3dSmrg /* Called just before loop duplication. Records start of duplicated area
16651debfc3dSmrg to OPT_INFO. */
16661debfc3dSmrg
16671debfc3dSmrg static void
opt_info_start_duplication(struct opt_info * opt_info)16681debfc3dSmrg opt_info_start_duplication (struct opt_info *opt_info)
16691debfc3dSmrg {
16701debfc3dSmrg if (opt_info)
16711debfc3dSmrg opt_info->first_new_block = last_basic_block_for_fn (cfun);
16721debfc3dSmrg }
16731debfc3dSmrg
16741debfc3dSmrg /* Determine the number of iterations between initialization of the base
16751debfc3dSmrg variable and the current copy (N_COPY). N_COPIES is the total number
16761debfc3dSmrg of newly created copies. UNROLLING is true if we are unrolling
16771debfc3dSmrg (not peeling) the loop. */
16781debfc3dSmrg
16791debfc3dSmrg static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)16801debfc3dSmrg determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
16811debfc3dSmrg {
16821debfc3dSmrg if (unrolling)
16831debfc3dSmrg {
16841debfc3dSmrg /* If we are unrolling, initialization is done in the original loop
16851debfc3dSmrg body (number 0). */
16861debfc3dSmrg return n_copy;
16871debfc3dSmrg }
16881debfc3dSmrg else
16891debfc3dSmrg {
16901debfc3dSmrg /* If we are peeling, the copy in that the initialization occurs has
16911debfc3dSmrg number 1. The original loop (number 0) is the last. */
16921debfc3dSmrg if (n_copy)
16931debfc3dSmrg return n_copy - 1;
16941debfc3dSmrg else
16951debfc3dSmrg return n_copies;
16961debfc3dSmrg }
16971debfc3dSmrg }
16981debfc3dSmrg
16991debfc3dSmrg /* Allocate basic variable for the induction variable chain. */
17001debfc3dSmrg
17011debfc3dSmrg static void
allocate_basic_variable(struct iv_to_split * ivts)17021debfc3dSmrg allocate_basic_variable (struct iv_to_split *ivts)
17031debfc3dSmrg {
17041debfc3dSmrg rtx expr = SET_SRC (single_set (ivts->insn));
17051debfc3dSmrg
17061debfc3dSmrg ivts->base_var = gen_reg_rtx (GET_MODE (expr));
17071debfc3dSmrg }
17081debfc3dSmrg
17091debfc3dSmrg /* Insert initialization of basic variable of IVTS before INSN, taking
17101debfc3dSmrg the initial value from INSN. */
17111debfc3dSmrg
17121debfc3dSmrg static void
insert_base_initialization(struct iv_to_split * ivts,rtx_insn * insn)17131debfc3dSmrg insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
17141debfc3dSmrg {
17151debfc3dSmrg rtx expr = copy_rtx (SET_SRC (single_set (insn)));
17161debfc3dSmrg rtx_insn *seq;
17171debfc3dSmrg
17181debfc3dSmrg start_sequence ();
17191debfc3dSmrg expr = force_operand (expr, ivts->base_var);
17201debfc3dSmrg if (expr != ivts->base_var)
17211debfc3dSmrg emit_move_insn (ivts->base_var, expr);
17221debfc3dSmrg seq = get_insns ();
17231debfc3dSmrg end_sequence ();
17241debfc3dSmrg
17251debfc3dSmrg emit_insn_before (seq, insn);
17261debfc3dSmrg }
17271debfc3dSmrg
17281debfc3dSmrg /* Replace the use of induction variable described in IVTS in INSN
17291debfc3dSmrg by base variable + DELTA * step. */
17301debfc3dSmrg
17311debfc3dSmrg static void
split_iv(struct iv_to_split * ivts,rtx_insn * insn,unsigned delta)17321debfc3dSmrg split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
17331debfc3dSmrg {
17341debfc3dSmrg rtx expr, *loc, incr, var;
17351debfc3dSmrg rtx_insn *seq;
17361debfc3dSmrg machine_mode mode = GET_MODE (ivts->base_var);
17371debfc3dSmrg rtx src, dest, set;
17381debfc3dSmrg
17391debfc3dSmrg /* Construct base + DELTA * step. */
17401debfc3dSmrg if (!delta)
17411debfc3dSmrg expr = ivts->base_var;
17421debfc3dSmrg else
17431debfc3dSmrg {
17441debfc3dSmrg incr = simplify_gen_binary (MULT, mode,
1745a2dc1f3fSmrg copy_rtx (ivts->step),
1746a2dc1f3fSmrg gen_int_mode (delta, mode));
17471debfc3dSmrg expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
17481debfc3dSmrg ivts->base_var, incr);
17491debfc3dSmrg }
17501debfc3dSmrg
17511debfc3dSmrg /* Figure out where to do the replacement. */
17521debfc3dSmrg loc = &SET_SRC (single_set (insn));
17531debfc3dSmrg
17541debfc3dSmrg /* If we can make the replacement right away, we're done. */
17551debfc3dSmrg if (validate_change (insn, loc, expr, 0))
17561debfc3dSmrg return;
17571debfc3dSmrg
17581debfc3dSmrg /* Otherwise, force EXPR into a register and try again. */
17591debfc3dSmrg start_sequence ();
17601debfc3dSmrg var = gen_reg_rtx (mode);
17611debfc3dSmrg expr = force_operand (expr, var);
17621debfc3dSmrg if (expr != var)
17631debfc3dSmrg emit_move_insn (var, expr);
17641debfc3dSmrg seq = get_insns ();
17651debfc3dSmrg end_sequence ();
17661debfc3dSmrg emit_insn_before (seq, insn);
17671debfc3dSmrg
17681debfc3dSmrg if (validate_change (insn, loc, var, 0))
17691debfc3dSmrg return;
17701debfc3dSmrg
17711debfc3dSmrg /* The last chance. Try recreating the assignment in insn
17721debfc3dSmrg completely from scratch. */
17731debfc3dSmrg set = single_set (insn);
17741debfc3dSmrg gcc_assert (set);
17751debfc3dSmrg
17761debfc3dSmrg start_sequence ();
17771debfc3dSmrg *loc = var;
17781debfc3dSmrg src = copy_rtx (SET_SRC (set));
17791debfc3dSmrg dest = copy_rtx (SET_DEST (set));
17801debfc3dSmrg src = force_operand (src, dest);
17811debfc3dSmrg if (src != dest)
17821debfc3dSmrg emit_move_insn (dest, src);
17831debfc3dSmrg seq = get_insns ();
17841debfc3dSmrg end_sequence ();
17851debfc3dSmrg
17861debfc3dSmrg emit_insn_before (seq, insn);
17871debfc3dSmrg delete_insn (insn);
17881debfc3dSmrg }
17891debfc3dSmrg
17901debfc3dSmrg
17911debfc3dSmrg /* Return one expansion of the accumulator recorded in struct VE. */
17921debfc3dSmrg
17931debfc3dSmrg static rtx
get_expansion(struct var_to_expand * ve)17941debfc3dSmrg get_expansion (struct var_to_expand *ve)
17951debfc3dSmrg {
17961debfc3dSmrg rtx reg;
17971debfc3dSmrg
17981debfc3dSmrg if (ve->reuse_expansion == 0)
17991debfc3dSmrg reg = ve->reg;
18001debfc3dSmrg else
18011debfc3dSmrg reg = ve->var_expansions[ve->reuse_expansion - 1];
18021debfc3dSmrg
18031debfc3dSmrg if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
18041debfc3dSmrg ve->reuse_expansion = 0;
18051debfc3dSmrg else
18061debfc3dSmrg ve->reuse_expansion++;
18071debfc3dSmrg
18081debfc3dSmrg return reg;
18091debfc3dSmrg }
18101debfc3dSmrg
18111debfc3dSmrg
18121debfc3dSmrg /* Given INSN replace the uses of the accumulator recorded in VE
18131debfc3dSmrg with a new register. */
18141debfc3dSmrg
18151debfc3dSmrg static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx_insn * insn)18161debfc3dSmrg expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
18171debfc3dSmrg {
18181debfc3dSmrg rtx new_reg, set;
18191debfc3dSmrg bool really_new_expansion = false;
18201debfc3dSmrg
18211debfc3dSmrg set = single_set (insn);
18221debfc3dSmrg gcc_assert (set);
18231debfc3dSmrg
18241debfc3dSmrg /* Generate a new register only if the expansion limit has not been
18251debfc3dSmrg reached. Else reuse an already existing expansion. */
1826*8feb0f0bSmrg if (param_max_variable_expansions > ve->expansion_count)
18271debfc3dSmrg {
18281debfc3dSmrg really_new_expansion = true;
18291debfc3dSmrg new_reg = gen_reg_rtx (GET_MODE (ve->reg));
18301debfc3dSmrg }
18311debfc3dSmrg else
18321debfc3dSmrg new_reg = get_expansion (ve);
18331debfc3dSmrg
18341debfc3dSmrg validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
18351debfc3dSmrg if (apply_change_group ())
18361debfc3dSmrg if (really_new_expansion)
18371debfc3dSmrg {
18381debfc3dSmrg ve->var_expansions.safe_push (new_reg);
18391debfc3dSmrg ve->expansion_count++;
18401debfc3dSmrg }
18411debfc3dSmrg }
18421debfc3dSmrg
18431debfc3dSmrg /* Initialize the variable expansions in loop preheader. PLACE is the
18441debfc3dSmrg loop-preheader basic block where the initialization of the
18451debfc3dSmrg expansions should take place. The expansions are initialized with
18461debfc3dSmrg (-0) when the operation is plus or minus to honor sign zero. This
18471debfc3dSmrg way we can prevent cases where the sign of the final result is
18481debfc3dSmrg effected by the sign of the expansion. Here is an example to
18491debfc3dSmrg demonstrate this:
18501debfc3dSmrg
18511debfc3dSmrg for (i = 0 ; i < n; i++)
18521debfc3dSmrg sum += something;
18531debfc3dSmrg
18541debfc3dSmrg ==>
18551debfc3dSmrg
18561debfc3dSmrg sum += something
18571debfc3dSmrg ....
18581debfc3dSmrg i = i+1;
18591debfc3dSmrg sum1 += something
18601debfc3dSmrg ....
18611debfc3dSmrg i = i+1
18621debfc3dSmrg sum2 += something;
18631debfc3dSmrg ....
18641debfc3dSmrg
18651debfc3dSmrg When SUM is initialized with -zero and SOMETHING is also -zero; the
18661debfc3dSmrg final result of sum should be -zero thus the expansions sum1 and sum2
18671debfc3dSmrg should be initialized with -zero as well (otherwise we will get +zero
18681debfc3dSmrg as the final result). */
18691debfc3dSmrg
18701debfc3dSmrg static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)18711debfc3dSmrg insert_var_expansion_initialization (struct var_to_expand *ve,
18721debfc3dSmrg basic_block place)
18731debfc3dSmrg {
18741debfc3dSmrg rtx_insn *seq;
18751debfc3dSmrg rtx var, zero_init;
18761debfc3dSmrg unsigned i;
18771debfc3dSmrg machine_mode mode = GET_MODE (ve->reg);
18781debfc3dSmrg bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
18791debfc3dSmrg
18801debfc3dSmrg if (ve->var_expansions.length () == 0)
18811debfc3dSmrg return;
18821debfc3dSmrg
18831debfc3dSmrg start_sequence ();
18841debfc3dSmrg switch (ve->op)
18851debfc3dSmrg {
18861debfc3dSmrg case FMA:
18871debfc3dSmrg /* Note that we only accumulate FMA via the ADD operand. */
18881debfc3dSmrg case PLUS:
18891debfc3dSmrg case MINUS:
18901debfc3dSmrg FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
18911debfc3dSmrg {
18921debfc3dSmrg if (honor_signed_zero_p)
18931debfc3dSmrg zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
18941debfc3dSmrg else
18951debfc3dSmrg zero_init = CONST0_RTX (mode);
18961debfc3dSmrg emit_move_insn (var, zero_init);
18971debfc3dSmrg }
18981debfc3dSmrg break;
18991debfc3dSmrg
19001debfc3dSmrg case MULT:
19011debfc3dSmrg FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19021debfc3dSmrg {
19031debfc3dSmrg zero_init = CONST1_RTX (GET_MODE (var));
19041debfc3dSmrg emit_move_insn (var, zero_init);
19051debfc3dSmrg }
19061debfc3dSmrg break;
19071debfc3dSmrg
19081debfc3dSmrg default:
19091debfc3dSmrg gcc_unreachable ();
19101debfc3dSmrg }
19111debfc3dSmrg
19121debfc3dSmrg seq = get_insns ();
19131debfc3dSmrg end_sequence ();
19141debfc3dSmrg
19151debfc3dSmrg emit_insn_after (seq, BB_END (place));
19161debfc3dSmrg }
19171debfc3dSmrg
19181debfc3dSmrg /* Combine the variable expansions at the loop exit. PLACE is the
19191debfc3dSmrg loop exit basic block where the summation of the expansions should
19201debfc3dSmrg take place. */
19211debfc3dSmrg
19221debfc3dSmrg static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)19231debfc3dSmrg combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
19241debfc3dSmrg {
19251debfc3dSmrg rtx sum = ve->reg;
19261debfc3dSmrg rtx expr, var;
19271debfc3dSmrg rtx_insn *seq, *insn;
19281debfc3dSmrg unsigned i;
19291debfc3dSmrg
19301debfc3dSmrg if (ve->var_expansions.length () == 0)
19311debfc3dSmrg return;
19321debfc3dSmrg
19331debfc3dSmrg /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
19341debfc3dSmrg it both here and as the destination of the assignment. */
19351debfc3dSmrg sum = copy_rtx (sum);
19361debfc3dSmrg start_sequence ();
19371debfc3dSmrg switch (ve->op)
19381debfc3dSmrg {
19391debfc3dSmrg case FMA:
19401debfc3dSmrg /* Note that we only accumulate FMA via the ADD operand. */
19411debfc3dSmrg case PLUS:
19421debfc3dSmrg case MINUS:
19431debfc3dSmrg FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19441debfc3dSmrg sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
19451debfc3dSmrg break;
19461debfc3dSmrg
19471debfc3dSmrg case MULT:
19481debfc3dSmrg FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19491debfc3dSmrg sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
19501debfc3dSmrg break;
19511debfc3dSmrg
19521debfc3dSmrg default:
19531debfc3dSmrg gcc_unreachable ();
19541debfc3dSmrg }
19551debfc3dSmrg
19561debfc3dSmrg expr = force_operand (sum, ve->reg);
19571debfc3dSmrg if (expr != ve->reg)
19581debfc3dSmrg emit_move_insn (ve->reg, expr);
19591debfc3dSmrg seq = get_insns ();
19601debfc3dSmrg end_sequence ();
19611debfc3dSmrg
19621debfc3dSmrg insn = BB_HEAD (place);
19631debfc3dSmrg while (!NOTE_INSN_BASIC_BLOCK_P (insn))
19641debfc3dSmrg insn = NEXT_INSN (insn);
19651debfc3dSmrg
19661debfc3dSmrg emit_insn_after (seq, insn);
19671debfc3dSmrg }
19681debfc3dSmrg
19691debfc3dSmrg /* Strip away REG_EQUAL notes for IVs we're splitting.
19701debfc3dSmrg
19711debfc3dSmrg Updating REG_EQUAL notes for IVs we split is tricky: We
19721debfc3dSmrg cannot tell until after unrolling, DF-rescanning, and liveness
19731debfc3dSmrg updating, whether an EQ_USE is reached by the split IV while
19741debfc3dSmrg the IV reg is still live. See PR55006.
19751debfc3dSmrg
19761debfc3dSmrg ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
19771debfc3dSmrg because RTL loop-iv requires us to defer rescanning insns and
19781debfc3dSmrg any notes attached to them. So resort to old techniques... */
19791debfc3dSmrg
19801debfc3dSmrg static void
maybe_strip_eq_note_for_split_iv(struct opt_info * opt_info,rtx_insn * insn)19811debfc3dSmrg maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
19821debfc3dSmrg {
19831debfc3dSmrg struct iv_to_split *ivts;
19841debfc3dSmrg rtx note = find_reg_equal_equiv_note (insn);
19851debfc3dSmrg if (! note)
19861debfc3dSmrg return;
19871debfc3dSmrg for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
19881debfc3dSmrg if (reg_mentioned_p (ivts->orig_var, note))
19891debfc3dSmrg {
19901debfc3dSmrg remove_note (insn, note);
19911debfc3dSmrg return;
19921debfc3dSmrg }
19931debfc3dSmrg }
19941debfc3dSmrg
19951debfc3dSmrg /* Apply loop optimizations in loop copies using the
19961debfc3dSmrg data which gathered during the unrolling. Structure
19971debfc3dSmrg OPT_INFO record that data.
19981debfc3dSmrg
19991debfc3dSmrg UNROLLING is true if we unrolled (not peeled) the loop.
20001debfc3dSmrg REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
20011debfc3dSmrg the loop (as it should happen in complete unrolling, but not in ordinary
20021debfc3dSmrg peeling of the loop). */
20031debfc3dSmrg
20041debfc3dSmrg static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)20051debfc3dSmrg apply_opt_in_copies (struct opt_info *opt_info,
20061debfc3dSmrg unsigned n_copies, bool unrolling,
20071debfc3dSmrg bool rewrite_original_loop)
20081debfc3dSmrg {
20091debfc3dSmrg unsigned i, delta;
20101debfc3dSmrg basic_block bb, orig_bb;
20111debfc3dSmrg rtx_insn *insn, *orig_insn, *next;
20121debfc3dSmrg struct iv_to_split ivts_templ, *ivts;
20131debfc3dSmrg struct var_to_expand ve_templ, *ves;
20141debfc3dSmrg
20151debfc3dSmrg /* Sanity check -- we need to put initialization in the original loop
20161debfc3dSmrg body. */
20171debfc3dSmrg gcc_assert (!unrolling || rewrite_original_loop);
20181debfc3dSmrg
20191debfc3dSmrg /* Allocate the basic variables (i0). */
20201debfc3dSmrg if (opt_info->insns_to_split)
20211debfc3dSmrg for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
20221debfc3dSmrg allocate_basic_variable (ivts);
20231debfc3dSmrg
20241debfc3dSmrg for (i = opt_info->first_new_block;
20251debfc3dSmrg i < (unsigned) last_basic_block_for_fn (cfun);
20261debfc3dSmrg i++)
20271debfc3dSmrg {
20281debfc3dSmrg bb = BASIC_BLOCK_FOR_FN (cfun, i);
20291debfc3dSmrg orig_bb = get_bb_original (bb);
20301debfc3dSmrg
20311debfc3dSmrg /* bb->aux holds position in copy sequence initialized by
20321debfc3dSmrg duplicate_loop_to_header_edge. */
20331debfc3dSmrg delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
20341debfc3dSmrg unrolling);
20351debfc3dSmrg bb->aux = 0;
20361debfc3dSmrg orig_insn = BB_HEAD (orig_bb);
20371debfc3dSmrg FOR_BB_INSNS_SAFE (bb, insn, next)
20381debfc3dSmrg {
20391debfc3dSmrg if (!INSN_P (insn)
2040a2dc1f3fSmrg || (DEBUG_BIND_INSN_P (insn)
2041a2dc1f3fSmrg && INSN_VAR_LOCATION_DECL (insn)
20421debfc3dSmrg && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
20431debfc3dSmrg continue;
20441debfc3dSmrg
20451debfc3dSmrg while (!INSN_P (orig_insn)
2046a2dc1f3fSmrg || (DEBUG_BIND_INSN_P (orig_insn)
2047a2dc1f3fSmrg && INSN_VAR_LOCATION_DECL (orig_insn)
20481debfc3dSmrg && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
20491debfc3dSmrg == LABEL_DECL)))
20501debfc3dSmrg orig_insn = NEXT_INSN (orig_insn);
20511debfc3dSmrg
20521debfc3dSmrg ivts_templ.insn = orig_insn;
20531debfc3dSmrg ve_templ.insn = orig_insn;
20541debfc3dSmrg
20551debfc3dSmrg /* Apply splitting iv optimization. */
20561debfc3dSmrg if (opt_info->insns_to_split)
20571debfc3dSmrg {
20581debfc3dSmrg maybe_strip_eq_note_for_split_iv (opt_info, insn);
20591debfc3dSmrg
20601debfc3dSmrg ivts = opt_info->insns_to_split->find (&ivts_templ);
20611debfc3dSmrg
20621debfc3dSmrg if (ivts)
20631debfc3dSmrg {
20641debfc3dSmrg gcc_assert (GET_CODE (PATTERN (insn))
20651debfc3dSmrg == GET_CODE (PATTERN (orig_insn)));
20661debfc3dSmrg
20671debfc3dSmrg if (!delta)
20681debfc3dSmrg insert_base_initialization (ivts, insn);
20691debfc3dSmrg split_iv (ivts, insn, delta);
20701debfc3dSmrg }
20711debfc3dSmrg }
20721debfc3dSmrg /* Apply variable expansion optimization. */
20731debfc3dSmrg if (unrolling && opt_info->insns_with_var_to_expand)
20741debfc3dSmrg {
20751debfc3dSmrg ves = (struct var_to_expand *)
20761debfc3dSmrg opt_info->insns_with_var_to_expand->find (&ve_templ);
20771debfc3dSmrg if (ves)
20781debfc3dSmrg {
20791debfc3dSmrg gcc_assert (GET_CODE (PATTERN (insn))
20801debfc3dSmrg == GET_CODE (PATTERN (orig_insn)));
20811debfc3dSmrg expand_var_during_unrolling (ves, insn);
20821debfc3dSmrg }
20831debfc3dSmrg }
20841debfc3dSmrg orig_insn = NEXT_INSN (orig_insn);
20851debfc3dSmrg }
20861debfc3dSmrg }
20871debfc3dSmrg
20881debfc3dSmrg if (!rewrite_original_loop)
20891debfc3dSmrg return;
20901debfc3dSmrg
20911debfc3dSmrg /* Initialize the variable expansions in the loop preheader
20921debfc3dSmrg and take care of combining them at the loop exit. */
20931debfc3dSmrg if (opt_info->insns_with_var_to_expand)
20941debfc3dSmrg {
20951debfc3dSmrg for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
20961debfc3dSmrg insert_var_expansion_initialization (ves, opt_info->loop_preheader);
20971debfc3dSmrg for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
20981debfc3dSmrg combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
20991debfc3dSmrg }
21001debfc3dSmrg
21011debfc3dSmrg /* Rewrite also the original loop body. Find them as originals of the blocks
21021debfc3dSmrg in the last copied iteration, i.e. those that have
21031debfc3dSmrg get_bb_copy (get_bb_original (bb)) == bb. */
21041debfc3dSmrg for (i = opt_info->first_new_block;
21051debfc3dSmrg i < (unsigned) last_basic_block_for_fn (cfun);
21061debfc3dSmrg i++)
21071debfc3dSmrg {
21081debfc3dSmrg bb = BASIC_BLOCK_FOR_FN (cfun, i);
21091debfc3dSmrg orig_bb = get_bb_original (bb);
21101debfc3dSmrg if (get_bb_copy (orig_bb) != bb)
21111debfc3dSmrg continue;
21121debfc3dSmrg
21131debfc3dSmrg delta = determine_split_iv_delta (0, n_copies, unrolling);
21141debfc3dSmrg for (orig_insn = BB_HEAD (orig_bb);
21151debfc3dSmrg orig_insn != NEXT_INSN (BB_END (bb));
21161debfc3dSmrg orig_insn = next)
21171debfc3dSmrg {
21181debfc3dSmrg next = NEXT_INSN (orig_insn);
21191debfc3dSmrg
21201debfc3dSmrg if (!INSN_P (orig_insn))
21211debfc3dSmrg continue;
21221debfc3dSmrg
21231debfc3dSmrg ivts_templ.insn = orig_insn;
21241debfc3dSmrg if (opt_info->insns_to_split)
21251debfc3dSmrg {
21261debfc3dSmrg maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
21271debfc3dSmrg
21281debfc3dSmrg ivts = (struct iv_to_split *)
21291debfc3dSmrg opt_info->insns_to_split->find (&ivts_templ);
21301debfc3dSmrg if (ivts)
21311debfc3dSmrg {
21321debfc3dSmrg if (!delta)
21331debfc3dSmrg insert_base_initialization (ivts, orig_insn);
21341debfc3dSmrg split_iv (ivts, orig_insn, delta);
21351debfc3dSmrg continue;
21361debfc3dSmrg }
21371debfc3dSmrg }
21381debfc3dSmrg
21391debfc3dSmrg }
21401debfc3dSmrg }
21411debfc3dSmrg }
21421debfc3dSmrg
21431debfc3dSmrg /* Release OPT_INFO. */
21441debfc3dSmrg
21451debfc3dSmrg static void
free_opt_info(struct opt_info * opt_info)21461debfc3dSmrg free_opt_info (struct opt_info *opt_info)
21471debfc3dSmrg {
21481debfc3dSmrg delete opt_info->insns_to_split;
21491debfc3dSmrg opt_info->insns_to_split = NULL;
21501debfc3dSmrg if (opt_info->insns_with_var_to_expand)
21511debfc3dSmrg {
21521debfc3dSmrg struct var_to_expand *ves;
21531debfc3dSmrg
21541debfc3dSmrg for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
21551debfc3dSmrg ves->var_expansions.release ();
21561debfc3dSmrg delete opt_info->insns_with_var_to_expand;
21571debfc3dSmrg opt_info->insns_with_var_to_expand = NULL;
21581debfc3dSmrg }
21591debfc3dSmrg free (opt_info);
21601debfc3dSmrg }
2161