xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/loop-unroll.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg /* Loop unrolling.
2*8feb0f0bSmrg    Copyright (C) 2002-2020 Free Software Foundation, Inc.
31debfc3dSmrg 
41debfc3dSmrg This file is part of GCC.
51debfc3dSmrg 
61debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
71debfc3dSmrg the terms of the GNU General Public License as published by the Free
81debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
91debfc3dSmrg version.
101debfc3dSmrg 
111debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
121debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
131debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
141debfc3dSmrg for more details.
151debfc3dSmrg 
161debfc3dSmrg You should have received a copy of the GNU General Public License
171debfc3dSmrg along with GCC; see the file COPYING3.  If not see
181debfc3dSmrg <http://www.gnu.org/licenses/>.  */
191debfc3dSmrg 
201debfc3dSmrg #include "config.h"
211debfc3dSmrg #include "system.h"
221debfc3dSmrg #include "coretypes.h"
231debfc3dSmrg #include "backend.h"
241debfc3dSmrg #include "target.h"
251debfc3dSmrg #include "rtl.h"
261debfc3dSmrg #include "tree.h"
271debfc3dSmrg #include "cfghooks.h"
281debfc3dSmrg #include "memmodel.h"
291debfc3dSmrg #include "optabs.h"
301debfc3dSmrg #include "emit-rtl.h"
311debfc3dSmrg #include "recog.h"
321debfc3dSmrg #include "profile.h"
331debfc3dSmrg #include "cfgrtl.h"
341debfc3dSmrg #include "cfgloop.h"
351debfc3dSmrg #include "dojump.h"
361debfc3dSmrg #include "expr.h"
371debfc3dSmrg #include "dumpfile.h"
381debfc3dSmrg 
391debfc3dSmrg /* This pass performs loop unrolling.  We only perform this
401debfc3dSmrg    optimization on innermost loops (with single exception) because
411debfc3dSmrg    the impact on performance is greatest here, and we want to avoid
421debfc3dSmrg    unnecessary code size growth.  The gain is caused by greater sequentiality
431debfc3dSmrg    of code, better code to optimize for further passes and in some cases
441debfc3dSmrg    by fewer testings of exit conditions.  The main problem is code growth,
451debfc3dSmrg    that impacts performance negatively due to effect of caches.
461debfc3dSmrg 
471debfc3dSmrg    What we do:
481debfc3dSmrg 
491debfc3dSmrg    -- unrolling of loops that roll constant times; this is almost always
501debfc3dSmrg       win, as we get rid of exit condition tests.
511debfc3dSmrg    -- unrolling of loops that roll number of times that we can compute
521debfc3dSmrg       in runtime; we also get rid of exit condition tests here, but there
531debfc3dSmrg       is the extra expense for calculating the number of iterations
541debfc3dSmrg    -- simple unrolling of remaining loops; this is performed only if we
551debfc3dSmrg       are asked to, as the gain is questionable in this case and often
561debfc3dSmrg       it may even slow down the code
571debfc3dSmrg    For more detailed descriptions of each of those, see comments at
581debfc3dSmrg    appropriate function below.
591debfc3dSmrg 
601debfc3dSmrg    There is a lot of parameters (defined and described in params.def) that
611debfc3dSmrg    control how much we unroll.
621debfc3dSmrg 
631debfc3dSmrg    ??? A great problem is that we don't have a good way how to determine
641debfc3dSmrg    how many times we should unroll the loop; the experiments I have made
651debfc3dSmrg    showed that this choice may affect performance in order of several %.
661debfc3dSmrg    */
671debfc3dSmrg 
681debfc3dSmrg /* Information about induction variables to split.  */
691debfc3dSmrg 
701debfc3dSmrg struct iv_to_split
711debfc3dSmrg {
721debfc3dSmrg   rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
731debfc3dSmrg   rtx orig_var;		/* The variable (register) for the IV before split.  */
741debfc3dSmrg   rtx base_var;		/* The variable on that the values in the further
751debfc3dSmrg 			   iterations are based.  */
761debfc3dSmrg   rtx step;		/* Step of the induction variable.  */
771debfc3dSmrg   struct iv_to_split *next; /* Next entry in walking order.  */
781debfc3dSmrg };
791debfc3dSmrg 
801debfc3dSmrg /* Information about accumulators to expand.  */
811debfc3dSmrg 
821debfc3dSmrg struct var_to_expand
831debfc3dSmrg {
841debfc3dSmrg   rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
851debfc3dSmrg   rtx reg;                         /* The accumulator which is expanded.  */
861debfc3dSmrg   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
871debfc3dSmrg   struct var_to_expand *next;	   /* Next entry in walking order.  */
881debfc3dSmrg   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
891debfc3dSmrg                                       or multiplication.  */
901debfc3dSmrg   int expansion_count;             /* Count the number of expansions generated so far.  */
911debfc3dSmrg   int reuse_expansion;             /* The expansion we intend to reuse to expand
921debfc3dSmrg                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
931debfc3dSmrg                                       the original accumulator.  Else use
941debfc3dSmrg                                       var_expansions[REUSE_EXPANSION - 1].  */
951debfc3dSmrg };
961debfc3dSmrg 
971debfc3dSmrg /* Hashtable helper for iv_to_split.  */
981debfc3dSmrg 
991debfc3dSmrg struct iv_split_hasher : free_ptr_hash <iv_to_split>
1001debfc3dSmrg {
1011debfc3dSmrg   static inline hashval_t hash (const iv_to_split *);
1021debfc3dSmrg   static inline bool equal (const iv_to_split *, const iv_to_split *);
1031debfc3dSmrg };
1041debfc3dSmrg 
1051debfc3dSmrg 
1061debfc3dSmrg /* A hash function for information about insns to split.  */
1071debfc3dSmrg 
1081debfc3dSmrg inline hashval_t
hash(const iv_to_split * ivts)1091debfc3dSmrg iv_split_hasher::hash (const iv_to_split *ivts)
1101debfc3dSmrg {
1111debfc3dSmrg   return (hashval_t) INSN_UID (ivts->insn);
1121debfc3dSmrg }
1131debfc3dSmrg 
1141debfc3dSmrg /* An equality functions for information about insns to split.  */
1151debfc3dSmrg 
1161debfc3dSmrg inline bool
equal(const iv_to_split * i1,const iv_to_split * i2)1171debfc3dSmrg iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
1181debfc3dSmrg {
1191debfc3dSmrg   return i1->insn == i2->insn;
1201debfc3dSmrg }
1211debfc3dSmrg 
1221debfc3dSmrg /* Hashtable helper for iv_to_split.  */
1231debfc3dSmrg 
1241debfc3dSmrg struct var_expand_hasher : free_ptr_hash <var_to_expand>
1251debfc3dSmrg {
1261debfc3dSmrg   static inline hashval_t hash (const var_to_expand *);
1271debfc3dSmrg   static inline bool equal (const var_to_expand *, const var_to_expand *);
1281debfc3dSmrg };
1291debfc3dSmrg 
1301debfc3dSmrg /* Return a hash for VES.  */
1311debfc3dSmrg 
1321debfc3dSmrg inline hashval_t
hash(const var_to_expand * ves)1331debfc3dSmrg var_expand_hasher::hash (const var_to_expand *ves)
1341debfc3dSmrg {
1351debfc3dSmrg   return (hashval_t) INSN_UID (ves->insn);
1361debfc3dSmrg }
1371debfc3dSmrg 
1381debfc3dSmrg /* Return true if I1 and I2 refer to the same instruction.  */
1391debfc3dSmrg 
1401debfc3dSmrg inline bool
equal(const var_to_expand * i1,const var_to_expand * i2)1411debfc3dSmrg var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
1421debfc3dSmrg {
1431debfc3dSmrg   return i1->insn == i2->insn;
1441debfc3dSmrg }
1451debfc3dSmrg 
1461debfc3dSmrg /* Information about optimization applied in
1471debfc3dSmrg    the unrolled loop.  */
1481debfc3dSmrg 
1491debfc3dSmrg struct opt_info
1501debfc3dSmrg {
1511debfc3dSmrg   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
1521debfc3dSmrg 						  split.  */
1531debfc3dSmrg   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
1541debfc3dSmrg   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
1551debfc3dSmrg   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
1561debfc3dSmrg 					insns with accumulators to expand.  */
1571debfc3dSmrg   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
1581debfc3dSmrg   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
1591debfc3dSmrg   unsigned first_new_block;        /* The first basic block that was
1601debfc3dSmrg                                       duplicated.  */
1611debfc3dSmrg   basic_block loop_exit;           /* The loop exit basic block.  */
1621debfc3dSmrg   basic_block loop_preheader;      /* The loop preheader basic block.  */
1631debfc3dSmrg };
1641debfc3dSmrg 
165*8feb0f0bSmrg static void decide_unroll_stupid (class loop *, int);
166*8feb0f0bSmrg static void decide_unroll_constant_iterations (class loop *, int);
167*8feb0f0bSmrg static void decide_unroll_runtime_iterations (class loop *, int);
168*8feb0f0bSmrg static void unroll_loop_stupid (class loop *);
1691debfc3dSmrg static void decide_unrolling (int);
170*8feb0f0bSmrg static void unroll_loop_constant_iterations (class loop *);
171*8feb0f0bSmrg static void unroll_loop_runtime_iterations (class loop *);
172*8feb0f0bSmrg static struct opt_info *analyze_insns_in_loop (class loop *);
1731debfc3dSmrg static void opt_info_start_duplication (struct opt_info *);
1741debfc3dSmrg static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
1751debfc3dSmrg static void free_opt_info (struct opt_info *);
176*8feb0f0bSmrg static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
177*8feb0f0bSmrg static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
1781debfc3dSmrg static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
1791debfc3dSmrg static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
1801debfc3dSmrg static void insert_var_expansion_initialization (struct var_to_expand *,
1811debfc3dSmrg 						 basic_block);
1821debfc3dSmrg static void combine_var_copies_in_loop_exit (struct var_to_expand *,
1831debfc3dSmrg 					     basic_block);
1841debfc3dSmrg static rtx get_expansion (struct var_to_expand *);
1851debfc3dSmrg 
1861debfc3dSmrg /* Emit a message summarizing the unroll that will be
1871debfc3dSmrg    performed for LOOP, along with the loop's location LOCUS, if
1881debfc3dSmrg    appropriate given the dump or -fopt-info settings.  */
1891debfc3dSmrg 
1901debfc3dSmrg static void
report_unroll(class loop * loop,dump_location_t locus)191*8feb0f0bSmrg report_unroll (class loop *loop, dump_location_t locus)
1921debfc3dSmrg {
193a2dc1f3fSmrg   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
1941debfc3dSmrg 
1951debfc3dSmrg   if (loop->lpt_decision.decision == LPT_NONE)
1961debfc3dSmrg     return;
1971debfc3dSmrg 
1981debfc3dSmrg   if (!dump_enabled_p ())
1991debfc3dSmrg     return;
2001debfc3dSmrg 
201c0a68be4Smrg   dump_metadata_t metadata (report_flags, locus.get_impl_location ());
202c0a68be4Smrg   dump_printf_loc (metadata, locus.get_user_location (),
2031debfc3dSmrg                    "loop unrolled %d times",
2041debfc3dSmrg                    loop->lpt_decision.times);
205a2dc1f3fSmrg   if (profile_info && loop->header->count.initialized_p ())
206c0a68be4Smrg     dump_printf (metadata,
2071debfc3dSmrg                  " (header execution count %d)",
208a2dc1f3fSmrg                  (int)loop->header->count.to_gcov_type ());
2091debfc3dSmrg 
210c0a68be4Smrg   dump_printf (metadata, "\n");
2111debfc3dSmrg }
2121debfc3dSmrg 
2131debfc3dSmrg /* Decide whether unroll loops and how much.  */
2141debfc3dSmrg static void
decide_unrolling(int flags)2151debfc3dSmrg decide_unrolling (int flags)
2161debfc3dSmrg {
217*8feb0f0bSmrg   class loop *loop;
2181debfc3dSmrg 
2191debfc3dSmrg   /* Scan the loops, inner ones first.  */
2201debfc3dSmrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2211debfc3dSmrg     {
2221debfc3dSmrg       loop->lpt_decision.decision = LPT_NONE;
223c0a68be4Smrg       dump_user_location_t locus = get_loop_location (loop);
2241debfc3dSmrg 
2251debfc3dSmrg       if (dump_enabled_p ())
226a2dc1f3fSmrg 	dump_printf_loc (MSG_NOTE, locus,
227a2dc1f3fSmrg 			 "considering unrolling loop %d at BB %d\n",
2281debfc3dSmrg 			 loop->num, loop->header->index);
2291debfc3dSmrg 
230a2dc1f3fSmrg       if (loop->unroll == 1)
231a2dc1f3fSmrg 	{
232a2dc1f3fSmrg 	  if (dump_file)
233a2dc1f3fSmrg 	    fprintf (dump_file,
234a2dc1f3fSmrg 		     ";; Not unrolling loop, user didn't want it unrolled\n");
235a2dc1f3fSmrg 	  continue;
236a2dc1f3fSmrg 	}
237a2dc1f3fSmrg 
2381debfc3dSmrg       /* Do not peel cold areas.  */
2391debfc3dSmrg       if (optimize_loop_for_size_p (loop))
2401debfc3dSmrg 	{
2411debfc3dSmrg 	  if (dump_file)
2421debfc3dSmrg 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
2431debfc3dSmrg 	  continue;
2441debfc3dSmrg 	}
2451debfc3dSmrg 
2461debfc3dSmrg       /* Can the loop be manipulated?  */
2471debfc3dSmrg       if (!can_duplicate_loop_p (loop))
2481debfc3dSmrg 	{
2491debfc3dSmrg 	  if (dump_file)
2501debfc3dSmrg 	    fprintf (dump_file,
2511debfc3dSmrg 		     ";; Not considering loop, cannot duplicate\n");
2521debfc3dSmrg 	  continue;
2531debfc3dSmrg 	}
2541debfc3dSmrg 
2551debfc3dSmrg       /* Skip non-innermost loops.  */
2561debfc3dSmrg       if (loop->inner)
2571debfc3dSmrg 	{
2581debfc3dSmrg 	  if (dump_file)
2591debfc3dSmrg 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
2601debfc3dSmrg 	  continue;
2611debfc3dSmrg 	}
2621debfc3dSmrg 
2631debfc3dSmrg       loop->ninsns = num_loop_insns (loop);
2641debfc3dSmrg       loop->av_ninsns = average_num_loop_insns (loop);
2651debfc3dSmrg 
266a2dc1f3fSmrg       /* Try transformations one by one in decreasing order of priority.  */
2671debfc3dSmrg       decide_unroll_constant_iterations (loop, flags);
2681debfc3dSmrg       if (loop->lpt_decision.decision == LPT_NONE)
2691debfc3dSmrg 	decide_unroll_runtime_iterations (loop, flags);
2701debfc3dSmrg       if (loop->lpt_decision.decision == LPT_NONE)
2711debfc3dSmrg 	decide_unroll_stupid (loop, flags);
2721debfc3dSmrg 
2731debfc3dSmrg       report_unroll (loop, locus);
2741debfc3dSmrg     }
2751debfc3dSmrg }
2761debfc3dSmrg 
2771debfc3dSmrg /* Unroll LOOPS.  */
2781debfc3dSmrg void
unroll_loops(int flags)2791debfc3dSmrg unroll_loops (int flags)
2801debfc3dSmrg {
281*8feb0f0bSmrg   class loop *loop;
2821debfc3dSmrg   bool changed = false;
2831debfc3dSmrg 
2841debfc3dSmrg   /* Now decide rest of unrolling.  */
2851debfc3dSmrg   decide_unrolling (flags);
2861debfc3dSmrg 
2871debfc3dSmrg   /* Scan the loops, inner ones first.  */
2881debfc3dSmrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2891debfc3dSmrg     {
2901debfc3dSmrg       /* And perform the appropriate transformations.  */
2911debfc3dSmrg       switch (loop->lpt_decision.decision)
2921debfc3dSmrg 	{
2931debfc3dSmrg 	case LPT_UNROLL_CONSTANT:
2941debfc3dSmrg 	  unroll_loop_constant_iterations (loop);
2951debfc3dSmrg 	  changed = true;
2961debfc3dSmrg 	  break;
2971debfc3dSmrg 	case LPT_UNROLL_RUNTIME:
2981debfc3dSmrg 	  unroll_loop_runtime_iterations (loop);
2991debfc3dSmrg 	  changed = true;
3001debfc3dSmrg 	  break;
3011debfc3dSmrg 	case LPT_UNROLL_STUPID:
3021debfc3dSmrg 	  unroll_loop_stupid (loop);
3031debfc3dSmrg 	  changed = true;
3041debfc3dSmrg 	  break;
3051debfc3dSmrg 	case LPT_NONE:
3061debfc3dSmrg 	  break;
3071debfc3dSmrg 	default:
3081debfc3dSmrg 	  gcc_unreachable ();
3091debfc3dSmrg 	}
3101debfc3dSmrg     }
3111debfc3dSmrg 
3121debfc3dSmrg     if (changed)
3131debfc3dSmrg       {
3141debfc3dSmrg 	calculate_dominance_info (CDI_DOMINATORS);
3151debfc3dSmrg 	fix_loop_structure (NULL);
3161debfc3dSmrg       }
3171debfc3dSmrg 
3181debfc3dSmrg   iv_analysis_done ();
3191debfc3dSmrg }
3201debfc3dSmrg 
3211debfc3dSmrg /* Check whether exit of the LOOP is at the end of loop body.  */
3221debfc3dSmrg 
3231debfc3dSmrg static bool
loop_exit_at_end_p(class loop * loop)324*8feb0f0bSmrg loop_exit_at_end_p (class loop *loop)
3251debfc3dSmrg {
326*8feb0f0bSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
3271debfc3dSmrg   rtx_insn *insn;
3281debfc3dSmrg 
3291debfc3dSmrg   /* We should never have conditional in latch block.  */
3301debfc3dSmrg   gcc_assert (desc->in_edge->dest != loop->header);
3311debfc3dSmrg 
3321debfc3dSmrg   if (desc->in_edge->dest != loop->latch)
3331debfc3dSmrg     return false;
3341debfc3dSmrg 
3351debfc3dSmrg   /* Check that the latch is empty.  */
3361debfc3dSmrg   FOR_BB_INSNS (loop->latch, insn)
3371debfc3dSmrg     {
3381debfc3dSmrg       if (INSN_P (insn) && active_insn_p (insn))
3391debfc3dSmrg 	return false;
3401debfc3dSmrg     }
3411debfc3dSmrg 
3421debfc3dSmrg   return true;
3431debfc3dSmrg }
3441debfc3dSmrg 
3451debfc3dSmrg /* Decide whether to unroll LOOP iterating constant number of times
3461debfc3dSmrg    and how much.  */
3471debfc3dSmrg 
3481debfc3dSmrg static void
decide_unroll_constant_iterations(class loop * loop,int flags)349*8feb0f0bSmrg decide_unroll_constant_iterations (class loop *loop, int flags)
3501debfc3dSmrg {
3511debfc3dSmrg   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
352*8feb0f0bSmrg   class niter_desc *desc;
3531debfc3dSmrg   widest_int iterations;
3541debfc3dSmrg 
355a2dc1f3fSmrg   /* If we were not asked to unroll this loop, just return back silently.  */
356a2dc1f3fSmrg   if (!(flags & UAP_UNROLL) && !loop->unroll)
3571debfc3dSmrg     return;
3581debfc3dSmrg 
359a2dc1f3fSmrg   if (dump_enabled_p ())
360a2dc1f3fSmrg     dump_printf (MSG_NOTE,
361a2dc1f3fSmrg 		 "considering unrolling loop with constant "
3621debfc3dSmrg 		 "number of iterations\n");
3631debfc3dSmrg 
3641debfc3dSmrg   /* nunroll = total number of copies of the original loop body in
365a2dc1f3fSmrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
366*8feb0f0bSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
3671debfc3dSmrg   nunroll_by_av
368*8feb0f0bSmrg     = param_max_average_unrolled_insns / loop->av_ninsns;
3691debfc3dSmrg   if (nunroll > nunroll_by_av)
3701debfc3dSmrg     nunroll = nunroll_by_av;
371*8feb0f0bSmrg   if (nunroll > (unsigned) param_max_unroll_times)
372*8feb0f0bSmrg     nunroll = param_max_unroll_times;
3731debfc3dSmrg 
3741debfc3dSmrg   if (targetm.loop_unroll_adjust)
3751debfc3dSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
3761debfc3dSmrg 
3771debfc3dSmrg   /* Skip big loops.  */
3781debfc3dSmrg   if (nunroll <= 1)
3791debfc3dSmrg     {
3801debfc3dSmrg       if (dump_file)
3811debfc3dSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
3821debfc3dSmrg       return;
3831debfc3dSmrg     }
3841debfc3dSmrg 
3851debfc3dSmrg   /* Check for simple loops.  */
3861debfc3dSmrg   desc = get_simple_loop_desc (loop);
3871debfc3dSmrg 
3881debfc3dSmrg   /* Check number of iterations.  */
3891debfc3dSmrg   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
3901debfc3dSmrg     {
3911debfc3dSmrg       if (dump_file)
3921debfc3dSmrg 	fprintf (dump_file,
3931debfc3dSmrg 		 ";; Unable to prove that the loop iterates constant times\n");
3941debfc3dSmrg       return;
3951debfc3dSmrg     }
3961debfc3dSmrg 
397a2dc1f3fSmrg   /* Check for an explicit unrolling factor.  */
398a2dc1f3fSmrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
399a2dc1f3fSmrg     {
400a2dc1f3fSmrg       /* However we cannot unroll completely at the RTL level a loop with
401a2dc1f3fSmrg 	 constant number of iterations; it should have been peeled instead.  */
402a2dc1f3fSmrg       if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
403a2dc1f3fSmrg 	{
404a2dc1f3fSmrg 	  if (dump_file)
405a2dc1f3fSmrg 	    fprintf (dump_file, ";; Loop should have been peeled\n");
406a2dc1f3fSmrg 	}
407a2dc1f3fSmrg       else
408a2dc1f3fSmrg 	{
409a2dc1f3fSmrg 	  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
410a2dc1f3fSmrg 	  loop->lpt_decision.times = loop->unroll - 1;
411a2dc1f3fSmrg 	}
412a2dc1f3fSmrg       return;
413a2dc1f3fSmrg     }
414a2dc1f3fSmrg 
4151debfc3dSmrg   /* Check whether the loop rolls enough to consider.
4161debfc3dSmrg      Consult also loop bounds and profile; in the case the loop has more
4171debfc3dSmrg      than one exit it may well loop less than determined maximal number
4181debfc3dSmrg      of iterations.  */
4191debfc3dSmrg   if (desc->niter < 2 * nunroll
4201debfc3dSmrg       || ((get_estimated_loop_iterations (loop, &iterations)
4211debfc3dSmrg 	   || get_likely_max_loop_iterations (loop, &iterations))
4221debfc3dSmrg 	  && wi::ltu_p (iterations, 2 * nunroll)))
4231debfc3dSmrg     {
4241debfc3dSmrg       if (dump_file)
4251debfc3dSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
4261debfc3dSmrg       return;
4271debfc3dSmrg     }
4281debfc3dSmrg 
4291debfc3dSmrg   /* Success; now compute number of iterations to unroll.  We alter
4301debfc3dSmrg      nunroll so that as few as possible copies of loop body are
4311debfc3dSmrg      necessary, while still not decreasing the number of unrollings
4321debfc3dSmrg      too much (at most by 1).  */
4331debfc3dSmrg   best_copies = 2 * nunroll + 10;
4341debfc3dSmrg 
4351debfc3dSmrg   i = 2 * nunroll + 2;
436a2dc1f3fSmrg   if (i > desc->niter - 2)
4371debfc3dSmrg     i = desc->niter - 2;
4381debfc3dSmrg 
4391debfc3dSmrg   for (; i >= nunroll - 1; i--)
4401debfc3dSmrg     {
4411debfc3dSmrg       unsigned exit_mod = desc->niter % (i + 1);
4421debfc3dSmrg 
4431debfc3dSmrg       if (!loop_exit_at_end_p (loop))
4441debfc3dSmrg 	n_copies = exit_mod + i + 1;
4451debfc3dSmrg       else if (exit_mod != (unsigned) i
4461debfc3dSmrg 	       || desc->noloop_assumptions != NULL_RTX)
4471debfc3dSmrg 	n_copies = exit_mod + i + 2;
4481debfc3dSmrg       else
4491debfc3dSmrg 	n_copies = i + 1;
4501debfc3dSmrg 
4511debfc3dSmrg       if (n_copies < best_copies)
4521debfc3dSmrg 	{
4531debfc3dSmrg 	  best_copies = n_copies;
4541debfc3dSmrg 	  best_unroll = i;
4551debfc3dSmrg 	}
4561debfc3dSmrg     }
4571debfc3dSmrg 
4581debfc3dSmrg   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
4591debfc3dSmrg   loop->lpt_decision.times = best_unroll;
4601debfc3dSmrg }
4611debfc3dSmrg 
4621debfc3dSmrg /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
4631debfc3dSmrg    The transformation does this:
4641debfc3dSmrg 
4651debfc3dSmrg    for (i = 0; i < 102; i++)
4661debfc3dSmrg      body;
4671debfc3dSmrg 
4681debfc3dSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
4691debfc3dSmrg 
4701debfc3dSmrg    i = 0;
4711debfc3dSmrg    body; i++;
4721debfc3dSmrg    body; i++;
4731debfc3dSmrg    while (i < 102)
4741debfc3dSmrg      {
4751debfc3dSmrg        body; i++;
4761debfc3dSmrg        body; i++;
4771debfc3dSmrg        body; i++;
4781debfc3dSmrg        body; i++;
4791debfc3dSmrg      }
4801debfc3dSmrg   */
4811debfc3dSmrg static void
unroll_loop_constant_iterations(class loop * loop)482*8feb0f0bSmrg unroll_loop_constant_iterations (class loop *loop)
4831debfc3dSmrg {
4841debfc3dSmrg   unsigned HOST_WIDE_INT niter;
4851debfc3dSmrg   unsigned exit_mod;
4861debfc3dSmrg   unsigned i;
4871debfc3dSmrg   edge e;
4881debfc3dSmrg   unsigned max_unroll = loop->lpt_decision.times;
489*8feb0f0bSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
4901debfc3dSmrg   bool exit_at_end = loop_exit_at_end_p (loop);
4911debfc3dSmrg   struct opt_info *opt_info = NULL;
4921debfc3dSmrg   bool ok;
4931debfc3dSmrg 
4941debfc3dSmrg   niter = desc->niter;
4951debfc3dSmrg 
4961debfc3dSmrg   /* Should not get here (such loop should be peeled instead).  */
4971debfc3dSmrg   gcc_assert (niter > max_unroll + 1);
4981debfc3dSmrg 
4991debfc3dSmrg   exit_mod = niter % (max_unroll + 1);
5001debfc3dSmrg 
5011debfc3dSmrg   auto_sbitmap wont_exit (max_unroll + 2);
5021debfc3dSmrg   bitmap_ones (wont_exit);
5031debfc3dSmrg 
5041debfc3dSmrg   auto_vec<edge> remove_edges;
5051debfc3dSmrg   if (flag_split_ivs_in_unroller
5061debfc3dSmrg       || flag_variable_expansion_in_unroller)
5071debfc3dSmrg     opt_info = analyze_insns_in_loop (loop);
5081debfc3dSmrg 
5091debfc3dSmrg   if (!exit_at_end)
5101debfc3dSmrg     {
5111debfc3dSmrg       /* The exit is not at the end of the loop; leave exit test
5121debfc3dSmrg 	 in the first copy, so that the loops that start with test
5131debfc3dSmrg 	 of exit condition have continuous body after unrolling.  */
5141debfc3dSmrg 
5151debfc3dSmrg       if (dump_file)
5161debfc3dSmrg 	fprintf (dump_file, ";; Condition at beginning of loop.\n");
5171debfc3dSmrg 
5181debfc3dSmrg       /* Peel exit_mod iterations.  */
5191debfc3dSmrg       bitmap_clear_bit (wont_exit, 0);
5201debfc3dSmrg       if (desc->noloop_assumptions)
5211debfc3dSmrg 	bitmap_clear_bit (wont_exit, 1);
5221debfc3dSmrg 
5231debfc3dSmrg       if (exit_mod)
5241debfc3dSmrg 	{
5251debfc3dSmrg 	  opt_info_start_duplication (opt_info);
5261debfc3dSmrg           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
5271debfc3dSmrg 					      exit_mod,
5281debfc3dSmrg 					      wont_exit, desc->out_edge,
5291debfc3dSmrg 					      &remove_edges,
5301debfc3dSmrg 					      DLTHE_FLAG_UPDATE_FREQ
5311debfc3dSmrg 					      | (opt_info && exit_mod > 1
5321debfc3dSmrg 						 ? DLTHE_RECORD_COPY_NUMBER
5331debfc3dSmrg 						   : 0));
5341debfc3dSmrg 	  gcc_assert (ok);
5351debfc3dSmrg 
5361debfc3dSmrg           if (opt_info && exit_mod > 1)
5371debfc3dSmrg  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
5381debfc3dSmrg 
5391debfc3dSmrg 	  desc->noloop_assumptions = NULL_RTX;
5401debfc3dSmrg 	  desc->niter -= exit_mod;
5411debfc3dSmrg 	  loop->nb_iterations_upper_bound -= exit_mod;
5421debfc3dSmrg 	  if (loop->any_estimate
5431debfc3dSmrg 	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
5441debfc3dSmrg 	    loop->nb_iterations_estimate -= exit_mod;
5451debfc3dSmrg 	  else
5461debfc3dSmrg 	    loop->any_estimate = false;
5471debfc3dSmrg 	  if (loop->any_likely_upper_bound
5481debfc3dSmrg 	      && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
5491debfc3dSmrg 	    loop->nb_iterations_likely_upper_bound -= exit_mod;
5501debfc3dSmrg 	  else
5511debfc3dSmrg 	    loop->any_likely_upper_bound = false;
5521debfc3dSmrg 	}
5531debfc3dSmrg 
5541debfc3dSmrg       bitmap_set_bit (wont_exit, 1);
5551debfc3dSmrg     }
5561debfc3dSmrg   else
5571debfc3dSmrg     {
5581debfc3dSmrg       /* Leave exit test in last copy, for the same reason as above if
5591debfc3dSmrg 	 the loop tests the condition at the end of loop body.  */
5601debfc3dSmrg 
5611debfc3dSmrg       if (dump_file)
5621debfc3dSmrg 	fprintf (dump_file, ";; Condition at end of loop.\n");
5631debfc3dSmrg 
5641debfc3dSmrg       /* We know that niter >= max_unroll + 2; so we do not need to care of
5651debfc3dSmrg 	 case when we would exit before reaching the loop.  So just peel
5661debfc3dSmrg 	 exit_mod + 1 iterations.  */
5671debfc3dSmrg       if (exit_mod != max_unroll
5681debfc3dSmrg 	  || desc->noloop_assumptions)
5691debfc3dSmrg 	{
5701debfc3dSmrg 	  bitmap_clear_bit (wont_exit, 0);
5711debfc3dSmrg 	  if (desc->noloop_assumptions)
5721debfc3dSmrg 	    bitmap_clear_bit (wont_exit, 1);
5731debfc3dSmrg 
5741debfc3dSmrg           opt_info_start_duplication (opt_info);
5751debfc3dSmrg 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
5761debfc3dSmrg 					      exit_mod + 1,
5771debfc3dSmrg 					      wont_exit, desc->out_edge,
5781debfc3dSmrg 					      &remove_edges,
5791debfc3dSmrg 					      DLTHE_FLAG_UPDATE_FREQ
5801debfc3dSmrg 					      | (opt_info && exit_mod > 0
5811debfc3dSmrg 						 ? DLTHE_RECORD_COPY_NUMBER
5821debfc3dSmrg 						   : 0));
5831debfc3dSmrg 	  gcc_assert (ok);
5841debfc3dSmrg 
5851debfc3dSmrg           if (opt_info && exit_mod > 0)
5861debfc3dSmrg   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
5871debfc3dSmrg 
5881debfc3dSmrg 	  desc->niter -= exit_mod + 1;
5891debfc3dSmrg 	  loop->nb_iterations_upper_bound -= exit_mod + 1;
5901debfc3dSmrg 	  if (loop->any_estimate
5911debfc3dSmrg 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
5921debfc3dSmrg 	    loop->nb_iterations_estimate -= exit_mod + 1;
5931debfc3dSmrg 	  else
5941debfc3dSmrg 	    loop->any_estimate = false;
5951debfc3dSmrg 	  if (loop->any_likely_upper_bound
5961debfc3dSmrg 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
5971debfc3dSmrg 	    loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
5981debfc3dSmrg 	  else
5991debfc3dSmrg 	    loop->any_likely_upper_bound = false;
6001debfc3dSmrg 	  desc->noloop_assumptions = NULL_RTX;
6011debfc3dSmrg 
6021debfc3dSmrg 	  bitmap_set_bit (wont_exit, 0);
6031debfc3dSmrg 	  bitmap_set_bit (wont_exit, 1);
6041debfc3dSmrg 	}
6051debfc3dSmrg 
6061debfc3dSmrg       bitmap_clear_bit (wont_exit, max_unroll);
6071debfc3dSmrg     }
6081debfc3dSmrg 
6091debfc3dSmrg   /* Now unroll the loop.  */
6101debfc3dSmrg 
6111debfc3dSmrg   opt_info_start_duplication (opt_info);
6121debfc3dSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
6131debfc3dSmrg 				      max_unroll,
6141debfc3dSmrg 				      wont_exit, desc->out_edge,
6151debfc3dSmrg 				      &remove_edges,
6161debfc3dSmrg 				      DLTHE_FLAG_UPDATE_FREQ
6171debfc3dSmrg 				      | (opt_info
6181debfc3dSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
6191debfc3dSmrg 					   : 0));
6201debfc3dSmrg   gcc_assert (ok);
6211debfc3dSmrg 
6221debfc3dSmrg   if (opt_info)
6231debfc3dSmrg     {
6241debfc3dSmrg       apply_opt_in_copies (opt_info, max_unroll, true, true);
6251debfc3dSmrg       free_opt_info (opt_info);
6261debfc3dSmrg     }
6271debfc3dSmrg 
6281debfc3dSmrg   if (exit_at_end)
6291debfc3dSmrg     {
6301debfc3dSmrg       basic_block exit_block = get_bb_copy (desc->in_edge->src);
6311debfc3dSmrg       /* Find a new in and out edge; they are in the last copy we have made.  */
6321debfc3dSmrg 
6331debfc3dSmrg       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
6341debfc3dSmrg 	{
6351debfc3dSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
6361debfc3dSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
6371debfc3dSmrg 	}
6381debfc3dSmrg       else
6391debfc3dSmrg 	{
6401debfc3dSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
6411debfc3dSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
6421debfc3dSmrg 	}
6431debfc3dSmrg     }
6441debfc3dSmrg 
6451debfc3dSmrg   desc->niter /= max_unroll + 1;
6461debfc3dSmrg   loop->nb_iterations_upper_bound
6471debfc3dSmrg     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
6481debfc3dSmrg   if (loop->any_estimate)
6491debfc3dSmrg     loop->nb_iterations_estimate
6501debfc3dSmrg       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
6511debfc3dSmrg   if (loop->any_likely_upper_bound)
6521debfc3dSmrg     loop->nb_iterations_likely_upper_bound
6531debfc3dSmrg       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
654a05ac97eSmrg   desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
6551debfc3dSmrg 
6561debfc3dSmrg   /* Remove the edges.  */
6571debfc3dSmrg   FOR_EACH_VEC_ELT (remove_edges, i, e)
6581debfc3dSmrg     remove_path (e);
6591debfc3dSmrg 
6601debfc3dSmrg   if (dump_file)
6611debfc3dSmrg     fprintf (dump_file,
6621debfc3dSmrg 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
6631debfc3dSmrg 	     max_unroll, num_loop_insns (loop));
6641debfc3dSmrg }
6651debfc3dSmrg 
6661debfc3dSmrg /* Decide whether to unroll LOOP iterating runtime computable number of times
6671debfc3dSmrg    and how much.  */
6681debfc3dSmrg static void
decide_unroll_runtime_iterations(class loop * loop,int flags)669*8feb0f0bSmrg decide_unroll_runtime_iterations (class loop *loop, int flags)
6701debfc3dSmrg {
6711debfc3dSmrg   unsigned nunroll, nunroll_by_av, i;
672*8feb0f0bSmrg   class niter_desc *desc;
6731debfc3dSmrg   widest_int iterations;
6741debfc3dSmrg 
675a2dc1f3fSmrg   /* If we were not asked to unroll this loop, just return back silently.  */
676a2dc1f3fSmrg   if (!(flags & UAP_UNROLL) && !loop->unroll)
6771debfc3dSmrg     return;
6781debfc3dSmrg 
679a2dc1f3fSmrg   if (dump_enabled_p ())
680a2dc1f3fSmrg     dump_printf (MSG_NOTE,
681a2dc1f3fSmrg 		 "considering unrolling loop with runtime-"
6821debfc3dSmrg 		 "computable number of iterations\n");
6831debfc3dSmrg 
6841debfc3dSmrg   /* nunroll = total number of copies of the original loop body in
6851debfc3dSmrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
686*8feb0f0bSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
687*8feb0f0bSmrg   nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
6881debfc3dSmrg   if (nunroll > nunroll_by_av)
6891debfc3dSmrg     nunroll = nunroll_by_av;
690*8feb0f0bSmrg   if (nunroll > (unsigned) param_max_unroll_times)
691*8feb0f0bSmrg     nunroll = param_max_unroll_times;
6921debfc3dSmrg 
6931debfc3dSmrg   if (targetm.loop_unroll_adjust)
6941debfc3dSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
6951debfc3dSmrg 
696a2dc1f3fSmrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
697a2dc1f3fSmrg     nunroll = loop->unroll;
698a2dc1f3fSmrg 
6991debfc3dSmrg   /* Skip big loops.  */
7001debfc3dSmrg   if (nunroll <= 1)
7011debfc3dSmrg     {
7021debfc3dSmrg       if (dump_file)
7031debfc3dSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
7041debfc3dSmrg       return;
7051debfc3dSmrg     }
7061debfc3dSmrg 
7071debfc3dSmrg   /* Check for simple loops.  */
7081debfc3dSmrg   desc = get_simple_loop_desc (loop);
7091debfc3dSmrg 
7101debfc3dSmrg   /* Check simpleness.  */
7111debfc3dSmrg   if (!desc->simple_p || desc->assumptions)
7121debfc3dSmrg     {
7131debfc3dSmrg       if (dump_file)
7141debfc3dSmrg 	fprintf (dump_file,
7151debfc3dSmrg 		 ";; Unable to prove that the number of iterations "
7161debfc3dSmrg 		 "can be counted in runtime\n");
7171debfc3dSmrg       return;
7181debfc3dSmrg     }
7191debfc3dSmrg 
7201debfc3dSmrg   if (desc->const_iter)
7211debfc3dSmrg     {
7221debfc3dSmrg       if (dump_file)
7231debfc3dSmrg 	fprintf (dump_file, ";; Loop iterates constant times\n");
7241debfc3dSmrg       return;
7251debfc3dSmrg     }
7261debfc3dSmrg 
7271debfc3dSmrg   /* Check whether the loop rolls.  */
7281debfc3dSmrg   if ((get_estimated_loop_iterations (loop, &iterations)
7291debfc3dSmrg        || get_likely_max_loop_iterations (loop, &iterations))
7301debfc3dSmrg       && wi::ltu_p (iterations, 2 * nunroll))
7311debfc3dSmrg     {
7321debfc3dSmrg       if (dump_file)
7331debfc3dSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
7341debfc3dSmrg       return;
7351debfc3dSmrg     }
7361debfc3dSmrg 
737a2dc1f3fSmrg   /* Success; now force nunroll to be power of 2, as code-gen
738a2dc1f3fSmrg      requires it, we are unable to cope with overflows in
739a2dc1f3fSmrg      computation of number of iterations.  */
7401debfc3dSmrg   for (i = 1; 2 * i <= nunroll; i *= 2)
7411debfc3dSmrg     continue;
7421debfc3dSmrg 
7431debfc3dSmrg   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
7441debfc3dSmrg   loop->lpt_decision.times = i - 1;
7451debfc3dSmrg }
7461debfc3dSmrg 
7471debfc3dSmrg /* Splits edge E and inserts the sequence of instructions INSNS on it, and
7481debfc3dSmrg    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
7491debfc3dSmrg    and NULL is returned instead.  */
7501debfc3dSmrg 
7511debfc3dSmrg basic_block
split_edge_and_insert(edge e,rtx_insn * insns)7521debfc3dSmrg split_edge_and_insert (edge e, rtx_insn *insns)
7531debfc3dSmrg {
7541debfc3dSmrg   basic_block bb;
7551debfc3dSmrg 
7561debfc3dSmrg   if (!insns)
7571debfc3dSmrg     return NULL;
7581debfc3dSmrg   bb = split_edge (e);
7591debfc3dSmrg   emit_insn_after (insns, BB_END (bb));
7601debfc3dSmrg 
7611debfc3dSmrg   /* ??? We used to assume that INSNS can contain control flow insns, and
7621debfc3dSmrg      that we had to try to find sub basic blocks in BB to maintain a valid
7631debfc3dSmrg      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
7641debfc3dSmrg      and call break_superblocks when going out of cfglayout mode.  But it
7651debfc3dSmrg      turns out that this never happens; and that if it does ever happen,
7661debfc3dSmrg      the verify_flow_info at the end of the RTL loop passes would fail.
7671debfc3dSmrg 
7681debfc3dSmrg      There are two reasons why we expected we could have control flow insns
7691debfc3dSmrg      in INSNS.  The first is when a comparison has to be done in parts, and
7701debfc3dSmrg      the second is when the number of iterations is computed for loops with
7711debfc3dSmrg      the number of iterations known at runtime.  In both cases, test cases
7721debfc3dSmrg      to get control flow in INSNS appear to be impossible to construct:
7731debfc3dSmrg 
7741debfc3dSmrg       * If do_compare_rtx_and_jump needs several branches to do comparison
7751debfc3dSmrg 	in a mode that needs comparison by parts, we cannot analyze the
7761debfc3dSmrg 	number of iterations of the loop, and we never get to unrolling it.
7771debfc3dSmrg 
7781debfc3dSmrg       * The code in expand_divmod that was suspected to cause creation of
7791debfc3dSmrg 	branching code seems to be only accessed for signed division.  The
7801debfc3dSmrg 	divisions used by # of iterations analysis are always unsigned.
7811debfc3dSmrg 	Problems might arise on architectures that emits branching code
7821debfc3dSmrg 	for some operations that may appear in the unroller (especially
7831debfc3dSmrg 	for division), but we have no such architectures.
7841debfc3dSmrg 
7851debfc3dSmrg      Considering all this, it was decided that we should for now assume
7861debfc3dSmrg      that INSNS can in theory contain control flow insns, but in practice
7871debfc3dSmrg      it never does.  So we don't handle the theoretical case, and should
7881debfc3dSmrg      a real failure ever show up, we have a pretty good clue for how to
7891debfc3dSmrg      fix it.  */
7901debfc3dSmrg 
7911debfc3dSmrg   return bb;
7921debfc3dSmrg }
7931debfc3dSmrg 
7941debfc3dSmrg /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
7951debfc3dSmrg    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
7961debfc3dSmrg    in order to create a jump.  */
7971debfc3dSmrg 
7981debfc3dSmrg static rtx_insn *
compare_and_jump_seq(rtx op0,rtx op1,enum rtx_code comp,rtx_code_label * label,profile_probability prob,rtx_insn * cinsn)7991debfc3dSmrg compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
800a2dc1f3fSmrg 		      rtx_code_label *label, profile_probability prob,
801a2dc1f3fSmrg 		      rtx_insn *cinsn)
8021debfc3dSmrg {
8031debfc3dSmrg   rtx_insn *seq;
8041debfc3dSmrg   rtx_jump_insn *jump;
8051debfc3dSmrg   rtx cond;
8061debfc3dSmrg   machine_mode mode;
8071debfc3dSmrg 
8081debfc3dSmrg   mode = GET_MODE (op0);
8091debfc3dSmrg   if (mode == VOIDmode)
8101debfc3dSmrg     mode = GET_MODE (op1);
8111debfc3dSmrg 
8121debfc3dSmrg   start_sequence ();
8131debfc3dSmrg   if (GET_MODE_CLASS (mode) == MODE_CC)
8141debfc3dSmrg     {
8151debfc3dSmrg       /* A hack -- there seems to be no easy generic way how to make a
8161debfc3dSmrg 	 conditional jump from a ccmode comparison.  */
8171debfc3dSmrg       gcc_assert (cinsn);
8181debfc3dSmrg       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
8191debfc3dSmrg       gcc_assert (GET_CODE (cond) == comp);
8201debfc3dSmrg       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
8211debfc3dSmrg       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
8221debfc3dSmrg       emit_jump_insn (copy_insn (PATTERN (cinsn)));
8231debfc3dSmrg       jump = as_a <rtx_jump_insn *> (get_last_insn ());
8241debfc3dSmrg       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
8251debfc3dSmrg       LABEL_NUSES (JUMP_LABEL (jump))++;
8261debfc3dSmrg       redirect_jump (jump, label, 0);
8271debfc3dSmrg     }
8281debfc3dSmrg   else
8291debfc3dSmrg     {
8301debfc3dSmrg       gcc_assert (!cinsn);
8311debfc3dSmrg 
8321debfc3dSmrg       op0 = force_operand (op0, NULL_RTX);
8331debfc3dSmrg       op1 = force_operand (op1, NULL_RTX);
8341debfc3dSmrg       do_compare_rtx_and_jump (op0, op1, comp, 0,
835a2dc1f3fSmrg 			       mode, NULL_RTX, NULL, label,
836a2dc1f3fSmrg 			       profile_probability::uninitialized ());
8371debfc3dSmrg       jump = as_a <rtx_jump_insn *> (get_last_insn ());
8381debfc3dSmrg       jump->set_jump_target (label);
8391debfc3dSmrg       LABEL_NUSES (label)++;
8401debfc3dSmrg     }
841a2dc1f3fSmrg   if (prob.initialized_p ())
842a2dc1f3fSmrg     add_reg_br_prob_note (jump, prob);
8431debfc3dSmrg 
8441debfc3dSmrg   seq = get_insns ();
8451debfc3dSmrg   end_sequence ();
8461debfc3dSmrg 
8471debfc3dSmrg   return seq;
8481debfc3dSmrg }
8491debfc3dSmrg 
850a2dc1f3fSmrg /* Unroll LOOP for which we are able to count number of iterations in
851a2dc1f3fSmrg    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
852a2dc1f3fSmrg    power of two.  The transformation does this (with some extra care
853a2dc1f3fSmrg    for case n < 0):
8541debfc3dSmrg 
8551debfc3dSmrg    for (i = 0; i < n; i++)
8561debfc3dSmrg      body;
8571debfc3dSmrg 
8581debfc3dSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
8591debfc3dSmrg 
8601debfc3dSmrg    i = 0;
8611debfc3dSmrg    mod = n % 4;
8621debfc3dSmrg 
8631debfc3dSmrg    switch (mod)
8641debfc3dSmrg      {
8651debfc3dSmrg        case 3:
8661debfc3dSmrg          body; i++;
8671debfc3dSmrg        case 2:
8681debfc3dSmrg          body; i++;
8691debfc3dSmrg        case 1:
8701debfc3dSmrg          body; i++;
8711debfc3dSmrg        case 0: ;
8721debfc3dSmrg      }
8731debfc3dSmrg 
8741debfc3dSmrg    while (i < n)
8751debfc3dSmrg      {
8761debfc3dSmrg        body; i++;
8771debfc3dSmrg        body; i++;
8781debfc3dSmrg        body; i++;
8791debfc3dSmrg        body; i++;
8801debfc3dSmrg      }
8811debfc3dSmrg    */
8821debfc3dSmrg static void
unroll_loop_runtime_iterations(class loop * loop)883*8feb0f0bSmrg unroll_loop_runtime_iterations (class loop *loop)
8841debfc3dSmrg {
8851debfc3dSmrg   rtx old_niter, niter, tmp;
8861debfc3dSmrg   rtx_insn *init_code, *branch_code;
887a2dc1f3fSmrg   unsigned i, j;
888a2dc1f3fSmrg   profile_probability p;
8891debfc3dSmrg   basic_block preheader, *body, swtch, ezc_swtch = NULL;
890a2dc1f3fSmrg   int may_exit_copy;
891a2dc1f3fSmrg   profile_count iter_count, new_count;
8921debfc3dSmrg   unsigned n_peel;
8931debfc3dSmrg   edge e;
8941debfc3dSmrg   bool extra_zero_check, last_may_exit;
8951debfc3dSmrg   unsigned max_unroll = loop->lpt_decision.times;
896*8feb0f0bSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
8971debfc3dSmrg   bool exit_at_end = loop_exit_at_end_p (loop);
8981debfc3dSmrg   struct opt_info *opt_info = NULL;
8991debfc3dSmrg   bool ok;
9001debfc3dSmrg 
9011debfc3dSmrg   if (flag_split_ivs_in_unroller
9021debfc3dSmrg       || flag_variable_expansion_in_unroller)
9031debfc3dSmrg     opt_info = analyze_insns_in_loop (loop);
9041debfc3dSmrg 
9051debfc3dSmrg   /* Remember blocks whose dominators will have to be updated.  */
9061debfc3dSmrg   auto_vec<basic_block> dom_bbs;
9071debfc3dSmrg 
9081debfc3dSmrg   body = get_loop_body (loop);
9091debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
9101debfc3dSmrg     {
9111debfc3dSmrg       vec<basic_block> ldom;
9121debfc3dSmrg       basic_block bb;
9131debfc3dSmrg 
9141debfc3dSmrg       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
9151debfc3dSmrg       FOR_EACH_VEC_ELT (ldom, j, bb)
9161debfc3dSmrg 	if (!flow_bb_inside_loop_p (loop, bb))
9171debfc3dSmrg 	  dom_bbs.safe_push (bb);
9181debfc3dSmrg 
9191debfc3dSmrg       ldom.release ();
9201debfc3dSmrg     }
9211debfc3dSmrg   free (body);
9221debfc3dSmrg 
9231debfc3dSmrg   if (!exit_at_end)
9241debfc3dSmrg     {
9251debfc3dSmrg       /* Leave exit in first copy (for explanation why see comment in
9261debfc3dSmrg 	 unroll_loop_constant_iterations).  */
9271debfc3dSmrg       may_exit_copy = 0;
9281debfc3dSmrg       n_peel = max_unroll - 1;
9291debfc3dSmrg       extra_zero_check = true;
9301debfc3dSmrg       last_may_exit = false;
9311debfc3dSmrg     }
9321debfc3dSmrg   else
9331debfc3dSmrg     {
9341debfc3dSmrg       /* Leave exit in last copy (for explanation why see comment in
9351debfc3dSmrg 	 unroll_loop_constant_iterations).  */
9361debfc3dSmrg       may_exit_copy = max_unroll;
9371debfc3dSmrg       n_peel = max_unroll;
9381debfc3dSmrg       extra_zero_check = false;
9391debfc3dSmrg       last_may_exit = true;
9401debfc3dSmrg     }
9411debfc3dSmrg 
9421debfc3dSmrg   /* Get expression for number of iterations.  */
9431debfc3dSmrg   start_sequence ();
9441debfc3dSmrg   old_niter = niter = gen_reg_rtx (desc->mode);
9451debfc3dSmrg   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
9461debfc3dSmrg   if (tmp != niter)
9471debfc3dSmrg     emit_move_insn (niter, tmp);
9481debfc3dSmrg 
9491debfc3dSmrg   /* For loops that exit at end and whose number of iterations is reliable,
9501debfc3dSmrg      add one to niter to account for first pass through loop body before
9511debfc3dSmrg      reaching exit test. */
9521debfc3dSmrg   if (exit_at_end && !desc->noloop_assumptions)
9531debfc3dSmrg     {
9541debfc3dSmrg       niter = expand_simple_binop (desc->mode, PLUS,
9551debfc3dSmrg 				   niter, const1_rtx,
9561debfc3dSmrg 				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
9571debfc3dSmrg       old_niter = niter;
9581debfc3dSmrg     }
9591debfc3dSmrg 
9601debfc3dSmrg   /* Count modulo by ANDing it with max_unroll; we use the fact that
9611debfc3dSmrg      the number of unrollings is a power of two, and thus this is correct
9621debfc3dSmrg      even if there is overflow in the computation.  */
9631debfc3dSmrg   niter = expand_simple_binop (desc->mode, AND,
9641debfc3dSmrg 			       niter, gen_int_mode (max_unroll, desc->mode),
9651debfc3dSmrg 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
9661debfc3dSmrg 
9671debfc3dSmrg   init_code = get_insns ();
9681debfc3dSmrg   end_sequence ();
9691debfc3dSmrg   unshare_all_rtl_in_chain (init_code);
9701debfc3dSmrg 
9711debfc3dSmrg   /* Precondition the loop.  */
9721debfc3dSmrg   split_edge_and_insert (loop_preheader_edge (loop), init_code);
9731debfc3dSmrg 
9741debfc3dSmrg   auto_vec<edge> remove_edges;
9751debfc3dSmrg 
9761debfc3dSmrg   auto_sbitmap wont_exit (max_unroll + 2);
9771debfc3dSmrg 
9781debfc3dSmrg   if (extra_zero_check || desc->noloop_assumptions)
9791debfc3dSmrg     {
9801debfc3dSmrg       /* Peel the first copy of loop body.  Leave the exit test if the number
9811debfc3dSmrg 	 of iterations is not reliable.  Also record the place of the extra zero
9821debfc3dSmrg 	 check.  */
9831debfc3dSmrg       bitmap_clear (wont_exit);
9841debfc3dSmrg       if (!desc->noloop_assumptions)
9851debfc3dSmrg 	bitmap_set_bit (wont_exit, 1);
9861debfc3dSmrg       ezc_swtch = loop_preheader_edge (loop)->src;
9871debfc3dSmrg       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
9881debfc3dSmrg 					  1, wont_exit, desc->out_edge,
9891debfc3dSmrg 					  &remove_edges,
9901debfc3dSmrg 					  DLTHE_FLAG_UPDATE_FREQ);
9911debfc3dSmrg       gcc_assert (ok);
9921debfc3dSmrg     }
9931debfc3dSmrg 
9941debfc3dSmrg   /* Record the place where switch will be built for preconditioning.  */
9951debfc3dSmrg   swtch = split_edge (loop_preheader_edge (loop));
9961debfc3dSmrg 
997a2dc1f3fSmrg   /* Compute count increments for each switch block and initialize
9981debfc3dSmrg      innermost switch block.  Switch blocks and peeled loop copies are built
9991debfc3dSmrg      from innermost outward.  */
1000a2dc1f3fSmrg   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
10011debfc3dSmrg   swtch->count = new_count;
10021debfc3dSmrg 
10031debfc3dSmrg   for (i = 0; i < n_peel; i++)
10041debfc3dSmrg     {
10051debfc3dSmrg       /* Peel the copy.  */
10061debfc3dSmrg       bitmap_clear (wont_exit);
10071debfc3dSmrg       if (i != n_peel - 1 || !last_may_exit)
10081debfc3dSmrg 	bitmap_set_bit (wont_exit, 1);
10091debfc3dSmrg       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
10101debfc3dSmrg 					  1, wont_exit, desc->out_edge,
10111debfc3dSmrg 					  &remove_edges,
10121debfc3dSmrg 					  DLTHE_FLAG_UPDATE_FREQ);
10131debfc3dSmrg       gcc_assert (ok);
10141debfc3dSmrg 
10151debfc3dSmrg       /* Create item for switch.  */
10161debfc3dSmrg       j = n_peel - i - (extra_zero_check ? 0 : 1);
1017a2dc1f3fSmrg       p = profile_probability::always ().apply_scale (1, i + 2);
10181debfc3dSmrg 
10191debfc3dSmrg       preheader = split_edge (loop_preheader_edge (loop));
1020a2dc1f3fSmrg       /* Add in count of edge from switch block.  */
10211debfc3dSmrg       preheader->count += iter_count;
1022a05ac97eSmrg       branch_code = compare_and_jump_seq (copy_rtx (niter),
1023a05ac97eSmrg 					  gen_int_mode (j, desc->mode), EQ,
1024a2dc1f3fSmrg 					  block_label (preheader), p, NULL);
10251debfc3dSmrg 
10261debfc3dSmrg       /* We rely on the fact that the compare and jump cannot be optimized out,
10271debfc3dSmrg 	 and hence the cfg we create is correct.  */
10281debfc3dSmrg       gcc_assert (branch_code != NULL_RTX);
10291debfc3dSmrg 
10301debfc3dSmrg       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
10311debfc3dSmrg       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1032a2dc1f3fSmrg       single_succ_edge (swtch)->probability = p.invert ();
10331debfc3dSmrg       new_count += iter_count;
10341debfc3dSmrg       swtch->count = new_count;
10351debfc3dSmrg       e = make_edge (swtch, preheader,
10361debfc3dSmrg 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
10371debfc3dSmrg       e->probability = p;
10381debfc3dSmrg     }
10391debfc3dSmrg 
10401debfc3dSmrg   if (extra_zero_check)
10411debfc3dSmrg     {
10421debfc3dSmrg       /* Add branch for zero iterations.  */
1043a2dc1f3fSmrg       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
10441debfc3dSmrg       swtch = ezc_swtch;
10451debfc3dSmrg       preheader = split_edge (loop_preheader_edge (loop));
1046a2dc1f3fSmrg       /* Recompute count adjustments since initial peel copy may
10471debfc3dSmrg 	 have exited and reduced those values that were computed above.  */
1048a2dc1f3fSmrg       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1049a2dc1f3fSmrg       /* Add in count of edge from switch block.  */
10501debfc3dSmrg       preheader->count += iter_count;
10511debfc3dSmrg       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
10521debfc3dSmrg 					  block_label (preheader), p,
10531debfc3dSmrg 					  NULL);
10541debfc3dSmrg       gcc_assert (branch_code != NULL_RTX);
10551debfc3dSmrg 
10561debfc3dSmrg       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
10571debfc3dSmrg       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1058a2dc1f3fSmrg       single_succ_edge (swtch)->probability = p.invert ();
10591debfc3dSmrg       e = make_edge (swtch, preheader,
10601debfc3dSmrg 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
10611debfc3dSmrg       e->probability = p;
10621debfc3dSmrg     }
10631debfc3dSmrg 
10641debfc3dSmrg   /* Recount dominators for outer blocks.  */
10651debfc3dSmrg   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
10661debfc3dSmrg 
10671debfc3dSmrg   /* And unroll loop.  */
10681debfc3dSmrg 
10691debfc3dSmrg   bitmap_ones (wont_exit);
10701debfc3dSmrg   bitmap_clear_bit (wont_exit, may_exit_copy);
10711debfc3dSmrg   opt_info_start_duplication (opt_info);
10721debfc3dSmrg 
10731debfc3dSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
10741debfc3dSmrg 				      max_unroll,
10751debfc3dSmrg 				      wont_exit, desc->out_edge,
10761debfc3dSmrg 				      &remove_edges,
10771debfc3dSmrg 				      DLTHE_FLAG_UPDATE_FREQ
10781debfc3dSmrg 				      | (opt_info
10791debfc3dSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
10801debfc3dSmrg 					   : 0));
10811debfc3dSmrg   gcc_assert (ok);
10821debfc3dSmrg 
10831debfc3dSmrg   if (opt_info)
10841debfc3dSmrg     {
10851debfc3dSmrg       apply_opt_in_copies (opt_info, max_unroll, true, true);
10861debfc3dSmrg       free_opt_info (opt_info);
10871debfc3dSmrg     }
10881debfc3dSmrg 
10891debfc3dSmrg   if (exit_at_end)
10901debfc3dSmrg     {
10911debfc3dSmrg       basic_block exit_block = get_bb_copy (desc->in_edge->src);
10921debfc3dSmrg       /* Find a new in and out edge; they are in the last copy we have
10931debfc3dSmrg 	 made.  */
10941debfc3dSmrg 
10951debfc3dSmrg       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
10961debfc3dSmrg 	{
10971debfc3dSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
10981debfc3dSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
10991debfc3dSmrg 	}
11001debfc3dSmrg       else
11011debfc3dSmrg 	{
11021debfc3dSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
11031debfc3dSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
11041debfc3dSmrg 	}
11051debfc3dSmrg     }
11061debfc3dSmrg 
11071debfc3dSmrg   /* Remove the edges.  */
11081debfc3dSmrg   FOR_EACH_VEC_ELT (remove_edges, i, e)
11091debfc3dSmrg     remove_path (e);
11101debfc3dSmrg 
11111debfc3dSmrg   /* We must be careful when updating the number of iterations due to
11121debfc3dSmrg      preconditioning and the fact that the value must be valid at entry
11131debfc3dSmrg      of the loop.  After passing through the above code, we see that
11141debfc3dSmrg      the correct new number of iterations is this:  */
11151debfc3dSmrg   gcc_assert (!desc->const_iter);
11161debfc3dSmrg   desc->niter_expr =
11171debfc3dSmrg     simplify_gen_binary (UDIV, desc->mode, old_niter,
11181debfc3dSmrg 			 gen_int_mode (max_unroll + 1, desc->mode));
11191debfc3dSmrg   loop->nb_iterations_upper_bound
11201debfc3dSmrg     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
11211debfc3dSmrg   if (loop->any_estimate)
11221debfc3dSmrg     loop->nb_iterations_estimate
11231debfc3dSmrg       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
11241debfc3dSmrg   if (loop->any_likely_upper_bound)
11251debfc3dSmrg     loop->nb_iterations_likely_upper_bound
11261debfc3dSmrg       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
11271debfc3dSmrg   if (exit_at_end)
11281debfc3dSmrg     {
11291debfc3dSmrg       desc->niter_expr =
11301debfc3dSmrg 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
11311debfc3dSmrg       desc->noloop_assumptions = NULL_RTX;
11321debfc3dSmrg       --loop->nb_iterations_upper_bound;
11331debfc3dSmrg       if (loop->any_estimate
11341debfc3dSmrg 	  && loop->nb_iterations_estimate != 0)
11351debfc3dSmrg 	--loop->nb_iterations_estimate;
11361debfc3dSmrg       else
11371debfc3dSmrg 	loop->any_estimate = false;
11381debfc3dSmrg       if (loop->any_likely_upper_bound
11391debfc3dSmrg 	  && loop->nb_iterations_likely_upper_bound != 0)
11401debfc3dSmrg 	--loop->nb_iterations_likely_upper_bound;
11411debfc3dSmrg       else
11421debfc3dSmrg 	loop->any_likely_upper_bound = false;
11431debfc3dSmrg     }
11441debfc3dSmrg 
11451debfc3dSmrg   if (dump_file)
11461debfc3dSmrg     fprintf (dump_file,
11471debfc3dSmrg 	     ";; Unrolled loop %d times, counting # of iterations "
11481debfc3dSmrg 	     "in runtime, %i insns\n",
11491debfc3dSmrg 	     max_unroll, num_loop_insns (loop));
11501debfc3dSmrg }
11511debfc3dSmrg 
11521debfc3dSmrg /* Decide whether to unroll LOOP stupidly and how much.  */
11531debfc3dSmrg static void
decide_unroll_stupid(class loop * loop,int flags)1154*8feb0f0bSmrg decide_unroll_stupid (class loop *loop, int flags)
11551debfc3dSmrg {
11561debfc3dSmrg   unsigned nunroll, nunroll_by_av, i;
1157*8feb0f0bSmrg   class niter_desc *desc;
11581debfc3dSmrg   widest_int iterations;
11591debfc3dSmrg 
1160a2dc1f3fSmrg   /* If we were not asked to unroll this loop, just return back silently.  */
1161a2dc1f3fSmrg   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
11621debfc3dSmrg     return;
11631debfc3dSmrg 
1164a2dc1f3fSmrg   if (dump_enabled_p ())
1165a2dc1f3fSmrg     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
11661debfc3dSmrg 
11671debfc3dSmrg   /* nunroll = total number of copies of the original loop body in
11681debfc3dSmrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1169*8feb0f0bSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
11701debfc3dSmrg   nunroll_by_av
1171*8feb0f0bSmrg     = param_max_average_unrolled_insns / loop->av_ninsns;
11721debfc3dSmrg   if (nunroll > nunroll_by_av)
11731debfc3dSmrg     nunroll = nunroll_by_av;
1174*8feb0f0bSmrg   if (nunroll > (unsigned) param_max_unroll_times)
1175*8feb0f0bSmrg     nunroll = param_max_unroll_times;
11761debfc3dSmrg 
11771debfc3dSmrg   if (targetm.loop_unroll_adjust)
11781debfc3dSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
11791debfc3dSmrg 
1180a2dc1f3fSmrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1181a2dc1f3fSmrg     nunroll = loop->unroll;
1182a2dc1f3fSmrg 
11831debfc3dSmrg   /* Skip big loops.  */
11841debfc3dSmrg   if (nunroll <= 1)
11851debfc3dSmrg     {
11861debfc3dSmrg       if (dump_file)
11871debfc3dSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
11881debfc3dSmrg       return;
11891debfc3dSmrg     }
11901debfc3dSmrg 
11911debfc3dSmrg   /* Check for simple loops.  */
11921debfc3dSmrg   desc = get_simple_loop_desc (loop);
11931debfc3dSmrg 
11941debfc3dSmrg   /* Check simpleness.  */
11951debfc3dSmrg   if (desc->simple_p && !desc->assumptions)
11961debfc3dSmrg     {
11971debfc3dSmrg       if (dump_file)
1198a2dc1f3fSmrg 	fprintf (dump_file, ";; Loop is simple\n");
11991debfc3dSmrg       return;
12001debfc3dSmrg     }
12011debfc3dSmrg 
12021debfc3dSmrg   /* Do not unroll loops with branches inside -- it increases number
12031debfc3dSmrg      of mispredicts.
12041debfc3dSmrg      TODO: this heuristic needs tunning; call inside the loop body
12051debfc3dSmrg      is also relatively good reason to not unroll.  */
12061debfc3dSmrg   if (num_loop_branches (loop) > 1)
12071debfc3dSmrg     {
12081debfc3dSmrg       if (dump_file)
12091debfc3dSmrg 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
12101debfc3dSmrg       return;
12111debfc3dSmrg     }
12121debfc3dSmrg 
12131debfc3dSmrg   /* Check whether the loop rolls.  */
12141debfc3dSmrg   if ((get_estimated_loop_iterations (loop, &iterations)
12151debfc3dSmrg        || get_likely_max_loop_iterations (loop, &iterations))
12161debfc3dSmrg       && wi::ltu_p (iterations, 2 * nunroll))
12171debfc3dSmrg     {
12181debfc3dSmrg       if (dump_file)
12191debfc3dSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
12201debfc3dSmrg       return;
12211debfc3dSmrg     }
12221debfc3dSmrg 
12231debfc3dSmrg   /* Success.  Now force nunroll to be power of 2, as it seems that this
12241debfc3dSmrg      improves results (partially because of better alignments, partially
12251debfc3dSmrg      because of some dark magic).  */
12261debfc3dSmrg   for (i = 1; 2 * i <= nunroll; i *= 2)
12271debfc3dSmrg     continue;
12281debfc3dSmrg 
12291debfc3dSmrg   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
12301debfc3dSmrg   loop->lpt_decision.times = i - 1;
12311debfc3dSmrg }
12321debfc3dSmrg 
12331debfc3dSmrg /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
12341debfc3dSmrg 
12351debfc3dSmrg    while (cond)
12361debfc3dSmrg      body;
12371debfc3dSmrg 
12381debfc3dSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
12391debfc3dSmrg 
12401debfc3dSmrg    while (cond)
12411debfc3dSmrg      {
12421debfc3dSmrg        body;
12431debfc3dSmrg        if (!cond) break;
12441debfc3dSmrg        body;
12451debfc3dSmrg        if (!cond) break;
12461debfc3dSmrg        body;
12471debfc3dSmrg        if (!cond) break;
12481debfc3dSmrg        body;
12491debfc3dSmrg      }
12501debfc3dSmrg    */
12511debfc3dSmrg static void
unroll_loop_stupid(class loop * loop)1252*8feb0f0bSmrg unroll_loop_stupid (class loop *loop)
12531debfc3dSmrg {
12541debfc3dSmrg   unsigned nunroll = loop->lpt_decision.times;
1255*8feb0f0bSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
12561debfc3dSmrg   struct opt_info *opt_info = NULL;
12571debfc3dSmrg   bool ok;
12581debfc3dSmrg 
12591debfc3dSmrg   if (flag_split_ivs_in_unroller
12601debfc3dSmrg       || flag_variable_expansion_in_unroller)
12611debfc3dSmrg     opt_info = analyze_insns_in_loop (loop);
12621debfc3dSmrg 
12631debfc3dSmrg   auto_sbitmap wont_exit (nunroll + 1);
12641debfc3dSmrg   bitmap_clear (wont_exit);
12651debfc3dSmrg   opt_info_start_duplication (opt_info);
12661debfc3dSmrg 
12671debfc3dSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
12681debfc3dSmrg 				      nunroll, wont_exit,
12691debfc3dSmrg 				      NULL, NULL,
12701debfc3dSmrg 				      DLTHE_FLAG_UPDATE_FREQ
12711debfc3dSmrg 				      | (opt_info
12721debfc3dSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
12731debfc3dSmrg 					   : 0));
12741debfc3dSmrg   gcc_assert (ok);
12751debfc3dSmrg 
12761debfc3dSmrg   if (opt_info)
12771debfc3dSmrg     {
12781debfc3dSmrg       apply_opt_in_copies (opt_info, nunroll, true, true);
12791debfc3dSmrg       free_opt_info (opt_info);
12801debfc3dSmrg     }
12811debfc3dSmrg 
12821debfc3dSmrg   if (desc->simple_p)
12831debfc3dSmrg     {
12841debfc3dSmrg       /* We indeed may get here provided that there are nontrivial assumptions
12851debfc3dSmrg 	 for a loop to be really simple.  We could update the counts, but the
12861debfc3dSmrg 	 problem is that we are unable to decide which exit will be taken
12871debfc3dSmrg 	 (not really true in case the number of iterations is constant,
12881debfc3dSmrg 	 but no one will do anything with this information, so we do not
12891debfc3dSmrg 	 worry about it).  */
12901debfc3dSmrg       desc->simple_p = false;
12911debfc3dSmrg     }
12921debfc3dSmrg 
12931debfc3dSmrg   if (dump_file)
12941debfc3dSmrg     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
12951debfc3dSmrg 	     nunroll, num_loop_insns (loop));
12961debfc3dSmrg }
12971debfc3dSmrg 
12981debfc3dSmrg /* Returns true if REG is referenced in one nondebug insn in LOOP.
12991debfc3dSmrg    Set *DEBUG_USES to the number of debug insns that reference the
13001debfc3dSmrg    variable.  */
13011debfc3dSmrg 
13021debfc3dSmrg static bool
referenced_in_one_insn_in_loop_p(class loop * loop,rtx reg,int * debug_uses)1303*8feb0f0bSmrg referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
13041debfc3dSmrg 				  int *debug_uses)
13051debfc3dSmrg {
13061debfc3dSmrg   basic_block *body, bb;
13071debfc3dSmrg   unsigned i;
13081debfc3dSmrg   int count_ref = 0;
13091debfc3dSmrg   rtx_insn *insn;
13101debfc3dSmrg 
13111debfc3dSmrg   body = get_loop_body (loop);
13121debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
13131debfc3dSmrg     {
13141debfc3dSmrg       bb = body[i];
13151debfc3dSmrg 
13161debfc3dSmrg       FOR_BB_INSNS (bb, insn)
13171debfc3dSmrg 	if (!rtx_referenced_p (reg, insn))
13181debfc3dSmrg 	  continue;
13191debfc3dSmrg 	else if (DEBUG_INSN_P (insn))
13201debfc3dSmrg 	  ++*debug_uses;
13211debfc3dSmrg 	else if (++count_ref > 1)
13221debfc3dSmrg 	  break;
13231debfc3dSmrg     }
13241debfc3dSmrg   free (body);
13251debfc3dSmrg   return (count_ref  == 1);
13261debfc3dSmrg }
13271debfc3dSmrg 
13281debfc3dSmrg /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
13291debfc3dSmrg 
13301debfc3dSmrg static void
reset_debug_uses_in_loop(class loop * loop,rtx reg,int debug_uses)1331*8feb0f0bSmrg reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
13321debfc3dSmrg {
13331debfc3dSmrg   basic_block *body, bb;
13341debfc3dSmrg   unsigned i;
13351debfc3dSmrg   rtx_insn *insn;
13361debfc3dSmrg 
13371debfc3dSmrg   body = get_loop_body (loop);
13381debfc3dSmrg   for (i = 0; debug_uses && i < loop->num_nodes; i++)
13391debfc3dSmrg     {
13401debfc3dSmrg       bb = body[i];
13411debfc3dSmrg 
13421debfc3dSmrg       FOR_BB_INSNS (bb, insn)
13431debfc3dSmrg 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
13441debfc3dSmrg 	  continue;
13451debfc3dSmrg 	else
13461debfc3dSmrg 	  {
13471debfc3dSmrg 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
13481debfc3dSmrg 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
13491debfc3dSmrg 	    if (!--debug_uses)
13501debfc3dSmrg 	      break;
13511debfc3dSmrg 	  }
13521debfc3dSmrg     }
13531debfc3dSmrg   free (body);
13541debfc3dSmrg }
13551debfc3dSmrg 
13561debfc3dSmrg /* Determine whether INSN contains an accumulator
13571debfc3dSmrg    which can be expanded into separate copies,
13581debfc3dSmrg    one for each copy of the LOOP body.
13591debfc3dSmrg 
13601debfc3dSmrg    for (i = 0 ; i < n; i++)
13611debfc3dSmrg      sum += a[i];
13621debfc3dSmrg 
13631debfc3dSmrg    ==>
13641debfc3dSmrg 
13651debfc3dSmrg    sum += a[i]
13661debfc3dSmrg    ....
13671debfc3dSmrg    i = i+1;
13681debfc3dSmrg    sum1 += a[i]
13691debfc3dSmrg    ....
13701debfc3dSmrg    i = i+1
13711debfc3dSmrg    sum2 += a[i];
13721debfc3dSmrg    ....
13731debfc3dSmrg 
13741debfc3dSmrg    Return NULL if INSN contains no opportunity for expansion of accumulator.
13751debfc3dSmrg    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
13761debfc3dSmrg    information and return a pointer to it.
13771debfc3dSmrg */
13781debfc3dSmrg 
13791debfc3dSmrg static struct var_to_expand *
analyze_insn_to_expand_var(class loop * loop,rtx_insn * insn)1380*8feb0f0bSmrg analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
13811debfc3dSmrg {
13821debfc3dSmrg   rtx set, dest, src;
13831debfc3dSmrg   struct var_to_expand *ves;
13841debfc3dSmrg   unsigned accum_pos;
13851debfc3dSmrg   enum rtx_code code;
13861debfc3dSmrg   int debug_uses = 0;
13871debfc3dSmrg 
13881debfc3dSmrg   set = single_set (insn);
13891debfc3dSmrg   if (!set)
13901debfc3dSmrg     return NULL;
13911debfc3dSmrg 
13921debfc3dSmrg   dest = SET_DEST (set);
13931debfc3dSmrg   src = SET_SRC (set);
13941debfc3dSmrg   code = GET_CODE (src);
13951debfc3dSmrg 
13961debfc3dSmrg   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
13971debfc3dSmrg     return NULL;
13981debfc3dSmrg 
13991debfc3dSmrg   if (FLOAT_MODE_P (GET_MODE (dest)))
14001debfc3dSmrg     {
14011debfc3dSmrg       if (!flag_associative_math)
14021debfc3dSmrg         return NULL;
14031debfc3dSmrg       /* In the case of FMA, we're also changing the rounding.  */
14041debfc3dSmrg       if (code == FMA && !flag_unsafe_math_optimizations)
14051debfc3dSmrg 	return NULL;
14061debfc3dSmrg     }
14071debfc3dSmrg 
14081debfc3dSmrg   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
14091debfc3dSmrg      in MD.  But if there is no optab to generate the insn, we cannot
14101debfc3dSmrg      perform the variable expansion.  This can happen if an MD provides
14111debfc3dSmrg      an insn but not a named pattern to generate it, for example to avoid
14121debfc3dSmrg      producing code that needs additional mode switches like for x87/mmx.
14131debfc3dSmrg 
14141debfc3dSmrg      So we check have_insn_for which looks for an optab for the operation
14151debfc3dSmrg      in SRC.  If it doesn't exist, we can't perform the expansion even
14161debfc3dSmrg      though INSN is valid.  */
14171debfc3dSmrg   if (!have_insn_for (code, GET_MODE (src)))
14181debfc3dSmrg     return NULL;
14191debfc3dSmrg 
14201debfc3dSmrg   if (!REG_P (dest)
14211debfc3dSmrg       && !(GET_CODE (dest) == SUBREG
14221debfc3dSmrg            && REG_P (SUBREG_REG (dest))))
14231debfc3dSmrg     return NULL;
14241debfc3dSmrg 
14251debfc3dSmrg   /* Find the accumulator use within the operation.  */
14261debfc3dSmrg   if (code == FMA)
14271debfc3dSmrg     {
14281debfc3dSmrg       /* We only support accumulation via FMA in the ADD position.  */
14291debfc3dSmrg       if (!rtx_equal_p  (dest, XEXP (src, 2)))
14301debfc3dSmrg 	return NULL;
14311debfc3dSmrg       accum_pos = 2;
14321debfc3dSmrg     }
14331debfc3dSmrg   else if (rtx_equal_p (dest, XEXP (src, 0)))
14341debfc3dSmrg     accum_pos = 0;
14351debfc3dSmrg   else if (rtx_equal_p (dest, XEXP (src, 1)))
14361debfc3dSmrg     {
14371debfc3dSmrg       /* The method of expansion that we are using; which includes the
14381debfc3dSmrg 	 initialization of the expansions with zero and the summation of
14391debfc3dSmrg          the expansions at the end of the computation will yield wrong
14401debfc3dSmrg 	 results for (x = something - x) thus avoid using it in that case.  */
14411debfc3dSmrg       if (code == MINUS)
14421debfc3dSmrg 	return NULL;
14431debfc3dSmrg       accum_pos = 1;
14441debfc3dSmrg     }
14451debfc3dSmrg   else
14461debfc3dSmrg     return NULL;
14471debfc3dSmrg 
14481debfc3dSmrg   /* It must not otherwise be used.  */
14491debfc3dSmrg   if (code == FMA)
14501debfc3dSmrg     {
14511debfc3dSmrg       if (rtx_referenced_p (dest, XEXP (src, 0))
14521debfc3dSmrg 	  || rtx_referenced_p (dest, XEXP (src, 1)))
14531debfc3dSmrg 	return NULL;
14541debfc3dSmrg     }
14551debfc3dSmrg   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
14561debfc3dSmrg     return NULL;
14571debfc3dSmrg 
14581debfc3dSmrg   /* It must be used in exactly one insn.  */
14591debfc3dSmrg   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
14601debfc3dSmrg     return NULL;
14611debfc3dSmrg 
14621debfc3dSmrg   if (dump_file)
14631debfc3dSmrg     {
14641debfc3dSmrg       fprintf (dump_file, "\n;; Expanding Accumulator ");
14651debfc3dSmrg       print_rtl (dump_file, dest);
14661debfc3dSmrg       fprintf (dump_file, "\n");
14671debfc3dSmrg     }
14681debfc3dSmrg 
14691debfc3dSmrg   if (debug_uses)
14701debfc3dSmrg     /* Instead of resetting the debug insns, we could replace each
14711debfc3dSmrg        debug use in the loop with the sum or product of all expanded
14721debfc3dSmrg        accumulators.  Since we'll only know of all expansions at the
14731debfc3dSmrg        end, we'd have to keep track of which vars_to_expand a debug
14741debfc3dSmrg        insn in the loop references, take note of each copy of the
14751debfc3dSmrg        debug insn during unrolling, and when it's all done, compute
14761debfc3dSmrg        the sum or product of each variable and adjust the original
14771debfc3dSmrg        debug insn and each copy thereof.  What a pain!  */
14781debfc3dSmrg     reset_debug_uses_in_loop (loop, dest, debug_uses);
14791debfc3dSmrg 
14801debfc3dSmrg   /* Record the accumulator to expand.  */
14811debfc3dSmrg   ves = XNEW (struct var_to_expand);
14821debfc3dSmrg   ves->insn = insn;
14831debfc3dSmrg   ves->reg = copy_rtx (dest);
14841debfc3dSmrg   ves->var_expansions.create (1);
14851debfc3dSmrg   ves->next = NULL;
14861debfc3dSmrg   ves->op = GET_CODE (src);
14871debfc3dSmrg   ves->expansion_count = 0;
14881debfc3dSmrg   ves->reuse_expansion = 0;
14891debfc3dSmrg   return ves;
14901debfc3dSmrg }
14911debfc3dSmrg 
14921debfc3dSmrg /* Determine whether there is an induction variable in INSN that
14931debfc3dSmrg    we would like to split during unrolling.
14941debfc3dSmrg 
14951debfc3dSmrg    I.e. replace
14961debfc3dSmrg 
14971debfc3dSmrg    i = i + 1;
14981debfc3dSmrg    ...
14991debfc3dSmrg    i = i + 1;
15001debfc3dSmrg    ...
15011debfc3dSmrg    i = i + 1;
15021debfc3dSmrg    ...
15031debfc3dSmrg 
15041debfc3dSmrg    type chains by
15051debfc3dSmrg 
15061debfc3dSmrg    i0 = i + 1
15071debfc3dSmrg    ...
15081debfc3dSmrg    i = i0 + 1
15091debfc3dSmrg    ...
15101debfc3dSmrg    i = i0 + 2
15111debfc3dSmrg    ...
15121debfc3dSmrg 
15131debfc3dSmrg    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
15141debfc3dSmrg    an IV_TO_SPLIT structure, fill it with the relevant information and return a
15151debfc3dSmrg    pointer to it.  */
15161debfc3dSmrg 
15171debfc3dSmrg static struct iv_to_split *
analyze_iv_to_split_insn(rtx_insn * insn)15181debfc3dSmrg analyze_iv_to_split_insn (rtx_insn *insn)
15191debfc3dSmrg {
15201debfc3dSmrg   rtx set, dest;
1521*8feb0f0bSmrg   class rtx_iv iv;
15221debfc3dSmrg   struct iv_to_split *ivts;
1523a2dc1f3fSmrg   scalar_int_mode mode;
15241debfc3dSmrg   bool ok;
15251debfc3dSmrg 
15261debfc3dSmrg   /* For now we just split the basic induction variables.  Later this may be
15271debfc3dSmrg      extended for example by selecting also addresses of memory references.  */
15281debfc3dSmrg   set = single_set (insn);
15291debfc3dSmrg   if (!set)
15301debfc3dSmrg     return NULL;
15311debfc3dSmrg 
15321debfc3dSmrg   dest = SET_DEST (set);
1533a2dc1f3fSmrg   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
15341debfc3dSmrg     return NULL;
15351debfc3dSmrg 
1536a2dc1f3fSmrg   if (!biv_p (insn, mode, dest))
15371debfc3dSmrg     return NULL;
15381debfc3dSmrg 
15391debfc3dSmrg   ok = iv_analyze_result (insn, dest, &iv);
15401debfc3dSmrg 
15411debfc3dSmrg   /* This used to be an assert under the assumption that if biv_p returns
15421debfc3dSmrg      true that iv_analyze_result must also return true.  However, that
15431debfc3dSmrg      assumption is not strictly correct as evidenced by pr25569.
15441debfc3dSmrg 
15451debfc3dSmrg      Returning NULL when iv_analyze_result returns false is safe and
15461debfc3dSmrg      avoids the problems in pr25569 until the iv_analyze_* routines
15471debfc3dSmrg      can be fixed, which is apparently hard and time consuming
15481debfc3dSmrg      according to their author.  */
15491debfc3dSmrg   if (! ok)
15501debfc3dSmrg     return NULL;
15511debfc3dSmrg 
15521debfc3dSmrg   if (iv.step == const0_rtx
15531debfc3dSmrg       || iv.mode != iv.extend_mode)
15541debfc3dSmrg     return NULL;
15551debfc3dSmrg 
15561debfc3dSmrg   /* Record the insn to split.  */
15571debfc3dSmrg   ivts = XNEW (struct iv_to_split);
15581debfc3dSmrg   ivts->insn = insn;
15591debfc3dSmrg   ivts->orig_var = dest;
15601debfc3dSmrg   ivts->base_var = NULL_RTX;
15611debfc3dSmrg   ivts->step = iv.step;
15621debfc3dSmrg   ivts->next = NULL;
15631debfc3dSmrg 
15641debfc3dSmrg   return ivts;
15651debfc3dSmrg }
15661debfc3dSmrg 
15671debfc3dSmrg /* Determines which of insns in LOOP can be optimized.
15681debfc3dSmrg    Return a OPT_INFO struct with the relevant hash tables filled
15691debfc3dSmrg    with all insns to be optimized.  The FIRST_NEW_BLOCK field
15701debfc3dSmrg    is undefined for the return value.  */
15711debfc3dSmrg 
15721debfc3dSmrg static struct opt_info *
analyze_insns_in_loop(class loop * loop)1573*8feb0f0bSmrg analyze_insns_in_loop (class loop *loop)
15741debfc3dSmrg {
15751debfc3dSmrg   basic_block *body, bb;
15761debfc3dSmrg   unsigned i;
15771debfc3dSmrg   struct opt_info *opt_info = XCNEW (struct opt_info);
15781debfc3dSmrg   rtx_insn *insn;
15791debfc3dSmrg   struct iv_to_split *ivts = NULL;
15801debfc3dSmrg   struct var_to_expand *ves = NULL;
15811debfc3dSmrg   iv_to_split **slot1;
15821debfc3dSmrg   var_to_expand **slot2;
15831debfc3dSmrg   vec<edge> edges = get_loop_exit_edges (loop);
15841debfc3dSmrg   edge exit;
15851debfc3dSmrg   bool can_apply = false;
15861debfc3dSmrg 
15871debfc3dSmrg   iv_analysis_loop_init (loop);
15881debfc3dSmrg 
15891debfc3dSmrg   body = get_loop_body (loop);
15901debfc3dSmrg 
15911debfc3dSmrg   if (flag_split_ivs_in_unroller)
15921debfc3dSmrg     {
15931debfc3dSmrg       opt_info->insns_to_split
15941debfc3dSmrg        	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
15951debfc3dSmrg       opt_info->iv_to_split_head = NULL;
15961debfc3dSmrg       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
15971debfc3dSmrg     }
15981debfc3dSmrg 
15991debfc3dSmrg   /* Record the loop exit bb and loop preheader before the unrolling.  */
16001debfc3dSmrg   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
16011debfc3dSmrg 
16021debfc3dSmrg   if (edges.length () == 1)
16031debfc3dSmrg     {
16041debfc3dSmrg       exit = edges[0];
16051debfc3dSmrg       if (!(exit->flags & EDGE_COMPLEX))
16061debfc3dSmrg 	{
16071debfc3dSmrg 	  opt_info->loop_exit = split_edge (exit);
16081debfc3dSmrg 	  can_apply = true;
16091debfc3dSmrg 	}
16101debfc3dSmrg     }
16111debfc3dSmrg 
16121debfc3dSmrg   if (flag_variable_expansion_in_unroller
16131debfc3dSmrg       && can_apply)
16141debfc3dSmrg     {
16151debfc3dSmrg       opt_info->insns_with_var_to_expand
16161debfc3dSmrg        	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
16171debfc3dSmrg       opt_info->var_to_expand_head = NULL;
16181debfc3dSmrg       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
16191debfc3dSmrg     }
16201debfc3dSmrg 
16211debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
16221debfc3dSmrg     {
16231debfc3dSmrg       bb = body[i];
16241debfc3dSmrg       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
16251debfc3dSmrg 	continue;
16261debfc3dSmrg 
16271debfc3dSmrg       FOR_BB_INSNS (bb, insn)
16281debfc3dSmrg       {
16291debfc3dSmrg         if (!INSN_P (insn))
16301debfc3dSmrg           continue;
16311debfc3dSmrg 
16321debfc3dSmrg         if (opt_info->insns_to_split)
16331debfc3dSmrg           ivts = analyze_iv_to_split_insn (insn);
16341debfc3dSmrg 
16351debfc3dSmrg         if (ivts)
16361debfc3dSmrg           {
16371debfc3dSmrg             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
16381debfc3dSmrg 	    gcc_assert (*slot1 == NULL);
16391debfc3dSmrg             *slot1 = ivts;
16401debfc3dSmrg 	    *opt_info->iv_to_split_tail = ivts;
16411debfc3dSmrg 	    opt_info->iv_to_split_tail = &ivts->next;
16421debfc3dSmrg             continue;
16431debfc3dSmrg           }
16441debfc3dSmrg 
16451debfc3dSmrg         if (opt_info->insns_with_var_to_expand)
16461debfc3dSmrg           ves = analyze_insn_to_expand_var (loop, insn);
16471debfc3dSmrg 
16481debfc3dSmrg         if (ves)
16491debfc3dSmrg           {
16501debfc3dSmrg             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
16511debfc3dSmrg 	    gcc_assert (*slot2 == NULL);
16521debfc3dSmrg             *slot2 = ves;
16531debfc3dSmrg 	    *opt_info->var_to_expand_tail = ves;
16541debfc3dSmrg 	    opt_info->var_to_expand_tail = &ves->next;
16551debfc3dSmrg           }
16561debfc3dSmrg       }
16571debfc3dSmrg     }
16581debfc3dSmrg 
16591debfc3dSmrg   edges.release ();
16601debfc3dSmrg   free (body);
16611debfc3dSmrg   return opt_info;
16621debfc3dSmrg }
16631debfc3dSmrg 
16641debfc3dSmrg /* Called just before loop duplication.  Records start of duplicated area
16651debfc3dSmrg    to OPT_INFO.  */
16661debfc3dSmrg 
16671debfc3dSmrg static void
opt_info_start_duplication(struct opt_info * opt_info)16681debfc3dSmrg opt_info_start_duplication (struct opt_info *opt_info)
16691debfc3dSmrg {
16701debfc3dSmrg   if (opt_info)
16711debfc3dSmrg     opt_info->first_new_block = last_basic_block_for_fn (cfun);
16721debfc3dSmrg }
16731debfc3dSmrg 
16741debfc3dSmrg /* Determine the number of iterations between initialization of the base
16751debfc3dSmrg    variable and the current copy (N_COPY).  N_COPIES is the total number
16761debfc3dSmrg    of newly created copies.  UNROLLING is true if we are unrolling
16771debfc3dSmrg    (not peeling) the loop.  */
16781debfc3dSmrg 
16791debfc3dSmrg static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)16801debfc3dSmrg determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
16811debfc3dSmrg {
16821debfc3dSmrg   if (unrolling)
16831debfc3dSmrg     {
16841debfc3dSmrg       /* If we are unrolling, initialization is done in the original loop
16851debfc3dSmrg 	 body (number 0).  */
16861debfc3dSmrg       return n_copy;
16871debfc3dSmrg     }
16881debfc3dSmrg   else
16891debfc3dSmrg     {
16901debfc3dSmrg       /* If we are peeling, the copy in that the initialization occurs has
16911debfc3dSmrg 	 number 1.  The original loop (number 0) is the last.  */
16921debfc3dSmrg       if (n_copy)
16931debfc3dSmrg 	return n_copy - 1;
16941debfc3dSmrg       else
16951debfc3dSmrg 	return n_copies;
16961debfc3dSmrg     }
16971debfc3dSmrg }
16981debfc3dSmrg 
16991debfc3dSmrg /* Allocate basic variable for the induction variable chain.  */
17001debfc3dSmrg 
17011debfc3dSmrg static void
allocate_basic_variable(struct iv_to_split * ivts)17021debfc3dSmrg allocate_basic_variable (struct iv_to_split *ivts)
17031debfc3dSmrg {
17041debfc3dSmrg   rtx expr = SET_SRC (single_set (ivts->insn));
17051debfc3dSmrg 
17061debfc3dSmrg   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
17071debfc3dSmrg }
17081debfc3dSmrg 
17091debfc3dSmrg /* Insert initialization of basic variable of IVTS before INSN, taking
17101debfc3dSmrg    the initial value from INSN.  */
17111debfc3dSmrg 
17121debfc3dSmrg static void
insert_base_initialization(struct iv_to_split * ivts,rtx_insn * insn)17131debfc3dSmrg insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
17141debfc3dSmrg {
17151debfc3dSmrg   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
17161debfc3dSmrg   rtx_insn *seq;
17171debfc3dSmrg 
17181debfc3dSmrg   start_sequence ();
17191debfc3dSmrg   expr = force_operand (expr, ivts->base_var);
17201debfc3dSmrg   if (expr != ivts->base_var)
17211debfc3dSmrg     emit_move_insn (ivts->base_var, expr);
17221debfc3dSmrg   seq = get_insns ();
17231debfc3dSmrg   end_sequence ();
17241debfc3dSmrg 
17251debfc3dSmrg   emit_insn_before (seq, insn);
17261debfc3dSmrg }
17271debfc3dSmrg 
17281debfc3dSmrg /* Replace the use of induction variable described in IVTS in INSN
17291debfc3dSmrg    by base variable + DELTA * step.  */
17301debfc3dSmrg 
17311debfc3dSmrg static void
split_iv(struct iv_to_split * ivts,rtx_insn * insn,unsigned delta)17321debfc3dSmrg split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
17331debfc3dSmrg {
17341debfc3dSmrg   rtx expr, *loc, incr, var;
17351debfc3dSmrg   rtx_insn *seq;
17361debfc3dSmrg   machine_mode mode = GET_MODE (ivts->base_var);
17371debfc3dSmrg   rtx src, dest, set;
17381debfc3dSmrg 
17391debfc3dSmrg   /* Construct base + DELTA * step.  */
17401debfc3dSmrg   if (!delta)
17411debfc3dSmrg     expr = ivts->base_var;
17421debfc3dSmrg   else
17431debfc3dSmrg     {
17441debfc3dSmrg       incr = simplify_gen_binary (MULT, mode,
1745a2dc1f3fSmrg 				  copy_rtx (ivts->step),
1746a2dc1f3fSmrg 				  gen_int_mode (delta, mode));
17471debfc3dSmrg       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
17481debfc3dSmrg 				  ivts->base_var, incr);
17491debfc3dSmrg     }
17501debfc3dSmrg 
17511debfc3dSmrg   /* Figure out where to do the replacement.  */
17521debfc3dSmrg   loc = &SET_SRC (single_set (insn));
17531debfc3dSmrg 
17541debfc3dSmrg   /* If we can make the replacement right away, we're done.  */
17551debfc3dSmrg   if (validate_change (insn, loc, expr, 0))
17561debfc3dSmrg     return;
17571debfc3dSmrg 
17581debfc3dSmrg   /* Otherwise, force EXPR into a register and try again.  */
17591debfc3dSmrg   start_sequence ();
17601debfc3dSmrg   var = gen_reg_rtx (mode);
17611debfc3dSmrg   expr = force_operand (expr, var);
17621debfc3dSmrg   if (expr != var)
17631debfc3dSmrg     emit_move_insn (var, expr);
17641debfc3dSmrg   seq = get_insns ();
17651debfc3dSmrg   end_sequence ();
17661debfc3dSmrg   emit_insn_before (seq, insn);
17671debfc3dSmrg 
17681debfc3dSmrg   if (validate_change (insn, loc, var, 0))
17691debfc3dSmrg     return;
17701debfc3dSmrg 
17711debfc3dSmrg   /* The last chance.  Try recreating the assignment in insn
17721debfc3dSmrg      completely from scratch.  */
17731debfc3dSmrg   set = single_set (insn);
17741debfc3dSmrg   gcc_assert (set);
17751debfc3dSmrg 
17761debfc3dSmrg   start_sequence ();
17771debfc3dSmrg   *loc = var;
17781debfc3dSmrg   src = copy_rtx (SET_SRC (set));
17791debfc3dSmrg   dest = copy_rtx (SET_DEST (set));
17801debfc3dSmrg   src = force_operand (src, dest);
17811debfc3dSmrg   if (src != dest)
17821debfc3dSmrg     emit_move_insn (dest, src);
17831debfc3dSmrg   seq = get_insns ();
17841debfc3dSmrg   end_sequence ();
17851debfc3dSmrg 
17861debfc3dSmrg   emit_insn_before (seq, insn);
17871debfc3dSmrg   delete_insn (insn);
17881debfc3dSmrg }
17891debfc3dSmrg 
17901debfc3dSmrg 
17911debfc3dSmrg /* Return one expansion of the accumulator recorded in struct VE.  */
17921debfc3dSmrg 
17931debfc3dSmrg static rtx
get_expansion(struct var_to_expand * ve)17941debfc3dSmrg get_expansion (struct var_to_expand *ve)
17951debfc3dSmrg {
17961debfc3dSmrg   rtx reg;
17971debfc3dSmrg 
17981debfc3dSmrg   if (ve->reuse_expansion == 0)
17991debfc3dSmrg     reg = ve->reg;
18001debfc3dSmrg   else
18011debfc3dSmrg     reg = ve->var_expansions[ve->reuse_expansion - 1];
18021debfc3dSmrg 
18031debfc3dSmrg   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
18041debfc3dSmrg     ve->reuse_expansion = 0;
18051debfc3dSmrg   else
18061debfc3dSmrg     ve->reuse_expansion++;
18071debfc3dSmrg 
18081debfc3dSmrg   return reg;
18091debfc3dSmrg }
18101debfc3dSmrg 
18111debfc3dSmrg 
18121debfc3dSmrg /* Given INSN replace the uses of the accumulator recorded in VE
18131debfc3dSmrg    with a new register.  */
18141debfc3dSmrg 
18151debfc3dSmrg static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx_insn * insn)18161debfc3dSmrg expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
18171debfc3dSmrg {
18181debfc3dSmrg   rtx new_reg, set;
18191debfc3dSmrg   bool really_new_expansion = false;
18201debfc3dSmrg 
18211debfc3dSmrg   set = single_set (insn);
18221debfc3dSmrg   gcc_assert (set);
18231debfc3dSmrg 
18241debfc3dSmrg   /* Generate a new register only if the expansion limit has not been
18251debfc3dSmrg      reached.  Else reuse an already existing expansion.  */
1826*8feb0f0bSmrg   if (param_max_variable_expansions > ve->expansion_count)
18271debfc3dSmrg     {
18281debfc3dSmrg       really_new_expansion = true;
18291debfc3dSmrg       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
18301debfc3dSmrg     }
18311debfc3dSmrg   else
18321debfc3dSmrg     new_reg = get_expansion (ve);
18331debfc3dSmrg 
18341debfc3dSmrg   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
18351debfc3dSmrg   if (apply_change_group ())
18361debfc3dSmrg     if (really_new_expansion)
18371debfc3dSmrg       {
18381debfc3dSmrg         ve->var_expansions.safe_push (new_reg);
18391debfc3dSmrg         ve->expansion_count++;
18401debfc3dSmrg       }
18411debfc3dSmrg }
18421debfc3dSmrg 
18431debfc3dSmrg /* Initialize the variable expansions in loop preheader.  PLACE is the
18441debfc3dSmrg    loop-preheader basic block where the initialization of the
18451debfc3dSmrg    expansions should take place.  The expansions are initialized with
18461debfc3dSmrg    (-0) when the operation is plus or minus to honor sign zero.  This
18471debfc3dSmrg    way we can prevent cases where the sign of the final result is
18481debfc3dSmrg    effected by the sign of the expansion.  Here is an example to
18491debfc3dSmrg    demonstrate this:
18501debfc3dSmrg 
18511debfc3dSmrg    for (i = 0 ; i < n; i++)
18521debfc3dSmrg      sum += something;
18531debfc3dSmrg 
18541debfc3dSmrg    ==>
18551debfc3dSmrg 
18561debfc3dSmrg    sum += something
18571debfc3dSmrg    ....
18581debfc3dSmrg    i = i+1;
18591debfc3dSmrg    sum1 += something
18601debfc3dSmrg    ....
18611debfc3dSmrg    i = i+1
18621debfc3dSmrg    sum2 += something;
18631debfc3dSmrg    ....
18641debfc3dSmrg 
18651debfc3dSmrg    When SUM is initialized with -zero and SOMETHING is also -zero; the
18661debfc3dSmrg    final result of sum should be -zero thus the expansions sum1 and sum2
18671debfc3dSmrg    should be initialized with -zero as well (otherwise we will get +zero
18681debfc3dSmrg    as the final result).  */
18691debfc3dSmrg 
18701debfc3dSmrg static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)18711debfc3dSmrg insert_var_expansion_initialization (struct var_to_expand *ve,
18721debfc3dSmrg 				     basic_block place)
18731debfc3dSmrg {
18741debfc3dSmrg   rtx_insn *seq;
18751debfc3dSmrg   rtx var, zero_init;
18761debfc3dSmrg   unsigned i;
18771debfc3dSmrg   machine_mode mode = GET_MODE (ve->reg);
18781debfc3dSmrg   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
18791debfc3dSmrg 
18801debfc3dSmrg   if (ve->var_expansions.length () == 0)
18811debfc3dSmrg     return;
18821debfc3dSmrg 
18831debfc3dSmrg   start_sequence ();
18841debfc3dSmrg   switch (ve->op)
18851debfc3dSmrg     {
18861debfc3dSmrg     case FMA:
18871debfc3dSmrg       /* Note that we only accumulate FMA via the ADD operand.  */
18881debfc3dSmrg     case PLUS:
18891debfc3dSmrg     case MINUS:
18901debfc3dSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
18911debfc3dSmrg         {
18921debfc3dSmrg 	  if (honor_signed_zero_p)
18931debfc3dSmrg 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
18941debfc3dSmrg 	  else
18951debfc3dSmrg 	    zero_init = CONST0_RTX (mode);
18961debfc3dSmrg           emit_move_insn (var, zero_init);
18971debfc3dSmrg         }
18981debfc3dSmrg       break;
18991debfc3dSmrg 
19001debfc3dSmrg     case MULT:
19011debfc3dSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19021debfc3dSmrg         {
19031debfc3dSmrg           zero_init = CONST1_RTX (GET_MODE (var));
19041debfc3dSmrg           emit_move_insn (var, zero_init);
19051debfc3dSmrg         }
19061debfc3dSmrg       break;
19071debfc3dSmrg 
19081debfc3dSmrg     default:
19091debfc3dSmrg       gcc_unreachable ();
19101debfc3dSmrg     }
19111debfc3dSmrg 
19121debfc3dSmrg   seq = get_insns ();
19131debfc3dSmrg   end_sequence ();
19141debfc3dSmrg 
19151debfc3dSmrg   emit_insn_after (seq, BB_END (place));
19161debfc3dSmrg }
19171debfc3dSmrg 
19181debfc3dSmrg /* Combine the variable expansions at the loop exit.  PLACE is the
19191debfc3dSmrg    loop exit basic block where the summation of the expansions should
19201debfc3dSmrg    take place.  */
19211debfc3dSmrg 
19221debfc3dSmrg static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)19231debfc3dSmrg combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
19241debfc3dSmrg {
19251debfc3dSmrg   rtx sum = ve->reg;
19261debfc3dSmrg   rtx expr, var;
19271debfc3dSmrg   rtx_insn *seq, *insn;
19281debfc3dSmrg   unsigned i;
19291debfc3dSmrg 
19301debfc3dSmrg   if (ve->var_expansions.length () == 0)
19311debfc3dSmrg     return;
19321debfc3dSmrg 
19331debfc3dSmrg   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
19341debfc3dSmrg      it both here and as the destination of the assignment.  */
19351debfc3dSmrg   sum = copy_rtx (sum);
19361debfc3dSmrg   start_sequence ();
19371debfc3dSmrg   switch (ve->op)
19381debfc3dSmrg     {
19391debfc3dSmrg     case FMA:
19401debfc3dSmrg       /* Note that we only accumulate FMA via the ADD operand.  */
19411debfc3dSmrg     case PLUS:
19421debfc3dSmrg     case MINUS:
19431debfc3dSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19441debfc3dSmrg 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
19451debfc3dSmrg       break;
19461debfc3dSmrg 
19471debfc3dSmrg     case MULT:
19481debfc3dSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
19491debfc3dSmrg 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
19501debfc3dSmrg       break;
19511debfc3dSmrg 
19521debfc3dSmrg     default:
19531debfc3dSmrg       gcc_unreachable ();
19541debfc3dSmrg     }
19551debfc3dSmrg 
19561debfc3dSmrg   expr = force_operand (sum, ve->reg);
19571debfc3dSmrg   if (expr != ve->reg)
19581debfc3dSmrg     emit_move_insn (ve->reg, expr);
19591debfc3dSmrg   seq = get_insns ();
19601debfc3dSmrg   end_sequence ();
19611debfc3dSmrg 
19621debfc3dSmrg   insn = BB_HEAD (place);
19631debfc3dSmrg   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
19641debfc3dSmrg     insn = NEXT_INSN (insn);
19651debfc3dSmrg 
19661debfc3dSmrg   emit_insn_after (seq, insn);
19671debfc3dSmrg }
19681debfc3dSmrg 
19691debfc3dSmrg /* Strip away REG_EQUAL notes for IVs we're splitting.
19701debfc3dSmrg 
19711debfc3dSmrg    Updating REG_EQUAL notes for IVs we split is tricky: We
19721debfc3dSmrg    cannot tell until after unrolling, DF-rescanning, and liveness
19731debfc3dSmrg    updating, whether an EQ_USE is reached by the split IV while
19741debfc3dSmrg    the IV reg is still live.  See PR55006.
19751debfc3dSmrg 
19761debfc3dSmrg    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
19771debfc3dSmrg    because RTL loop-iv requires us to defer rescanning insns and
19781debfc3dSmrg    any notes attached to them.  So resort to old techniques...  */
19791debfc3dSmrg 
19801debfc3dSmrg static void
maybe_strip_eq_note_for_split_iv(struct opt_info * opt_info,rtx_insn * insn)19811debfc3dSmrg maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
19821debfc3dSmrg {
19831debfc3dSmrg   struct iv_to_split *ivts;
19841debfc3dSmrg   rtx note = find_reg_equal_equiv_note (insn);
19851debfc3dSmrg   if (! note)
19861debfc3dSmrg     return;
19871debfc3dSmrg   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
19881debfc3dSmrg     if (reg_mentioned_p (ivts->orig_var, note))
19891debfc3dSmrg       {
19901debfc3dSmrg 	remove_note (insn, note);
19911debfc3dSmrg 	return;
19921debfc3dSmrg       }
19931debfc3dSmrg }
19941debfc3dSmrg 
19951debfc3dSmrg /* Apply loop optimizations in loop copies using the
19961debfc3dSmrg    data which gathered during the unrolling.  Structure
19971debfc3dSmrg    OPT_INFO record that data.
19981debfc3dSmrg 
19991debfc3dSmrg    UNROLLING is true if we unrolled (not peeled) the loop.
20001debfc3dSmrg    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
20011debfc3dSmrg    the loop (as it should happen in complete unrolling, but not in ordinary
20021debfc3dSmrg    peeling of the loop).  */
20031debfc3dSmrg 
20041debfc3dSmrg static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)20051debfc3dSmrg apply_opt_in_copies (struct opt_info *opt_info,
20061debfc3dSmrg                      unsigned n_copies, bool unrolling,
20071debfc3dSmrg                      bool rewrite_original_loop)
20081debfc3dSmrg {
20091debfc3dSmrg   unsigned i, delta;
20101debfc3dSmrg   basic_block bb, orig_bb;
20111debfc3dSmrg   rtx_insn *insn, *orig_insn, *next;
20121debfc3dSmrg   struct iv_to_split ivts_templ, *ivts;
20131debfc3dSmrg   struct var_to_expand ve_templ, *ves;
20141debfc3dSmrg 
20151debfc3dSmrg   /* Sanity check -- we need to put initialization in the original loop
20161debfc3dSmrg      body.  */
20171debfc3dSmrg   gcc_assert (!unrolling || rewrite_original_loop);
20181debfc3dSmrg 
20191debfc3dSmrg   /* Allocate the basic variables (i0).  */
20201debfc3dSmrg   if (opt_info->insns_to_split)
20211debfc3dSmrg     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
20221debfc3dSmrg       allocate_basic_variable (ivts);
20231debfc3dSmrg 
20241debfc3dSmrg   for (i = opt_info->first_new_block;
20251debfc3dSmrg        i < (unsigned) last_basic_block_for_fn (cfun);
20261debfc3dSmrg        i++)
20271debfc3dSmrg     {
20281debfc3dSmrg       bb = BASIC_BLOCK_FOR_FN (cfun, i);
20291debfc3dSmrg       orig_bb = get_bb_original (bb);
20301debfc3dSmrg 
20311debfc3dSmrg       /* bb->aux holds position in copy sequence initialized by
20321debfc3dSmrg 	 duplicate_loop_to_header_edge.  */
20331debfc3dSmrg       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
20341debfc3dSmrg 					unrolling);
20351debfc3dSmrg       bb->aux = 0;
20361debfc3dSmrg       orig_insn = BB_HEAD (orig_bb);
20371debfc3dSmrg       FOR_BB_INSNS_SAFE (bb, insn, next)
20381debfc3dSmrg         {
20391debfc3dSmrg 	  if (!INSN_P (insn)
2040a2dc1f3fSmrg 	      || (DEBUG_BIND_INSN_P (insn)
2041a2dc1f3fSmrg 		  && INSN_VAR_LOCATION_DECL (insn)
20421debfc3dSmrg 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
20431debfc3dSmrg             continue;
20441debfc3dSmrg 
20451debfc3dSmrg 	  while (!INSN_P (orig_insn)
2046a2dc1f3fSmrg 		 || (DEBUG_BIND_INSN_P (orig_insn)
2047a2dc1f3fSmrg 		     && INSN_VAR_LOCATION_DECL (orig_insn)
20481debfc3dSmrg 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
20491debfc3dSmrg 			 == LABEL_DECL)))
20501debfc3dSmrg             orig_insn = NEXT_INSN (orig_insn);
20511debfc3dSmrg 
20521debfc3dSmrg           ivts_templ.insn = orig_insn;
20531debfc3dSmrg           ve_templ.insn = orig_insn;
20541debfc3dSmrg 
20551debfc3dSmrg           /* Apply splitting iv optimization.  */
20561debfc3dSmrg           if (opt_info->insns_to_split)
20571debfc3dSmrg             {
20581debfc3dSmrg 	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
20591debfc3dSmrg 
20601debfc3dSmrg               ivts = opt_info->insns_to_split->find (&ivts_templ);
20611debfc3dSmrg 
20621debfc3dSmrg               if (ivts)
20631debfc3dSmrg                 {
20641debfc3dSmrg 		  gcc_assert (GET_CODE (PATTERN (insn))
20651debfc3dSmrg 			      == GET_CODE (PATTERN (orig_insn)));
20661debfc3dSmrg 
20671debfc3dSmrg                   if (!delta)
20681debfc3dSmrg                     insert_base_initialization (ivts, insn);
20691debfc3dSmrg                   split_iv (ivts, insn, delta);
20701debfc3dSmrg                 }
20711debfc3dSmrg             }
20721debfc3dSmrg           /* Apply variable expansion optimization.  */
20731debfc3dSmrg           if (unrolling && opt_info->insns_with_var_to_expand)
20741debfc3dSmrg             {
20751debfc3dSmrg               ves = (struct var_to_expand *)
20761debfc3dSmrg 		opt_info->insns_with_var_to_expand->find (&ve_templ);
20771debfc3dSmrg               if (ves)
20781debfc3dSmrg                 {
20791debfc3dSmrg 		  gcc_assert (GET_CODE (PATTERN (insn))
20801debfc3dSmrg 			      == GET_CODE (PATTERN (orig_insn)));
20811debfc3dSmrg                   expand_var_during_unrolling (ves, insn);
20821debfc3dSmrg                 }
20831debfc3dSmrg             }
20841debfc3dSmrg           orig_insn = NEXT_INSN (orig_insn);
20851debfc3dSmrg         }
20861debfc3dSmrg     }
20871debfc3dSmrg 
20881debfc3dSmrg   if (!rewrite_original_loop)
20891debfc3dSmrg     return;
20901debfc3dSmrg 
20911debfc3dSmrg   /* Initialize the variable expansions in the loop preheader
20921debfc3dSmrg      and take care of combining them at the loop exit.  */
20931debfc3dSmrg   if (opt_info->insns_with_var_to_expand)
20941debfc3dSmrg     {
20951debfc3dSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
20961debfc3dSmrg 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
20971debfc3dSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
20981debfc3dSmrg 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
20991debfc3dSmrg     }
21001debfc3dSmrg 
21011debfc3dSmrg   /* Rewrite also the original loop body.  Find them as originals of the blocks
21021debfc3dSmrg      in the last copied iteration, i.e. those that have
21031debfc3dSmrg      get_bb_copy (get_bb_original (bb)) == bb.  */
21041debfc3dSmrg   for (i = opt_info->first_new_block;
21051debfc3dSmrg        i < (unsigned) last_basic_block_for_fn (cfun);
21061debfc3dSmrg        i++)
21071debfc3dSmrg     {
21081debfc3dSmrg       bb = BASIC_BLOCK_FOR_FN (cfun, i);
21091debfc3dSmrg       orig_bb = get_bb_original (bb);
21101debfc3dSmrg       if (get_bb_copy (orig_bb) != bb)
21111debfc3dSmrg 	continue;
21121debfc3dSmrg 
21131debfc3dSmrg       delta = determine_split_iv_delta (0, n_copies, unrolling);
21141debfc3dSmrg       for (orig_insn = BB_HEAD (orig_bb);
21151debfc3dSmrg            orig_insn != NEXT_INSN (BB_END (bb));
21161debfc3dSmrg            orig_insn = next)
21171debfc3dSmrg         {
21181debfc3dSmrg           next = NEXT_INSN (orig_insn);
21191debfc3dSmrg 
21201debfc3dSmrg           if (!INSN_P (orig_insn))
21211debfc3dSmrg  	    continue;
21221debfc3dSmrg 
21231debfc3dSmrg           ivts_templ.insn = orig_insn;
21241debfc3dSmrg           if (opt_info->insns_to_split)
21251debfc3dSmrg             {
21261debfc3dSmrg 	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
21271debfc3dSmrg 
21281debfc3dSmrg               ivts = (struct iv_to_split *)
21291debfc3dSmrg 		opt_info->insns_to_split->find (&ivts_templ);
21301debfc3dSmrg               if (ivts)
21311debfc3dSmrg                 {
21321debfc3dSmrg                   if (!delta)
21331debfc3dSmrg                     insert_base_initialization (ivts, orig_insn);
21341debfc3dSmrg                   split_iv (ivts, orig_insn, delta);
21351debfc3dSmrg                   continue;
21361debfc3dSmrg                 }
21371debfc3dSmrg             }
21381debfc3dSmrg 
21391debfc3dSmrg         }
21401debfc3dSmrg     }
21411debfc3dSmrg }
21421debfc3dSmrg 
21431debfc3dSmrg /* Release OPT_INFO.  */
21441debfc3dSmrg 
21451debfc3dSmrg static void
free_opt_info(struct opt_info * opt_info)21461debfc3dSmrg free_opt_info (struct opt_info *opt_info)
21471debfc3dSmrg {
21481debfc3dSmrg   delete opt_info->insns_to_split;
21491debfc3dSmrg   opt_info->insns_to_split = NULL;
21501debfc3dSmrg   if (opt_info->insns_with_var_to_expand)
21511debfc3dSmrg     {
21521debfc3dSmrg       struct var_to_expand *ves;
21531debfc3dSmrg 
21541debfc3dSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
21551debfc3dSmrg 	ves->var_expansions.release ();
21561debfc3dSmrg       delete opt_info->insns_with_var_to_expand;
21571debfc3dSmrg       opt_info->insns_with_var_to_expand = NULL;
21581debfc3dSmrg     }
21591debfc3dSmrg   free (opt_info);
21601debfc3dSmrg }
2161