xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/loop-doloop.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg /* Perform doloop optimizations
2*8feb0f0bSmrg    Copyright (C) 2004-2020 Free Software Foundation, Inc.
31debfc3dSmrg    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
41debfc3dSmrg 
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg 
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
81debfc3dSmrg the terms of the GNU General Public License as published by the Free
91debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
101debfc3dSmrg version.
111debfc3dSmrg 
121debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
141debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
151debfc3dSmrg for more details.
161debfc3dSmrg 
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3.  If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>.  */
201debfc3dSmrg 
211debfc3dSmrg #include "config.h"
221debfc3dSmrg #include "system.h"
231debfc3dSmrg #include "coretypes.h"
241debfc3dSmrg #include "backend.h"
251debfc3dSmrg #include "target.h"
261debfc3dSmrg #include "rtl.h"
271debfc3dSmrg #include "tree.h"
281debfc3dSmrg #include "cfghooks.h"
291debfc3dSmrg #include "memmodel.h"
301debfc3dSmrg #include "emit-rtl.h"
311debfc3dSmrg #include "dojump.h"
321debfc3dSmrg #include "expr.h"
331debfc3dSmrg #include "cfgloop.h"
341debfc3dSmrg #include "cfgrtl.h"
351debfc3dSmrg #include "dumpfile.h"
361debfc3dSmrg #include "loop-unroll.h"
371debfc3dSmrg #include "regs.h"
381debfc3dSmrg #include "df.h"
391debfc3dSmrg 
401debfc3dSmrg /* This module is used to modify loops with a determinable number of
411debfc3dSmrg    iterations to use special low-overhead looping instructions.
421debfc3dSmrg 
431debfc3dSmrg    It first validates whether the loop is well behaved and has a
441debfc3dSmrg    determinable number of iterations (either at compile or run-time).
451debfc3dSmrg    It then modifies the loop to use a low-overhead looping pattern as
461debfc3dSmrg    follows:
471debfc3dSmrg 
481debfc3dSmrg    1. A pseudo register is allocated as the loop iteration counter.
491debfc3dSmrg 
501debfc3dSmrg    2. The number of loop iterations is calculated and is stored
511debfc3dSmrg       in the loop counter.
521debfc3dSmrg 
531debfc3dSmrg    3. At the end of the loop, the jump insn is replaced by the
541debfc3dSmrg       doloop_end pattern.  The compare must remain because it might be
551debfc3dSmrg       used elsewhere.  If the loop-variable or condition register are
561debfc3dSmrg       used elsewhere, they will be eliminated by flow.
571debfc3dSmrg 
581debfc3dSmrg    4. An optional doloop_begin pattern is inserted at the top of the
591debfc3dSmrg       loop.
601debfc3dSmrg 
611debfc3dSmrg    TODO The optimization should only performed when either the biv used for exit
621debfc3dSmrg    condition is unused at all except for the exit test, or if we do not have to
631debfc3dSmrg    change its value, since otherwise we have to add a new induction variable,
641debfc3dSmrg    which usually will not pay up (unless the cost of the doloop pattern is
651debfc3dSmrg    somehow extremely lower than the cost of compare & jump, or unless the bct
661debfc3dSmrg    register cannot be used for anything else but doloop -- ??? detect these
671debfc3dSmrg    cases).  */
681debfc3dSmrg 
691debfc3dSmrg /* Return the loop termination condition for PATTERN or zero
701debfc3dSmrg    if it is not a decrement and branch jump insn.  */
711debfc3dSmrg 
721debfc3dSmrg rtx
doloop_condition_get(rtx_insn * doloop_pat)731debfc3dSmrg doloop_condition_get (rtx_insn *doloop_pat)
741debfc3dSmrg {
751debfc3dSmrg   rtx cmp;
761debfc3dSmrg   rtx inc;
771debfc3dSmrg   rtx reg;
781debfc3dSmrg   rtx inc_src;
791debfc3dSmrg   rtx condition;
801debfc3dSmrg   rtx pattern;
811debfc3dSmrg   rtx cc_reg = NULL_RTX;
821debfc3dSmrg   rtx reg_orig = NULL_RTX;
831debfc3dSmrg 
841debfc3dSmrg   /* The canonical doloop pattern we expect has one of the following
851debfc3dSmrg      forms:
861debfc3dSmrg 
871debfc3dSmrg      1)  (parallel [(set (pc) (if_then_else (condition)
881debfc3dSmrg 	  			            (label_ref (label))
891debfc3dSmrg 				            (pc)))
901debfc3dSmrg 	             (set (reg) (plus (reg) (const_int -1)))
911debfc3dSmrg 	             (additional clobbers and uses)])
921debfc3dSmrg 
931debfc3dSmrg      The branch must be the first entry of the parallel (also required
941debfc3dSmrg      by jump.c), and the second entry of the parallel must be a set of
951debfc3dSmrg      the loop counter register.  Some targets (IA-64) wrap the set of
961debfc3dSmrg      the loop counter in an if_then_else too.
971debfc3dSmrg 
981debfc3dSmrg      2)  (set (reg) (plus (reg) (const_int -1))
991debfc3dSmrg          (set (pc) (if_then_else (reg != 0)
1001debfc3dSmrg 	                         (label_ref (label))
1011debfc3dSmrg 			         (pc))).
1021debfc3dSmrg 
1031debfc3dSmrg      Some targets (ARM) do the comparison before the branch, as in the
1041debfc3dSmrg      following form:
1051debfc3dSmrg 
1061debfc3dSmrg      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
1071debfc3dSmrg                    (set (reg) (plus (reg) (const_int -1)))])
1081debfc3dSmrg         (set (pc) (if_then_else (cc == NE)
1091debfc3dSmrg                                 (label_ref (label))
1101debfc3dSmrg                                 (pc))) */
1111debfc3dSmrg 
1121debfc3dSmrg   pattern = PATTERN (doloop_pat);
1131debfc3dSmrg 
1141debfc3dSmrg   if (GET_CODE (pattern) != PARALLEL)
1151debfc3dSmrg     {
1161debfc3dSmrg       rtx cond;
1171debfc3dSmrg       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
1181debfc3dSmrg       rtx cmp_arg1, cmp_arg2;
1191debfc3dSmrg       rtx cmp_orig;
1201debfc3dSmrg 
1211debfc3dSmrg       /* In case the pattern is not PARALLEL we expect two forms
1221debfc3dSmrg 	 of doloop which are cases 2) and 3) above: in case 2) the
1231debfc3dSmrg 	 decrement immediately precedes the branch, while in case 3)
1241debfc3dSmrg 	 the compare and decrement instructions immediately precede
1251debfc3dSmrg 	 the branch.  */
1261debfc3dSmrg 
1271debfc3dSmrg       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
1281debfc3dSmrg         return 0;
1291debfc3dSmrg 
1301debfc3dSmrg       cmp = pattern;
1311debfc3dSmrg       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
1321debfc3dSmrg         {
1331debfc3dSmrg 	  /* The third case: the compare and decrement instructions
1341debfc3dSmrg 	     immediately precede the branch.  */
1351debfc3dSmrg 	  cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
1361debfc3dSmrg 	  if (GET_CODE (cmp_orig) != SET)
1371debfc3dSmrg 	    return 0;
1381debfc3dSmrg 	  if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
1391debfc3dSmrg 	    return 0;
1401debfc3dSmrg 	  cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
1411debfc3dSmrg           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
1421debfc3dSmrg 	  if (cmp_arg2 != const0_rtx
1431debfc3dSmrg 	      || GET_CODE (cmp_arg1) != PLUS)
1441debfc3dSmrg 	    return 0;
1451debfc3dSmrg 	  reg_orig = XEXP (cmp_arg1, 0);
1461debfc3dSmrg 	  if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
1471debfc3dSmrg 	      || !REG_P (reg_orig))
1481debfc3dSmrg 	    return 0;
1491debfc3dSmrg 	  cc_reg = SET_DEST (cmp_orig);
1501debfc3dSmrg 
1511debfc3dSmrg 	  inc = XVECEXP (PATTERN (prev_insn), 0, 1);
1521debfc3dSmrg 	}
1531debfc3dSmrg       else
1541debfc3dSmrg         inc = PATTERN (prev_insn);
1551debfc3dSmrg       if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE)
1561debfc3dSmrg 	{
1571debfc3dSmrg 	  /* We expect the condition to be of the form (reg != 0)  */
1581debfc3dSmrg 	  cond = XEXP (SET_SRC (cmp), 0);
1591debfc3dSmrg 	  if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
1601debfc3dSmrg 	    return 0;
1611debfc3dSmrg 	}
1621debfc3dSmrg     }
1631debfc3dSmrg   else
1641debfc3dSmrg     {
1651debfc3dSmrg       cmp = XVECEXP (pattern, 0, 0);
1661debfc3dSmrg       inc = XVECEXP (pattern, 0, 1);
1671debfc3dSmrg     }
1681debfc3dSmrg 
1691debfc3dSmrg   /* Check for (set (reg) (something)).  */
1701debfc3dSmrg   if (GET_CODE (inc) != SET)
1711debfc3dSmrg     return 0;
1721debfc3dSmrg   reg = SET_DEST (inc);
1731debfc3dSmrg   if (! REG_P (reg))
1741debfc3dSmrg     return 0;
1751debfc3dSmrg 
1761debfc3dSmrg   /* Check if something = (plus (reg) (const_int -1)).
1771debfc3dSmrg      On IA-64, this decrement is wrapped in an if_then_else.  */
1781debfc3dSmrg   inc_src = SET_SRC (inc);
1791debfc3dSmrg   if (GET_CODE (inc_src) == IF_THEN_ELSE)
1801debfc3dSmrg     inc_src = XEXP (inc_src, 1);
1811debfc3dSmrg   if (GET_CODE (inc_src) != PLUS
1821debfc3dSmrg       || XEXP (inc_src, 0) != reg
1831debfc3dSmrg       || XEXP (inc_src, 1) != constm1_rtx)
1841debfc3dSmrg     return 0;
1851debfc3dSmrg 
1861debfc3dSmrg   /* Check for (set (pc) (if_then_else (condition)
1871debfc3dSmrg                                        (label_ref (label))
1881debfc3dSmrg                                        (pc))).  */
1891debfc3dSmrg   if (GET_CODE (cmp) != SET
1901debfc3dSmrg       || SET_DEST (cmp) != pc_rtx
1911debfc3dSmrg       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
1921debfc3dSmrg       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
1931debfc3dSmrg       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
1941debfc3dSmrg     return 0;
1951debfc3dSmrg 
1961debfc3dSmrg   /* Extract loop termination condition.  */
1971debfc3dSmrg   condition = XEXP (SET_SRC (cmp), 0);
1981debfc3dSmrg 
1991debfc3dSmrg   /* We expect a GE or NE comparison with 0 or 1.  */
2001debfc3dSmrg   if ((GET_CODE (condition) != GE
2011debfc3dSmrg        && GET_CODE (condition) != NE)
2021debfc3dSmrg       || (XEXP (condition, 1) != const0_rtx
2031debfc3dSmrg           && XEXP (condition, 1) != const1_rtx))
2041debfc3dSmrg     return 0;
2051debfc3dSmrg 
2061debfc3dSmrg   if ((XEXP (condition, 0) == reg)
2071debfc3dSmrg       /* For the third case:  */
2081debfc3dSmrg       || ((cc_reg != NULL_RTX)
2091debfc3dSmrg 	  && (XEXP (condition, 0) == cc_reg)
2101debfc3dSmrg 	  && (reg_orig == reg))
2111debfc3dSmrg       || (GET_CODE (XEXP (condition, 0)) == PLUS
2121debfc3dSmrg 	  && XEXP (XEXP (condition, 0), 0) == reg))
2131debfc3dSmrg    {
2141debfc3dSmrg      if (GET_CODE (pattern) != PARALLEL)
2151debfc3dSmrg      /*  For the second form we expect:
2161debfc3dSmrg 
2171debfc3dSmrg          (set (reg) (plus (reg) (const_int -1))
2181debfc3dSmrg          (set (pc) (if_then_else (reg != 0)
2191debfc3dSmrg                                  (label_ref (label))
2201debfc3dSmrg                                  (pc))).
2211debfc3dSmrg 
2221debfc3dSmrg          is equivalent to the following:
2231debfc3dSmrg 
2241debfc3dSmrg          (parallel [(set (pc) (if_then_else (reg != 1)
2251debfc3dSmrg                                             (label_ref (label))
2261debfc3dSmrg                                             (pc)))
2271debfc3dSmrg                      (set (reg) (plus (reg) (const_int -1)))
2281debfc3dSmrg                      (additional clobbers and uses)])
2291debfc3dSmrg 
2301debfc3dSmrg         For the third form we expect:
2311debfc3dSmrg 
2321debfc3dSmrg         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
2331debfc3dSmrg                    (set (reg) (plus (reg) (const_int -1)))])
2341debfc3dSmrg         (set (pc) (if_then_else (cc == NE)
2351debfc3dSmrg                                 (label_ref (label))
2361debfc3dSmrg                                 (pc)))
2371debfc3dSmrg 
2381debfc3dSmrg         which is equivalent to the following:
2391debfc3dSmrg 
2401debfc3dSmrg         (parallel [(set (cc) (compare (reg,  1))
2411debfc3dSmrg                    (set (reg) (plus (reg) (const_int -1)))
2421debfc3dSmrg                    (set (pc) (if_then_else (NE == cc)
2431debfc3dSmrg                                            (label_ref (label))
2441debfc3dSmrg                                            (pc))))])
2451debfc3dSmrg 
2461debfc3dSmrg         So we return the second form instead for the two cases.
2471debfc3dSmrg 
2481debfc3dSmrg      */
2491debfc3dSmrg         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
2501debfc3dSmrg 
2511debfc3dSmrg     return condition;
2521debfc3dSmrg    }
2531debfc3dSmrg 
2541debfc3dSmrg   /* ??? If a machine uses a funny comparison, we could return a
2551debfc3dSmrg      canonicalized form here.  */
2561debfc3dSmrg 
2571debfc3dSmrg   return 0;
2581debfc3dSmrg }
2591debfc3dSmrg 
2601debfc3dSmrg /* Return nonzero if the loop specified by LOOP is suitable for
2611debfc3dSmrg    the use of special low-overhead looping instructions.  DESC
2621debfc3dSmrg    describes the number of iterations of the loop.  */
2631debfc3dSmrg 
2641debfc3dSmrg static bool
doloop_valid_p(class loop * loop,class niter_desc * desc)265*8feb0f0bSmrg doloop_valid_p (class loop *loop, class niter_desc *desc)
2661debfc3dSmrg {
2671debfc3dSmrg   basic_block *body = get_loop_body (loop), bb;
2681debfc3dSmrg   rtx_insn *insn;
2691debfc3dSmrg   unsigned i;
2701debfc3dSmrg   bool result = true;
2711debfc3dSmrg 
2721debfc3dSmrg   /* Check for loops that may not terminate under special conditions.  */
2731debfc3dSmrg   if (!desc->simple_p
2741debfc3dSmrg       || desc->assumptions
2751debfc3dSmrg       || desc->infinite)
2761debfc3dSmrg     {
2771debfc3dSmrg       /* There are some cases that would require a special attention.
2781debfc3dSmrg 	 For example if the comparison is LEU and the comparison value
2791debfc3dSmrg 	 is UINT_MAX then the loop will not terminate.  Similarly, if the
2801debfc3dSmrg 	 comparison code is GEU and the comparison value is 0, the
2811debfc3dSmrg 	 loop will not terminate.
2821debfc3dSmrg 
2831debfc3dSmrg 	 If the absolute increment is not 1, the loop can be infinite
2841debfc3dSmrg 	 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
2851debfc3dSmrg 
2861debfc3dSmrg 	 ??? We could compute these conditions at run-time and have a
2871debfc3dSmrg 	 additional jump around the loop to ensure an infinite loop.
2881debfc3dSmrg 	 However, it is very unlikely that this is the intended
2891debfc3dSmrg 	 behavior of the loop and checking for these rare boundary
2901debfc3dSmrg 	 conditions would pessimize all other code.
2911debfc3dSmrg 
2921debfc3dSmrg 	 If the loop is executed only a few times an extra check to
2931debfc3dSmrg 	 restart the loop could use up most of the benefits of using a
2941debfc3dSmrg 	 count register loop.  Note however, that normally, this
2951debfc3dSmrg 	 restart branch would never execute, so it could be predicted
2961debfc3dSmrg 	 well by the CPU.  We should generate the pessimistic code by
2971debfc3dSmrg 	 default, and have an option, e.g. -funsafe-loops that would
2981debfc3dSmrg 	 enable count-register loops in this case.  */
2991debfc3dSmrg       if (dump_file)
3001debfc3dSmrg 	fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
3011debfc3dSmrg       result = false;
3021debfc3dSmrg       goto cleanup;
3031debfc3dSmrg     }
3041debfc3dSmrg 
3051debfc3dSmrg   for (i = 0; i < loop->num_nodes; i++)
3061debfc3dSmrg     {
3071debfc3dSmrg       bb = body[i];
3081debfc3dSmrg 
3091debfc3dSmrg       for (insn = BB_HEAD (bb);
3101debfc3dSmrg 	   insn != NEXT_INSN (BB_END (bb));
3111debfc3dSmrg 	   insn = NEXT_INSN (insn))
3121debfc3dSmrg 	{
3131debfc3dSmrg 	  /* Different targets have different necessities for low-overhead
3141debfc3dSmrg 	     looping.  Call the back end for each instruction within the loop
3151debfc3dSmrg 	     to let it decide whether the insn prohibits a low-overhead loop.
3161debfc3dSmrg 	     It will then return the cause for it to emit to the dump file.  */
3171debfc3dSmrg 	  const char * invalid = targetm.invalid_within_doloop (insn);
3181debfc3dSmrg 	  if (invalid)
3191debfc3dSmrg 	    {
3201debfc3dSmrg 	      if (dump_file)
3211debfc3dSmrg 		fprintf (dump_file, "Doloop: %s\n", invalid);
3221debfc3dSmrg 	      result = false;
3231debfc3dSmrg 	      goto cleanup;
3241debfc3dSmrg 	    }
3251debfc3dSmrg 	}
3261debfc3dSmrg     }
3271debfc3dSmrg   result = true;
3281debfc3dSmrg 
3291debfc3dSmrg cleanup:
3301debfc3dSmrg   free (body);
3311debfc3dSmrg 
3321debfc3dSmrg   return result;
3331debfc3dSmrg }
3341debfc3dSmrg 
3351debfc3dSmrg /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
3361debfc3dSmrg    edge.  If the condition is always false, do not do anything.  If it is always
3371debfc3dSmrg    true, redirect E to DEST and return false.  In all other cases, true is
3381debfc3dSmrg    returned.  */
3391debfc3dSmrg 
3401debfc3dSmrg static bool
add_test(rtx cond,edge * e,basic_block dest)3411debfc3dSmrg add_test (rtx cond, edge *e, basic_block dest)
3421debfc3dSmrg {
3431debfc3dSmrg   rtx_insn *seq, *jump;
3441debfc3dSmrg   rtx_code_label *label;
3451debfc3dSmrg   machine_mode mode;
3461debfc3dSmrg   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
3471debfc3dSmrg   enum rtx_code code = GET_CODE (cond);
3481debfc3dSmrg   basic_block bb;
349a2dc1f3fSmrg   /* The jump is supposed to handle an unlikely special case.  */
350a2dc1f3fSmrg   profile_probability prob = profile_probability::guessed_never ();
3511debfc3dSmrg 
3521debfc3dSmrg   mode = GET_MODE (XEXP (cond, 0));
3531debfc3dSmrg   if (mode == VOIDmode)
3541debfc3dSmrg     mode = GET_MODE (XEXP (cond, 1));
3551debfc3dSmrg 
3561debfc3dSmrg   start_sequence ();
3571debfc3dSmrg   op0 = force_operand (op0, NULL_RTX);
3581debfc3dSmrg   op1 = force_operand (op1, NULL_RTX);
3591debfc3dSmrg   label = block_label (dest);
360a2dc1f3fSmrg   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label,
361a2dc1f3fSmrg 			   prob);
3621debfc3dSmrg 
3631debfc3dSmrg   jump = get_last_insn ();
3641debfc3dSmrg   if (!jump || !JUMP_P (jump))
3651debfc3dSmrg     {
3661debfc3dSmrg       /* The condition is always false and the jump was optimized out.  */
3671debfc3dSmrg       end_sequence ();
3681debfc3dSmrg       return true;
3691debfc3dSmrg     }
3701debfc3dSmrg 
3711debfc3dSmrg   seq = get_insns ();
3721debfc3dSmrg   unshare_all_rtl_in_chain (seq);
3731debfc3dSmrg   end_sequence ();
3741debfc3dSmrg 
3751debfc3dSmrg   /* There always is at least the jump insn in the sequence.  */
3761debfc3dSmrg   gcc_assert (seq != NULL_RTX);
3771debfc3dSmrg 
3781debfc3dSmrg   bb = split_edge_and_insert (*e, seq);
3791debfc3dSmrg   *e = single_succ_edge (bb);
3801debfc3dSmrg 
3811debfc3dSmrg   if (any_uncondjump_p (jump))
3821debfc3dSmrg     {
3831debfc3dSmrg       /* The condition is always true.  */
3841debfc3dSmrg       delete_insn (jump);
3851debfc3dSmrg       redirect_edge_and_branch_force (*e, dest);
3861debfc3dSmrg       return false;
3871debfc3dSmrg     }
3881debfc3dSmrg 
3891debfc3dSmrg   JUMP_LABEL (jump) = label;
3901debfc3dSmrg 
3911debfc3dSmrg   LABEL_NUSES (label)++;
3921debfc3dSmrg 
393a2dc1f3fSmrg   edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
394a2dc1f3fSmrg   e2->probability = prob;
395a2dc1f3fSmrg   (*e)->probability = prob.invert ();
396a2dc1f3fSmrg   update_br_prob_note (e2->src);
3971debfc3dSmrg   return true;
3981debfc3dSmrg }
3991debfc3dSmrg 
4001debfc3dSmrg /* Modify the loop to use the low-overhead looping insn where LOOP
4011debfc3dSmrg    describes the loop, DESC describes the number of iterations of the
4021debfc3dSmrg    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
4031debfc3dSmrg    end of the loop.  CONDITION is the condition separated from the
4041debfc3dSmrg    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
4051debfc3dSmrg 
4061debfc3dSmrg static void
doloop_modify(class loop * loop,class niter_desc * desc,rtx_insn * doloop_seq,rtx condition,rtx count)407*8feb0f0bSmrg doloop_modify (class loop *loop, class niter_desc *desc,
4081debfc3dSmrg 	       rtx_insn *doloop_seq, rtx condition, rtx count)
4091debfc3dSmrg {
4101debfc3dSmrg   rtx counter_reg;
4111debfc3dSmrg   rtx tmp, noloop = NULL_RTX;
4121debfc3dSmrg   rtx_insn *sequence;
4131debfc3dSmrg   rtx_insn *jump_insn;
4141debfc3dSmrg   rtx_code_label *jump_label;
4151debfc3dSmrg   int nonneg = 0;
4161debfc3dSmrg   bool increment_count;
4171debfc3dSmrg   basic_block loop_end = desc->out_edge->src;
418a2dc1f3fSmrg   scalar_int_mode mode;
4191debfc3dSmrg   widest_int iterations;
4201debfc3dSmrg 
4211debfc3dSmrg   jump_insn = BB_END (loop_end);
4221debfc3dSmrg 
4231debfc3dSmrg   if (dump_file)
4241debfc3dSmrg     {
4251debfc3dSmrg       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
4261debfc3dSmrg       if (desc->const_iter)
4271debfc3dSmrg 	fprintf (dump_file, "%" PRId64, desc->niter);
4281debfc3dSmrg       else
4291debfc3dSmrg 	fputs ("runtime", dump_file);
4301debfc3dSmrg       fputs (" iterations).\n", dump_file);
4311debfc3dSmrg     }
4321debfc3dSmrg 
4331debfc3dSmrg   /* Discard original jump to continue loop.  The original compare
4341debfc3dSmrg      result may still be live, so it cannot be discarded explicitly.  */
4351debfc3dSmrg   delete_insn (jump_insn);
4361debfc3dSmrg 
4371debfc3dSmrg   counter_reg = XEXP (condition, 0);
4381debfc3dSmrg   if (GET_CODE (counter_reg) == PLUS)
4391debfc3dSmrg     counter_reg = XEXP (counter_reg, 0);
440a2dc1f3fSmrg   /* These patterns must operate on integer counters.  */
441a2dc1f3fSmrg   mode = as_a <scalar_int_mode> (GET_MODE (counter_reg));
4421debfc3dSmrg 
4431debfc3dSmrg   increment_count = false;
4441debfc3dSmrg   switch (GET_CODE (condition))
4451debfc3dSmrg     {
4461debfc3dSmrg     case NE:
4471debfc3dSmrg       /* Currently only NE tests against zero and one are supported.  */
4481debfc3dSmrg       noloop = XEXP (condition, 1);
4491debfc3dSmrg       if (noloop != const0_rtx)
4501debfc3dSmrg 	{
4511debfc3dSmrg 	  gcc_assert (noloop == const1_rtx);
4521debfc3dSmrg 	  increment_count = true;
4531debfc3dSmrg 	}
4541debfc3dSmrg       break;
4551debfc3dSmrg 
4561debfc3dSmrg     case GE:
4571debfc3dSmrg       /* Currently only GE tests against zero are supported.  */
4581debfc3dSmrg       gcc_assert (XEXP (condition, 1) == const0_rtx);
4591debfc3dSmrg 
4601debfc3dSmrg       noloop = constm1_rtx;
4611debfc3dSmrg 
4621debfc3dSmrg       /* The iteration count does not need incrementing for a GE test.  */
4631debfc3dSmrg       increment_count = false;
4641debfc3dSmrg 
4651debfc3dSmrg       /* Determine if the iteration counter will be non-negative.
4661debfc3dSmrg 	 Note that the maximum value loaded is iterations_max - 1.  */
4671debfc3dSmrg       if (get_max_loop_iterations (loop, &iterations)
4681debfc3dSmrg 	  && wi::leu_p (iterations,
4691debfc3dSmrg 			wi::set_bit_in_zero <widest_int>
4701debfc3dSmrg 			(GET_MODE_PRECISION (mode) - 1)))
4711debfc3dSmrg 	nonneg = 1;
4721debfc3dSmrg       break;
4731debfc3dSmrg 
4741debfc3dSmrg       /* Abort if an invalid doloop pattern has been generated.  */
4751debfc3dSmrg     default:
4761debfc3dSmrg       gcc_unreachable ();
4771debfc3dSmrg     }
4781debfc3dSmrg 
4791debfc3dSmrg   if (increment_count)
4801debfc3dSmrg     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
4811debfc3dSmrg 
4821debfc3dSmrg   /* Insert initialization of the count register into the loop header.  */
4831debfc3dSmrg   start_sequence ();
4841debfc3dSmrg   /* count has been already copied through copy_rtx.  */
4851debfc3dSmrg   reset_used_flags (count);
4861debfc3dSmrg   set_used_flags (condition);
4871debfc3dSmrg   tmp = force_operand (count, counter_reg);
4881debfc3dSmrg   convert_move (counter_reg, tmp, 1);
4891debfc3dSmrg   sequence = get_insns ();
4901debfc3dSmrg   unshare_all_rtl_in_chain (sequence);
4911debfc3dSmrg   end_sequence ();
4921debfc3dSmrg   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
4931debfc3dSmrg 
4941debfc3dSmrg   if (desc->noloop_assumptions)
4951debfc3dSmrg     {
4961debfc3dSmrg       rtx ass = copy_rtx (desc->noloop_assumptions);
4971debfc3dSmrg       basic_block preheader = loop_preheader_edge (loop)->src;
4981debfc3dSmrg       basic_block set_zero = split_edge (loop_preheader_edge (loop));
4991debfc3dSmrg       basic_block new_preheader = split_edge (loop_preheader_edge (loop));
5001debfc3dSmrg       edge te;
5011debfc3dSmrg 
5021debfc3dSmrg       /* Expand the condition testing the assumptions and if it does not pass,
5031debfc3dSmrg 	 reset the count register to 0.  */
5041debfc3dSmrg       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
5051debfc3dSmrg       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
5061debfc3dSmrg 
507a2dc1f3fSmrg       set_zero->count = profile_count::uninitialized ();
5081debfc3dSmrg 
5091debfc3dSmrg       te = single_succ_edge (preheader);
5101debfc3dSmrg       for (; ass; ass = XEXP (ass, 1))
5111debfc3dSmrg 	if (!add_test (XEXP (ass, 0), &te, set_zero))
5121debfc3dSmrg 	  break;
5131debfc3dSmrg 
5141debfc3dSmrg       if (ass)
5151debfc3dSmrg 	{
5161debfc3dSmrg 	  /* We reached a condition that is always true.  This is very hard to
5171debfc3dSmrg 	     reproduce (such a loop does not roll, and thus it would most
5181debfc3dSmrg 	     likely get optimized out by some of the preceding optimizations).
5191debfc3dSmrg 	     In fact, I do not have any testcase for it.  However, it would
5201debfc3dSmrg 	     also be very hard to show that it is impossible, so we must
5211debfc3dSmrg 	     handle this case.  */
5221debfc3dSmrg 	  set_zero->count = preheader->count;
5231debfc3dSmrg 	}
5241debfc3dSmrg 
5251debfc3dSmrg       if (EDGE_COUNT (set_zero->preds) == 0)
5261debfc3dSmrg 	{
5271debfc3dSmrg 	  /* All the conditions were simplified to false, remove the
5281debfc3dSmrg 	     unreachable set_zero block.  */
5291debfc3dSmrg 	  delete_basic_block (set_zero);
5301debfc3dSmrg 	}
5311debfc3dSmrg       else
5321debfc3dSmrg 	{
5331debfc3dSmrg 	  /* Reset the counter to zero in the set_zero block.  */
5341debfc3dSmrg 	  start_sequence ();
5351debfc3dSmrg 	  convert_move (counter_reg, noloop, 0);
5361debfc3dSmrg 	  sequence = get_insns ();
5371debfc3dSmrg 	  end_sequence ();
5381debfc3dSmrg 	  emit_insn_after (sequence, BB_END (set_zero));
5391debfc3dSmrg 
5401debfc3dSmrg 	  set_immediate_dominator (CDI_DOMINATORS, set_zero,
5411debfc3dSmrg 				   recompute_dominator (CDI_DOMINATORS,
5421debfc3dSmrg 							set_zero));
5431debfc3dSmrg 	}
5441debfc3dSmrg 
5451debfc3dSmrg       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
5461debfc3dSmrg 			       recompute_dominator (CDI_DOMINATORS,
5471debfc3dSmrg 						    new_preheader));
5481debfc3dSmrg     }
5491debfc3dSmrg 
5501debfc3dSmrg   /* Some targets (eg, C4x) need to initialize special looping
5511debfc3dSmrg      registers.  */
5521debfc3dSmrg   if (targetm.have_doloop_begin ())
5531debfc3dSmrg     if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
5541debfc3dSmrg       emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
5551debfc3dSmrg 
5561debfc3dSmrg   /* Insert the new low-overhead looping insn.  */
5571debfc3dSmrg   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
5581debfc3dSmrg   jump_insn = BB_END (loop_end);
5591debfc3dSmrg   jump_label = block_label (desc->in_edge->dest);
5601debfc3dSmrg   JUMP_LABEL (jump_insn) = jump_label;
5611debfc3dSmrg   LABEL_NUSES (jump_label)++;
5621debfc3dSmrg 
5631debfc3dSmrg   /* Ensure the right fallthru edge is marked, for case we have reversed
5641debfc3dSmrg      the condition.  */
5651debfc3dSmrg   desc->in_edge->flags &= ~EDGE_FALLTHRU;
5661debfc3dSmrg   desc->out_edge->flags |= EDGE_FALLTHRU;
5671debfc3dSmrg 
5681debfc3dSmrg   /* Add a REG_NONNEG note if the actual or estimated maximum number
5691debfc3dSmrg      of iterations is non-negative.  */
5701debfc3dSmrg   if (nonneg)
5711debfc3dSmrg     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
5721debfc3dSmrg 
5731debfc3dSmrg   /* Update the REG_BR_PROB note.  */
574a2dc1f3fSmrg   if (desc->in_edge->probability.initialized_p ())
575a2dc1f3fSmrg     add_reg_br_prob_note (jump_insn, desc->in_edge->probability);
5761debfc3dSmrg }
5771debfc3dSmrg 
5781debfc3dSmrg /* Called through note_stores.  */
5791debfc3dSmrg 
5801debfc3dSmrg static void
record_reg_sets(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)5811debfc3dSmrg record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
5821debfc3dSmrg {
5831debfc3dSmrg   bitmap mod = (bitmap)data;
5841debfc3dSmrg   if (REG_P (x))
5851debfc3dSmrg     {
5861debfc3dSmrg       unsigned int regno = REGNO (x);
5871debfc3dSmrg       if (HARD_REGISTER_P (x))
5881debfc3dSmrg 	{
5891debfc3dSmrg 	  unsigned int end_regno = end_hard_regno (GET_MODE (x), regno);
5901debfc3dSmrg 	  do
5911debfc3dSmrg 	    bitmap_set_bit (mod, regno);
5921debfc3dSmrg 	  while (++regno < end_regno);
5931debfc3dSmrg 	}
5941debfc3dSmrg       else
5951debfc3dSmrg 	bitmap_set_bit (mod, regno);
5961debfc3dSmrg     }
5971debfc3dSmrg }
5981debfc3dSmrg 
5991debfc3dSmrg /* Process loop described by LOOP validating that the loop is suitable for
6001debfc3dSmrg    conversion to use a low overhead looping instruction, replacing the jump
6011debfc3dSmrg    insn where suitable.  Returns true if the loop was successfully
6021debfc3dSmrg    modified.  */
6031debfc3dSmrg 
6041debfc3dSmrg static bool
doloop_optimize(class loop * loop)605*8feb0f0bSmrg doloop_optimize (class loop *loop)
6061debfc3dSmrg {
607a2dc1f3fSmrg   scalar_int_mode mode;
6081debfc3dSmrg   rtx doloop_reg;
6091debfc3dSmrg   rtx count;
6101debfc3dSmrg   widest_int iterations, iterations_max;
6111debfc3dSmrg   rtx_code_label *start_label;
6121debfc3dSmrg   rtx condition;
6131debfc3dSmrg   unsigned level;
6141debfc3dSmrg   HOST_WIDE_INT est_niter;
6151debfc3dSmrg   int max_cost;
616*8feb0f0bSmrg   class niter_desc *desc;
6171debfc3dSmrg   unsigned word_mode_size;
6181debfc3dSmrg   unsigned HOST_WIDE_INT word_mode_max;
6191debfc3dSmrg   int entered_at_top;
6201debfc3dSmrg 
6211debfc3dSmrg   if (dump_file)
6221debfc3dSmrg     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
6231debfc3dSmrg 
6241debfc3dSmrg   iv_analysis_loop_init (loop);
6251debfc3dSmrg 
6261debfc3dSmrg   /* Find the simple exit of a LOOP.  */
6271debfc3dSmrg   desc = get_simple_loop_desc (loop);
6281debfc3dSmrg 
6291debfc3dSmrg   /* Check that loop is a candidate for a low-overhead looping insn.  */
6301debfc3dSmrg   if (!doloop_valid_p (loop, desc))
6311debfc3dSmrg     {
6321debfc3dSmrg       if (dump_file)
6331debfc3dSmrg 	fprintf (dump_file,
6341debfc3dSmrg 		 "Doloop: The loop is not suitable.\n");
6351debfc3dSmrg       return false;
6361debfc3dSmrg     }
6371debfc3dSmrg   mode = desc->mode;
6381debfc3dSmrg 
6391debfc3dSmrg   est_niter = get_estimated_loop_iterations_int (loop);
6401debfc3dSmrg   if (est_niter == -1)
6411debfc3dSmrg     est_niter = get_likely_max_loop_iterations_int (loop);
6421debfc3dSmrg 
6431debfc3dSmrg   if (est_niter >= 0 && est_niter < 3)
6441debfc3dSmrg     {
6451debfc3dSmrg       if (dump_file)
6461debfc3dSmrg 	fprintf (dump_file,
6471debfc3dSmrg 		 "Doloop: Too few iterations (%u) to be profitable.\n",
6481debfc3dSmrg 		 (unsigned int)est_niter);
6491debfc3dSmrg       return false;
6501debfc3dSmrg     }
6511debfc3dSmrg 
6521debfc3dSmrg   max_cost
653*8feb0f0bSmrg     = COSTS_N_INSNS (param_max_iterations_computation_cost);
6541debfc3dSmrg   if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
6551debfc3dSmrg       > max_cost)
6561debfc3dSmrg     {
6571debfc3dSmrg       if (dump_file)
6581debfc3dSmrg 	fprintf (dump_file,
6591debfc3dSmrg 		 "Doloop: number of iterations too costly to compute.\n");
6601debfc3dSmrg       return false;
6611debfc3dSmrg     }
6621debfc3dSmrg 
6631debfc3dSmrg   if (desc->const_iter)
6641debfc3dSmrg     iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
6651debfc3dSmrg 				   UNSIGNED);
6661debfc3dSmrg   else
6671debfc3dSmrg     iterations = 0;
6681debfc3dSmrg   if (!get_max_loop_iterations (loop, &iterations_max))
6691debfc3dSmrg     iterations_max = 0;
6701debfc3dSmrg   level = get_loop_level (loop) + 1;
6711debfc3dSmrg   entered_at_top = (loop->latch == desc->in_edge->dest
6721debfc3dSmrg 		    && contains_no_active_insn_p (loop->latch));
6731debfc3dSmrg   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
6741debfc3dSmrg 				 entered_at_top))
6751debfc3dSmrg     {
6761debfc3dSmrg       if (dump_file)
6771debfc3dSmrg 	fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
6781debfc3dSmrg       return false;
6791debfc3dSmrg     }
6801debfc3dSmrg 
6811debfc3dSmrg   /* Generate looping insn.  If the pattern FAILs then give up trying
6821debfc3dSmrg      to modify the loop since there is some aspect the back-end does
6831debfc3dSmrg      not like.  */
6841debfc3dSmrg   count = copy_rtx (desc->niter_expr);
6851debfc3dSmrg   start_label = block_label (desc->in_edge->dest);
6861debfc3dSmrg   doloop_reg = gen_reg_rtx (mode);
6871debfc3dSmrg   rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
6881debfc3dSmrg 
6891debfc3dSmrg   word_mode_size = GET_MODE_PRECISION (word_mode);
6901debfc3dSmrg   word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
6911debfc3dSmrg   if (! doloop_seq
6921debfc3dSmrg       && mode != word_mode
6931debfc3dSmrg       /* Before trying mode different from the one in that # of iterations is
6941debfc3dSmrg 	 computed, we must be sure that the number of iterations fits into
6951debfc3dSmrg 	 the new mode.  */
6961debfc3dSmrg       && (word_mode_size >= GET_MODE_PRECISION (mode)
6971debfc3dSmrg  	  || wi::leu_p (iterations_max, word_mode_max)))
6981debfc3dSmrg     {
6991debfc3dSmrg       if (word_mode_size > GET_MODE_PRECISION (mode))
7001debfc3dSmrg 	count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
7011debfc3dSmrg       else
7021debfc3dSmrg 	count = lowpart_subreg (word_mode, count, mode);
7031debfc3dSmrg       PUT_MODE (doloop_reg, word_mode);
7041debfc3dSmrg       doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
7051debfc3dSmrg     }
7061debfc3dSmrg   if (! doloop_seq)
7071debfc3dSmrg     {
7081debfc3dSmrg       if (dump_file)
7091debfc3dSmrg 	fprintf (dump_file,
7101debfc3dSmrg 		 "Doloop: Target unwilling to use doloop pattern!\n");
7111debfc3dSmrg       return false;
7121debfc3dSmrg     }
7131debfc3dSmrg 
7141debfc3dSmrg   /* If multiple instructions were created, the last must be the
7151debfc3dSmrg      jump instruction.  */
7161debfc3dSmrg   rtx_insn *doloop_insn = doloop_seq;
7171debfc3dSmrg   while (NEXT_INSN (doloop_insn) != NULL_RTX)
7181debfc3dSmrg     doloop_insn = NEXT_INSN (doloop_insn);
7191debfc3dSmrg   if (!JUMP_P (doloop_insn)
7201debfc3dSmrg       || !(condition = doloop_condition_get (doloop_insn)))
7211debfc3dSmrg     {
7221debfc3dSmrg       if (dump_file)
7231debfc3dSmrg 	fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
7241debfc3dSmrg       return false;
7251debfc3dSmrg     }
7261debfc3dSmrg 
7271debfc3dSmrg   /* Ensure that the new sequence doesn't clobber a register that
7281debfc3dSmrg      is live at the end of the block.  */
7291debfc3dSmrg   {
7301debfc3dSmrg     bitmap modified = BITMAP_ALLOC (NULL);
7311debfc3dSmrg 
7321debfc3dSmrg     for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
733*8feb0f0bSmrg       note_stores (i, record_reg_sets, modified);
7341debfc3dSmrg 
7351debfc3dSmrg     basic_block loop_end = desc->out_edge->src;
7361debfc3dSmrg     bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
7371debfc3dSmrg     BITMAP_FREE (modified);
7381debfc3dSmrg 
7391debfc3dSmrg     if (fail)
7401debfc3dSmrg       {
7411debfc3dSmrg 	if (dump_file)
7421debfc3dSmrg 	  fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n");
7431debfc3dSmrg 	return false;
7441debfc3dSmrg       }
7451debfc3dSmrg   }
7461debfc3dSmrg 
7471debfc3dSmrg   doloop_modify (loop, desc, doloop_seq, condition, count);
7481debfc3dSmrg   return true;
7491debfc3dSmrg }
7501debfc3dSmrg 
7511debfc3dSmrg /* This is the main entry point.  Process all loops using doloop_optimize.  */
7521debfc3dSmrg 
7531debfc3dSmrg void
doloop_optimize_loops(void)7541debfc3dSmrg doloop_optimize_loops (void)
7551debfc3dSmrg {
756*8feb0f0bSmrg   class loop *loop;
7571debfc3dSmrg 
7581debfc3dSmrg   if (optimize == 1)
7591debfc3dSmrg     {
7601debfc3dSmrg       df_live_add_problem ();
7611debfc3dSmrg       df_live_set_all_dirty ();
7621debfc3dSmrg     }
7631debfc3dSmrg 
7641debfc3dSmrg   FOR_EACH_LOOP (loop, 0)
7651debfc3dSmrg     {
7661debfc3dSmrg       doloop_optimize (loop);
7671debfc3dSmrg     }
7681debfc3dSmrg 
7691debfc3dSmrg   if (optimize == 1)
7701debfc3dSmrg     df_remove_problem (df_live);
7711debfc3dSmrg 
7721debfc3dSmrg   iv_analysis_done ();
7731debfc3dSmrg 
7741debfc3dSmrg   checking_verify_loop_structure ();
7751debfc3dSmrg }
776