11debfc3dSmrg /* Perform doloop optimizations
2*8feb0f0bSmrg Copyright (C) 2004-2020 Free Software Foundation, Inc.
31debfc3dSmrg Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
41debfc3dSmrg
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
81debfc3dSmrg the terms of the GNU General Public License as published by the Free
91debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
101debfc3dSmrg version.
111debfc3dSmrg
121debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
141debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
151debfc3dSmrg for more details.
161debfc3dSmrg
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3. If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>. */
201debfc3dSmrg
211debfc3dSmrg #include "config.h"
221debfc3dSmrg #include "system.h"
231debfc3dSmrg #include "coretypes.h"
241debfc3dSmrg #include "backend.h"
251debfc3dSmrg #include "target.h"
261debfc3dSmrg #include "rtl.h"
271debfc3dSmrg #include "tree.h"
281debfc3dSmrg #include "cfghooks.h"
291debfc3dSmrg #include "memmodel.h"
301debfc3dSmrg #include "emit-rtl.h"
311debfc3dSmrg #include "dojump.h"
321debfc3dSmrg #include "expr.h"
331debfc3dSmrg #include "cfgloop.h"
341debfc3dSmrg #include "cfgrtl.h"
351debfc3dSmrg #include "dumpfile.h"
361debfc3dSmrg #include "loop-unroll.h"
371debfc3dSmrg #include "regs.h"
381debfc3dSmrg #include "df.h"
391debfc3dSmrg
401debfc3dSmrg /* This module is used to modify loops with a determinable number of
411debfc3dSmrg iterations to use special low-overhead looping instructions.
421debfc3dSmrg
431debfc3dSmrg It first validates whether the loop is well behaved and has a
441debfc3dSmrg determinable number of iterations (either at compile or run-time).
451debfc3dSmrg It then modifies the loop to use a low-overhead looping pattern as
461debfc3dSmrg follows:
471debfc3dSmrg
481debfc3dSmrg 1. A pseudo register is allocated as the loop iteration counter.
491debfc3dSmrg
501debfc3dSmrg 2. The number of loop iterations is calculated and is stored
511debfc3dSmrg in the loop counter.
521debfc3dSmrg
531debfc3dSmrg 3. At the end of the loop, the jump insn is replaced by the
541debfc3dSmrg doloop_end pattern. The compare must remain because it might be
551debfc3dSmrg used elsewhere. If the loop-variable or condition register are
561debfc3dSmrg used elsewhere, they will be eliminated by flow.
571debfc3dSmrg
581debfc3dSmrg 4. An optional doloop_begin pattern is inserted at the top of the
591debfc3dSmrg loop.
601debfc3dSmrg
611debfc3dSmrg TODO The optimization should only performed when either the biv used for exit
621debfc3dSmrg condition is unused at all except for the exit test, or if we do not have to
631debfc3dSmrg change its value, since otherwise we have to add a new induction variable,
641debfc3dSmrg which usually will not pay up (unless the cost of the doloop pattern is
651debfc3dSmrg somehow extremely lower than the cost of compare & jump, or unless the bct
661debfc3dSmrg register cannot be used for anything else but doloop -- ??? detect these
671debfc3dSmrg cases). */
681debfc3dSmrg
691debfc3dSmrg /* Return the loop termination condition for PATTERN or zero
701debfc3dSmrg if it is not a decrement and branch jump insn. */
711debfc3dSmrg
721debfc3dSmrg rtx
doloop_condition_get(rtx_insn * doloop_pat)731debfc3dSmrg doloop_condition_get (rtx_insn *doloop_pat)
741debfc3dSmrg {
751debfc3dSmrg rtx cmp;
761debfc3dSmrg rtx inc;
771debfc3dSmrg rtx reg;
781debfc3dSmrg rtx inc_src;
791debfc3dSmrg rtx condition;
801debfc3dSmrg rtx pattern;
811debfc3dSmrg rtx cc_reg = NULL_RTX;
821debfc3dSmrg rtx reg_orig = NULL_RTX;
831debfc3dSmrg
841debfc3dSmrg /* The canonical doloop pattern we expect has one of the following
851debfc3dSmrg forms:
861debfc3dSmrg
871debfc3dSmrg 1) (parallel [(set (pc) (if_then_else (condition)
881debfc3dSmrg (label_ref (label))
891debfc3dSmrg (pc)))
901debfc3dSmrg (set (reg) (plus (reg) (const_int -1)))
911debfc3dSmrg (additional clobbers and uses)])
921debfc3dSmrg
931debfc3dSmrg The branch must be the first entry of the parallel (also required
941debfc3dSmrg by jump.c), and the second entry of the parallel must be a set of
951debfc3dSmrg the loop counter register. Some targets (IA-64) wrap the set of
961debfc3dSmrg the loop counter in an if_then_else too.
971debfc3dSmrg
981debfc3dSmrg 2) (set (reg) (plus (reg) (const_int -1))
991debfc3dSmrg (set (pc) (if_then_else (reg != 0)
1001debfc3dSmrg (label_ref (label))
1011debfc3dSmrg (pc))).
1021debfc3dSmrg
1031debfc3dSmrg Some targets (ARM) do the comparison before the branch, as in the
1041debfc3dSmrg following form:
1051debfc3dSmrg
1061debfc3dSmrg 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
1071debfc3dSmrg (set (reg) (plus (reg) (const_int -1)))])
1081debfc3dSmrg (set (pc) (if_then_else (cc == NE)
1091debfc3dSmrg (label_ref (label))
1101debfc3dSmrg (pc))) */
1111debfc3dSmrg
1121debfc3dSmrg pattern = PATTERN (doloop_pat);
1131debfc3dSmrg
1141debfc3dSmrg if (GET_CODE (pattern) != PARALLEL)
1151debfc3dSmrg {
1161debfc3dSmrg rtx cond;
1171debfc3dSmrg rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
1181debfc3dSmrg rtx cmp_arg1, cmp_arg2;
1191debfc3dSmrg rtx cmp_orig;
1201debfc3dSmrg
1211debfc3dSmrg /* In case the pattern is not PARALLEL we expect two forms
1221debfc3dSmrg of doloop which are cases 2) and 3) above: in case 2) the
1231debfc3dSmrg decrement immediately precedes the branch, while in case 3)
1241debfc3dSmrg the compare and decrement instructions immediately precede
1251debfc3dSmrg the branch. */
1261debfc3dSmrg
1271debfc3dSmrg if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
1281debfc3dSmrg return 0;
1291debfc3dSmrg
1301debfc3dSmrg cmp = pattern;
1311debfc3dSmrg if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
1321debfc3dSmrg {
1331debfc3dSmrg /* The third case: the compare and decrement instructions
1341debfc3dSmrg immediately precede the branch. */
1351debfc3dSmrg cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
1361debfc3dSmrg if (GET_CODE (cmp_orig) != SET)
1371debfc3dSmrg return 0;
1381debfc3dSmrg if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
1391debfc3dSmrg return 0;
1401debfc3dSmrg cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
1411debfc3dSmrg cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
1421debfc3dSmrg if (cmp_arg2 != const0_rtx
1431debfc3dSmrg || GET_CODE (cmp_arg1) != PLUS)
1441debfc3dSmrg return 0;
1451debfc3dSmrg reg_orig = XEXP (cmp_arg1, 0);
1461debfc3dSmrg if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
1471debfc3dSmrg || !REG_P (reg_orig))
1481debfc3dSmrg return 0;
1491debfc3dSmrg cc_reg = SET_DEST (cmp_orig);
1501debfc3dSmrg
1511debfc3dSmrg inc = XVECEXP (PATTERN (prev_insn), 0, 1);
1521debfc3dSmrg }
1531debfc3dSmrg else
1541debfc3dSmrg inc = PATTERN (prev_insn);
1551debfc3dSmrg if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE)
1561debfc3dSmrg {
1571debfc3dSmrg /* We expect the condition to be of the form (reg != 0) */
1581debfc3dSmrg cond = XEXP (SET_SRC (cmp), 0);
1591debfc3dSmrg if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
1601debfc3dSmrg return 0;
1611debfc3dSmrg }
1621debfc3dSmrg }
1631debfc3dSmrg else
1641debfc3dSmrg {
1651debfc3dSmrg cmp = XVECEXP (pattern, 0, 0);
1661debfc3dSmrg inc = XVECEXP (pattern, 0, 1);
1671debfc3dSmrg }
1681debfc3dSmrg
1691debfc3dSmrg /* Check for (set (reg) (something)). */
1701debfc3dSmrg if (GET_CODE (inc) != SET)
1711debfc3dSmrg return 0;
1721debfc3dSmrg reg = SET_DEST (inc);
1731debfc3dSmrg if (! REG_P (reg))
1741debfc3dSmrg return 0;
1751debfc3dSmrg
1761debfc3dSmrg /* Check if something = (plus (reg) (const_int -1)).
1771debfc3dSmrg On IA-64, this decrement is wrapped in an if_then_else. */
1781debfc3dSmrg inc_src = SET_SRC (inc);
1791debfc3dSmrg if (GET_CODE (inc_src) == IF_THEN_ELSE)
1801debfc3dSmrg inc_src = XEXP (inc_src, 1);
1811debfc3dSmrg if (GET_CODE (inc_src) != PLUS
1821debfc3dSmrg || XEXP (inc_src, 0) != reg
1831debfc3dSmrg || XEXP (inc_src, 1) != constm1_rtx)
1841debfc3dSmrg return 0;
1851debfc3dSmrg
1861debfc3dSmrg /* Check for (set (pc) (if_then_else (condition)
1871debfc3dSmrg (label_ref (label))
1881debfc3dSmrg (pc))). */
1891debfc3dSmrg if (GET_CODE (cmp) != SET
1901debfc3dSmrg || SET_DEST (cmp) != pc_rtx
1911debfc3dSmrg || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
1921debfc3dSmrg || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
1931debfc3dSmrg || XEXP (SET_SRC (cmp), 2) != pc_rtx)
1941debfc3dSmrg return 0;
1951debfc3dSmrg
1961debfc3dSmrg /* Extract loop termination condition. */
1971debfc3dSmrg condition = XEXP (SET_SRC (cmp), 0);
1981debfc3dSmrg
1991debfc3dSmrg /* We expect a GE or NE comparison with 0 or 1. */
2001debfc3dSmrg if ((GET_CODE (condition) != GE
2011debfc3dSmrg && GET_CODE (condition) != NE)
2021debfc3dSmrg || (XEXP (condition, 1) != const0_rtx
2031debfc3dSmrg && XEXP (condition, 1) != const1_rtx))
2041debfc3dSmrg return 0;
2051debfc3dSmrg
2061debfc3dSmrg if ((XEXP (condition, 0) == reg)
2071debfc3dSmrg /* For the third case: */
2081debfc3dSmrg || ((cc_reg != NULL_RTX)
2091debfc3dSmrg && (XEXP (condition, 0) == cc_reg)
2101debfc3dSmrg && (reg_orig == reg))
2111debfc3dSmrg || (GET_CODE (XEXP (condition, 0)) == PLUS
2121debfc3dSmrg && XEXP (XEXP (condition, 0), 0) == reg))
2131debfc3dSmrg {
2141debfc3dSmrg if (GET_CODE (pattern) != PARALLEL)
2151debfc3dSmrg /* For the second form we expect:
2161debfc3dSmrg
2171debfc3dSmrg (set (reg) (plus (reg) (const_int -1))
2181debfc3dSmrg (set (pc) (if_then_else (reg != 0)
2191debfc3dSmrg (label_ref (label))
2201debfc3dSmrg (pc))).
2211debfc3dSmrg
2221debfc3dSmrg is equivalent to the following:
2231debfc3dSmrg
2241debfc3dSmrg (parallel [(set (pc) (if_then_else (reg != 1)
2251debfc3dSmrg (label_ref (label))
2261debfc3dSmrg (pc)))
2271debfc3dSmrg (set (reg) (plus (reg) (const_int -1)))
2281debfc3dSmrg (additional clobbers and uses)])
2291debfc3dSmrg
2301debfc3dSmrg For the third form we expect:
2311debfc3dSmrg
2321debfc3dSmrg (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
2331debfc3dSmrg (set (reg) (plus (reg) (const_int -1)))])
2341debfc3dSmrg (set (pc) (if_then_else (cc == NE)
2351debfc3dSmrg (label_ref (label))
2361debfc3dSmrg (pc)))
2371debfc3dSmrg
2381debfc3dSmrg which is equivalent to the following:
2391debfc3dSmrg
2401debfc3dSmrg (parallel [(set (cc) (compare (reg, 1))
2411debfc3dSmrg (set (reg) (plus (reg) (const_int -1)))
2421debfc3dSmrg (set (pc) (if_then_else (NE == cc)
2431debfc3dSmrg (label_ref (label))
2441debfc3dSmrg (pc))))])
2451debfc3dSmrg
2461debfc3dSmrg So we return the second form instead for the two cases.
2471debfc3dSmrg
2481debfc3dSmrg */
2491debfc3dSmrg condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
2501debfc3dSmrg
2511debfc3dSmrg return condition;
2521debfc3dSmrg }
2531debfc3dSmrg
2541debfc3dSmrg /* ??? If a machine uses a funny comparison, we could return a
2551debfc3dSmrg canonicalized form here. */
2561debfc3dSmrg
2571debfc3dSmrg return 0;
2581debfc3dSmrg }
2591debfc3dSmrg
2601debfc3dSmrg /* Return nonzero if the loop specified by LOOP is suitable for
2611debfc3dSmrg the use of special low-overhead looping instructions. DESC
2621debfc3dSmrg describes the number of iterations of the loop. */
2631debfc3dSmrg
2641debfc3dSmrg static bool
doloop_valid_p(class loop * loop,class niter_desc * desc)265*8feb0f0bSmrg doloop_valid_p (class loop *loop, class niter_desc *desc)
2661debfc3dSmrg {
2671debfc3dSmrg basic_block *body = get_loop_body (loop), bb;
2681debfc3dSmrg rtx_insn *insn;
2691debfc3dSmrg unsigned i;
2701debfc3dSmrg bool result = true;
2711debfc3dSmrg
2721debfc3dSmrg /* Check for loops that may not terminate under special conditions. */
2731debfc3dSmrg if (!desc->simple_p
2741debfc3dSmrg || desc->assumptions
2751debfc3dSmrg || desc->infinite)
2761debfc3dSmrg {
2771debfc3dSmrg /* There are some cases that would require a special attention.
2781debfc3dSmrg For example if the comparison is LEU and the comparison value
2791debfc3dSmrg is UINT_MAX then the loop will not terminate. Similarly, if the
2801debfc3dSmrg comparison code is GEU and the comparison value is 0, the
2811debfc3dSmrg loop will not terminate.
2821debfc3dSmrg
2831debfc3dSmrg If the absolute increment is not 1, the loop can be infinite
2841debfc3dSmrg even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
2851debfc3dSmrg
2861debfc3dSmrg ??? We could compute these conditions at run-time and have a
2871debfc3dSmrg additional jump around the loop to ensure an infinite loop.
2881debfc3dSmrg However, it is very unlikely that this is the intended
2891debfc3dSmrg behavior of the loop and checking for these rare boundary
2901debfc3dSmrg conditions would pessimize all other code.
2911debfc3dSmrg
2921debfc3dSmrg If the loop is executed only a few times an extra check to
2931debfc3dSmrg restart the loop could use up most of the benefits of using a
2941debfc3dSmrg count register loop. Note however, that normally, this
2951debfc3dSmrg restart branch would never execute, so it could be predicted
2961debfc3dSmrg well by the CPU. We should generate the pessimistic code by
2971debfc3dSmrg default, and have an option, e.g. -funsafe-loops that would
2981debfc3dSmrg enable count-register loops in this case. */
2991debfc3dSmrg if (dump_file)
3001debfc3dSmrg fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
3011debfc3dSmrg result = false;
3021debfc3dSmrg goto cleanup;
3031debfc3dSmrg }
3041debfc3dSmrg
3051debfc3dSmrg for (i = 0; i < loop->num_nodes; i++)
3061debfc3dSmrg {
3071debfc3dSmrg bb = body[i];
3081debfc3dSmrg
3091debfc3dSmrg for (insn = BB_HEAD (bb);
3101debfc3dSmrg insn != NEXT_INSN (BB_END (bb));
3111debfc3dSmrg insn = NEXT_INSN (insn))
3121debfc3dSmrg {
3131debfc3dSmrg /* Different targets have different necessities for low-overhead
3141debfc3dSmrg looping. Call the back end for each instruction within the loop
3151debfc3dSmrg to let it decide whether the insn prohibits a low-overhead loop.
3161debfc3dSmrg It will then return the cause for it to emit to the dump file. */
3171debfc3dSmrg const char * invalid = targetm.invalid_within_doloop (insn);
3181debfc3dSmrg if (invalid)
3191debfc3dSmrg {
3201debfc3dSmrg if (dump_file)
3211debfc3dSmrg fprintf (dump_file, "Doloop: %s\n", invalid);
3221debfc3dSmrg result = false;
3231debfc3dSmrg goto cleanup;
3241debfc3dSmrg }
3251debfc3dSmrg }
3261debfc3dSmrg }
3271debfc3dSmrg result = true;
3281debfc3dSmrg
3291debfc3dSmrg cleanup:
3301debfc3dSmrg free (body);
3311debfc3dSmrg
3321debfc3dSmrg return result;
3331debfc3dSmrg }
3341debfc3dSmrg
3351debfc3dSmrg /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
3361debfc3dSmrg edge. If the condition is always false, do not do anything. If it is always
3371debfc3dSmrg true, redirect E to DEST and return false. In all other cases, true is
3381debfc3dSmrg returned. */
3391debfc3dSmrg
3401debfc3dSmrg static bool
add_test(rtx cond,edge * e,basic_block dest)3411debfc3dSmrg add_test (rtx cond, edge *e, basic_block dest)
3421debfc3dSmrg {
3431debfc3dSmrg rtx_insn *seq, *jump;
3441debfc3dSmrg rtx_code_label *label;
3451debfc3dSmrg machine_mode mode;
3461debfc3dSmrg rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
3471debfc3dSmrg enum rtx_code code = GET_CODE (cond);
3481debfc3dSmrg basic_block bb;
349a2dc1f3fSmrg /* The jump is supposed to handle an unlikely special case. */
350a2dc1f3fSmrg profile_probability prob = profile_probability::guessed_never ();
3511debfc3dSmrg
3521debfc3dSmrg mode = GET_MODE (XEXP (cond, 0));
3531debfc3dSmrg if (mode == VOIDmode)
3541debfc3dSmrg mode = GET_MODE (XEXP (cond, 1));
3551debfc3dSmrg
3561debfc3dSmrg start_sequence ();
3571debfc3dSmrg op0 = force_operand (op0, NULL_RTX);
3581debfc3dSmrg op1 = force_operand (op1, NULL_RTX);
3591debfc3dSmrg label = block_label (dest);
360a2dc1f3fSmrg do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label,
361a2dc1f3fSmrg prob);
3621debfc3dSmrg
3631debfc3dSmrg jump = get_last_insn ();
3641debfc3dSmrg if (!jump || !JUMP_P (jump))
3651debfc3dSmrg {
3661debfc3dSmrg /* The condition is always false and the jump was optimized out. */
3671debfc3dSmrg end_sequence ();
3681debfc3dSmrg return true;
3691debfc3dSmrg }
3701debfc3dSmrg
3711debfc3dSmrg seq = get_insns ();
3721debfc3dSmrg unshare_all_rtl_in_chain (seq);
3731debfc3dSmrg end_sequence ();
3741debfc3dSmrg
3751debfc3dSmrg /* There always is at least the jump insn in the sequence. */
3761debfc3dSmrg gcc_assert (seq != NULL_RTX);
3771debfc3dSmrg
3781debfc3dSmrg bb = split_edge_and_insert (*e, seq);
3791debfc3dSmrg *e = single_succ_edge (bb);
3801debfc3dSmrg
3811debfc3dSmrg if (any_uncondjump_p (jump))
3821debfc3dSmrg {
3831debfc3dSmrg /* The condition is always true. */
3841debfc3dSmrg delete_insn (jump);
3851debfc3dSmrg redirect_edge_and_branch_force (*e, dest);
3861debfc3dSmrg return false;
3871debfc3dSmrg }
3881debfc3dSmrg
3891debfc3dSmrg JUMP_LABEL (jump) = label;
3901debfc3dSmrg
3911debfc3dSmrg LABEL_NUSES (label)++;
3921debfc3dSmrg
393a2dc1f3fSmrg edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
394a2dc1f3fSmrg e2->probability = prob;
395a2dc1f3fSmrg (*e)->probability = prob.invert ();
396a2dc1f3fSmrg update_br_prob_note (e2->src);
3971debfc3dSmrg return true;
3981debfc3dSmrg }
3991debfc3dSmrg
4001debfc3dSmrg /* Modify the loop to use the low-overhead looping insn where LOOP
4011debfc3dSmrg describes the loop, DESC describes the number of iterations of the
4021debfc3dSmrg loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
4031debfc3dSmrg end of the loop. CONDITION is the condition separated from the
4041debfc3dSmrg DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
4051debfc3dSmrg
4061debfc3dSmrg static void
doloop_modify(class loop * loop,class niter_desc * desc,rtx_insn * doloop_seq,rtx condition,rtx count)407*8feb0f0bSmrg doloop_modify (class loop *loop, class niter_desc *desc,
4081debfc3dSmrg rtx_insn *doloop_seq, rtx condition, rtx count)
4091debfc3dSmrg {
4101debfc3dSmrg rtx counter_reg;
4111debfc3dSmrg rtx tmp, noloop = NULL_RTX;
4121debfc3dSmrg rtx_insn *sequence;
4131debfc3dSmrg rtx_insn *jump_insn;
4141debfc3dSmrg rtx_code_label *jump_label;
4151debfc3dSmrg int nonneg = 0;
4161debfc3dSmrg bool increment_count;
4171debfc3dSmrg basic_block loop_end = desc->out_edge->src;
418a2dc1f3fSmrg scalar_int_mode mode;
4191debfc3dSmrg widest_int iterations;
4201debfc3dSmrg
4211debfc3dSmrg jump_insn = BB_END (loop_end);
4221debfc3dSmrg
4231debfc3dSmrg if (dump_file)
4241debfc3dSmrg {
4251debfc3dSmrg fprintf (dump_file, "Doloop: Inserting doloop pattern (");
4261debfc3dSmrg if (desc->const_iter)
4271debfc3dSmrg fprintf (dump_file, "%" PRId64, desc->niter);
4281debfc3dSmrg else
4291debfc3dSmrg fputs ("runtime", dump_file);
4301debfc3dSmrg fputs (" iterations).\n", dump_file);
4311debfc3dSmrg }
4321debfc3dSmrg
4331debfc3dSmrg /* Discard original jump to continue loop. The original compare
4341debfc3dSmrg result may still be live, so it cannot be discarded explicitly. */
4351debfc3dSmrg delete_insn (jump_insn);
4361debfc3dSmrg
4371debfc3dSmrg counter_reg = XEXP (condition, 0);
4381debfc3dSmrg if (GET_CODE (counter_reg) == PLUS)
4391debfc3dSmrg counter_reg = XEXP (counter_reg, 0);
440a2dc1f3fSmrg /* These patterns must operate on integer counters. */
441a2dc1f3fSmrg mode = as_a <scalar_int_mode> (GET_MODE (counter_reg));
4421debfc3dSmrg
4431debfc3dSmrg increment_count = false;
4441debfc3dSmrg switch (GET_CODE (condition))
4451debfc3dSmrg {
4461debfc3dSmrg case NE:
4471debfc3dSmrg /* Currently only NE tests against zero and one are supported. */
4481debfc3dSmrg noloop = XEXP (condition, 1);
4491debfc3dSmrg if (noloop != const0_rtx)
4501debfc3dSmrg {
4511debfc3dSmrg gcc_assert (noloop == const1_rtx);
4521debfc3dSmrg increment_count = true;
4531debfc3dSmrg }
4541debfc3dSmrg break;
4551debfc3dSmrg
4561debfc3dSmrg case GE:
4571debfc3dSmrg /* Currently only GE tests against zero are supported. */
4581debfc3dSmrg gcc_assert (XEXP (condition, 1) == const0_rtx);
4591debfc3dSmrg
4601debfc3dSmrg noloop = constm1_rtx;
4611debfc3dSmrg
4621debfc3dSmrg /* The iteration count does not need incrementing for a GE test. */
4631debfc3dSmrg increment_count = false;
4641debfc3dSmrg
4651debfc3dSmrg /* Determine if the iteration counter will be non-negative.
4661debfc3dSmrg Note that the maximum value loaded is iterations_max - 1. */
4671debfc3dSmrg if (get_max_loop_iterations (loop, &iterations)
4681debfc3dSmrg && wi::leu_p (iterations,
4691debfc3dSmrg wi::set_bit_in_zero <widest_int>
4701debfc3dSmrg (GET_MODE_PRECISION (mode) - 1)))
4711debfc3dSmrg nonneg = 1;
4721debfc3dSmrg break;
4731debfc3dSmrg
4741debfc3dSmrg /* Abort if an invalid doloop pattern has been generated. */
4751debfc3dSmrg default:
4761debfc3dSmrg gcc_unreachable ();
4771debfc3dSmrg }
4781debfc3dSmrg
4791debfc3dSmrg if (increment_count)
4801debfc3dSmrg count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
4811debfc3dSmrg
4821debfc3dSmrg /* Insert initialization of the count register into the loop header. */
4831debfc3dSmrg start_sequence ();
4841debfc3dSmrg /* count has been already copied through copy_rtx. */
4851debfc3dSmrg reset_used_flags (count);
4861debfc3dSmrg set_used_flags (condition);
4871debfc3dSmrg tmp = force_operand (count, counter_reg);
4881debfc3dSmrg convert_move (counter_reg, tmp, 1);
4891debfc3dSmrg sequence = get_insns ();
4901debfc3dSmrg unshare_all_rtl_in_chain (sequence);
4911debfc3dSmrg end_sequence ();
4921debfc3dSmrg emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
4931debfc3dSmrg
4941debfc3dSmrg if (desc->noloop_assumptions)
4951debfc3dSmrg {
4961debfc3dSmrg rtx ass = copy_rtx (desc->noloop_assumptions);
4971debfc3dSmrg basic_block preheader = loop_preheader_edge (loop)->src;
4981debfc3dSmrg basic_block set_zero = split_edge (loop_preheader_edge (loop));
4991debfc3dSmrg basic_block new_preheader = split_edge (loop_preheader_edge (loop));
5001debfc3dSmrg edge te;
5011debfc3dSmrg
5021debfc3dSmrg /* Expand the condition testing the assumptions and if it does not pass,
5031debfc3dSmrg reset the count register to 0. */
5041debfc3dSmrg redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
5051debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
5061debfc3dSmrg
507a2dc1f3fSmrg set_zero->count = profile_count::uninitialized ();
5081debfc3dSmrg
5091debfc3dSmrg te = single_succ_edge (preheader);
5101debfc3dSmrg for (; ass; ass = XEXP (ass, 1))
5111debfc3dSmrg if (!add_test (XEXP (ass, 0), &te, set_zero))
5121debfc3dSmrg break;
5131debfc3dSmrg
5141debfc3dSmrg if (ass)
5151debfc3dSmrg {
5161debfc3dSmrg /* We reached a condition that is always true. This is very hard to
5171debfc3dSmrg reproduce (such a loop does not roll, and thus it would most
5181debfc3dSmrg likely get optimized out by some of the preceding optimizations).
5191debfc3dSmrg In fact, I do not have any testcase for it. However, it would
5201debfc3dSmrg also be very hard to show that it is impossible, so we must
5211debfc3dSmrg handle this case. */
5221debfc3dSmrg set_zero->count = preheader->count;
5231debfc3dSmrg }
5241debfc3dSmrg
5251debfc3dSmrg if (EDGE_COUNT (set_zero->preds) == 0)
5261debfc3dSmrg {
5271debfc3dSmrg /* All the conditions were simplified to false, remove the
5281debfc3dSmrg unreachable set_zero block. */
5291debfc3dSmrg delete_basic_block (set_zero);
5301debfc3dSmrg }
5311debfc3dSmrg else
5321debfc3dSmrg {
5331debfc3dSmrg /* Reset the counter to zero in the set_zero block. */
5341debfc3dSmrg start_sequence ();
5351debfc3dSmrg convert_move (counter_reg, noloop, 0);
5361debfc3dSmrg sequence = get_insns ();
5371debfc3dSmrg end_sequence ();
5381debfc3dSmrg emit_insn_after (sequence, BB_END (set_zero));
5391debfc3dSmrg
5401debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, set_zero,
5411debfc3dSmrg recompute_dominator (CDI_DOMINATORS,
5421debfc3dSmrg set_zero));
5431debfc3dSmrg }
5441debfc3dSmrg
5451debfc3dSmrg set_immediate_dominator (CDI_DOMINATORS, new_preheader,
5461debfc3dSmrg recompute_dominator (CDI_DOMINATORS,
5471debfc3dSmrg new_preheader));
5481debfc3dSmrg }
5491debfc3dSmrg
5501debfc3dSmrg /* Some targets (eg, C4x) need to initialize special looping
5511debfc3dSmrg registers. */
5521debfc3dSmrg if (targetm.have_doloop_begin ())
5531debfc3dSmrg if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
5541debfc3dSmrg emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
5551debfc3dSmrg
5561debfc3dSmrg /* Insert the new low-overhead looping insn. */
5571debfc3dSmrg emit_jump_insn_after (doloop_seq, BB_END (loop_end));
5581debfc3dSmrg jump_insn = BB_END (loop_end);
5591debfc3dSmrg jump_label = block_label (desc->in_edge->dest);
5601debfc3dSmrg JUMP_LABEL (jump_insn) = jump_label;
5611debfc3dSmrg LABEL_NUSES (jump_label)++;
5621debfc3dSmrg
5631debfc3dSmrg /* Ensure the right fallthru edge is marked, for case we have reversed
5641debfc3dSmrg the condition. */
5651debfc3dSmrg desc->in_edge->flags &= ~EDGE_FALLTHRU;
5661debfc3dSmrg desc->out_edge->flags |= EDGE_FALLTHRU;
5671debfc3dSmrg
5681debfc3dSmrg /* Add a REG_NONNEG note if the actual or estimated maximum number
5691debfc3dSmrg of iterations is non-negative. */
5701debfc3dSmrg if (nonneg)
5711debfc3dSmrg add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
5721debfc3dSmrg
5731debfc3dSmrg /* Update the REG_BR_PROB note. */
574a2dc1f3fSmrg if (desc->in_edge->probability.initialized_p ())
575a2dc1f3fSmrg add_reg_br_prob_note (jump_insn, desc->in_edge->probability);
5761debfc3dSmrg }
5771debfc3dSmrg
5781debfc3dSmrg /* Called through note_stores. */
5791debfc3dSmrg
5801debfc3dSmrg static void
record_reg_sets(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)5811debfc3dSmrg record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
5821debfc3dSmrg {
5831debfc3dSmrg bitmap mod = (bitmap)data;
5841debfc3dSmrg if (REG_P (x))
5851debfc3dSmrg {
5861debfc3dSmrg unsigned int regno = REGNO (x);
5871debfc3dSmrg if (HARD_REGISTER_P (x))
5881debfc3dSmrg {
5891debfc3dSmrg unsigned int end_regno = end_hard_regno (GET_MODE (x), regno);
5901debfc3dSmrg do
5911debfc3dSmrg bitmap_set_bit (mod, regno);
5921debfc3dSmrg while (++regno < end_regno);
5931debfc3dSmrg }
5941debfc3dSmrg else
5951debfc3dSmrg bitmap_set_bit (mod, regno);
5961debfc3dSmrg }
5971debfc3dSmrg }
5981debfc3dSmrg
5991debfc3dSmrg /* Process loop described by LOOP validating that the loop is suitable for
6001debfc3dSmrg conversion to use a low overhead looping instruction, replacing the jump
6011debfc3dSmrg insn where suitable. Returns true if the loop was successfully
6021debfc3dSmrg modified. */
6031debfc3dSmrg
6041debfc3dSmrg static bool
doloop_optimize(class loop * loop)605*8feb0f0bSmrg doloop_optimize (class loop *loop)
6061debfc3dSmrg {
607a2dc1f3fSmrg scalar_int_mode mode;
6081debfc3dSmrg rtx doloop_reg;
6091debfc3dSmrg rtx count;
6101debfc3dSmrg widest_int iterations, iterations_max;
6111debfc3dSmrg rtx_code_label *start_label;
6121debfc3dSmrg rtx condition;
6131debfc3dSmrg unsigned level;
6141debfc3dSmrg HOST_WIDE_INT est_niter;
6151debfc3dSmrg int max_cost;
616*8feb0f0bSmrg class niter_desc *desc;
6171debfc3dSmrg unsigned word_mode_size;
6181debfc3dSmrg unsigned HOST_WIDE_INT word_mode_max;
6191debfc3dSmrg int entered_at_top;
6201debfc3dSmrg
6211debfc3dSmrg if (dump_file)
6221debfc3dSmrg fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
6231debfc3dSmrg
6241debfc3dSmrg iv_analysis_loop_init (loop);
6251debfc3dSmrg
6261debfc3dSmrg /* Find the simple exit of a LOOP. */
6271debfc3dSmrg desc = get_simple_loop_desc (loop);
6281debfc3dSmrg
6291debfc3dSmrg /* Check that loop is a candidate for a low-overhead looping insn. */
6301debfc3dSmrg if (!doloop_valid_p (loop, desc))
6311debfc3dSmrg {
6321debfc3dSmrg if (dump_file)
6331debfc3dSmrg fprintf (dump_file,
6341debfc3dSmrg "Doloop: The loop is not suitable.\n");
6351debfc3dSmrg return false;
6361debfc3dSmrg }
6371debfc3dSmrg mode = desc->mode;
6381debfc3dSmrg
6391debfc3dSmrg est_niter = get_estimated_loop_iterations_int (loop);
6401debfc3dSmrg if (est_niter == -1)
6411debfc3dSmrg est_niter = get_likely_max_loop_iterations_int (loop);
6421debfc3dSmrg
6431debfc3dSmrg if (est_niter >= 0 && est_niter < 3)
6441debfc3dSmrg {
6451debfc3dSmrg if (dump_file)
6461debfc3dSmrg fprintf (dump_file,
6471debfc3dSmrg "Doloop: Too few iterations (%u) to be profitable.\n",
6481debfc3dSmrg (unsigned int)est_niter);
6491debfc3dSmrg return false;
6501debfc3dSmrg }
6511debfc3dSmrg
6521debfc3dSmrg max_cost
653*8feb0f0bSmrg = COSTS_N_INSNS (param_max_iterations_computation_cost);
6541debfc3dSmrg if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
6551debfc3dSmrg > max_cost)
6561debfc3dSmrg {
6571debfc3dSmrg if (dump_file)
6581debfc3dSmrg fprintf (dump_file,
6591debfc3dSmrg "Doloop: number of iterations too costly to compute.\n");
6601debfc3dSmrg return false;
6611debfc3dSmrg }
6621debfc3dSmrg
6631debfc3dSmrg if (desc->const_iter)
6641debfc3dSmrg iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
6651debfc3dSmrg UNSIGNED);
6661debfc3dSmrg else
6671debfc3dSmrg iterations = 0;
6681debfc3dSmrg if (!get_max_loop_iterations (loop, &iterations_max))
6691debfc3dSmrg iterations_max = 0;
6701debfc3dSmrg level = get_loop_level (loop) + 1;
6711debfc3dSmrg entered_at_top = (loop->latch == desc->in_edge->dest
6721debfc3dSmrg && contains_no_active_insn_p (loop->latch));
6731debfc3dSmrg if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
6741debfc3dSmrg entered_at_top))
6751debfc3dSmrg {
6761debfc3dSmrg if (dump_file)
6771debfc3dSmrg fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
6781debfc3dSmrg return false;
6791debfc3dSmrg }
6801debfc3dSmrg
6811debfc3dSmrg /* Generate looping insn. If the pattern FAILs then give up trying
6821debfc3dSmrg to modify the loop since there is some aspect the back-end does
6831debfc3dSmrg not like. */
6841debfc3dSmrg count = copy_rtx (desc->niter_expr);
6851debfc3dSmrg start_label = block_label (desc->in_edge->dest);
6861debfc3dSmrg doloop_reg = gen_reg_rtx (mode);
6871debfc3dSmrg rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
6881debfc3dSmrg
6891debfc3dSmrg word_mode_size = GET_MODE_PRECISION (word_mode);
6901debfc3dSmrg word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
6911debfc3dSmrg if (! doloop_seq
6921debfc3dSmrg && mode != word_mode
6931debfc3dSmrg /* Before trying mode different from the one in that # of iterations is
6941debfc3dSmrg computed, we must be sure that the number of iterations fits into
6951debfc3dSmrg the new mode. */
6961debfc3dSmrg && (word_mode_size >= GET_MODE_PRECISION (mode)
6971debfc3dSmrg || wi::leu_p (iterations_max, word_mode_max)))
6981debfc3dSmrg {
6991debfc3dSmrg if (word_mode_size > GET_MODE_PRECISION (mode))
7001debfc3dSmrg count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
7011debfc3dSmrg else
7021debfc3dSmrg count = lowpart_subreg (word_mode, count, mode);
7031debfc3dSmrg PUT_MODE (doloop_reg, word_mode);
7041debfc3dSmrg doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
7051debfc3dSmrg }
7061debfc3dSmrg if (! doloop_seq)
7071debfc3dSmrg {
7081debfc3dSmrg if (dump_file)
7091debfc3dSmrg fprintf (dump_file,
7101debfc3dSmrg "Doloop: Target unwilling to use doloop pattern!\n");
7111debfc3dSmrg return false;
7121debfc3dSmrg }
7131debfc3dSmrg
7141debfc3dSmrg /* If multiple instructions were created, the last must be the
7151debfc3dSmrg jump instruction. */
7161debfc3dSmrg rtx_insn *doloop_insn = doloop_seq;
7171debfc3dSmrg while (NEXT_INSN (doloop_insn) != NULL_RTX)
7181debfc3dSmrg doloop_insn = NEXT_INSN (doloop_insn);
7191debfc3dSmrg if (!JUMP_P (doloop_insn)
7201debfc3dSmrg || !(condition = doloop_condition_get (doloop_insn)))
7211debfc3dSmrg {
7221debfc3dSmrg if (dump_file)
7231debfc3dSmrg fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
7241debfc3dSmrg return false;
7251debfc3dSmrg }
7261debfc3dSmrg
7271debfc3dSmrg /* Ensure that the new sequence doesn't clobber a register that
7281debfc3dSmrg is live at the end of the block. */
7291debfc3dSmrg {
7301debfc3dSmrg bitmap modified = BITMAP_ALLOC (NULL);
7311debfc3dSmrg
7321debfc3dSmrg for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
733*8feb0f0bSmrg note_stores (i, record_reg_sets, modified);
7341debfc3dSmrg
7351debfc3dSmrg basic_block loop_end = desc->out_edge->src;
7361debfc3dSmrg bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
7371debfc3dSmrg BITMAP_FREE (modified);
7381debfc3dSmrg
7391debfc3dSmrg if (fail)
7401debfc3dSmrg {
7411debfc3dSmrg if (dump_file)
7421debfc3dSmrg fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n");
7431debfc3dSmrg return false;
7441debfc3dSmrg }
7451debfc3dSmrg }
7461debfc3dSmrg
7471debfc3dSmrg doloop_modify (loop, desc, doloop_seq, condition, count);
7481debfc3dSmrg return true;
7491debfc3dSmrg }
7501debfc3dSmrg
7511debfc3dSmrg /* This is the main entry point. Process all loops using doloop_optimize. */
7521debfc3dSmrg
7531debfc3dSmrg void
doloop_optimize_loops(void)7541debfc3dSmrg doloop_optimize_loops (void)
7551debfc3dSmrg {
756*8feb0f0bSmrg class loop *loop;
7571debfc3dSmrg
7581debfc3dSmrg if (optimize == 1)
7591debfc3dSmrg {
7601debfc3dSmrg df_live_add_problem ();
7611debfc3dSmrg df_live_set_all_dirty ();
7621debfc3dSmrg }
7631debfc3dSmrg
7641debfc3dSmrg FOR_EACH_LOOP (loop, 0)
7651debfc3dSmrg {
7661debfc3dSmrg doloop_optimize (loop);
7671debfc3dSmrg }
7681debfc3dSmrg
7691debfc3dSmrg if (optimize == 1)
7701debfc3dSmrg df_remove_problem (df_live);
7711debfc3dSmrg
7721debfc3dSmrg iv_analysis_done ();
7731debfc3dSmrg
7741debfc3dSmrg checking_verify_loop_structure ();
7751debfc3dSmrg }
776