138fd1498Szrj /* Induction variable canonicalization and loop peeling.
238fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it
738fd1498Szrj under the terms of the GNU General Public License as published by the
838fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
938fd1498Szrj later version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1238fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj /* This pass detects the loops that iterate a constant number of times,
2138fd1498Szrj adds a canonical induction variable (step -1, tested against 0)
2238fd1498Szrj and replaces the exit test. This enables the less powerful rtl
2338fd1498Szrj level analysis to use this information.
2438fd1498Szrj
2538fd1498Szrj This might spoil the code in some cases (by increasing register pressure).
2638fd1498Szrj Note that in the case the new variable is not needed, ivopts will get rid
2738fd1498Szrj of it, so it might only be a problem when there are no other linear induction
2838fd1498Szrj variables. In that case the created optimization possibilities are likely
2938fd1498Szrj to pay up.
3038fd1498Szrj
3138fd1498Szrj We also perform
3238fd1498Szrj - complete unrolling (or peeling) when the loops is rolling few enough
3338fd1498Szrj times
3438fd1498Szrj - simple peeling (i.e. copying few initial iterations prior the loop)
3538fd1498Szrj when number of iteration estimate is known (typically by the profile
3638fd1498Szrj info). */
3738fd1498Szrj
3838fd1498Szrj #include "config.h"
3938fd1498Szrj #include "system.h"
4038fd1498Szrj #include "coretypes.h"
4138fd1498Szrj #include "backend.h"
4238fd1498Szrj #include "tree.h"
4338fd1498Szrj #include "gimple.h"
4438fd1498Szrj #include "cfghooks.h"
4538fd1498Szrj #include "tree-pass.h"
4638fd1498Szrj #include "ssa.h"
4738fd1498Szrj #include "cgraph.h"
4838fd1498Szrj #include "gimple-pretty-print.h"
4938fd1498Szrj #include "fold-const.h"
5038fd1498Szrj #include "profile.h"
5138fd1498Szrj #include "gimple-fold.h"
5238fd1498Szrj #include "tree-eh.h"
5338fd1498Szrj #include "gimple-iterator.h"
5438fd1498Szrj #include "tree-cfg.h"
5538fd1498Szrj #include "tree-ssa-loop-manip.h"
5638fd1498Szrj #include "tree-ssa-loop-niter.h"
5738fd1498Szrj #include "tree-ssa-loop.h"
5838fd1498Szrj #include "tree-into-ssa.h"
5938fd1498Szrj #include "cfgloop.h"
6038fd1498Szrj #include "tree-chrec.h"
6138fd1498Szrj #include "tree-scalar-evolution.h"
6238fd1498Szrj #include "params.h"
6338fd1498Szrj #include "tree-inline.h"
6438fd1498Szrj #include "tree-cfgcleanup.h"
6538fd1498Szrj #include "builtins.h"
6638fd1498Szrj
6738fd1498Szrj /* Specifies types of loops that may be unrolled. */
6838fd1498Szrj
6938fd1498Szrj enum unroll_level
7038fd1498Szrj {
7138fd1498Szrj UL_SINGLE_ITER, /* Only loops that exit immediately in the first
7238fd1498Szrj iteration. */
7338fd1498Szrj UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
7438fd1498Szrj of code size. */
7538fd1498Szrj UL_ALL /* All suitable loops. */
7638fd1498Szrj };
7738fd1498Szrj
7838fd1498Szrj /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
7938fd1498Szrj is the exit edge whose condition is replaced. The ssa versions of the new
8038fd1498Szrj IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
8138fd1498Szrj if they are not NULL. */
8238fd1498Szrj
8338fd1498Szrj void
8438fd1498Szrj create_canonical_iv (struct loop *loop, edge exit, tree niter,
8538fd1498Szrj tree *var_before = NULL, tree *var_after = NULL)
8638fd1498Szrj {
8738fd1498Szrj edge in;
8838fd1498Szrj tree type, var;
8938fd1498Szrj gcond *cond;
9038fd1498Szrj gimple_stmt_iterator incr_at;
9138fd1498Szrj enum tree_code cmp;
9238fd1498Szrj
9338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
9438fd1498Szrj {
9538fd1498Szrj fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
9638fd1498Szrj print_generic_expr (dump_file, niter, TDF_SLIM);
9738fd1498Szrj fprintf (dump_file, " iterations.\n");
9838fd1498Szrj }
9938fd1498Szrj
10038fd1498Szrj cond = as_a <gcond *> (last_stmt (exit->src));
10138fd1498Szrj in = EDGE_SUCC (exit->src, 0);
10238fd1498Szrj if (in == exit)
10338fd1498Szrj in = EDGE_SUCC (exit->src, 1);
10438fd1498Szrj
10538fd1498Szrj /* Note that we do not need to worry about overflows, since
10638fd1498Szrj type of niter is always unsigned and all comparisons are
10738fd1498Szrj just for equality/nonequality -- i.e. everything works
10838fd1498Szrj with a modulo arithmetics. */
10938fd1498Szrj
11038fd1498Szrj type = TREE_TYPE (niter);
11138fd1498Szrj niter = fold_build2 (PLUS_EXPR, type,
11238fd1498Szrj niter,
11338fd1498Szrj build_int_cst (type, 1));
11438fd1498Szrj incr_at = gsi_last_bb (in->src);
11538fd1498Szrj create_iv (niter,
11638fd1498Szrj build_int_cst (type, -1),
11738fd1498Szrj NULL_TREE, loop,
11838fd1498Szrj &incr_at, false, var_before, &var);
11938fd1498Szrj if (var_after)
12038fd1498Szrj *var_after = var;
12138fd1498Szrj
12238fd1498Szrj cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
12338fd1498Szrj gimple_cond_set_code (cond, cmp);
12438fd1498Szrj gimple_cond_set_lhs (cond, var);
12538fd1498Szrj gimple_cond_set_rhs (cond, build_int_cst (type, 0));
12638fd1498Szrj update_stmt (cond);
12738fd1498Szrj }
12838fd1498Szrj
12938fd1498Szrj /* Describe size of loop as detected by tree_estimate_loop_size. */
13038fd1498Szrj struct loop_size
13138fd1498Szrj {
13238fd1498Szrj /* Number of instructions in the loop. */
13338fd1498Szrj int overall;
13438fd1498Szrj
13538fd1498Szrj /* Number of instructions that will be likely optimized out in
13638fd1498Szrj peeled iterations of loop (i.e. computation based on induction
13738fd1498Szrj variable where induction variable starts at known constant.) */
13838fd1498Szrj int eliminated_by_peeling;
13938fd1498Szrj
14038fd1498Szrj /* Same statistics for last iteration of loop: it is smaller because
14138fd1498Szrj instructions after exit are not executed. */
14238fd1498Szrj int last_iteration;
14338fd1498Szrj int last_iteration_eliminated_by_peeling;
14438fd1498Szrj
14538fd1498Szrj /* If some IV computation will become constant. */
14638fd1498Szrj bool constant_iv;
14738fd1498Szrj
14838fd1498Szrj /* Number of call stmts that are not a builtin and are pure or const
14938fd1498Szrj present on the hot path. */
15038fd1498Szrj int num_pure_calls_on_hot_path;
15138fd1498Szrj /* Number of call stmts that are not a builtin and are not pure nor const
15238fd1498Szrj present on the hot path. */
15338fd1498Szrj int num_non_pure_calls_on_hot_path;
15438fd1498Szrj /* Number of statements other than calls in the loop. */
15538fd1498Szrj int non_call_stmts_on_hot_path;
15638fd1498Szrj /* Number of branches seen on the hot path. */
15738fd1498Szrj int num_branches_on_hot_path;
15838fd1498Szrj };
15938fd1498Szrj
16038fd1498Szrj /* Return true if OP in STMT will be constant after peeling LOOP. */
16138fd1498Szrj
16238fd1498Szrj static bool
constant_after_peeling(tree op,gimple * stmt,struct loop * loop)16338fd1498Szrj constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
16438fd1498Szrj {
16538fd1498Szrj if (is_gimple_min_invariant (op))
16638fd1498Szrj return true;
16738fd1498Szrj
16838fd1498Szrj /* We can still fold accesses to constant arrays when index is known. */
16938fd1498Szrj if (TREE_CODE (op) != SSA_NAME)
17038fd1498Szrj {
17138fd1498Szrj tree base = op;
17238fd1498Szrj
17338fd1498Szrj /* First make fast look if we see constant array inside. */
17438fd1498Szrj while (handled_component_p (base))
17538fd1498Szrj base = TREE_OPERAND (base, 0);
17638fd1498Szrj if ((DECL_P (base)
17738fd1498Szrj && ctor_for_folding (base) != error_mark_node)
17838fd1498Szrj || CONSTANT_CLASS_P (base))
17938fd1498Szrj {
18038fd1498Szrj /* If so, see if we understand all the indices. */
18138fd1498Szrj base = op;
18238fd1498Szrj while (handled_component_p (base))
18338fd1498Szrj {
18438fd1498Szrj if (TREE_CODE (base) == ARRAY_REF
18538fd1498Szrj && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
18638fd1498Szrj return false;
18738fd1498Szrj base = TREE_OPERAND (base, 0);
18838fd1498Szrj }
18938fd1498Szrj return true;
19038fd1498Szrj }
19138fd1498Szrj return false;
19238fd1498Szrj }
19338fd1498Szrj
19438fd1498Szrj /* Induction variables are constants when defined in loop. */
19538fd1498Szrj if (loop_containing_stmt (stmt) != loop)
19638fd1498Szrj return false;
19738fd1498Szrj tree ev = analyze_scalar_evolution (loop, op);
19838fd1498Szrj if (chrec_contains_undetermined (ev)
19938fd1498Szrj || chrec_contains_symbols (ev))
20038fd1498Szrj return false;
20138fd1498Szrj return true;
20238fd1498Szrj }
20338fd1498Szrj
20438fd1498Szrj /* Computes an estimated number of insns in LOOP.
20538fd1498Szrj EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
20638fd1498Szrj iteration of the loop.
20738fd1498Szrj EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
20838fd1498Szrj of loop.
20938fd1498Szrj Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
21038fd1498Szrj Stop estimating after UPPER_BOUND is met. Return true in this case. */
21138fd1498Szrj
21238fd1498Szrj static bool
tree_estimate_loop_size(struct loop * loop,edge exit,edge edge_to_cancel,struct loop_size * size,int upper_bound)21338fd1498Szrj tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
21438fd1498Szrj struct loop_size *size, int upper_bound)
21538fd1498Szrj {
21638fd1498Szrj basic_block *body = get_loop_body (loop);
21738fd1498Szrj gimple_stmt_iterator gsi;
21838fd1498Szrj unsigned int i;
21938fd1498Szrj bool after_exit;
22038fd1498Szrj vec<basic_block> path = get_loop_hot_path (loop);
22138fd1498Szrj
22238fd1498Szrj size->overall = 0;
22338fd1498Szrj size->eliminated_by_peeling = 0;
22438fd1498Szrj size->last_iteration = 0;
22538fd1498Szrj size->last_iteration_eliminated_by_peeling = 0;
22638fd1498Szrj size->num_pure_calls_on_hot_path = 0;
22738fd1498Szrj size->num_non_pure_calls_on_hot_path = 0;
22838fd1498Szrj size->non_call_stmts_on_hot_path = 0;
22938fd1498Szrj size->num_branches_on_hot_path = 0;
23038fd1498Szrj size->constant_iv = 0;
23138fd1498Szrj
23238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
23338fd1498Szrj fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
23438fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
23538fd1498Szrj {
23638fd1498Szrj if (edge_to_cancel && body[i] != edge_to_cancel->src
23738fd1498Szrj && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
23838fd1498Szrj after_exit = true;
23938fd1498Szrj else
24038fd1498Szrj after_exit = false;
24138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
24238fd1498Szrj fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
24338fd1498Szrj after_exit);
24438fd1498Szrj
24538fd1498Szrj for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
24638fd1498Szrj {
24738fd1498Szrj gimple *stmt = gsi_stmt (gsi);
24838fd1498Szrj int num = estimate_num_insns (stmt, &eni_size_weights);
24938fd1498Szrj bool likely_eliminated = false;
25038fd1498Szrj bool likely_eliminated_last = false;
25138fd1498Szrj bool likely_eliminated_peeled = false;
25238fd1498Szrj
25338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
25438fd1498Szrj {
25538fd1498Szrj fprintf (dump_file, " size: %3i ", num);
25638fd1498Szrj print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
25738fd1498Szrj }
25838fd1498Szrj
25938fd1498Szrj /* Look for reasons why we might optimize this stmt away. */
26038fd1498Szrj
26138fd1498Szrj if (!gimple_has_side_effects (stmt))
26238fd1498Szrj {
26338fd1498Szrj /* Exit conditional. */
26438fd1498Szrj if (exit && body[i] == exit->src
26538fd1498Szrj && stmt == last_stmt (exit->src))
26638fd1498Szrj {
26738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
26838fd1498Szrj fprintf (dump_file, " Exit condition will be eliminated "
26938fd1498Szrj "in peeled copies.\n");
27038fd1498Szrj likely_eliminated_peeled = true;
27138fd1498Szrj }
27238fd1498Szrj if (edge_to_cancel && body[i] == edge_to_cancel->src
27338fd1498Szrj && stmt == last_stmt (edge_to_cancel->src))
27438fd1498Szrj {
27538fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
27638fd1498Szrj fprintf (dump_file, " Exit condition will be eliminated "
27738fd1498Szrj "in last copy.\n");
27838fd1498Szrj likely_eliminated_last = true;
27938fd1498Szrj }
28038fd1498Szrj /* Sets of IV variables */
28138fd1498Szrj if (gimple_code (stmt) == GIMPLE_ASSIGN
28238fd1498Szrj && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
28338fd1498Szrj {
28438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
28538fd1498Szrj fprintf (dump_file, " Induction variable computation will"
28638fd1498Szrj " be folded away.\n");
28738fd1498Szrj likely_eliminated = true;
28838fd1498Szrj }
28938fd1498Szrj /* Assignments of IV variables. */
29038fd1498Szrj else if (gimple_code (stmt) == GIMPLE_ASSIGN
29138fd1498Szrj && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
29238fd1498Szrj && constant_after_peeling (gimple_assign_rhs1 (stmt),
29338fd1498Szrj stmt, loop)
29438fd1498Szrj && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
29538fd1498Szrj || constant_after_peeling (gimple_assign_rhs2 (stmt),
29638fd1498Szrj stmt, loop)))
29738fd1498Szrj {
29838fd1498Szrj size->constant_iv = true;
29938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
30038fd1498Szrj fprintf (dump_file,
30138fd1498Szrj " Constant expression will be folded away.\n");
30238fd1498Szrj likely_eliminated = true;
30338fd1498Szrj }
30438fd1498Szrj /* Conditionals. */
30538fd1498Szrj else if ((gimple_code (stmt) == GIMPLE_COND
30638fd1498Szrj && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
30738fd1498Szrj loop)
30838fd1498Szrj && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
30938fd1498Szrj loop)
31038fd1498Szrj /* We don't simplify all constant compares so make sure
31138fd1498Szrj they are not both constant already. See PR70288. */
31238fd1498Szrj && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
31338fd1498Szrj || ! is_gimple_min_invariant
31438fd1498Szrj (gimple_cond_rhs (stmt))))
31538fd1498Szrj || (gimple_code (stmt) == GIMPLE_SWITCH
31638fd1498Szrj && constant_after_peeling (gimple_switch_index (
31738fd1498Szrj as_a <gswitch *>
31838fd1498Szrj (stmt)),
31938fd1498Szrj stmt, loop)
32038fd1498Szrj && ! is_gimple_min_invariant
32138fd1498Szrj (gimple_switch_index
32238fd1498Szrj (as_a <gswitch *> (stmt)))))
32338fd1498Szrj {
32438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
32538fd1498Szrj fprintf (dump_file, " Constant conditional.\n");
32638fd1498Szrj likely_eliminated = true;
32738fd1498Szrj }
32838fd1498Szrj }
32938fd1498Szrj
33038fd1498Szrj size->overall += num;
33138fd1498Szrj if (likely_eliminated || likely_eliminated_peeled)
33238fd1498Szrj size->eliminated_by_peeling += num;
33338fd1498Szrj if (!after_exit)
33438fd1498Szrj {
33538fd1498Szrj size->last_iteration += num;
33638fd1498Szrj if (likely_eliminated || likely_eliminated_last)
33738fd1498Szrj size->last_iteration_eliminated_by_peeling += num;
33838fd1498Szrj }
33938fd1498Szrj if ((size->overall * 3 / 2 - size->eliminated_by_peeling
34038fd1498Szrj - size->last_iteration_eliminated_by_peeling) > upper_bound)
34138fd1498Szrj {
34238fd1498Szrj free (body);
34338fd1498Szrj path.release ();
34438fd1498Szrj return true;
34538fd1498Szrj }
34638fd1498Szrj }
34738fd1498Szrj }
34838fd1498Szrj while (path.length ())
34938fd1498Szrj {
35038fd1498Szrj basic_block bb = path.pop ();
35138fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
35238fd1498Szrj {
35338fd1498Szrj gimple *stmt = gsi_stmt (gsi);
35438fd1498Szrj if (gimple_code (stmt) == GIMPLE_CALL
35538fd1498Szrj && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
35638fd1498Szrj {
35738fd1498Szrj int flags = gimple_call_flags (stmt);
35838fd1498Szrj if (flags & (ECF_PURE | ECF_CONST))
35938fd1498Szrj size->num_pure_calls_on_hot_path++;
36038fd1498Szrj else
36138fd1498Szrj size->num_non_pure_calls_on_hot_path++;
36238fd1498Szrj size->num_branches_on_hot_path ++;
36338fd1498Szrj }
36438fd1498Szrj /* Count inexpensive calls as non-calls, because they will likely
36538fd1498Szrj expand inline. */
36638fd1498Szrj else if (gimple_code (stmt) != GIMPLE_DEBUG)
36738fd1498Szrj size->non_call_stmts_on_hot_path++;
36838fd1498Szrj if (((gimple_code (stmt) == GIMPLE_COND
36938fd1498Szrj && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
37058e805e6Szrj || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
37138fd1498Szrj loop)))
37238fd1498Szrj || (gimple_code (stmt) == GIMPLE_SWITCH
37338fd1498Szrj && !constant_after_peeling (gimple_switch_index (
37438fd1498Szrj as_a <gswitch *> (stmt)),
37538fd1498Szrj stmt, loop)))
37638fd1498Szrj && (!exit || bb != exit->src))
37738fd1498Szrj size->num_branches_on_hot_path++;
37838fd1498Szrj }
37938fd1498Szrj }
38038fd1498Szrj path.release ();
38138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
38238fd1498Szrj fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
38338fd1498Szrj size->eliminated_by_peeling, size->last_iteration,
38438fd1498Szrj size->last_iteration_eliminated_by_peeling);
38538fd1498Szrj
38638fd1498Szrj free (body);
38738fd1498Szrj return false;
38838fd1498Szrj }
38938fd1498Szrj
39038fd1498Szrj /* Estimate number of insns of completely unrolled loop.
39138fd1498Szrj It is (NUNROLL + 1) * size of loop body with taking into account
39238fd1498Szrj the fact that in last copy everything after exit conditional
39338fd1498Szrj is dead and that some instructions will be eliminated after
39438fd1498Szrj peeling.
39538fd1498Szrj
39638fd1498Szrj Loop body is likely going to simplify further, this is difficult
39738fd1498Szrj to guess, we just decrease the result by 1/3. */
39838fd1498Szrj
39938fd1498Szrj static unsigned HOST_WIDE_INT
estimated_unrolled_size(struct loop_size * size,unsigned HOST_WIDE_INT nunroll)40038fd1498Szrj estimated_unrolled_size (struct loop_size *size,
40138fd1498Szrj unsigned HOST_WIDE_INT nunroll)
40238fd1498Szrj {
40338fd1498Szrj HOST_WIDE_INT unr_insns = ((nunroll)
40438fd1498Szrj * (HOST_WIDE_INT) (size->overall
40538fd1498Szrj - size->eliminated_by_peeling));
40638fd1498Szrj if (!nunroll)
40738fd1498Szrj unr_insns = 0;
40838fd1498Szrj unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
40938fd1498Szrj
41038fd1498Szrj unr_insns = unr_insns * 2 / 3;
41138fd1498Szrj if (unr_insns <= 0)
41238fd1498Szrj unr_insns = 1;
41338fd1498Szrj
41438fd1498Szrj return unr_insns;
41538fd1498Szrj }
41638fd1498Szrj
41738fd1498Szrj /* Loop LOOP is known to not loop. See if there is an edge in the loop
41838fd1498Szrj body that can be remove to make the loop to always exit and at
41938fd1498Szrj the same time it does not make any code potentially executed
42038fd1498Szrj during the last iteration dead.
42138fd1498Szrj
42238fd1498Szrj After complete unrolling we still may get rid of the conditional
42338fd1498Szrj on the exit in the last copy even if we have no idea what it does.
42438fd1498Szrj This is quite common case for loops of form
42538fd1498Szrj
42638fd1498Szrj int a[5];
42738fd1498Szrj for (i=0;i<b;i++)
42838fd1498Szrj a[i]=0;
42938fd1498Szrj
43038fd1498Szrj Here we prove the loop to iterate 5 times but we do not know
43138fd1498Szrj it from induction variable.
43238fd1498Szrj
43338fd1498Szrj For now we handle only simple case where there is exit condition
43438fd1498Szrj just before the latch block and the latch block contains no statements
43538fd1498Szrj with side effect that may otherwise terminate the execution of loop
43638fd1498Szrj (such as by EH or by terminating the program or longjmp).
43738fd1498Szrj
43838fd1498Szrj In the general case we may want to cancel the paths leading to statements
43938fd1498Szrj loop-niter identified as having undefined effect in the last iteration.
44038fd1498Szrj The other cases are hopefully rare and will be cleaned up later. */
44138fd1498Szrj
44238fd1498Szrj static edge
loop_edge_to_cancel(struct loop * loop)44338fd1498Szrj loop_edge_to_cancel (struct loop *loop)
44438fd1498Szrj {
44538fd1498Szrj vec<edge> exits;
44638fd1498Szrj unsigned i;
44738fd1498Szrj edge edge_to_cancel;
44838fd1498Szrj gimple_stmt_iterator gsi;
44938fd1498Szrj
45038fd1498Szrj /* We want only one predecestor of the loop. */
45138fd1498Szrj if (EDGE_COUNT (loop->latch->preds) > 1)
45238fd1498Szrj return NULL;
45338fd1498Szrj
45438fd1498Szrj exits = get_loop_exit_edges (loop);
45538fd1498Szrj
45638fd1498Szrj FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
45738fd1498Szrj {
45838fd1498Szrj /* Find the other edge than the loop exit
45938fd1498Szrj leaving the conditoinal. */
46038fd1498Szrj if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
46138fd1498Szrj continue;
46238fd1498Szrj if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
46338fd1498Szrj edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
46438fd1498Szrj else
46538fd1498Szrj edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
46638fd1498Szrj
46738fd1498Szrj /* We only can handle conditionals. */
46838fd1498Szrj if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
46938fd1498Szrj continue;
47038fd1498Szrj
47138fd1498Szrj /* We should never have conditionals in the loop latch. */
47238fd1498Szrj gcc_assert (edge_to_cancel->dest != loop->header);
47338fd1498Szrj
47438fd1498Szrj /* Check that it leads to loop latch. */
47538fd1498Szrj if (edge_to_cancel->dest != loop->latch)
47638fd1498Szrj continue;
47738fd1498Szrj
47838fd1498Szrj exits.release ();
47938fd1498Szrj
48038fd1498Szrj /* Verify that the code in loop latch does nothing that may end program
48138fd1498Szrj execution without really reaching the exit. This may include
48238fd1498Szrj non-pure/const function calls, EH statements, volatile ASMs etc. */
48338fd1498Szrj for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
48438fd1498Szrj if (gimple_has_side_effects (gsi_stmt (gsi)))
48538fd1498Szrj return NULL;
48638fd1498Szrj return edge_to_cancel;
48738fd1498Szrj }
48838fd1498Szrj exits.release ();
48938fd1498Szrj return NULL;
49038fd1498Szrj }
49138fd1498Szrj
49238fd1498Szrj /* Remove all tests for exits that are known to be taken after LOOP was
49338fd1498Szrj peeled NPEELED times. Put gcc_unreachable before every statement
49438fd1498Szrj known to not be executed. */
49538fd1498Szrj
49638fd1498Szrj static bool
remove_exits_and_undefined_stmts(struct loop * loop,unsigned int npeeled)49738fd1498Szrj remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
49838fd1498Szrj {
49938fd1498Szrj struct nb_iter_bound *elt;
50038fd1498Szrj bool changed = false;
50138fd1498Szrj
50238fd1498Szrj for (elt = loop->bounds; elt; elt = elt->next)
50338fd1498Szrj {
50438fd1498Szrj /* If statement is known to be undefined after peeling, turn it
50538fd1498Szrj into unreachable (or trap when debugging experience is supposed
50638fd1498Szrj to be good). */
50738fd1498Szrj if (!elt->is_exit
50838fd1498Szrj && wi::ltu_p (elt->bound, npeeled))
50938fd1498Szrj {
51038fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
51138fd1498Szrj gcall *stmt = gimple_build_call
51238fd1498Szrj (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
51338fd1498Szrj gimple_set_location (stmt, gimple_location (elt->stmt));
51438fd1498Szrj gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
51538fd1498Szrj split_block (gimple_bb (stmt), stmt);
51638fd1498Szrj changed = true;
51738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
51838fd1498Szrj {
51938fd1498Szrj fprintf (dump_file, "Forced statement unreachable: ");
52038fd1498Szrj print_gimple_stmt (dump_file, elt->stmt, 0);
52138fd1498Szrj }
52238fd1498Szrj }
52338fd1498Szrj /* If we know the exit will be taken after peeling, update. */
52438fd1498Szrj else if (elt->is_exit
52538fd1498Szrj && wi::leu_p (elt->bound, npeeled))
52638fd1498Szrj {
52738fd1498Szrj basic_block bb = gimple_bb (elt->stmt);
52838fd1498Szrj edge exit_edge = EDGE_SUCC (bb, 0);
52938fd1498Szrj
53038fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
53138fd1498Szrj {
53238fd1498Szrj fprintf (dump_file, "Forced exit to be taken: ");
53338fd1498Szrj print_gimple_stmt (dump_file, elt->stmt, 0);
53438fd1498Szrj }
53538fd1498Szrj if (!loop_exit_edge_p (loop, exit_edge))
53638fd1498Szrj exit_edge = EDGE_SUCC (bb, 1);
53738fd1498Szrj exit_edge->probability = profile_probability::always ();
53838fd1498Szrj gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
53938fd1498Szrj gcond *cond_stmt = as_a <gcond *> (elt->stmt);
54038fd1498Szrj if (exit_edge->flags & EDGE_TRUE_VALUE)
54138fd1498Szrj gimple_cond_make_true (cond_stmt);
54238fd1498Szrj else
54338fd1498Szrj gimple_cond_make_false (cond_stmt);
54438fd1498Szrj update_stmt (cond_stmt);
54538fd1498Szrj changed = true;
54638fd1498Szrj }
54738fd1498Szrj }
54838fd1498Szrj return changed;
54938fd1498Szrj }
55038fd1498Szrj
55138fd1498Szrj /* Remove all exits that are known to be never taken because of the loop bound
55238fd1498Szrj discovered. */
55338fd1498Szrj
55438fd1498Szrj static bool
remove_redundant_iv_tests(struct loop * loop)55538fd1498Szrj remove_redundant_iv_tests (struct loop *loop)
55638fd1498Szrj {
55738fd1498Szrj struct nb_iter_bound *elt;
55838fd1498Szrj bool changed = false;
55938fd1498Szrj
56038fd1498Szrj if (!loop->any_upper_bound)
56138fd1498Szrj return false;
56238fd1498Szrj for (elt = loop->bounds; elt; elt = elt->next)
56338fd1498Szrj {
56438fd1498Szrj /* Exit is pointless if it won't be taken before loop reaches
56538fd1498Szrj upper bound. */
56638fd1498Szrj if (elt->is_exit && loop->any_upper_bound
56738fd1498Szrj && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
56838fd1498Szrj {
56938fd1498Szrj basic_block bb = gimple_bb (elt->stmt);
57038fd1498Szrj edge exit_edge = EDGE_SUCC (bb, 0);
57138fd1498Szrj struct tree_niter_desc niter;
57238fd1498Szrj
57338fd1498Szrj if (!loop_exit_edge_p (loop, exit_edge))
57438fd1498Szrj exit_edge = EDGE_SUCC (bb, 1);
57538fd1498Szrj
57638fd1498Szrj /* Only when we know the actual number of iterations, not
57738fd1498Szrj just a bound, we can remove the exit. */
57838fd1498Szrj if (!number_of_iterations_exit (loop, exit_edge,
57938fd1498Szrj &niter, false, false)
58038fd1498Szrj || !integer_onep (niter.assumptions)
58138fd1498Szrj || !integer_zerop (niter.may_be_zero)
58238fd1498Szrj || !niter.niter
58338fd1498Szrj || TREE_CODE (niter.niter) != INTEGER_CST
58438fd1498Szrj || !wi::ltu_p (loop->nb_iterations_upper_bound,
58538fd1498Szrj wi::to_widest (niter.niter)))
58638fd1498Szrj continue;
58738fd1498Szrj
58838fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
58938fd1498Szrj {
59038fd1498Szrj fprintf (dump_file, "Removed pointless exit: ");
59138fd1498Szrj print_gimple_stmt (dump_file, elt->stmt, 0);
59238fd1498Szrj }
59338fd1498Szrj gcond *cond_stmt = as_a <gcond *> (elt->stmt);
59438fd1498Szrj if (exit_edge->flags & EDGE_TRUE_VALUE)
59538fd1498Szrj gimple_cond_make_false (cond_stmt);
59638fd1498Szrj else
59738fd1498Szrj gimple_cond_make_true (cond_stmt);
59838fd1498Szrj update_stmt (cond_stmt);
59938fd1498Szrj changed = true;
60038fd1498Szrj }
60138fd1498Szrj }
60238fd1498Szrj return changed;
60338fd1498Szrj }
60438fd1498Szrj
60538fd1498Szrj /* Stores loops that will be unlooped and edges that will be removed
60638fd1498Szrj after we process whole loop tree. */
60738fd1498Szrj static vec<loop_p> loops_to_unloop;
60838fd1498Szrj static vec<int> loops_to_unloop_nunroll;
60938fd1498Szrj static vec<edge> edges_to_remove;
61038fd1498Szrj /* Stores loops that has been peeled. */
61138fd1498Szrj static bitmap peeled_loops;
61238fd1498Szrj
61338fd1498Szrj /* Cancel all fully unrolled loops by putting __builtin_unreachable
61438fd1498Szrj on the latch edge.
61538fd1498Szrj We do it after all unrolling since unlooping moves basic blocks
61638fd1498Szrj across loop boundaries trashing loop closed SSA form as well
61738fd1498Szrj as SCEV info needed to be intact during unrolling.
61838fd1498Szrj
61938fd1498Szrj IRRED_INVALIDATED is used to bookkeep if information about
62038fd1498Szrj irreducible regions may become invalid as a result
62138fd1498Szrj of the transformation.
62238fd1498Szrj LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
62338fd1498Szrj when we need to go into loop closed SSA form. */
62438fd1498Szrj
62538fd1498Szrj static void
unloop_loops(bitmap loop_closed_ssa_invalidated,bool * irred_invalidated)62638fd1498Szrj unloop_loops (bitmap loop_closed_ssa_invalidated,
62738fd1498Szrj bool *irred_invalidated)
62838fd1498Szrj {
62938fd1498Szrj while (loops_to_unloop.length ())
63038fd1498Szrj {
63138fd1498Szrj struct loop *loop = loops_to_unloop.pop ();
63238fd1498Szrj int n_unroll = loops_to_unloop_nunroll.pop ();
63338fd1498Szrj basic_block latch = loop->latch;
63438fd1498Szrj edge latch_edge = loop_latch_edge (loop);
63538fd1498Szrj int flags = latch_edge->flags;
63638fd1498Szrj location_t locus = latch_edge->goto_locus;
63738fd1498Szrj gcall *stmt;
63838fd1498Szrj gimple_stmt_iterator gsi;
63938fd1498Szrj
64038fd1498Szrj remove_exits_and_undefined_stmts (loop, n_unroll);
64138fd1498Szrj
64238fd1498Szrj /* Unloop destroys the latch edge. */
64338fd1498Szrj unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
64438fd1498Szrj
64538fd1498Szrj /* Create new basic block for the latch edge destination and wire
64638fd1498Szrj it in. */
64738fd1498Szrj stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
64838fd1498Szrj latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
64938fd1498Szrj latch_edge->probability = profile_probability::never ();
65038fd1498Szrj latch_edge->flags |= flags;
65138fd1498Szrj latch_edge->goto_locus = locus;
65238fd1498Szrj
65338fd1498Szrj add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
65438fd1498Szrj latch_edge->dest->count = profile_count::zero ();
65538fd1498Szrj set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
65638fd1498Szrj
65738fd1498Szrj gsi = gsi_start_bb (latch_edge->dest);
65838fd1498Szrj gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
65938fd1498Szrj }
66038fd1498Szrj loops_to_unloop.release ();
66138fd1498Szrj loops_to_unloop_nunroll.release ();
66238fd1498Szrj
66338fd1498Szrj /* Remove edges in peeled copies. Given remove_path removes dominated
66438fd1498Szrj regions we need to cope with removal of already removed paths. */
66538fd1498Szrj unsigned i;
66638fd1498Szrj edge e;
66738fd1498Szrj auto_vec<int, 20> src_bbs;
66838fd1498Szrj src_bbs.reserve_exact (edges_to_remove.length ());
66938fd1498Szrj FOR_EACH_VEC_ELT (edges_to_remove, i, e)
67038fd1498Szrj src_bbs.quick_push (e->src->index);
67138fd1498Szrj FOR_EACH_VEC_ELT (edges_to_remove, i, e)
67238fd1498Szrj if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
67338fd1498Szrj {
67438fd1498Szrj bool ok = remove_path (e, irred_invalidated,
67538fd1498Szrj loop_closed_ssa_invalidated);
67638fd1498Szrj gcc_assert (ok);
67738fd1498Szrj }
67838fd1498Szrj edges_to_remove.release ();
67938fd1498Szrj }
68038fd1498Szrj
68138fd1498Szrj /* Tries to unroll LOOP completely, i.e. NITER times.
68238fd1498Szrj UL determines which loops we are allowed to unroll.
68338fd1498Szrj EXIT is the exit of the loop that should be eliminated.
68438fd1498Szrj MAXITER specfy bound on number of iterations, -1 if it is
68538fd1498Szrj not known or too large for HOST_WIDE_INT. The location
68638fd1498Szrj LOCUS corresponding to the loop is used when emitting
68738fd1498Szrj a summary of the unroll to the dump file. */
68838fd1498Szrj
68938fd1498Szrj static bool
try_unroll_loop_completely(struct loop * loop,edge exit,tree niter,bool may_be_zero,enum unroll_level ul,HOST_WIDE_INT maxiter,location_t locus,bool allow_peel)69038fd1498Szrj try_unroll_loop_completely (struct loop *loop,
69138fd1498Szrj edge exit, tree niter, bool may_be_zero,
69238fd1498Szrj enum unroll_level ul,
69338fd1498Szrj HOST_WIDE_INT maxiter,
69438fd1498Szrj location_t locus, bool allow_peel)
69538fd1498Szrj {
69638fd1498Szrj unsigned HOST_WIDE_INT n_unroll = 0;
69738fd1498Szrj bool n_unroll_found = false;
69838fd1498Szrj edge edge_to_cancel = NULL;
69938fd1498Szrj
70038fd1498Szrj /* See if we proved number of iterations to be low constant.
70138fd1498Szrj
70238fd1498Szrj EXIT is an edge that will be removed in all but last iteration of
70338fd1498Szrj the loop.
70438fd1498Szrj
70538fd1498Szrj EDGE_TO_CACNEL is an edge that will be removed from the last iteration
70638fd1498Szrj of the unrolled sequence and is expected to make the final loop not
70738fd1498Szrj rolling.
70838fd1498Szrj
70938fd1498Szrj If the number of execution of loop is determined by standard induction
71038fd1498Szrj variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
71138fd1498Szrj from the iv test. */
71238fd1498Szrj if (tree_fits_uhwi_p (niter))
71338fd1498Szrj {
71438fd1498Szrj n_unroll = tree_to_uhwi (niter);
71538fd1498Szrj n_unroll_found = true;
71638fd1498Szrj edge_to_cancel = EDGE_SUCC (exit->src, 0);
71738fd1498Szrj if (edge_to_cancel == exit)
71838fd1498Szrj edge_to_cancel = EDGE_SUCC (exit->src, 1);
71938fd1498Szrj }
72038fd1498Szrj /* We do not know the number of iterations and thus we can not eliminate
72138fd1498Szrj the EXIT edge. */
72238fd1498Szrj else
72338fd1498Szrj exit = NULL;
72438fd1498Szrj
72538fd1498Szrj /* See if we can improve our estimate by using recorded loop bounds. */
72638fd1498Szrj if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
72738fd1498Szrj && maxiter >= 0
72838fd1498Szrj && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
72938fd1498Szrj {
73038fd1498Szrj n_unroll = maxiter;
73138fd1498Szrj n_unroll_found = true;
73238fd1498Szrj /* Loop terminates before the IV variable test, so we can not
73338fd1498Szrj remove it in the last iteration. */
73438fd1498Szrj edge_to_cancel = NULL;
73538fd1498Szrj }
73638fd1498Szrj
73738fd1498Szrj if (!n_unroll_found)
73838fd1498Szrj return false;
73938fd1498Szrj
74038fd1498Szrj if (!loop->unroll
74138fd1498Szrj && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
74238fd1498Szrj {
74338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
74438fd1498Szrj fprintf (dump_file, "Not unrolling loop %d "
74538fd1498Szrj "(--param max-completely-peel-times limit reached).\n",
74638fd1498Szrj loop->num);
74738fd1498Szrj return false;
74838fd1498Szrj }
74938fd1498Szrj
75038fd1498Szrj if (!edge_to_cancel)
75138fd1498Szrj edge_to_cancel = loop_edge_to_cancel (loop);
75238fd1498Szrj
75338fd1498Szrj if (n_unroll)
75438fd1498Szrj {
75538fd1498Szrj if (ul == UL_SINGLE_ITER)
75638fd1498Szrj return false;
75738fd1498Szrj
75838fd1498Szrj if (loop->unroll)
75938fd1498Szrj {
76038fd1498Szrj /* If the unrolling factor is too large, bail out. */
76138fd1498Szrj if (n_unroll > (unsigned)loop->unroll)
76238fd1498Szrj {
76338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
76438fd1498Szrj fprintf (dump_file,
76538fd1498Szrj "Not unrolling loop %d: "
76638fd1498Szrj "user didn't want it unrolled completely.\n",
76738fd1498Szrj loop->num);
76838fd1498Szrj return false;
76938fd1498Szrj }
77038fd1498Szrj }
77138fd1498Szrj else
77238fd1498Szrj {
77338fd1498Szrj struct loop_size size;
77438fd1498Szrj /* EXIT can be removed only if we are sure it passes first N_UNROLL
77538fd1498Szrj iterations. */
77638fd1498Szrj bool remove_exit = (exit && niter
77738fd1498Szrj && TREE_CODE (niter) == INTEGER_CST
77838fd1498Szrj && wi::leu_p (n_unroll, wi::to_widest (niter)));
77938fd1498Szrj bool large
78038fd1498Szrj = tree_estimate_loop_size
78138fd1498Szrj (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
78238fd1498Szrj PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
78338fd1498Szrj if (large)
78438fd1498Szrj {
78538fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
78638fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
78738fd1498Szrj loop->num);
78838fd1498Szrj return false;
78938fd1498Szrj }
79038fd1498Szrj
79138fd1498Szrj unsigned HOST_WIDE_INT ninsns = size.overall;
79238fd1498Szrj unsigned HOST_WIDE_INT unr_insns
79338fd1498Szrj = estimated_unrolled_size (&size, n_unroll);
79438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
79538fd1498Szrj {
79638fd1498Szrj fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
79738fd1498Szrj fprintf (dump_file, " Estimated size after unrolling: %d\n",
79838fd1498Szrj (int) unr_insns);
79938fd1498Szrj }
80038fd1498Szrj
80138fd1498Szrj /* If the code is going to shrink, we don't need to be extra
80238fd1498Szrj cautious on guessing if the unrolling is going to be
80338fd1498Szrj profitable. */
80438fd1498Szrj if (unr_insns
80538fd1498Szrj /* If there is IV variable that will become constant, we
80638fd1498Szrj save one instruction in the loop prologue we do not
80738fd1498Szrj account otherwise. */
80838fd1498Szrj <= ninsns + (size.constant_iv != false))
80938fd1498Szrj ;
81038fd1498Szrj /* We unroll only inner loops, because we do not consider it
81138fd1498Szrj profitable otheriwse. We still can cancel loopback edge
81238fd1498Szrj of not rolling loop; this is always a good idea. */
81338fd1498Szrj else if (ul == UL_NO_GROWTH)
81438fd1498Szrj {
81538fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
81638fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
81738fd1498Szrj loop->num);
81838fd1498Szrj return false;
81938fd1498Szrj }
82038fd1498Szrj /* Outer loops tend to be less interesting candidates for
82138fd1498Szrj complete unrolling unless we can do a lot of propagation
82238fd1498Szrj into the inner loop body. For now we disable outer loop
82338fd1498Szrj unrolling when the code would grow. */
82438fd1498Szrj else if (loop->inner)
82538fd1498Szrj {
82638fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
82738fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: "
82838fd1498Szrj "it is not innermost and code would grow.\n",
82938fd1498Szrj loop->num);
83038fd1498Szrj return false;
83138fd1498Szrj }
83238fd1498Szrj /* If there is call on a hot path through the loop, then
83338fd1498Szrj there is most probably not much to optimize. */
83438fd1498Szrj else if (size.num_non_pure_calls_on_hot_path)
83538fd1498Szrj {
83638fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
83738fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: "
83838fd1498Szrj "contains call and code would grow.\n",
83938fd1498Szrj loop->num);
84038fd1498Szrj return false;
84138fd1498Szrj }
84238fd1498Szrj /* If there is pure/const call in the function, then we can
84338fd1498Szrj still optimize the unrolled loop body if it contains some
84438fd1498Szrj other interesting code than the calls and code storing or
84538fd1498Szrj cumulating the return value. */
84638fd1498Szrj else if (size.num_pure_calls_on_hot_path
84738fd1498Szrj /* One IV increment, one test, one ivtmp store and
84838fd1498Szrj one useful stmt. That is about minimal loop
84938fd1498Szrj doing pure call. */
85038fd1498Szrj && (size.non_call_stmts_on_hot_path
85138fd1498Szrj <= 3 + size.num_pure_calls_on_hot_path))
85238fd1498Szrj {
85338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
85438fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: "
85538fd1498Szrj "contains just pure calls and code would grow.\n",
85638fd1498Szrj loop->num);
85738fd1498Szrj return false;
85838fd1498Szrj }
85938fd1498Szrj /* Complete unrolling is major win when control flow is
86038fd1498Szrj removed and one big basic block is created. If the loop
86138fd1498Szrj contains control flow the optimization may still be a win
86238fd1498Szrj because of eliminating the loop overhead but it also may
86338fd1498Szrj blow the branch predictor tables. Limit number of
86438fd1498Szrj branches on the hot path through the peeled sequence. */
86538fd1498Szrj else if (size.num_branches_on_hot_path * (int)n_unroll
86638fd1498Szrj > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
86738fd1498Szrj {
86838fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
86938fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: "
87038fd1498Szrj "number of branches on hot path in the unrolled "
87138fd1498Szrj "sequence reaches --param max-peel-branches limit.\n",
87238fd1498Szrj loop->num);
87338fd1498Szrj return false;
87438fd1498Szrj }
87538fd1498Szrj else if (unr_insns
87638fd1498Szrj > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
87738fd1498Szrj {
87838fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
87938fd1498Szrj fprintf (dump_file, "Not unrolling loop %d: "
88038fd1498Szrj "number of insns in the unrolled sequence reaches "
88138fd1498Szrj "--param max-completely-peeled-insns limit.\n",
88238fd1498Szrj loop->num);
88338fd1498Szrj return false;
88438fd1498Szrj }
88538fd1498Szrj }
88638fd1498Szrj
88738fd1498Szrj initialize_original_copy_tables ();
88838fd1498Szrj auto_sbitmap wont_exit (n_unroll + 1);
88938fd1498Szrj if (exit && niter
89038fd1498Szrj && TREE_CODE (niter) == INTEGER_CST
89138fd1498Szrj && wi::leu_p (n_unroll, wi::to_widest (niter)))
89238fd1498Szrj {
89338fd1498Szrj bitmap_ones (wont_exit);
89438fd1498Szrj if (wi::eq_p (wi::to_widest (niter), n_unroll)
89538fd1498Szrj || edge_to_cancel)
89638fd1498Szrj bitmap_clear_bit (wont_exit, 0);
89738fd1498Szrj }
89838fd1498Szrj else
89938fd1498Szrj {
90038fd1498Szrj exit = NULL;
90138fd1498Szrj bitmap_clear (wont_exit);
90238fd1498Szrj }
90338fd1498Szrj if (may_be_zero)
90438fd1498Szrj bitmap_clear_bit (wont_exit, 1);
90538fd1498Szrj
90638fd1498Szrj if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
90738fd1498Szrj n_unroll, wont_exit,
90838fd1498Szrj exit, &edges_to_remove,
90938fd1498Szrj DLTHE_FLAG_UPDATE_FREQ
91038fd1498Szrj | DLTHE_FLAG_COMPLETTE_PEEL))
91138fd1498Szrj {
91238fd1498Szrj free_original_copy_tables ();
91338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
91438fd1498Szrj fprintf (dump_file, "Failed to duplicate the loop\n");
91538fd1498Szrj return false;
91638fd1498Szrj }
91738fd1498Szrj
91838fd1498Szrj free_original_copy_tables ();
91938fd1498Szrj }
92038fd1498Szrj
92138fd1498Szrj /* Remove the conditional from the last copy of the loop. */
92238fd1498Szrj if (edge_to_cancel)
92338fd1498Szrj {
92438fd1498Szrj gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
92538fd1498Szrj force_edge_cold (edge_to_cancel, true);
92638fd1498Szrj if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
92738fd1498Szrj gimple_cond_make_false (cond);
92838fd1498Szrj else
92938fd1498Szrj gimple_cond_make_true (cond);
93038fd1498Szrj update_stmt (cond);
93138fd1498Szrj /* Do not remove the path, as doing so may remove outer loop and
93238fd1498Szrj confuse bookkeeping code in tree_unroll_loops_completely. */
93338fd1498Szrj }
93438fd1498Szrj
93538fd1498Szrj /* Store the loop for later unlooping and exit removal. */
93638fd1498Szrj loops_to_unloop.safe_push (loop);
93738fd1498Szrj loops_to_unloop_nunroll.safe_push (n_unroll);
93838fd1498Szrj
93938fd1498Szrj if (dump_enabled_p ())
94038fd1498Szrj {
94138fd1498Szrj if (!n_unroll)
94238fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
94338fd1498Szrj "loop turned into non-loop; it never loops\n");
94438fd1498Szrj else
94538fd1498Szrj {
94638fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
94738fd1498Szrj "loop with %d iterations completely unrolled",
94838fd1498Szrj (int) n_unroll);
94938fd1498Szrj if (loop->header->count.initialized_p ())
95038fd1498Szrj dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
95138fd1498Szrj " (header execution count %d)",
95238fd1498Szrj (int)loop->header->count.to_gcov_type ());
95338fd1498Szrj dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
95438fd1498Szrj }
95538fd1498Szrj }
95638fd1498Szrj
95738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
95838fd1498Szrj {
95938fd1498Szrj if (exit)
96038fd1498Szrj fprintf (dump_file, "Exit condition of peeled iterations was "
96138fd1498Szrj "eliminated.\n");
96238fd1498Szrj if (edge_to_cancel)
96338fd1498Szrj fprintf (dump_file, "Last iteration exit edge was proved true.\n");
96438fd1498Szrj else
96538fd1498Szrj fprintf (dump_file, "Latch of last iteration was marked by "
96638fd1498Szrj "__builtin_unreachable ().\n");
96738fd1498Szrj }
96838fd1498Szrj
96938fd1498Szrj return true;
97038fd1498Szrj }
97138fd1498Szrj
97238fd1498Szrj /* Return number of instructions after peeling. */
97338fd1498Szrj static unsigned HOST_WIDE_INT
estimated_peeled_sequence_size(struct loop_size * size,unsigned HOST_WIDE_INT npeel)97438fd1498Szrj estimated_peeled_sequence_size (struct loop_size *size,
97538fd1498Szrj unsigned HOST_WIDE_INT npeel)
97638fd1498Szrj {
97738fd1498Szrj return MAX (npeel * (HOST_WIDE_INT) (size->overall
97838fd1498Szrj - size->eliminated_by_peeling), 1);
97938fd1498Szrj }
98038fd1498Szrj
98138fd1498Szrj /* If the loop is expected to iterate N times and is
98238fd1498Szrj small enough, duplicate the loop body N+1 times before
98338fd1498Szrj the loop itself. This way the hot path will never
98438fd1498Szrj enter the loop.
98538fd1498Szrj Parameters are the same as for try_unroll_loops_completely */
98638fd1498Szrj
98738fd1498Szrj static bool
try_peel_loop(struct loop * loop,edge exit,tree niter,bool may_be_zero,HOST_WIDE_INT maxiter)98838fd1498Szrj try_peel_loop (struct loop *loop,
98938fd1498Szrj edge exit, tree niter, bool may_be_zero,
99038fd1498Szrj HOST_WIDE_INT maxiter)
99138fd1498Szrj {
99238fd1498Szrj HOST_WIDE_INT npeel;
99338fd1498Szrj struct loop_size size;
99438fd1498Szrj int peeled_size;
99538fd1498Szrj
99638fd1498Szrj if (!flag_peel_loops
99738fd1498Szrj || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
99838fd1498Szrj || !peeled_loops)
99938fd1498Szrj return false;
100038fd1498Szrj
100138fd1498Szrj if (bitmap_bit_p (peeled_loops, loop->num))
100238fd1498Szrj {
100338fd1498Szrj if (dump_file)
100438fd1498Szrj fprintf (dump_file, "Not peeling: loop is already peeled\n");
100538fd1498Szrj return false;
100638fd1498Szrj }
100738fd1498Szrj
100838fd1498Szrj /* We don't peel loops that will be unrolled as this can duplicate a
100938fd1498Szrj loop more times than the user requested. */
101038fd1498Szrj if (loop->unroll)
101138fd1498Szrj {
101238fd1498Szrj if (dump_file)
101338fd1498Szrj fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
101438fd1498Szrj return false;
101538fd1498Szrj }
101638fd1498Szrj
101738fd1498Szrj /* Peel only innermost loops.
101838fd1498Szrj While the code is perfectly capable of peeling non-innermost loops,
101938fd1498Szrj the heuristics would probably need some improvements. */
102038fd1498Szrj if (loop->inner)
102138fd1498Szrj {
102238fd1498Szrj if (dump_file)
102338fd1498Szrj fprintf (dump_file, "Not peeling: outer loop\n");
102438fd1498Szrj return false;
102538fd1498Szrj }
102638fd1498Szrj
102738fd1498Szrj if (!optimize_loop_for_speed_p (loop))
102838fd1498Szrj {
102938fd1498Szrj if (dump_file)
103038fd1498Szrj fprintf (dump_file, "Not peeling: cold loop\n");
103138fd1498Szrj return false;
103238fd1498Szrj }
103338fd1498Szrj
103438fd1498Szrj /* Check if there is an estimate on the number of iterations. */
103538fd1498Szrj npeel = estimated_loop_iterations_int (loop);
103638fd1498Szrj if (npeel < 0)
103738fd1498Szrj npeel = likely_max_loop_iterations_int (loop);
103838fd1498Szrj if (npeel < 0)
103938fd1498Szrj {
104038fd1498Szrj if (dump_file)
104138fd1498Szrj fprintf (dump_file, "Not peeling: number of iterations is not "
104238fd1498Szrj "estimated\n");
104338fd1498Szrj return false;
104438fd1498Szrj }
104538fd1498Szrj if (maxiter >= 0 && maxiter <= npeel)
104638fd1498Szrj {
104738fd1498Szrj if (dump_file)
104838fd1498Szrj fprintf (dump_file, "Not peeling: upper bound is known so can "
104938fd1498Szrj "unroll completely\n");
105038fd1498Szrj return false;
105138fd1498Szrj }
105238fd1498Szrj
105338fd1498Szrj /* We want to peel estimated number of iterations + 1 (so we never
105438fd1498Szrj enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
105538fd1498Szrj and be sure to avoid overflows. */
105638fd1498Szrj if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
105738fd1498Szrj {
105838fd1498Szrj if (dump_file)
105938fd1498Szrj fprintf (dump_file, "Not peeling: rolls too much "
106038fd1498Szrj "(%i + 1 > --param max-peel-times)\n", (int) npeel);
106138fd1498Szrj return false;
106238fd1498Szrj }
106338fd1498Szrj npeel++;
106438fd1498Szrj
106538fd1498Szrj /* Check peeled loops size. */
106638fd1498Szrj tree_estimate_loop_size (loop, exit, NULL, &size,
106738fd1498Szrj PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
106838fd1498Szrj if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
106938fd1498Szrj > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
107038fd1498Szrj {
107138fd1498Szrj if (dump_file)
107238fd1498Szrj fprintf (dump_file, "Not peeling: peeled sequence size is too large "
107338fd1498Szrj "(%i insns > --param max-peel-insns)", peeled_size);
107438fd1498Szrj return false;
107538fd1498Szrj }
107638fd1498Szrj
107738fd1498Szrj /* Duplicate possibly eliminating the exits. */
107838fd1498Szrj initialize_original_copy_tables ();
107938fd1498Szrj auto_sbitmap wont_exit (npeel + 1);
108038fd1498Szrj if (exit && niter
108138fd1498Szrj && TREE_CODE (niter) == INTEGER_CST
108238fd1498Szrj && wi::leu_p (npeel, wi::to_widest (niter)))
108338fd1498Szrj {
108438fd1498Szrj bitmap_ones (wont_exit);
108538fd1498Szrj bitmap_clear_bit (wont_exit, 0);
108638fd1498Szrj }
108738fd1498Szrj else
108838fd1498Szrj {
108938fd1498Szrj exit = NULL;
109038fd1498Szrj bitmap_clear (wont_exit);
109138fd1498Szrj }
109238fd1498Szrj if (may_be_zero)
109338fd1498Szrj bitmap_clear_bit (wont_exit, 1);
109438fd1498Szrj if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
109538fd1498Szrj npeel, wont_exit,
109638fd1498Szrj exit, &edges_to_remove,
109738fd1498Szrj DLTHE_FLAG_UPDATE_FREQ))
109838fd1498Szrj {
109938fd1498Szrj free_original_copy_tables ();
110038fd1498Szrj return false;
110138fd1498Szrj }
110238fd1498Szrj free_original_copy_tables ();
110338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
110438fd1498Szrj {
110538fd1498Szrj fprintf (dump_file, "Peeled loop %d, %i times.\n",
110638fd1498Szrj loop->num, (int) npeel);
110738fd1498Szrj }
110838fd1498Szrj if (loop->any_estimate)
110938fd1498Szrj {
111038fd1498Szrj if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
111138fd1498Szrj loop->nb_iterations_estimate -= npeel;
111238fd1498Szrj else
111338fd1498Szrj loop->nb_iterations_estimate = 0;
111438fd1498Szrj }
111538fd1498Szrj if (loop->any_upper_bound)
111638fd1498Szrj {
111738fd1498Szrj if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
111838fd1498Szrj loop->nb_iterations_upper_bound -= npeel;
111938fd1498Szrj else
112038fd1498Szrj loop->nb_iterations_upper_bound = 0;
112138fd1498Szrj }
112238fd1498Szrj if (loop->any_likely_upper_bound)
112338fd1498Szrj {
112438fd1498Szrj if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
112538fd1498Szrj loop->nb_iterations_likely_upper_bound -= npeel;
112638fd1498Szrj else
112738fd1498Szrj {
112838fd1498Szrj loop->any_estimate = true;
112938fd1498Szrj loop->nb_iterations_estimate = 0;
113038fd1498Szrj loop->nb_iterations_likely_upper_bound = 0;
113138fd1498Szrj }
113238fd1498Szrj }
113338fd1498Szrj profile_count entry_count = profile_count::zero ();
113438fd1498Szrj
113538fd1498Szrj edge e;
113638fd1498Szrj edge_iterator ei;
113738fd1498Szrj FOR_EACH_EDGE (e, ei, loop->header->preds)
113838fd1498Szrj if (e->src != loop->latch)
113938fd1498Szrj {
114038fd1498Szrj if (e->src->count.initialized_p ())
114138fd1498Szrj entry_count = e->src->count + e->src->count;
114238fd1498Szrj gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
114338fd1498Szrj }
114438fd1498Szrj profile_probability p = profile_probability::very_unlikely ();
114538fd1498Szrj p = entry_count.probability_in (loop->header->count);
114638fd1498Szrj scale_loop_profile (loop, p, 0);
114738fd1498Szrj bitmap_set_bit (peeled_loops, loop->num);
114838fd1498Szrj return true;
114938fd1498Szrj }
115038fd1498Szrj /* Adds a canonical induction variable to LOOP if suitable.
115138fd1498Szrj CREATE_IV is true if we may create a new iv. UL determines
115238fd1498Szrj which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
115338fd1498Szrj to determine the number of iterations of a loop by direct evaluation.
115438fd1498Szrj Returns true if cfg is changed. */
115538fd1498Szrj
115638fd1498Szrj static bool
canonicalize_loop_induction_variables(struct loop * loop,bool create_iv,enum unroll_level ul,bool try_eval,bool allow_peel)115738fd1498Szrj canonicalize_loop_induction_variables (struct loop *loop,
115838fd1498Szrj bool create_iv, enum unroll_level ul,
115938fd1498Szrj bool try_eval, bool allow_peel)
116038fd1498Szrj {
116138fd1498Szrj edge exit = NULL;
116238fd1498Szrj tree niter;
116338fd1498Szrj HOST_WIDE_INT maxiter;
116438fd1498Szrj bool modified = false;
116538fd1498Szrj location_t locus = UNKNOWN_LOCATION;
116638fd1498Szrj struct tree_niter_desc niter_desc;
116738fd1498Szrj bool may_be_zero = false;
116838fd1498Szrj
116938fd1498Szrj /* For unrolling allow conditional constant or zero iterations, thus
117038fd1498Szrj perform loop-header copying on-the-fly. */
117138fd1498Szrj exit = single_exit (loop);
117238fd1498Szrj niter = chrec_dont_know;
117338fd1498Szrj if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
117438fd1498Szrj {
117538fd1498Szrj niter = niter_desc.niter;
117638fd1498Szrj may_be_zero
117738fd1498Szrj = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
117838fd1498Szrj }
117938fd1498Szrj if (TREE_CODE (niter) == INTEGER_CST)
1180*e215fc28Szrj locus = gimple_location_safe (last_stmt (exit->src));
118138fd1498Szrj else
118238fd1498Szrj {
118338fd1498Szrj /* For non-constant niter fold may_be_zero into niter again. */
118438fd1498Szrj if (may_be_zero)
118538fd1498Szrj {
118638fd1498Szrj if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
118738fd1498Szrj niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
118838fd1498Szrj niter_desc.may_be_zero,
118938fd1498Szrj build_int_cst (TREE_TYPE (niter), 0), niter);
119038fd1498Szrj else
119138fd1498Szrj niter = chrec_dont_know;
119238fd1498Szrj may_be_zero = false;
119338fd1498Szrj }
119438fd1498Szrj
119538fd1498Szrj /* If the loop has more than one exit, try checking all of them
119638fd1498Szrj for # of iterations determinable through scev. */
119738fd1498Szrj if (!exit)
119838fd1498Szrj niter = find_loop_niter (loop, &exit);
119938fd1498Szrj
120038fd1498Szrj /* Finally if everything else fails, try brute force evaluation. */
120138fd1498Szrj if (try_eval
120238fd1498Szrj && (chrec_contains_undetermined (niter)
120338fd1498Szrj || TREE_CODE (niter) != INTEGER_CST))
120438fd1498Szrj niter = find_loop_niter_by_eval (loop, &exit);
120538fd1498Szrj
120638fd1498Szrj if (exit)
1207*e215fc28Szrj locus = gimple_location_safe (last_stmt (exit->src));
120838fd1498Szrj
120938fd1498Szrj if (TREE_CODE (niter) != INTEGER_CST)
121038fd1498Szrj exit = NULL;
121138fd1498Szrj }
121238fd1498Szrj
121338fd1498Szrj /* We work exceptionally hard here to estimate the bound
121438fd1498Szrj by find_loop_niter_by_eval. Be sure to keep it for future. */
121538fd1498Szrj if (niter && TREE_CODE (niter) == INTEGER_CST)
121638fd1498Szrj {
121738fd1498Szrj record_niter_bound (loop, wi::to_widest (niter),
121838fd1498Szrj exit == single_likely_exit (loop), true);
121938fd1498Szrj }
122038fd1498Szrj
122138fd1498Szrj /* Force re-computation of loop bounds so we can remove redundant exits. */
122238fd1498Szrj maxiter = max_loop_iterations_int (loop);
122338fd1498Szrj
122438fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)
122538fd1498Szrj && TREE_CODE (niter) == INTEGER_CST)
122638fd1498Szrj {
122738fd1498Szrj fprintf (dump_file, "Loop %d iterates ", loop->num);
122838fd1498Szrj print_generic_expr (dump_file, niter, TDF_SLIM);
122938fd1498Szrj fprintf (dump_file, " times.\n");
123038fd1498Szrj }
123138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)
123238fd1498Szrj && maxiter >= 0)
123338fd1498Szrj {
123438fd1498Szrj fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
123538fd1498Szrj (int)maxiter);
123638fd1498Szrj }
123738fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)
123838fd1498Szrj && likely_max_loop_iterations_int (loop) >= 0)
123938fd1498Szrj {
124038fd1498Szrj fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
124138fd1498Szrj loop->num, (int)likely_max_loop_iterations_int (loop));
124238fd1498Szrj }
124338fd1498Szrj
124438fd1498Szrj /* Remove exits that are known to be never taken based on loop bound.
124538fd1498Szrj Needs to be called after compilation of max_loop_iterations_int that
124638fd1498Szrj populates the loop bounds. */
124738fd1498Szrj modified |= remove_redundant_iv_tests (loop);
124838fd1498Szrj
124938fd1498Szrj if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
125038fd1498Szrj maxiter, locus, allow_peel))
125138fd1498Szrj return true;
125238fd1498Szrj
125338fd1498Szrj if (create_iv
125438fd1498Szrj && niter && !chrec_contains_undetermined (niter)
125538fd1498Szrj && exit && just_once_each_iteration_p (loop, exit->src))
125638fd1498Szrj {
125738fd1498Szrj tree iv_niter = niter;
125838fd1498Szrj if (may_be_zero)
125938fd1498Szrj {
126038fd1498Szrj if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
126138fd1498Szrj iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
126238fd1498Szrj niter_desc.may_be_zero,
126338fd1498Szrj build_int_cst (TREE_TYPE (iv_niter), 0),
126438fd1498Szrj iv_niter);
126538fd1498Szrj else
126638fd1498Szrj iv_niter = NULL_TREE;
126738fd1498Szrj }
126838fd1498Szrj if (iv_niter)
126938fd1498Szrj create_canonical_iv (loop, exit, iv_niter);
127038fd1498Szrj }
127138fd1498Szrj
127238fd1498Szrj if (ul == UL_ALL)
127338fd1498Szrj modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
127438fd1498Szrj
127538fd1498Szrj return modified;
127638fd1498Szrj }
127738fd1498Szrj
127838fd1498Szrj /* The main entry point of the pass. Adds canonical induction variables
127938fd1498Szrj to the suitable loops. */
128038fd1498Szrj
128138fd1498Szrj unsigned int
canonicalize_induction_variables(void)128238fd1498Szrj canonicalize_induction_variables (void)
128338fd1498Szrj {
128438fd1498Szrj struct loop *loop;
128538fd1498Szrj bool changed = false;
128638fd1498Szrj bool irred_invalidated = false;
128738fd1498Szrj bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
128838fd1498Szrj
128938fd1498Szrj estimate_numbers_of_iterations (cfun);
129038fd1498Szrj
129138fd1498Szrj FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
129238fd1498Szrj {
129338fd1498Szrj changed |= canonicalize_loop_induction_variables (loop,
129438fd1498Szrj true, UL_SINGLE_ITER,
129538fd1498Szrj true, false);
129638fd1498Szrj }
129738fd1498Szrj gcc_assert (!need_ssa_update_p (cfun));
129838fd1498Szrj
129938fd1498Szrj unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
130038fd1498Szrj if (irred_invalidated
130138fd1498Szrj && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
130238fd1498Szrj mark_irreducible_loops ();
130338fd1498Szrj
130438fd1498Szrj /* Clean up the information about numbers of iterations, since brute force
130538fd1498Szrj evaluation could reveal new information. */
130638fd1498Szrj free_numbers_of_iterations_estimates (cfun);
130738fd1498Szrj scev_reset ();
130838fd1498Szrj
130938fd1498Szrj if (!bitmap_empty_p (loop_closed_ssa_invalidated))
131038fd1498Szrj {
131138fd1498Szrj gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
131238fd1498Szrj rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
131338fd1498Szrj }
131438fd1498Szrj BITMAP_FREE (loop_closed_ssa_invalidated);
131538fd1498Szrj
131638fd1498Szrj if (changed)
131738fd1498Szrj return TODO_cleanup_cfg;
131838fd1498Szrj return 0;
131938fd1498Szrj }
132038fd1498Szrj
132138fd1498Szrj /* Propagate constant SSA_NAMEs defined in basic block BB. */
132238fd1498Szrj
132338fd1498Szrj static void
propagate_constants_for_unrolling(basic_block bb)132438fd1498Szrj propagate_constants_for_unrolling (basic_block bb)
132538fd1498Szrj {
132638fd1498Szrj /* Look for degenerate PHI nodes with constant argument. */
132738fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
132838fd1498Szrj {
132938fd1498Szrj gphi *phi = gsi.phi ();
133038fd1498Szrj tree result = gimple_phi_result (phi);
133138fd1498Szrj tree arg = gimple_phi_arg_def (phi, 0);
133238fd1498Szrj
133338fd1498Szrj if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
133438fd1498Szrj && gimple_phi_num_args (phi) == 1
133538fd1498Szrj && CONSTANT_CLASS_P (arg))
133638fd1498Szrj {
133738fd1498Szrj replace_uses_by (result, arg);
133838fd1498Szrj gsi_remove (&gsi, true);
133938fd1498Szrj release_ssa_name (result);
134038fd1498Szrj }
134138fd1498Szrj else
134238fd1498Szrj gsi_next (&gsi);
134338fd1498Szrj }
134438fd1498Szrj
134538fd1498Szrj /* Look for assignments to SSA names with constant RHS. */
134638fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
134738fd1498Szrj {
134838fd1498Szrj gimple *stmt = gsi_stmt (gsi);
134938fd1498Szrj tree lhs;
135038fd1498Szrj
135138fd1498Szrj if (is_gimple_assign (stmt)
135238fd1498Szrj && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
135338fd1498Szrj && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
135438fd1498Szrj && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
135538fd1498Szrj {
135638fd1498Szrj replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
135738fd1498Szrj gsi_remove (&gsi, true);
135838fd1498Szrj release_ssa_name (lhs);
135938fd1498Szrj }
136038fd1498Szrj else
136138fd1498Szrj gsi_next (&gsi);
136238fd1498Szrj }
136338fd1498Szrj }
136438fd1498Szrj
136538fd1498Szrj /* Process loops from innermost to outer, stopping at the innermost
136638fd1498Szrj loop we unrolled. */
136738fd1498Szrj
136838fd1498Szrj static bool
tree_unroll_loops_completely_1(bool may_increase_size,bool unroll_outer,bitmap father_bbs,struct loop * loop)136938fd1498Szrj tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
137038fd1498Szrj bitmap father_bbs, struct loop *loop)
137138fd1498Szrj {
137238fd1498Szrj struct loop *loop_father;
137338fd1498Szrj bool changed = false;
137438fd1498Szrj struct loop *inner;
137538fd1498Szrj enum unroll_level ul;
137638fd1498Szrj unsigned num = number_of_loops (cfun);
137738fd1498Szrj
137838fd1498Szrj /* Process inner loops first. Don't walk loops added by the recursive
137938fd1498Szrj calls because SSA form is not up-to-date. They can be handled in the
138038fd1498Szrj next iteration. */
138138fd1498Szrj for (inner = loop->inner; inner != NULL; inner = inner->next)
138238fd1498Szrj if ((unsigned) inner->num < num)
138338fd1498Szrj changed |= tree_unroll_loops_completely_1 (may_increase_size,
138438fd1498Szrj unroll_outer, father_bbs,
138538fd1498Szrj inner);
138638fd1498Szrj
138738fd1498Szrj /* If we changed an inner loop we cannot process outer loops in this
138838fd1498Szrj iteration because SSA form is not up-to-date. Continue with
138938fd1498Szrj siblings of outer loops instead. */
139038fd1498Szrj if (changed)
139138fd1498Szrj return true;
139238fd1498Szrj
139338fd1498Szrj /* Don't unroll #pragma omp simd loops until the vectorizer
139438fd1498Szrj attempts to vectorize those. */
139538fd1498Szrj if (loop->force_vectorize)
139638fd1498Szrj return false;
139738fd1498Szrj
139838fd1498Szrj /* Try to unroll this loop. */
139938fd1498Szrj loop_father = loop_outer (loop);
140038fd1498Szrj if (!loop_father)
140138fd1498Szrj return false;
140238fd1498Szrj
140338fd1498Szrj if (loop->unroll > 1)
140438fd1498Szrj ul = UL_ALL;
140538fd1498Szrj else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
140638fd1498Szrj /* Unroll outermost loops only if asked to do so or they do
140738fd1498Szrj not cause code growth. */
140838fd1498Szrj && (unroll_outer || loop_outer (loop_father)))
140938fd1498Szrj ul = UL_ALL;
141038fd1498Szrj else
141138fd1498Szrj ul = UL_NO_GROWTH;
141238fd1498Szrj
141338fd1498Szrj if (canonicalize_loop_induction_variables
141438fd1498Szrj (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
141538fd1498Szrj {
141638fd1498Szrj /* If we'll continue unrolling, we need to propagate constants
141738fd1498Szrj within the new basic blocks to fold away induction variable
141838fd1498Szrj computations; otherwise, the size might blow up before the
141938fd1498Szrj iteration is complete and the IR eventually cleaned up. */
142038fd1498Szrj if (loop_outer (loop_father))
142138fd1498Szrj bitmap_set_bit (father_bbs, loop_father->header->index);
142238fd1498Szrj
142338fd1498Szrj return true;
142438fd1498Szrj }
142538fd1498Szrj
142638fd1498Szrj return false;
142738fd1498Szrj }
142838fd1498Szrj
142938fd1498Szrj /* Unroll LOOPS completely if they iterate just few times. Unless
143038fd1498Szrj MAY_INCREASE_SIZE is true, perform the unrolling only if the
143138fd1498Szrj size of the code does not increase. */
143238fd1498Szrj
143338fd1498Szrj static unsigned int
tree_unroll_loops_completely(bool may_increase_size,bool unroll_outer)143438fd1498Szrj tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
143538fd1498Szrj {
143638fd1498Szrj bitmap father_bbs = BITMAP_ALLOC (NULL);
143738fd1498Szrj bool changed;
143838fd1498Szrj int iteration = 0;
143938fd1498Szrj bool irred_invalidated = false;
144038fd1498Szrj
144138fd1498Szrj estimate_numbers_of_iterations (cfun);
144238fd1498Szrj
144338fd1498Szrj do
144438fd1498Szrj {
144538fd1498Szrj changed = false;
144638fd1498Szrj bitmap loop_closed_ssa_invalidated = NULL;
144738fd1498Szrj
144838fd1498Szrj if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
144938fd1498Szrj loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
145038fd1498Szrj
145138fd1498Szrj free_numbers_of_iterations_estimates (cfun);
145238fd1498Szrj estimate_numbers_of_iterations (cfun);
145338fd1498Szrj
145438fd1498Szrj changed = tree_unroll_loops_completely_1 (may_increase_size,
145538fd1498Szrj unroll_outer, father_bbs,
145638fd1498Szrj current_loops->tree_root);
145738fd1498Szrj if (changed)
145838fd1498Szrj {
145938fd1498Szrj unsigned i;
146038fd1498Szrj
146138fd1498Szrj unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
146238fd1498Szrj
146338fd1498Szrj /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
146438fd1498Szrj if (loop_closed_ssa_invalidated
146538fd1498Szrj && !bitmap_empty_p (loop_closed_ssa_invalidated))
146638fd1498Szrj rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
146738fd1498Szrj TODO_update_ssa);
146838fd1498Szrj else
146938fd1498Szrj update_ssa (TODO_update_ssa);
147038fd1498Szrj
147138fd1498Szrj /* father_bbs is a bitmap of loop father header BB indices.
147238fd1498Szrj Translate that to what non-root loops these BBs belong to now. */
147338fd1498Szrj bitmap_iterator bi;
147438fd1498Szrj bitmap fathers = BITMAP_ALLOC (NULL);
147538fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
147638fd1498Szrj {
147738fd1498Szrj basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
147838fd1498Szrj if (! unrolled_loop_bb)
147938fd1498Szrj continue;
148038fd1498Szrj if (loop_outer (unrolled_loop_bb->loop_father))
148138fd1498Szrj bitmap_set_bit (fathers,
148238fd1498Szrj unrolled_loop_bb->loop_father->num);
148338fd1498Szrj }
148438fd1498Szrj bitmap_clear (father_bbs);
148538fd1498Szrj /* Propagate the constants within the new basic blocks. */
148638fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
148738fd1498Szrj {
148838fd1498Szrj loop_p father = get_loop (cfun, i);
148938fd1498Szrj basic_block *body = get_loop_body_in_dom_order (father);
149038fd1498Szrj for (unsigned j = 0; j < father->num_nodes; j++)
149138fd1498Szrj propagate_constants_for_unrolling (body[j]);
149238fd1498Szrj free (body);
149338fd1498Szrj }
149438fd1498Szrj BITMAP_FREE (fathers);
149538fd1498Szrj
149638fd1498Szrj /* This will take care of removing completely unrolled loops
149738fd1498Szrj from the loop structures so we can continue unrolling now
149838fd1498Szrj innermost loops. */
149938fd1498Szrj if (cleanup_tree_cfg ())
150038fd1498Szrj update_ssa (TODO_update_ssa_only_virtuals);
150138fd1498Szrj
150238fd1498Szrj /* Clean up the information about numbers of iterations, since
150338fd1498Szrj complete unrolling might have invalidated it. */
150438fd1498Szrj scev_reset ();
150538fd1498Szrj if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
150638fd1498Szrj verify_loop_closed_ssa (true);
150738fd1498Szrj }
150838fd1498Szrj if (loop_closed_ssa_invalidated)
150938fd1498Szrj BITMAP_FREE (loop_closed_ssa_invalidated);
151038fd1498Szrj }
151138fd1498Szrj while (changed
151238fd1498Szrj && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
151338fd1498Szrj
151438fd1498Szrj BITMAP_FREE (father_bbs);
151538fd1498Szrj
151638fd1498Szrj if (irred_invalidated
151738fd1498Szrj && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
151838fd1498Szrj mark_irreducible_loops ();
151938fd1498Szrj
152038fd1498Szrj return 0;
152138fd1498Szrj }
152238fd1498Szrj
152338fd1498Szrj /* Canonical induction variable creation pass. */
152438fd1498Szrj
152538fd1498Szrj namespace {
152638fd1498Szrj
152738fd1498Szrj const pass_data pass_data_iv_canon =
152838fd1498Szrj {
152938fd1498Szrj GIMPLE_PASS, /* type */
153038fd1498Szrj "ivcanon", /* name */
153138fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
153238fd1498Szrj TV_TREE_LOOP_IVCANON, /* tv_id */
153338fd1498Szrj ( PROP_cfg | PROP_ssa ), /* properties_required */
153438fd1498Szrj 0, /* properties_provided */
153538fd1498Szrj 0, /* properties_destroyed */
153638fd1498Szrj 0, /* todo_flags_start */
153738fd1498Szrj 0, /* todo_flags_finish */
153838fd1498Szrj };
153938fd1498Szrj
154038fd1498Szrj class pass_iv_canon : public gimple_opt_pass
154138fd1498Szrj {
154238fd1498Szrj public:
pass_iv_canon(gcc::context * ctxt)154338fd1498Szrj pass_iv_canon (gcc::context *ctxt)
154438fd1498Szrj : gimple_opt_pass (pass_data_iv_canon, ctxt)
154538fd1498Szrj {}
154638fd1498Szrj
154738fd1498Szrj /* opt_pass methods: */
gate(function *)154838fd1498Szrj virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
154938fd1498Szrj virtual unsigned int execute (function *fun);
155038fd1498Szrj
155138fd1498Szrj }; // class pass_iv_canon
155238fd1498Szrj
155338fd1498Szrj unsigned int
execute(function * fun)155438fd1498Szrj pass_iv_canon::execute (function *fun)
155538fd1498Szrj {
155638fd1498Szrj if (number_of_loops (fun) <= 1)
155738fd1498Szrj return 0;
155838fd1498Szrj
155938fd1498Szrj return canonicalize_induction_variables ();
156038fd1498Szrj }
156138fd1498Szrj
156238fd1498Szrj } // anon namespace
156338fd1498Szrj
156438fd1498Szrj gimple_opt_pass *
make_pass_iv_canon(gcc::context * ctxt)156538fd1498Szrj make_pass_iv_canon (gcc::context *ctxt)
156638fd1498Szrj {
156738fd1498Szrj return new pass_iv_canon (ctxt);
156838fd1498Szrj }
156938fd1498Szrj
157038fd1498Szrj /* Complete unrolling of loops. */
157138fd1498Szrj
157238fd1498Szrj namespace {
157338fd1498Szrj
157438fd1498Szrj const pass_data pass_data_complete_unroll =
157538fd1498Szrj {
157638fd1498Szrj GIMPLE_PASS, /* type */
157738fd1498Szrj "cunroll", /* name */
157838fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
157938fd1498Szrj TV_COMPLETE_UNROLL, /* tv_id */
158038fd1498Szrj ( PROP_cfg | PROP_ssa ), /* properties_required */
158138fd1498Szrj 0, /* properties_provided */
158238fd1498Szrj 0, /* properties_destroyed */
158338fd1498Szrj 0, /* todo_flags_start */
158438fd1498Szrj 0, /* todo_flags_finish */
158538fd1498Szrj };
158638fd1498Szrj
158738fd1498Szrj class pass_complete_unroll : public gimple_opt_pass
158838fd1498Szrj {
158938fd1498Szrj public:
pass_complete_unroll(gcc::context * ctxt)159038fd1498Szrj pass_complete_unroll (gcc::context *ctxt)
159138fd1498Szrj : gimple_opt_pass (pass_data_complete_unroll, ctxt)
159238fd1498Szrj {}
159338fd1498Szrj
159438fd1498Szrj /* opt_pass methods: */
159538fd1498Szrj virtual unsigned int execute (function *);
159638fd1498Szrj
159738fd1498Szrj }; // class pass_complete_unroll
159838fd1498Szrj
159938fd1498Szrj unsigned int
execute(function * fun)160038fd1498Szrj pass_complete_unroll::execute (function *fun)
160138fd1498Szrj {
160238fd1498Szrj if (number_of_loops (fun) <= 1)
160338fd1498Szrj return 0;
160438fd1498Szrj
160538fd1498Szrj /* If we ever decide to run loop peeling more than once, we will need to
160638fd1498Szrj track loops already peeled in loop structures themselves to avoid
160738fd1498Szrj re-peeling the same loop multiple times. */
160838fd1498Szrj if (flag_peel_loops)
160938fd1498Szrj peeled_loops = BITMAP_ALLOC (NULL);
161038fd1498Szrj unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
161138fd1498Szrj || flag_peel_loops
161238fd1498Szrj || optimize >= 3, true);
161338fd1498Szrj if (peeled_loops)
161438fd1498Szrj {
161538fd1498Szrj BITMAP_FREE (peeled_loops);
161638fd1498Szrj peeled_loops = NULL;
161738fd1498Szrj }
161838fd1498Szrj return val;
161938fd1498Szrj }
162038fd1498Szrj
162138fd1498Szrj } // anon namespace
162238fd1498Szrj
162338fd1498Szrj gimple_opt_pass *
make_pass_complete_unroll(gcc::context * ctxt)162438fd1498Szrj make_pass_complete_unroll (gcc::context *ctxt)
162538fd1498Szrj {
162638fd1498Szrj return new pass_complete_unroll (ctxt);
162738fd1498Szrj }
162838fd1498Szrj
162938fd1498Szrj /* Complete unrolling of inner loops. */
163038fd1498Szrj
163138fd1498Szrj namespace {
163238fd1498Szrj
163338fd1498Szrj const pass_data pass_data_complete_unrolli =
163438fd1498Szrj {
163538fd1498Szrj GIMPLE_PASS, /* type */
163638fd1498Szrj "cunrolli", /* name */
163738fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */
163838fd1498Szrj TV_COMPLETE_UNROLL, /* tv_id */
163938fd1498Szrj ( PROP_cfg | PROP_ssa ), /* properties_required */
164038fd1498Szrj 0, /* properties_provided */
164138fd1498Szrj 0, /* properties_destroyed */
164238fd1498Szrj 0, /* todo_flags_start */
164338fd1498Szrj 0, /* todo_flags_finish */
164438fd1498Szrj };
164538fd1498Szrj
164638fd1498Szrj class pass_complete_unrolli : public gimple_opt_pass
164738fd1498Szrj {
164838fd1498Szrj public:
pass_complete_unrolli(gcc::context * ctxt)164938fd1498Szrj pass_complete_unrolli (gcc::context *ctxt)
165038fd1498Szrj : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
165138fd1498Szrj {}
165238fd1498Szrj
165338fd1498Szrj /* opt_pass methods: */
gate(function *)165438fd1498Szrj virtual bool gate (function *) { return optimize >= 2; }
165538fd1498Szrj virtual unsigned int execute (function *);
165638fd1498Szrj
165738fd1498Szrj }; // class pass_complete_unrolli
165838fd1498Szrj
165938fd1498Szrj unsigned int
execute(function * fun)166038fd1498Szrj pass_complete_unrolli::execute (function *fun)
166138fd1498Szrj {
166238fd1498Szrj unsigned ret = 0;
166338fd1498Szrj
166438fd1498Szrj loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
166538fd1498Szrj if (number_of_loops (fun) > 1)
166638fd1498Szrj {
166738fd1498Szrj scev_initialize ();
166838fd1498Szrj ret = tree_unroll_loops_completely (optimize >= 3, false);
166938fd1498Szrj scev_finalize ();
167038fd1498Szrj }
167138fd1498Szrj loop_optimizer_finalize ();
167238fd1498Szrj
167338fd1498Szrj return ret;
167438fd1498Szrj }
167538fd1498Szrj
167638fd1498Szrj } // anon namespace
167738fd1498Szrj
167838fd1498Szrj gimple_opt_pass *
make_pass_complete_unrolli(gcc::context * ctxt)167938fd1498Szrj make_pass_complete_unrolli (gcc::context *ctxt)
168038fd1498Szrj {
168138fd1498Szrj return new pass_complete_unrolli (ctxt);
168238fd1498Szrj }
168338fd1498Szrj
168438fd1498Szrj
1685