xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-split.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
13ad841b2Smrg /* Loop splitting.
2*8feb0f0bSmrg    Copyright (C) 2015-2020 Free Software Foundation, Inc.
33ad841b2Smrg 
43ad841b2Smrg This file is part of GCC.
53ad841b2Smrg 
63ad841b2Smrg GCC is free software; you can redistribute it and/or modify it
73ad841b2Smrg under the terms of the GNU General Public License as published by the
83ad841b2Smrg Free Software Foundation; either version 3, or (at your option) any
93ad841b2Smrg later version.
103ad841b2Smrg 
113ad841b2Smrg GCC is distributed in the hope that it will be useful, but WITHOUT
123ad841b2Smrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
133ad841b2Smrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
143ad841b2Smrg for more details.
153ad841b2Smrg 
163ad841b2Smrg You should have received a copy of the GNU General Public License
173ad841b2Smrg along with GCC; see the file COPYING3.  If not see
183ad841b2Smrg <http://www.gnu.org/licenses/>.  */
193ad841b2Smrg 
203ad841b2Smrg #include "config.h"
213ad841b2Smrg #include "system.h"
223ad841b2Smrg #include "coretypes.h"
233ad841b2Smrg #include "backend.h"
243ad841b2Smrg #include "tree.h"
253ad841b2Smrg #include "gimple.h"
263ad841b2Smrg #include "tree-pass.h"
273ad841b2Smrg #include "ssa.h"
283ad841b2Smrg #include "fold-const.h"
293ad841b2Smrg #include "tree-cfg.h"
303ad841b2Smrg #include "tree-ssa.h"
313ad841b2Smrg #include "tree-ssa-loop-niter.h"
323ad841b2Smrg #include "tree-ssa-loop.h"
333ad841b2Smrg #include "tree-ssa-loop-manip.h"
343ad841b2Smrg #include "tree-into-ssa.h"
35*8feb0f0bSmrg #include "tree-inline.h"
36*8feb0f0bSmrg #include "tree-cfgcleanup.h"
373ad841b2Smrg #include "cfgloop.h"
383ad841b2Smrg #include "tree-scalar-evolution.h"
393ad841b2Smrg #include "gimple-iterator.h"
403ad841b2Smrg #include "gimple-pretty-print.h"
413ad841b2Smrg #include "cfghooks.h"
423ad841b2Smrg #include "gimple-fold.h"
433ad841b2Smrg #include "gimplify-me.h"
443ad841b2Smrg 
45*8feb0f0bSmrg /* This file implements two kinds of loop splitting.
46*8feb0f0bSmrg 
47*8feb0f0bSmrg    One transformation of loops like:
483ad841b2Smrg 
493ad841b2Smrg    for (i = 0; i < 100; i++)
503ad841b2Smrg      {
513ad841b2Smrg        if (i < 50)
523ad841b2Smrg          A;
533ad841b2Smrg        else
543ad841b2Smrg          B;
553ad841b2Smrg      }
563ad841b2Smrg 
573ad841b2Smrg    into:
583ad841b2Smrg 
593ad841b2Smrg    for (i = 0; i < 50; i++)
603ad841b2Smrg      {
613ad841b2Smrg        A;
623ad841b2Smrg      }
633ad841b2Smrg    for (; i < 100; i++)
643ad841b2Smrg      {
653ad841b2Smrg        B;
663ad841b2Smrg      }
673ad841b2Smrg 
683ad841b2Smrg    */
693ad841b2Smrg 
703ad841b2Smrg /* Return true when BB inside LOOP is a potential iteration space
713ad841b2Smrg    split point, i.e. ends with a condition like "IV < comp", which
723ad841b2Smrg    is true on one side of the iteration space and false on the other,
733ad841b2Smrg    and the split point can be computed.  If so, also return the border
743ad841b2Smrg    point in *BORDER and the comparison induction variable in IV.  */
753ad841b2Smrg 
763ad841b2Smrg static tree
split_at_bb_p(class loop * loop,basic_block bb,tree * border,affine_iv * iv)77*8feb0f0bSmrg split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
783ad841b2Smrg {
793ad841b2Smrg   gimple *last;
803ad841b2Smrg   gcond *stmt;
813ad841b2Smrg   affine_iv iv2;
823ad841b2Smrg 
833ad841b2Smrg   /* BB must end in a simple conditional jump.  */
843ad841b2Smrg   last = last_stmt (bb);
853ad841b2Smrg   if (!last || gimple_code (last) != GIMPLE_COND)
863ad841b2Smrg     return NULL_TREE;
873ad841b2Smrg   stmt = as_a <gcond *> (last);
883ad841b2Smrg 
893ad841b2Smrg   enum tree_code code = gimple_cond_code (stmt);
903ad841b2Smrg 
913ad841b2Smrg   /* Only handle relational comparisons, for equality and non-equality
923ad841b2Smrg      we'd have to split the loop into two loops and a middle statement.  */
933ad841b2Smrg   switch (code)
943ad841b2Smrg     {
953ad841b2Smrg       case LT_EXPR:
963ad841b2Smrg       case LE_EXPR:
973ad841b2Smrg       case GT_EXPR:
983ad841b2Smrg       case GE_EXPR:
993ad841b2Smrg 	break;
1003ad841b2Smrg       default:
1013ad841b2Smrg 	return NULL_TREE;
1023ad841b2Smrg     }
1033ad841b2Smrg 
1043ad841b2Smrg   if (loop_exits_from_bb_p (loop, bb))
1053ad841b2Smrg     return NULL_TREE;
1063ad841b2Smrg 
1073ad841b2Smrg   tree op0 = gimple_cond_lhs (stmt);
1083ad841b2Smrg   tree op1 = gimple_cond_rhs (stmt);
109*8feb0f0bSmrg   class loop *useloop = loop_containing_stmt (stmt);
1103ad841b2Smrg 
1113ad841b2Smrg   if (!simple_iv (loop, useloop, op0, iv, false))
1123ad841b2Smrg     return NULL_TREE;
1133ad841b2Smrg   if (!simple_iv (loop, useloop, op1, &iv2, false))
1143ad841b2Smrg     return NULL_TREE;
1153ad841b2Smrg 
1163ad841b2Smrg   /* Make it so that the first argument of the condition is
1173ad841b2Smrg      the looping one.  */
1183ad841b2Smrg   if (!integer_zerop (iv2.step))
1193ad841b2Smrg     {
1203ad841b2Smrg       std::swap (op0, op1);
1213ad841b2Smrg       std::swap (*iv, iv2);
1223ad841b2Smrg       code = swap_tree_comparison (code);
1233ad841b2Smrg       gimple_cond_set_condition (stmt, code, op0, op1);
1243ad841b2Smrg       update_stmt (stmt);
1253ad841b2Smrg     }
1263ad841b2Smrg   else if (integer_zerop (iv->step))
1273ad841b2Smrg     return NULL_TREE;
1283ad841b2Smrg   if (!integer_zerop (iv2.step))
1293ad841b2Smrg     return NULL_TREE;
1303ad841b2Smrg   if (!iv->no_overflow)
1313ad841b2Smrg     return NULL_TREE;
1323ad841b2Smrg 
1333ad841b2Smrg   if (dump_file && (dump_flags & TDF_DETAILS))
1343ad841b2Smrg     {
1353ad841b2Smrg       fprintf (dump_file, "Found potential split point: ");
1363ad841b2Smrg       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1373ad841b2Smrg       fprintf (dump_file, " { ");
1383ad841b2Smrg       print_generic_expr (dump_file, iv->base, TDF_SLIM);
1393ad841b2Smrg       fprintf (dump_file, " + I*");
1403ad841b2Smrg       print_generic_expr (dump_file, iv->step, TDF_SLIM);
1413ad841b2Smrg       fprintf (dump_file, " } %s ", get_tree_code_name (code));
1423ad841b2Smrg       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
1433ad841b2Smrg       fprintf (dump_file, "\n");
1443ad841b2Smrg     }
1453ad841b2Smrg 
1463ad841b2Smrg   *border = iv2.base;
1473ad841b2Smrg   return op0;
1483ad841b2Smrg }
1493ad841b2Smrg 
1503ad841b2Smrg /* Given a GUARD conditional stmt inside LOOP, which we want to make always
1513ad841b2Smrg    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
1523ad841b2Smrg    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
1533ad841b2Smrg    exit test statement to loop back only if the GUARD statement will
1543ad841b2Smrg    also be true/false in the next iteration.  */
1553ad841b2Smrg 
1563ad841b2Smrg static void
patch_loop_exit(class loop * loop,gcond * guard,tree nextval,tree newbound,bool initial_true)157*8feb0f0bSmrg patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
1583ad841b2Smrg 		 bool initial_true)
1593ad841b2Smrg {
1603ad841b2Smrg   edge exit = single_exit (loop);
1613ad841b2Smrg   gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
1623ad841b2Smrg   gimple_cond_set_condition (stmt, gimple_cond_code (guard),
1633ad841b2Smrg 			     nextval, newbound);
1643ad841b2Smrg   update_stmt (stmt);
1653ad841b2Smrg 
1663ad841b2Smrg   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
1673ad841b2Smrg 
1683ad841b2Smrg   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
1693ad841b2Smrg   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
1703ad841b2Smrg 
1713ad841b2Smrg   if (initial_true)
1723ad841b2Smrg     {
1733ad841b2Smrg       exit->flags |= EDGE_FALSE_VALUE;
1743ad841b2Smrg       stay->flags |= EDGE_TRUE_VALUE;
1753ad841b2Smrg     }
1763ad841b2Smrg   else
1773ad841b2Smrg     {
1783ad841b2Smrg       exit->flags |= EDGE_TRUE_VALUE;
1793ad841b2Smrg       stay->flags |= EDGE_FALSE_VALUE;
1803ad841b2Smrg     }
1813ad841b2Smrg }
1823ad841b2Smrg 
1833ad841b2Smrg /* Give an induction variable GUARD_IV, and its affine descriptor IV,
1843ad841b2Smrg    find the loop phi node in LOOP defining it directly, or create
1853ad841b2Smrg    such phi node.  Return that phi node.  */
1863ad841b2Smrg 
1873ad841b2Smrg static gphi *
find_or_create_guard_phi(class loop * loop,tree guard_iv,affine_iv *)188*8feb0f0bSmrg find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
1893ad841b2Smrg {
1903ad841b2Smrg   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
1913ad841b2Smrg   gphi *phi;
1923ad841b2Smrg   if ((phi = dyn_cast <gphi *> (def))
1933ad841b2Smrg       && gimple_bb (phi) == loop->header)
1943ad841b2Smrg     return phi;
1953ad841b2Smrg 
1963ad841b2Smrg   /* XXX Create the PHI instead.  */
1973ad841b2Smrg   return NULL;
1983ad841b2Smrg }
1993ad841b2Smrg 
2003ad841b2Smrg /* Returns true if the exit values of all loop phi nodes can be
2013ad841b2Smrg    determined easily (i.e. that connect_loop_phis can determine them).  */
2023ad841b2Smrg 
2033ad841b2Smrg static bool
easy_exit_values(class loop * loop)204*8feb0f0bSmrg easy_exit_values (class loop *loop)
2053ad841b2Smrg {
2063ad841b2Smrg   edge exit = single_exit (loop);
2073ad841b2Smrg   edge latch = loop_latch_edge (loop);
2083ad841b2Smrg   gphi_iterator psi;
2093ad841b2Smrg 
2103ad841b2Smrg   /* Currently we regard the exit values as easy if they are the same
2113ad841b2Smrg      as the value over the backedge.  Which is the case if the definition
2123ad841b2Smrg      of the backedge value dominates the exit edge.  */
2133ad841b2Smrg   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2143ad841b2Smrg     {
2153ad841b2Smrg       gphi *phi = psi.phi ();
2163ad841b2Smrg       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
2173ad841b2Smrg       basic_block bb;
2183ad841b2Smrg       if (TREE_CODE (next) == SSA_NAME
2193ad841b2Smrg 	  && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
2203ad841b2Smrg 	  && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
2213ad841b2Smrg 	return false;
2223ad841b2Smrg     }
2233ad841b2Smrg 
2243ad841b2Smrg   return true;
2253ad841b2Smrg }
2263ad841b2Smrg 
2273ad841b2Smrg /* This function updates the SSA form after connect_loops made a new
2283ad841b2Smrg    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
2293ad841b2Smrg    conditional).  I.e. the second loop can now be entered either
2303ad841b2Smrg    via the original entry or via NEW_E, so the entry values of LOOP2
2313ad841b2Smrg    phi nodes are either the original ones or those at the exit
2323ad841b2Smrg    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
2333ad841b2Smrg    this.  The loops need to fulfill easy_exit_values().  */
2343ad841b2Smrg 
2353ad841b2Smrg static void
connect_loop_phis(class loop * loop1,class loop * loop2,edge new_e)236*8feb0f0bSmrg connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
2373ad841b2Smrg {
2383ad841b2Smrg   basic_block rest = loop_preheader_edge (loop2)->src;
2393ad841b2Smrg   gcc_assert (new_e->dest == rest);
2403ad841b2Smrg   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
2413ad841b2Smrg 
2423ad841b2Smrg   edge firste = loop_preheader_edge (loop1);
2433ad841b2Smrg   edge seconde = loop_preheader_edge (loop2);
2443ad841b2Smrg   edge firstn = loop_latch_edge (loop1);
2453ad841b2Smrg   gphi_iterator psi_first, psi_second;
2463ad841b2Smrg   for (psi_first = gsi_start_phis (loop1->header),
2473ad841b2Smrg        psi_second = gsi_start_phis (loop2->header);
2483ad841b2Smrg        !gsi_end_p (psi_first);
2493ad841b2Smrg        gsi_next (&psi_first), gsi_next (&psi_second))
2503ad841b2Smrg     {
2513ad841b2Smrg       tree init, next, new_init;
2523ad841b2Smrg       use_operand_p op;
2533ad841b2Smrg       gphi *phi_first = psi_first.phi ();
2543ad841b2Smrg       gphi *phi_second = psi_second.phi ();
2553ad841b2Smrg 
2563ad841b2Smrg       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
2573ad841b2Smrg       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
2583ad841b2Smrg       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
2593ad841b2Smrg       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
2603ad841b2Smrg 
2613ad841b2Smrg       /* Prefer using original variable as a base for the new ssa name.
2623ad841b2Smrg 	 This is necessary for virtual ops, and useful in order to avoid
2633ad841b2Smrg 	 losing debug info for real ops.  */
2643ad841b2Smrg       if (TREE_CODE (next) == SSA_NAME
2653ad841b2Smrg 	  && useless_type_conversion_p (TREE_TYPE (next),
2663ad841b2Smrg 					TREE_TYPE (init)))
2673ad841b2Smrg 	new_init = copy_ssa_name (next);
2683ad841b2Smrg       else if (TREE_CODE (init) == SSA_NAME
2693ad841b2Smrg 	       && useless_type_conversion_p (TREE_TYPE (init),
2703ad841b2Smrg 					     TREE_TYPE (next)))
2713ad841b2Smrg 	new_init = copy_ssa_name (init);
2723ad841b2Smrg       else if (useless_type_conversion_p (TREE_TYPE (next),
2733ad841b2Smrg 					  TREE_TYPE (init)))
2743ad841b2Smrg 	new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
2753ad841b2Smrg 				       "unrinittmp");
2763ad841b2Smrg       else
2773ad841b2Smrg 	new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
2783ad841b2Smrg 				       "unrinittmp");
2793ad841b2Smrg 
2803ad841b2Smrg       gphi * newphi = create_phi_node (new_init, rest);
2813ad841b2Smrg       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
2823ad841b2Smrg       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
2833ad841b2Smrg       SET_USE (op, new_init);
2843ad841b2Smrg     }
2853ad841b2Smrg }
2863ad841b2Smrg 
2873ad841b2Smrg /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
2883ad841b2Smrg    they are still equivalent and placed in two arms of a diamond, like so:
2893ad841b2Smrg 
2903ad841b2Smrg                .------if (cond)------.
2913ad841b2Smrg                v                     v
2923ad841b2Smrg              pre1                   pre2
2933ad841b2Smrg               |                      |
2943ad841b2Smrg         .--->h1                     h2<----.
2953ad841b2Smrg         |     |                      |     |
2963ad841b2Smrg         |    ex1---.            .---ex2    |
2973ad841b2Smrg         |    /     |            |     \    |
2983ad841b2Smrg         '---l1     X            |     l2---'
2993ad841b2Smrg                    |            |
3003ad841b2Smrg                    |            |
3013ad841b2Smrg                    '--->join<---'
3023ad841b2Smrg 
3033ad841b2Smrg    This function transforms the program such that LOOP1 is conditionally
3043ad841b2Smrg    falling through to LOOP2, or skipping it.  This is done by splitting
3053ad841b2Smrg    the ex1->join edge at X in the diagram above, and inserting a condition
3063ad841b2Smrg    whose one arm goes to pre2, resulting in this situation:
3073ad841b2Smrg 
3083ad841b2Smrg                .------if (cond)------.
3093ad841b2Smrg                v                     v
3103ad841b2Smrg              pre1       .---------->pre2
3113ad841b2Smrg               |         |            |
3123ad841b2Smrg         .--->h1         |           h2<----.
3133ad841b2Smrg         |     |         |            |     |
3143ad841b2Smrg         |    ex1---.    |       .---ex2    |
3153ad841b2Smrg         |    /     v    |       |     \    |
3163ad841b2Smrg         '---l1   skip---'       |     l2---'
3173ad841b2Smrg                    |            |
3183ad841b2Smrg                    |            |
3193ad841b2Smrg                    '--->join<---'
3203ad841b2Smrg 
3213ad841b2Smrg 
3223ad841b2Smrg    The condition used is the exit condition of LOOP1, which effectively means
3233ad841b2Smrg    that when the first loop exits (for whatever reason) but the real original
3243ad841b2Smrg    exit expression is still false the second loop will be entered.
3253ad841b2Smrg    The function returns the new edge cond->pre2.
3263ad841b2Smrg 
3273ad841b2Smrg    This doesn't update the SSA form, see connect_loop_phis for that.  */
3283ad841b2Smrg 
3293ad841b2Smrg static edge
connect_loops(class loop * loop1,class loop * loop2)330*8feb0f0bSmrg connect_loops (class loop *loop1, class loop *loop2)
3313ad841b2Smrg {
3323ad841b2Smrg   edge exit = single_exit (loop1);
3333ad841b2Smrg   basic_block skip_bb = split_edge (exit);
3343ad841b2Smrg   gcond *skip_stmt;
3353ad841b2Smrg   gimple_stmt_iterator gsi;
3363ad841b2Smrg   edge new_e, skip_e;
3373ad841b2Smrg 
3383ad841b2Smrg   gimple *stmt = last_stmt (exit->src);
3393ad841b2Smrg   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
3403ad841b2Smrg 				 gimple_cond_lhs (stmt),
3413ad841b2Smrg 				 gimple_cond_rhs (stmt),
3423ad841b2Smrg 				 NULL_TREE, NULL_TREE);
3433ad841b2Smrg   gsi = gsi_last_bb (skip_bb);
3443ad841b2Smrg   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
3453ad841b2Smrg 
3463ad841b2Smrg   skip_e = EDGE_SUCC (skip_bb, 0);
3473ad841b2Smrg   skip_e->flags &= ~EDGE_FALLTHRU;
3483ad841b2Smrg   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
3493ad841b2Smrg   if (exit->flags & EDGE_TRUE_VALUE)
3503ad841b2Smrg     {
3513ad841b2Smrg       skip_e->flags |= EDGE_TRUE_VALUE;
3523ad841b2Smrg       new_e->flags |= EDGE_FALSE_VALUE;
3533ad841b2Smrg     }
3543ad841b2Smrg   else
3553ad841b2Smrg     {
3563ad841b2Smrg       skip_e->flags |= EDGE_FALSE_VALUE;
3573ad841b2Smrg       new_e->flags |= EDGE_TRUE_VALUE;
3583ad841b2Smrg     }
3593ad841b2Smrg 
360a2dc1f3fSmrg   new_e->probability = profile_probability::likely ();
361a2dc1f3fSmrg   skip_e->probability = new_e->probability.invert ();
3623ad841b2Smrg 
3633ad841b2Smrg   return new_e;
3643ad841b2Smrg }
3653ad841b2Smrg 
3663ad841b2Smrg /* This returns the new bound for iterations given the original iteration
3673ad841b2Smrg    space in NITER, an arbitrary new bound BORDER, assumed to be some
3683ad841b2Smrg    comparison value with a different IV, the initial value GUARD_INIT of
3693ad841b2Smrg    that other IV, and the comparison code GUARD_CODE that compares
3703ad841b2Smrg    that other IV with BORDER.  We return an SSA name, and place any
3713ad841b2Smrg    necessary statements for that computation into *STMTS.
3723ad841b2Smrg 
3733ad841b2Smrg    For example for such a loop:
3743ad841b2Smrg 
3753ad841b2Smrg      for (i = beg, j = guard_init; i < end; i++, j++)
3763ad841b2Smrg        if (j < border)  // this is supposed to be true/false
3773ad841b2Smrg          ...
3783ad841b2Smrg 
3793ad841b2Smrg    we want to return a new bound (on j) that makes the loop iterate
3803ad841b2Smrg    as long as the condition j < border stays true.  We also don't want
3813ad841b2Smrg    to iterate more often than the original loop, so we have to introduce
3823ad841b2Smrg    some cut-off as well (via min/max), effectively resulting in:
3833ad841b2Smrg 
3843ad841b2Smrg      newend = min (end+guard_init-beg, border)
3853ad841b2Smrg      for (i = beg; j = guard_init; j < newend; i++, j++)
3863ad841b2Smrg        if (j < c)
3873ad841b2Smrg          ...
3883ad841b2Smrg 
3893ad841b2Smrg    Depending on the direction of the IVs and if the exit tests
3903ad841b2Smrg    are strict or non-strict we need to use MIN or MAX,
3913ad841b2Smrg    and add or subtract 1.  This routine computes newend above.  */
3923ad841b2Smrg 
3933ad841b2Smrg static tree
compute_new_first_bound(gimple_seq * stmts,class tree_niter_desc * niter,tree border,enum tree_code guard_code,tree guard_init)394*8feb0f0bSmrg compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
3953ad841b2Smrg 			 tree border,
3963ad841b2Smrg 			 enum tree_code guard_code, tree guard_init)
3973ad841b2Smrg {
3983ad841b2Smrg   /* The niter structure contains the after-increment IV, we need
3993ad841b2Smrg      the loop-enter base, so subtract STEP once.  */
4003ad841b2Smrg   tree controlbase = force_gimple_operand (niter->control.base,
4013ad841b2Smrg 					   stmts, true, NULL_TREE);
4023ad841b2Smrg   tree controlstep = niter->control.step;
4033ad841b2Smrg   tree enddiff;
4043ad841b2Smrg   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
4053ad841b2Smrg     {
4063ad841b2Smrg       controlstep = gimple_build (stmts, NEGATE_EXPR,
4073ad841b2Smrg 				  TREE_TYPE (controlstep), controlstep);
4083ad841b2Smrg       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
4093ad841b2Smrg 			      TREE_TYPE (controlbase),
4103ad841b2Smrg 			      controlbase, controlstep);
4113ad841b2Smrg     }
4123ad841b2Smrg   else
4133ad841b2Smrg     enddiff = gimple_build (stmts, MINUS_EXPR,
4143ad841b2Smrg 			    TREE_TYPE (controlbase),
4153ad841b2Smrg 			    controlbase, controlstep);
4163ad841b2Smrg 
4173ad841b2Smrg   /* Compute end-beg.  */
4183ad841b2Smrg   gimple_seq stmts2;
4193ad841b2Smrg   tree end = force_gimple_operand (niter->bound, &stmts2,
4203ad841b2Smrg 					true, NULL_TREE);
4213ad841b2Smrg   gimple_seq_add_seq_without_update (stmts, stmts2);
4223ad841b2Smrg   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
4233ad841b2Smrg     {
4243ad841b2Smrg       tree tem = gimple_convert (stmts, sizetype, enddiff);
4253ad841b2Smrg       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
4263ad841b2Smrg       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
4273ad841b2Smrg 			      TREE_TYPE (enddiff),
4283ad841b2Smrg 			      end, tem);
4293ad841b2Smrg     }
4303ad841b2Smrg   else
4313ad841b2Smrg     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
4323ad841b2Smrg 			    end, enddiff);
4333ad841b2Smrg 
4343ad841b2Smrg   /* Compute guard_init + (end-beg).  */
4353ad841b2Smrg   tree newbound;
4363ad841b2Smrg   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
4373ad841b2Smrg   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
4383ad841b2Smrg     {
4393ad841b2Smrg       enddiff = gimple_convert (stmts, sizetype, enddiff);
4403ad841b2Smrg       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
4413ad841b2Smrg 			       TREE_TYPE (guard_init),
4423ad841b2Smrg 			       guard_init, enddiff);
4433ad841b2Smrg     }
4443ad841b2Smrg   else
4453ad841b2Smrg     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
4463ad841b2Smrg 			     guard_init, enddiff);
4473ad841b2Smrg 
4483ad841b2Smrg   /* Depending on the direction of the IVs the new bound for the first
4493ad841b2Smrg      loop is the minimum or maximum of old bound and border.
4503ad841b2Smrg      Also, if the guard condition isn't strictly less or greater,
4513ad841b2Smrg      we need to adjust the bound.  */
4523ad841b2Smrg   int addbound = 0;
4533ad841b2Smrg   enum tree_code minmax;
4543ad841b2Smrg   if (niter->cmp == LT_EXPR)
4553ad841b2Smrg     {
4563ad841b2Smrg       /* GT and LE are the same, inverted.  */
4573ad841b2Smrg       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
4583ad841b2Smrg 	addbound = -1;
4593ad841b2Smrg       minmax = MIN_EXPR;
4603ad841b2Smrg     }
4613ad841b2Smrg   else
4623ad841b2Smrg     {
4633ad841b2Smrg       gcc_assert (niter->cmp == GT_EXPR);
4643ad841b2Smrg       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
4653ad841b2Smrg 	addbound = 1;
4663ad841b2Smrg       minmax = MAX_EXPR;
4673ad841b2Smrg     }
4683ad841b2Smrg 
4693ad841b2Smrg   if (addbound)
4703ad841b2Smrg     {
4713ad841b2Smrg       tree type2 = TREE_TYPE (newbound);
4723ad841b2Smrg       if (POINTER_TYPE_P (type2))
4733ad841b2Smrg 	type2 = sizetype;
4743ad841b2Smrg       newbound = gimple_build (stmts,
4753ad841b2Smrg 			       POINTER_TYPE_P (TREE_TYPE (newbound))
4763ad841b2Smrg 			       ? POINTER_PLUS_EXPR : PLUS_EXPR,
4773ad841b2Smrg 			       TREE_TYPE (newbound),
4783ad841b2Smrg 			       newbound,
4793ad841b2Smrg 			       build_int_cst (type2, addbound));
4803ad841b2Smrg     }
4813ad841b2Smrg 
4823ad841b2Smrg   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
4833ad841b2Smrg 			      border, newbound);
4843ad841b2Smrg   return newend;
4853ad841b2Smrg }
4863ad841b2Smrg 
4873ad841b2Smrg /* Checks if LOOP contains an conditional block whose condition
4883ad841b2Smrg    depends on which side in the iteration space it is, and if so
4893ad841b2Smrg    splits the iteration space into two loops.  Returns true if the
4903ad841b2Smrg    loop was split.  NITER must contain the iteration descriptor for the
4913ad841b2Smrg    single exit of LOOP.  */
4923ad841b2Smrg 
4933ad841b2Smrg static bool
split_loop(class loop * loop1)494*8feb0f0bSmrg split_loop (class loop *loop1)
4953ad841b2Smrg {
496*8feb0f0bSmrg   class tree_niter_desc niter;
4973ad841b2Smrg   basic_block *bbs;
4983ad841b2Smrg   unsigned i;
4993ad841b2Smrg   bool changed = false;
5003ad841b2Smrg   tree guard_iv;
5013ad841b2Smrg   tree border = NULL_TREE;
5023ad841b2Smrg   affine_iv iv;
5033ad841b2Smrg 
504*8feb0f0bSmrg   if (!single_exit (loop1)
505*8feb0f0bSmrg       /* ??? We could handle non-empty latches when we split the latch edge
506*8feb0f0bSmrg 	 (not the exit edge), and put the new exit condition in the new block.
507*8feb0f0bSmrg 	 OTOH this executes some code unconditionally that might have been
508*8feb0f0bSmrg 	 skipped by the original exit before.  */
509*8feb0f0bSmrg       || !empty_block_p (loop1->latch)
510*8feb0f0bSmrg       || !easy_exit_values (loop1)
511*8feb0f0bSmrg       || !number_of_iterations_exit (loop1, single_exit (loop1), &niter,
512*8feb0f0bSmrg 				     false, true)
513*8feb0f0bSmrg       || niter.cmp == ERROR_MARK
514*8feb0f0bSmrg       /* We can't yet handle loops controlled by a != predicate.  */
515*8feb0f0bSmrg       || niter.cmp == NE_EXPR)
516*8feb0f0bSmrg     return false;
517*8feb0f0bSmrg 
5183ad841b2Smrg   bbs = get_loop_body (loop1);
5193ad841b2Smrg 
520*8feb0f0bSmrg   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
521*8feb0f0bSmrg     {
522*8feb0f0bSmrg       free (bbs);
523*8feb0f0bSmrg       return false;
524*8feb0f0bSmrg     }
525*8feb0f0bSmrg 
5263ad841b2Smrg   /* Find a splitting opportunity.  */
5273ad841b2Smrg   for (i = 0; i < loop1->num_nodes; i++)
5283ad841b2Smrg     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
5293ad841b2Smrg       {
5303ad841b2Smrg 	/* Handling opposite steps is not implemented yet.  Neither
5313ad841b2Smrg 	   is handling different step sizes.  */
5323ad841b2Smrg 	if ((tree_int_cst_sign_bit (iv.step)
533*8feb0f0bSmrg 	     != tree_int_cst_sign_bit (niter.control.step))
534*8feb0f0bSmrg 	    || !tree_int_cst_equal (iv.step, niter.control.step))
5353ad841b2Smrg 	  continue;
5363ad841b2Smrg 
5373ad841b2Smrg 	/* Find a loop PHI node that defines guard_iv directly,
5383ad841b2Smrg 	   or create one doing that.  */
5393ad841b2Smrg 	gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
5403ad841b2Smrg 	if (!phi)
5413ad841b2Smrg 	  continue;
5423ad841b2Smrg 	gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
5433ad841b2Smrg 	tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
5443ad841b2Smrg 						 loop_preheader_edge (loop1));
5453ad841b2Smrg 	enum tree_code guard_code = gimple_cond_code (guard_stmt);
5463ad841b2Smrg 
5473ad841b2Smrg 	/* Loop splitting is implemented by versioning the loop, placing
5483ad841b2Smrg 	   the new loop after the old loop, make the first loop iterate
5493ad841b2Smrg 	   as long as the conditional stays true (or false) and let the
5503ad841b2Smrg 	   second (new) loop handle the rest of the iterations.
5513ad841b2Smrg 
5523ad841b2Smrg 	   First we need to determine if the condition will start being true
5533ad841b2Smrg 	   or false in the first loop.  */
5543ad841b2Smrg 	bool initial_true;
5553ad841b2Smrg 	switch (guard_code)
5563ad841b2Smrg 	  {
5573ad841b2Smrg 	    case LT_EXPR:
5583ad841b2Smrg 	    case LE_EXPR:
5593ad841b2Smrg 	      initial_true = !tree_int_cst_sign_bit (iv.step);
5603ad841b2Smrg 	      break;
5613ad841b2Smrg 	    case GT_EXPR:
5623ad841b2Smrg 	    case GE_EXPR:
5633ad841b2Smrg 	      initial_true = tree_int_cst_sign_bit (iv.step);
5643ad841b2Smrg 	      break;
5653ad841b2Smrg 	    default:
5663ad841b2Smrg 	      gcc_unreachable ();
5673ad841b2Smrg 	  }
5683ad841b2Smrg 
5693ad841b2Smrg 	/* Build a condition that will skip the first loop when the
5703ad841b2Smrg 	   guard condition won't ever be true (or false).  */
5713ad841b2Smrg 	gimple_seq stmts2;
5723ad841b2Smrg 	border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
5733ad841b2Smrg 	if (stmts2)
5743ad841b2Smrg 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
5753ad841b2Smrg 					    stmts2);
5763ad841b2Smrg 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
5773ad841b2Smrg 	if (!initial_true)
5783ad841b2Smrg 	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
5793ad841b2Smrg 
5803ad841b2Smrg 	/* Now version the loop, placing loop2 after loop1 connecting
5813ad841b2Smrg 	   them, and fix up SSA form for that.  */
5823ad841b2Smrg 	initialize_original_copy_tables ();
5833ad841b2Smrg 	basic_block cond_bb;
584a2dc1f3fSmrg 
585*8feb0f0bSmrg 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
586a2dc1f3fSmrg 					   profile_probability::always (),
587a2dc1f3fSmrg 					   profile_probability::always (),
588a2dc1f3fSmrg 					   profile_probability::always (),
589a2dc1f3fSmrg 					   profile_probability::always (),
5903ad841b2Smrg 					   true);
5913ad841b2Smrg 	gcc_assert (loop2);
5923ad841b2Smrg 	update_ssa (TODO_update_ssa);
5933ad841b2Smrg 
5943ad841b2Smrg 	edge new_e = connect_loops (loop1, loop2);
5953ad841b2Smrg 	connect_loop_phis (loop1, loop2, new_e);
5963ad841b2Smrg 
5973ad841b2Smrg 	/* The iterations of the second loop is now already
5983ad841b2Smrg 	   exactly those that the first loop didn't do, but the
5993ad841b2Smrg 	   iteration space of the first loop is still the original one.
6003ad841b2Smrg 	   Compute the new bound for the guarding IV and patch the
6013ad841b2Smrg 	   loop exit to use it instead of original IV and bound.  */
6023ad841b2Smrg 	gimple_seq stmts = NULL;
603*8feb0f0bSmrg 	tree newend = compute_new_first_bound (&stmts, &niter, border,
6043ad841b2Smrg 					       guard_code, guard_init);
6053ad841b2Smrg 	if (stmts)
6063ad841b2Smrg 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
6073ad841b2Smrg 					    stmts);
6083ad841b2Smrg 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
6093ad841b2Smrg 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
6103ad841b2Smrg 
6113ad841b2Smrg 	/* Finally patch out the two copies of the condition to be always
6123ad841b2Smrg 	   true/false (or opposite).  */
6133ad841b2Smrg 	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
6143ad841b2Smrg 	gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
6153ad841b2Smrg 	if (!initial_true)
6163ad841b2Smrg 	  std::swap (force_true, force_false);
6173ad841b2Smrg 	gimple_cond_make_true (force_true);
6183ad841b2Smrg 	gimple_cond_make_false (force_false);
6193ad841b2Smrg 	update_stmt (force_true);
6203ad841b2Smrg 	update_stmt (force_false);
6213ad841b2Smrg 
6223ad841b2Smrg 	free_original_copy_tables ();
6233ad841b2Smrg 
6243ad841b2Smrg 	/* We destroyed LCSSA form above.  Eventually we might be able
6253ad841b2Smrg 	   to fix it on the fly, for now simply punt and use the helper.  */
6263ad841b2Smrg 	rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
6273ad841b2Smrg 
6283ad841b2Smrg 	changed = true;
6293ad841b2Smrg 	if (dump_file && (dump_flags & TDF_DETAILS))
6303ad841b2Smrg 	  fprintf (dump_file, ";; Loop split.\n");
6313ad841b2Smrg 
6323ad841b2Smrg 	/* Only deal with the first opportunity.  */
6333ad841b2Smrg 	break;
6343ad841b2Smrg       }
6353ad841b2Smrg 
6363ad841b2Smrg   free (bbs);
6373ad841b2Smrg   return changed;
6383ad841b2Smrg }
6393ad841b2Smrg 
640*8feb0f0bSmrg /* Another transformation of loops like:
641*8feb0f0bSmrg 
642*8feb0f0bSmrg    for (i = INIT (); CHECK (i); i = NEXT ())
643*8feb0f0bSmrg      {
644*8feb0f0bSmrg        if (expr (a_1, a_2, ..., a_n))  // expr is pure
645*8feb0f0bSmrg          a_j = ...;  // change at least one a_j
646*8feb0f0bSmrg        else
647*8feb0f0bSmrg          S;          // not change any a_j
648*8feb0f0bSmrg      }
649*8feb0f0bSmrg 
650*8feb0f0bSmrg    into:
651*8feb0f0bSmrg 
652*8feb0f0bSmrg    for (i = INIT (); CHECK (i); i = NEXT ())
653*8feb0f0bSmrg      {
654*8feb0f0bSmrg        if (expr (a_1, a_2, ..., a_n))
655*8feb0f0bSmrg          a_j = ...;
656*8feb0f0bSmrg        else
657*8feb0f0bSmrg          {
658*8feb0f0bSmrg            S;
659*8feb0f0bSmrg            i = NEXT ();
660*8feb0f0bSmrg            break;
661*8feb0f0bSmrg          }
662*8feb0f0bSmrg      }
663*8feb0f0bSmrg 
664*8feb0f0bSmrg    for (; CHECK (i); i = NEXT ())
665*8feb0f0bSmrg      {
666*8feb0f0bSmrg        S;
667*8feb0f0bSmrg      }
668*8feb0f0bSmrg 
669*8feb0f0bSmrg    */
670*8feb0f0bSmrg 
671*8feb0f0bSmrg /* Data structure to hold temporary information during loop split upon
672*8feb0f0bSmrg    semi-invariant conditional statement.  */
673*8feb0f0bSmrg class split_info {
674*8feb0f0bSmrg public:
675*8feb0f0bSmrg   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
676*8feb0f0bSmrg   basic_block *bbs;
677*8feb0f0bSmrg 
678*8feb0f0bSmrg   /* All memory store/clobber statements in a loop.  */
679*8feb0f0bSmrg   auto_vec<gimple *> memory_stores;
680*8feb0f0bSmrg 
681*8feb0f0bSmrg   /* Whether above memory stores vector has been filled.  */
682*8feb0f0bSmrg   int need_init;
683*8feb0f0bSmrg 
684*8feb0f0bSmrg   /* Control dependencies of basic blocks in a loop.  */
685*8feb0f0bSmrg   auto_vec<hash_set<basic_block> *> control_deps;
686*8feb0f0bSmrg 
split_info()687*8feb0f0bSmrg   split_info () : bbs (NULL),  need_init (true) { }
688*8feb0f0bSmrg 
~split_info()689*8feb0f0bSmrg   ~split_info ()
690*8feb0f0bSmrg     {
691*8feb0f0bSmrg       if (bbs)
692*8feb0f0bSmrg 	free (bbs);
693*8feb0f0bSmrg 
694*8feb0f0bSmrg       for (unsigned i = 0; i < control_deps.length (); i++)
695*8feb0f0bSmrg 	delete control_deps[i];
696*8feb0f0bSmrg     }
697*8feb0f0bSmrg };
698*8feb0f0bSmrg 
699*8feb0f0bSmrg /* Find all statements with memory-write effect in LOOP, including memory
700*8feb0f0bSmrg    store and non-pure function call, and keep those in a vector.  This work
701*8feb0f0bSmrg    is only done one time, for the vector should be constant during analysis
702*8feb0f0bSmrg    stage of semi-invariant condition.  */
703*8feb0f0bSmrg 
704*8feb0f0bSmrg static void
find_vdef_in_loop(struct loop * loop)705*8feb0f0bSmrg find_vdef_in_loop (struct loop *loop)
706*8feb0f0bSmrg {
707*8feb0f0bSmrg   split_info *info = (split_info *) loop->aux;
708*8feb0f0bSmrg   gphi *vphi = get_virtual_phi (loop->header);
709*8feb0f0bSmrg 
710*8feb0f0bSmrg   /* Indicate memory store vector has been filled.  */
711*8feb0f0bSmrg   info->need_init = false;
712*8feb0f0bSmrg 
713*8feb0f0bSmrg   /* If loop contains memory operation, there must be a virtual PHI node in
714*8feb0f0bSmrg      loop header basic block.  */
715*8feb0f0bSmrg   if (vphi == NULL)
716*8feb0f0bSmrg     return;
717*8feb0f0bSmrg 
718*8feb0f0bSmrg   /* All virtual SSA names inside the loop are connected to be a cyclic
719*8feb0f0bSmrg      graph via virtual PHI nodes.  The virtual PHI node in loop header just
720*8feb0f0bSmrg      links the first and the last virtual SSA names, by using the last as
721*8feb0f0bSmrg      PHI operand to define the first.  */
722*8feb0f0bSmrg   const edge latch = loop_latch_edge (loop);
723*8feb0f0bSmrg   const tree first = gimple_phi_result (vphi);
724*8feb0f0bSmrg   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
725*8feb0f0bSmrg 
726*8feb0f0bSmrg   /* The virtual SSA cyclic graph might consist of only one SSA name, who
727*8feb0f0bSmrg      is defined by itself.
728*8feb0f0bSmrg 
729*8feb0f0bSmrg        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
730*8feb0f0bSmrg 
731*8feb0f0bSmrg      This means the loop contains only memory loads, so we can skip it.  */
732*8feb0f0bSmrg   if (first == last)
733*8feb0f0bSmrg     return;
734*8feb0f0bSmrg 
735*8feb0f0bSmrg   auto_vec<gimple *> other_stores;
736*8feb0f0bSmrg   auto_vec<tree> worklist;
737*8feb0f0bSmrg   auto_bitmap visited;
738*8feb0f0bSmrg 
739*8feb0f0bSmrg   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
740*8feb0f0bSmrg   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
741*8feb0f0bSmrg   worklist.safe_push (last);
742*8feb0f0bSmrg 
743*8feb0f0bSmrg   do
744*8feb0f0bSmrg     {
745*8feb0f0bSmrg       tree vuse = worklist.pop ();
746*8feb0f0bSmrg       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
747*8feb0f0bSmrg 
748*8feb0f0bSmrg       /* We mark the first and last SSA names as visited at the beginning,
749*8feb0f0bSmrg 	 and reversely start the process from the last SSA name towards the
750*8feb0f0bSmrg 	 first, which ensures that this do-while will not touch SSA names
751*8feb0f0bSmrg 	 defined outside the loop.  */
752*8feb0f0bSmrg       gcc_assert (gimple_bb (stmt)
753*8feb0f0bSmrg 		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
754*8feb0f0bSmrg 
755*8feb0f0bSmrg       if (gimple_code (stmt) == GIMPLE_PHI)
756*8feb0f0bSmrg 	{
757*8feb0f0bSmrg 	  gphi *phi = as_a <gphi *> (stmt);
758*8feb0f0bSmrg 
759*8feb0f0bSmrg 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
760*8feb0f0bSmrg 	    {
761*8feb0f0bSmrg 	      tree arg = gimple_phi_arg_def (stmt, i);
762*8feb0f0bSmrg 
763*8feb0f0bSmrg 	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
764*8feb0f0bSmrg 		worklist.safe_push (arg);
765*8feb0f0bSmrg 	    }
766*8feb0f0bSmrg 	}
767*8feb0f0bSmrg       else
768*8feb0f0bSmrg 	{
769*8feb0f0bSmrg 	  tree prev = gimple_vuse (stmt);
770*8feb0f0bSmrg 
771*8feb0f0bSmrg 	  /* Non-pure call statement is conservatively assumed to impact all
772*8feb0f0bSmrg 	     memory locations.  So place call statements ahead of other memory
773*8feb0f0bSmrg 	     stores in the vector with an idea of using them as shortcut
774*8feb0f0bSmrg 	     terminators to memory alias analysis.  */
775*8feb0f0bSmrg 	  if (gimple_code (stmt) == GIMPLE_CALL)
776*8feb0f0bSmrg 	    info->memory_stores.safe_push (stmt);
777*8feb0f0bSmrg 	  else
778*8feb0f0bSmrg 	    other_stores.safe_push (stmt);
779*8feb0f0bSmrg 
780*8feb0f0bSmrg 	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
781*8feb0f0bSmrg 	    worklist.safe_push (prev);
782*8feb0f0bSmrg 	}
783*8feb0f0bSmrg     } while (!worklist.is_empty ());
784*8feb0f0bSmrg 
785*8feb0f0bSmrg     info->memory_stores.safe_splice (other_stores);
786*8feb0f0bSmrg }
787*8feb0f0bSmrg 
788*8feb0f0bSmrg /* Two basic blocks have equivalent control dependency if one dominates to
789*8feb0f0bSmrg    the other, and it is post-dominated by the latter.  Given a basic block
790*8feb0f0bSmrg    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
791*8feb0f0bSmrg    is a constraint that BB does not post-dominate loop header of LOOP, this
792*8feb0f0bSmrg    means BB is control-dependent on at least one basic block in LOOP.  */
793*8feb0f0bSmrg 
794*8feb0f0bSmrg static basic_block
get_control_equiv_head_block(struct loop * loop,basic_block bb)795*8feb0f0bSmrg get_control_equiv_head_block (struct loop *loop, basic_block bb)
796*8feb0f0bSmrg {
797*8feb0f0bSmrg   while (!bb->aux)
798*8feb0f0bSmrg     {
799*8feb0f0bSmrg       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
800*8feb0f0bSmrg 
801*8feb0f0bSmrg       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
802*8feb0f0bSmrg 
803*8feb0f0bSmrg       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
804*8feb0f0bSmrg 	break;
805*8feb0f0bSmrg 
806*8feb0f0bSmrg       bb = dom_bb;
807*8feb0f0bSmrg     }
808*8feb0f0bSmrg   return bb;
809*8feb0f0bSmrg }
810*8feb0f0bSmrg 
811*8feb0f0bSmrg /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
812*8feb0f0bSmrg    dependent on.  */
813*8feb0f0bSmrg 
814*8feb0f0bSmrg static hash_set<basic_block> *
find_control_dep_blocks(struct loop * loop,basic_block bb)815*8feb0f0bSmrg find_control_dep_blocks (struct loop *loop, basic_block bb)
816*8feb0f0bSmrg {
817*8feb0f0bSmrg   /* BB has same control dependency as loop header, then it is not control-
818*8feb0f0bSmrg      dependent on any basic block in LOOP.  */
819*8feb0f0bSmrg   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
820*8feb0f0bSmrg     return NULL;
821*8feb0f0bSmrg 
822*8feb0f0bSmrg   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
823*8feb0f0bSmrg 
824*8feb0f0bSmrg   if (equiv_head->aux)
825*8feb0f0bSmrg     {
826*8feb0f0bSmrg       /* There is a basic block containing control dependency equivalent
827*8feb0f0bSmrg 	 to BB.  No need to recompute that, and also set this information
828*8feb0f0bSmrg 	 to other equivalent basic blocks.  */
829*8feb0f0bSmrg       for (; bb != equiv_head;
830*8feb0f0bSmrg 	   bb = get_immediate_dominator (CDI_DOMINATORS, bb))
831*8feb0f0bSmrg 	bb->aux = equiv_head->aux;
832*8feb0f0bSmrg       return (hash_set<basic_block> *) equiv_head->aux;
833*8feb0f0bSmrg     }
834*8feb0f0bSmrg 
835*8feb0f0bSmrg   /* A basic block X is control-dependent on another Y iff there exists
836*8feb0f0bSmrg      a path from X to Y, in which every basic block other than X and Y
837*8feb0f0bSmrg      is post-dominated by Y, but X is not post-dominated by Y.
838*8feb0f0bSmrg 
839*8feb0f0bSmrg      According to this rule, traverse basic blocks in the loop backwards
840*8feb0f0bSmrg      starting from BB, if a basic block is post-dominated by BB, extend
841*8feb0f0bSmrg      current post-dominating path to this block, otherwise it is another
842*8feb0f0bSmrg      one that BB is control-dependent on.  */
843*8feb0f0bSmrg 
844*8feb0f0bSmrg   auto_vec<basic_block> pdom_worklist;
845*8feb0f0bSmrg   hash_set<basic_block> pdom_visited;
846*8feb0f0bSmrg   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
847*8feb0f0bSmrg 
848*8feb0f0bSmrg   pdom_worklist.safe_push (equiv_head);
849*8feb0f0bSmrg 
850*8feb0f0bSmrg   do
851*8feb0f0bSmrg     {
852*8feb0f0bSmrg       basic_block pdom_bb = pdom_worklist.pop ();
853*8feb0f0bSmrg       edge_iterator ei;
854*8feb0f0bSmrg       edge e;
855*8feb0f0bSmrg 
856*8feb0f0bSmrg       if (pdom_visited.add (pdom_bb))
857*8feb0f0bSmrg 	continue;
858*8feb0f0bSmrg 
859*8feb0f0bSmrg       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
860*8feb0f0bSmrg 	{
861*8feb0f0bSmrg 	  basic_block pred_bb = e->src;
862*8feb0f0bSmrg 
863*8feb0f0bSmrg 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
864*8feb0f0bSmrg 	    {
865*8feb0f0bSmrg 	      dep_bbs->add (pred_bb);
866*8feb0f0bSmrg 	      continue;
867*8feb0f0bSmrg 	    }
868*8feb0f0bSmrg 
869*8feb0f0bSmrg 	  pred_bb = get_control_equiv_head_block (loop, pred_bb);
870*8feb0f0bSmrg 
871*8feb0f0bSmrg 	  if (pdom_visited.contains (pred_bb))
872*8feb0f0bSmrg 	    continue;
873*8feb0f0bSmrg 
874*8feb0f0bSmrg 	  if (!pred_bb->aux)
875*8feb0f0bSmrg 	    {
876*8feb0f0bSmrg 	      pdom_worklist.safe_push (pred_bb);
877*8feb0f0bSmrg 	      continue;
878*8feb0f0bSmrg 	    }
879*8feb0f0bSmrg 
880*8feb0f0bSmrg 	  /* If control dependency of basic block is available, fast extend
881*8feb0f0bSmrg 	     post-dominating path using the information instead of advancing
882*8feb0f0bSmrg 	     forward step-by-step.  */
883*8feb0f0bSmrg 	  hash_set<basic_block> *pred_dep_bbs
884*8feb0f0bSmrg 			= (hash_set<basic_block> *) pred_bb->aux;
885*8feb0f0bSmrg 
886*8feb0f0bSmrg 	  for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
887*8feb0f0bSmrg 	       iter != pred_dep_bbs->end (); ++iter)
888*8feb0f0bSmrg 	    {
889*8feb0f0bSmrg 	      basic_block pred_dep_bb = *iter;
890*8feb0f0bSmrg 
891*8feb0f0bSmrg 	      /* Basic blocks can either be in control dependency of BB, or
892*8feb0f0bSmrg 		 must be post-dominated by BB, if so, extend the path from
893*8feb0f0bSmrg 		 these basic blocks.  */
894*8feb0f0bSmrg 	      if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
895*8feb0f0bSmrg 		dep_bbs->add (pred_dep_bb);
896*8feb0f0bSmrg 	      else if (!pdom_visited.contains (pred_dep_bb))
897*8feb0f0bSmrg 		pdom_worklist.safe_push (pred_dep_bb);
898*8feb0f0bSmrg 	    }
899*8feb0f0bSmrg 	}
900*8feb0f0bSmrg     } while (!pdom_worklist.is_empty ());
901*8feb0f0bSmrg 
902*8feb0f0bSmrg   /* Record computed control dependencies in loop so that we can reach them
903*8feb0f0bSmrg      when reclaiming resources.  */
904*8feb0f0bSmrg   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
905*8feb0f0bSmrg 
906*8feb0f0bSmrg   /* Associate control dependence with related equivalent basic blocks.  */
907*8feb0f0bSmrg   for (equiv_head->aux = dep_bbs; bb != equiv_head;
908*8feb0f0bSmrg        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
909*8feb0f0bSmrg     bb->aux = dep_bbs;
910*8feb0f0bSmrg 
911*8feb0f0bSmrg   return dep_bbs;
912*8feb0f0bSmrg }
913*8feb0f0bSmrg 
914*8feb0f0bSmrg /* Forward declaration */
915*8feb0f0bSmrg 
916*8feb0f0bSmrg static bool
917*8feb0f0bSmrg stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
918*8feb0f0bSmrg 			 const_basic_block skip_head,
919*8feb0f0bSmrg 			 hash_map<gimple *, bool> &stmt_stat);
920*8feb0f0bSmrg 
921*8feb0f0bSmrg /* Given STMT, memory load or pure call statement, check whether it is impacted
922*8feb0f0bSmrg    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
923*8feb0f0bSmrg    trace is composed of SKIP_HEAD and those basic block dominated by it, always
924*8feb0f0bSmrg    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
925*8feb0f0bSmrg    NULL, all basic blocks of LOOP are checked.  */
926*8feb0f0bSmrg 
927*8feb0f0bSmrg static bool
vuse_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)928*8feb0f0bSmrg vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
929*8feb0f0bSmrg 		       const_basic_block skip_head)
930*8feb0f0bSmrg {
931*8feb0f0bSmrg   split_info *info = (split_info *) loop->aux;
932*8feb0f0bSmrg   tree rhs = NULL_TREE;
933*8feb0f0bSmrg   ao_ref ref;
934*8feb0f0bSmrg   gimple *store;
935*8feb0f0bSmrg   unsigned i;
936*8feb0f0bSmrg 
937*8feb0f0bSmrg   /* Collect memory store/clobber statements if haven't done that.  */
938*8feb0f0bSmrg   if (info->need_init)
939*8feb0f0bSmrg     find_vdef_in_loop (loop);
940*8feb0f0bSmrg 
941*8feb0f0bSmrg   if (is_gimple_assign (stmt))
942*8feb0f0bSmrg     rhs = gimple_assign_rhs1 (stmt);
943*8feb0f0bSmrg 
944*8feb0f0bSmrg   ao_ref_init (&ref, rhs);
945*8feb0f0bSmrg 
946*8feb0f0bSmrg   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
947*8feb0f0bSmrg     {
948*8feb0f0bSmrg       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
949*8feb0f0bSmrg       if (skip_head
950*8feb0f0bSmrg 	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
951*8feb0f0bSmrg 	continue;
952*8feb0f0bSmrg 
953*8feb0f0bSmrg       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
954*8feb0f0bSmrg 	return false;
955*8feb0f0bSmrg     }
956*8feb0f0bSmrg 
957*8feb0f0bSmrg   return true;
958*8feb0f0bSmrg }
959*8feb0f0bSmrg 
960*8feb0f0bSmrg /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
961*8feb0f0bSmrg    certain iteration of LOOP, check whether an SSA name (NAME) remains
962*8feb0f0bSmrg    unchanged in next iteration.  We call this characteristic semi-
963*8feb0f0bSmrg    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
964*8feb0f0bSmrg    blocks and control flows in the loop will be considered.  Semi-invariant
965*8feb0f0bSmrg    state of checked statement is cached in hash map STMT_STAT to avoid
966*8feb0f0bSmrg    redundant computation in possible following re-check.  */
967*8feb0f0bSmrg 
968*8feb0f0bSmrg static inline bool
ssa_semi_invariant_p(struct loop * loop,tree name,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)969*8feb0f0bSmrg ssa_semi_invariant_p (struct loop *loop, tree name,
970*8feb0f0bSmrg 		      const_basic_block skip_head,
971*8feb0f0bSmrg 		      hash_map<gimple *, bool> &stmt_stat)
972*8feb0f0bSmrg {
973*8feb0f0bSmrg   gimple *def = SSA_NAME_DEF_STMT (name);
974*8feb0f0bSmrg   const_basic_block def_bb = gimple_bb (def);
975*8feb0f0bSmrg 
976*8feb0f0bSmrg   /* An SSA name defined outside loop is definitely semi-invariant.  */
977*8feb0f0bSmrg   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
978*8feb0f0bSmrg     return true;
979*8feb0f0bSmrg 
980*8feb0f0bSmrg   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
981*8feb0f0bSmrg     return false;
982*8feb0f0bSmrg 
983*8feb0f0bSmrg   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
984*8feb0f0bSmrg }
985*8feb0f0bSmrg 
986*8feb0f0bSmrg /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
987*8feb0f0bSmrg    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
988*8feb0f0bSmrg    are excluded from LOOP.  */
989*8feb0f0bSmrg 
990*8feb0f0bSmrg static bool
loop_iter_phi_semi_invariant_p(struct loop * loop,gphi * loop_phi,const_basic_block skip_head)991*8feb0f0bSmrg loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
992*8feb0f0bSmrg 				const_basic_block skip_head)
993*8feb0f0bSmrg {
994*8feb0f0bSmrg   const_edge latch = loop_latch_edge (loop);
995*8feb0f0bSmrg   tree name = gimple_phi_result (loop_phi);
996*8feb0f0bSmrg   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
997*8feb0f0bSmrg 
998*8feb0f0bSmrg   gcc_checking_assert (from);
999*8feb0f0bSmrg 
1000*8feb0f0bSmrg   /* Loop iteration PHI node locates in loop header, and it has two source
1001*8feb0f0bSmrg      operands, one is an initial value coming from outside the loop, the other
1002*8feb0f0bSmrg      is a value through latch of the loop, which is derived in last iteration,
1003*8feb0f0bSmrg      we call the latter latch value.  From the PHI node to definition of latch
1004*8feb0f0bSmrg      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1005*8feb0f0bSmrg      assignment or likewise, there is no other kind of value redefinition, SSA
1006*8feb0f0bSmrg      name defined by the PHI node is semi-invariant.
1007*8feb0f0bSmrg 
1008*8feb0f0bSmrg                          loop entry
1009*8feb0f0bSmrg                               |     .--- latch ---.
1010*8feb0f0bSmrg                               |     |             |
1011*8feb0f0bSmrg                               v     v             |
1012*8feb0f0bSmrg                   x_1 = PHI <x_0,  x_3>           |
1013*8feb0f0bSmrg                            |                      |
1014*8feb0f0bSmrg                            v                      |
1015*8feb0f0bSmrg               .------- if (cond) -------.         |
1016*8feb0f0bSmrg               |                         |         |
1017*8feb0f0bSmrg               |                     [ SKIP ]      |
1018*8feb0f0bSmrg               |                         |         |
1019*8feb0f0bSmrg               |                     x_2 = ...     |
1020*8feb0f0bSmrg               |                         |         |
1021*8feb0f0bSmrg               '---- T ---->.<---- F ----'         |
1022*8feb0f0bSmrg                            |                      |
1023*8feb0f0bSmrg                            v                      |
1024*8feb0f0bSmrg                   x_3 = PHI <x_1, x_2>            |
1025*8feb0f0bSmrg                            |                      |
1026*8feb0f0bSmrg                            '----------------------'
1027*8feb0f0bSmrg 
1028*8feb0f0bSmrg      Suppose in certain iteration, execution flow in above graph goes through
1029*8feb0f0bSmrg      true branch, which means that one source value to define x_3 in false
1030*8feb0f0bSmrg      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1031*8feb0f0bSmrg      iterations is defined by x_3, we know that x_1 will never changed if COND
1032*8feb0f0bSmrg      always chooses true branch from then on.  */
1033*8feb0f0bSmrg 
1034*8feb0f0bSmrg   while (from != name)
1035*8feb0f0bSmrg     {
1036*8feb0f0bSmrg       /* A new value comes from a CONSTANT.  */
1037*8feb0f0bSmrg       if (TREE_CODE (from) != SSA_NAME)
1038*8feb0f0bSmrg 	return false;
1039*8feb0f0bSmrg 
1040*8feb0f0bSmrg       gimple *stmt = SSA_NAME_DEF_STMT (from);
1041*8feb0f0bSmrg       const_basic_block bb = gimple_bb (stmt);
1042*8feb0f0bSmrg 
1043*8feb0f0bSmrg       /* A new value comes from outside the loop.  */
1044*8feb0f0bSmrg       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1045*8feb0f0bSmrg 	return false;
1046*8feb0f0bSmrg 
1047*8feb0f0bSmrg       from = NULL_TREE;
1048*8feb0f0bSmrg 
1049*8feb0f0bSmrg       if (gimple_code (stmt) == GIMPLE_PHI)
1050*8feb0f0bSmrg 	{
1051*8feb0f0bSmrg 	  gphi *phi = as_a <gphi *> (stmt);
1052*8feb0f0bSmrg 
1053*8feb0f0bSmrg 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1054*8feb0f0bSmrg 	    {
1055*8feb0f0bSmrg 	      if (skip_head)
1056*8feb0f0bSmrg 		{
1057*8feb0f0bSmrg 		  const_edge e = gimple_phi_arg_edge (phi, i);
1058*8feb0f0bSmrg 
1059*8feb0f0bSmrg 		  /* Don't consider redefinitions in excluded basic blocks.  */
1060*8feb0f0bSmrg 		  if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1061*8feb0f0bSmrg 		    continue;
1062*8feb0f0bSmrg 		}
1063*8feb0f0bSmrg 
1064*8feb0f0bSmrg 	      tree arg = gimple_phi_arg_def (phi, i);
1065*8feb0f0bSmrg 
1066*8feb0f0bSmrg 	      if (!from)
1067*8feb0f0bSmrg 		from = arg;
1068*8feb0f0bSmrg 	      else if (!operand_equal_p (from, arg, 0))
1069*8feb0f0bSmrg 		/* There are more than one source operands that provide
1070*8feb0f0bSmrg 		   different values to the SSA name, it is variant.  */
1071*8feb0f0bSmrg 		return false;
1072*8feb0f0bSmrg 	    }
1073*8feb0f0bSmrg 	}
1074*8feb0f0bSmrg       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1075*8feb0f0bSmrg 	{
1076*8feb0f0bSmrg 	  /* For simple value copy, check its rhs instead.  */
1077*8feb0f0bSmrg 	  if (gimple_assign_ssa_name_copy_p (stmt))
1078*8feb0f0bSmrg 	    from = gimple_assign_rhs1 (stmt);
1079*8feb0f0bSmrg 	}
1080*8feb0f0bSmrg 
1081*8feb0f0bSmrg       /* Any other kind of definition is deemed to introduce a new value
1082*8feb0f0bSmrg 	 to the SSA name.  */
1083*8feb0f0bSmrg       if (!from)
1084*8feb0f0bSmrg 	return false;
1085*8feb0f0bSmrg     }
1086*8feb0f0bSmrg   return true;
1087*8feb0f0bSmrg }
1088*8feb0f0bSmrg 
1089*8feb0f0bSmrg /* Check whether conditional predicates that BB is control-dependent on, are
1090*8feb0f0bSmrg    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1091*8feb0f0bSmrg    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1092*8feb0f0bSmrg    in hash map STMT_STAT.  */
1093*8feb0f0bSmrg 
1094*8feb0f0bSmrg static bool
control_dep_semi_invariant_p(struct loop * loop,basic_block bb,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1095*8feb0f0bSmrg control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1096*8feb0f0bSmrg 			      const_basic_block skip_head,
1097*8feb0f0bSmrg 			      hash_map<gimple *, bool> &stmt_stat)
1098*8feb0f0bSmrg {
1099*8feb0f0bSmrg   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1100*8feb0f0bSmrg 
1101*8feb0f0bSmrg   if (!dep_bbs)
1102*8feb0f0bSmrg     return true;
1103*8feb0f0bSmrg 
1104*8feb0f0bSmrg   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1105*8feb0f0bSmrg        iter != dep_bbs->end (); ++iter)
1106*8feb0f0bSmrg     {
1107*8feb0f0bSmrg       gimple *last = last_stmt (*iter);
1108*8feb0f0bSmrg 
1109*8feb0f0bSmrg       if (!last)
1110*8feb0f0bSmrg 	return false;
1111*8feb0f0bSmrg 
1112*8feb0f0bSmrg       /* Only check condition predicates.  */
1113*8feb0f0bSmrg       if (gimple_code (last) != GIMPLE_COND
1114*8feb0f0bSmrg 	  && gimple_code (last) != GIMPLE_SWITCH)
1115*8feb0f0bSmrg 	return false;
1116*8feb0f0bSmrg 
1117*8feb0f0bSmrg       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1118*8feb0f0bSmrg 	return false;
1119*8feb0f0bSmrg     }
1120*8feb0f0bSmrg 
1121*8feb0f0bSmrg   return true;
1122*8feb0f0bSmrg }
1123*8feb0f0bSmrg 
1124*8feb0f0bSmrg /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1125*8feb0f0bSmrg    semi-invariant, consequently, all its defined values are semi-invariant.
1126*8feb0f0bSmrg    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1127*8feb0f0bSmrg    Semi-invariant state of checked statement is cached in hash map
1128*8feb0f0bSmrg    STMT_STAT.  */
1129*8feb0f0bSmrg 
1130*8feb0f0bSmrg static bool
stmt_semi_invariant_p_1(struct loop * loop,gimple * stmt,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1131*8feb0f0bSmrg stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1132*8feb0f0bSmrg 			 const_basic_block skip_head,
1133*8feb0f0bSmrg 			 hash_map<gimple *, bool> &stmt_stat)
1134*8feb0f0bSmrg {
1135*8feb0f0bSmrg   bool existed;
1136*8feb0f0bSmrg   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1137*8feb0f0bSmrg 
1138*8feb0f0bSmrg   if (existed)
1139*8feb0f0bSmrg     return invar;
1140*8feb0f0bSmrg 
1141*8feb0f0bSmrg   /* A statement might depend on itself, which is treated as variant.  So set
1142*8feb0f0bSmrg      state of statement under check to be variant to ensure that.  */
1143*8feb0f0bSmrg   invar = false;
1144*8feb0f0bSmrg 
1145*8feb0f0bSmrg   if (gimple_code (stmt) == GIMPLE_PHI)
1146*8feb0f0bSmrg     {
1147*8feb0f0bSmrg       gphi *phi = as_a <gphi *> (stmt);
1148*8feb0f0bSmrg 
1149*8feb0f0bSmrg       if (gimple_bb (stmt) == loop->header)
1150*8feb0f0bSmrg 	{
1151*8feb0f0bSmrg 	  /* If the entry value is subject to abnormal coalescing
1152*8feb0f0bSmrg 	     avoid the transform since we're going to duplicate the
1153*8feb0f0bSmrg 	     loop header and thus likely introduce overlapping life-ranges
1154*8feb0f0bSmrg 	     between the PHI def and the entry on the path when the
1155*8feb0f0bSmrg 	     first loop is skipped.  */
1156*8feb0f0bSmrg 	  tree entry_def
1157*8feb0f0bSmrg 	    = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1158*8feb0f0bSmrg 	  if (TREE_CODE (entry_def) == SSA_NAME
1159*8feb0f0bSmrg 	      && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1160*8feb0f0bSmrg 	    return false;
1161*8feb0f0bSmrg 	  invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1162*8feb0f0bSmrg 	  return invar;
1163*8feb0f0bSmrg 	}
1164*8feb0f0bSmrg 
1165*8feb0f0bSmrg       /* For a loop PHI node that does not locate in loop header, it is semi-
1166*8feb0f0bSmrg 	 invariant only if two conditions are met.  The first is its source
1167*8feb0f0bSmrg 	 values are derived from CONSTANT (including loop-invariant value), or
1168*8feb0f0bSmrg 	 from SSA name defined by semi-invariant loop iteration PHI node.  The
1169*8feb0f0bSmrg 	 second is its source incoming edges are control-dependent on semi-
1170*8feb0f0bSmrg 	 invariant conditional predicates.  */
1171*8feb0f0bSmrg       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1172*8feb0f0bSmrg 	{
1173*8feb0f0bSmrg 	  const_edge e = gimple_phi_arg_edge (phi, i);
1174*8feb0f0bSmrg 	  tree arg = gimple_phi_arg_def (phi, i);
1175*8feb0f0bSmrg 
1176*8feb0f0bSmrg 	  if (TREE_CODE (arg) == SSA_NAME)
1177*8feb0f0bSmrg 	    {
1178*8feb0f0bSmrg 	      if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1179*8feb0f0bSmrg 		return false;
1180*8feb0f0bSmrg 
1181*8feb0f0bSmrg 	      /* If source value is defined in location from where the source
1182*8feb0f0bSmrg 		 edge comes in, no need to check control dependency again
1183*8feb0f0bSmrg 		 since this has been done in above SSA name check stage.  */
1184*8feb0f0bSmrg 	      if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1185*8feb0f0bSmrg 		continue;
1186*8feb0f0bSmrg 	    }
1187*8feb0f0bSmrg 
1188*8feb0f0bSmrg 	  if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1189*8feb0f0bSmrg 					     stmt_stat))
1190*8feb0f0bSmrg 	    return false;
1191*8feb0f0bSmrg 	}
1192*8feb0f0bSmrg     }
1193*8feb0f0bSmrg   else
1194*8feb0f0bSmrg     {
1195*8feb0f0bSmrg       ssa_op_iter iter;
1196*8feb0f0bSmrg       tree use;
1197*8feb0f0bSmrg 
1198*8feb0f0bSmrg       /* Volatile memory load or return of normal (non-const/non-pure) call
1199*8feb0f0bSmrg 	 should not be treated as constant in each iteration of loop.  */
1200*8feb0f0bSmrg       if (gimple_has_side_effects (stmt))
1201*8feb0f0bSmrg 	return false;
1202*8feb0f0bSmrg 
1203*8feb0f0bSmrg       /* Check if any memory store may kill memory load at this place.  */
1204*8feb0f0bSmrg       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1205*8feb0f0bSmrg 	return false;
1206*8feb0f0bSmrg 
1207*8feb0f0bSmrg       /* Although operand of a statement might be SSA name, CONSTANT or
1208*8feb0f0bSmrg 	 VARDECL, here we only need to check SSA name operands.  This is
1209*8feb0f0bSmrg 	 because check on VARDECL operands, which involve memory loads,
1210*8feb0f0bSmrg 	 must have been done prior to invocation of this function in
1211*8feb0f0bSmrg 	 vuse_semi_invariant_p.  */
1212*8feb0f0bSmrg       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1213*8feb0f0bSmrg 	if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1214*8feb0f0bSmrg 	  return false;
1215*8feb0f0bSmrg     }
1216*8feb0f0bSmrg 
1217*8feb0f0bSmrg   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1218*8feb0f0bSmrg 				     stmt_stat))
1219*8feb0f0bSmrg     return false;
1220*8feb0f0bSmrg 
1221*8feb0f0bSmrg   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1222*8feb0f0bSmrg      to new insertion, and thus invar may point to invalid memory.  */
1223*8feb0f0bSmrg   stmt_stat.put (stmt, true);
1224*8feb0f0bSmrg   return true;
1225*8feb0f0bSmrg }
1226*8feb0f0bSmrg 
1227*8feb0f0bSmrg /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1228*8feb0f0bSmrg    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1229*8feb0f0bSmrg 
1230*8feb0f0bSmrg static bool
stmt_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)1231*8feb0f0bSmrg stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1232*8feb0f0bSmrg 		       const_basic_block skip_head)
1233*8feb0f0bSmrg {
1234*8feb0f0bSmrg   hash_map<gimple *, bool> stmt_stat;
1235*8feb0f0bSmrg   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1236*8feb0f0bSmrg }
1237*8feb0f0bSmrg 
1238*8feb0f0bSmrg /* Determine when conditional statement never transfers execution to one of its
1239*8feb0f0bSmrg    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1240*8feb0f0bSmrg    and those basic blocks dominated by BRANCH_BB.  */
1241*8feb0f0bSmrg 
1242*8feb0f0bSmrg static bool
branch_removable_p(basic_block branch_bb)1243*8feb0f0bSmrg branch_removable_p (basic_block branch_bb)
1244*8feb0f0bSmrg {
1245*8feb0f0bSmrg   edge_iterator ei;
1246*8feb0f0bSmrg   edge e;
1247*8feb0f0bSmrg 
1248*8feb0f0bSmrg   if (single_pred_p (branch_bb))
1249*8feb0f0bSmrg     return true;
1250*8feb0f0bSmrg 
1251*8feb0f0bSmrg   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1252*8feb0f0bSmrg     {
1253*8feb0f0bSmrg       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1254*8feb0f0bSmrg 	continue;
1255*8feb0f0bSmrg 
1256*8feb0f0bSmrg       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1257*8feb0f0bSmrg 	continue;
1258*8feb0f0bSmrg 
1259*8feb0f0bSmrg        /* The branch can be reached from opposite branch, or from some
1260*8feb0f0bSmrg 	  statement not dominated by the conditional statement.  */
1261*8feb0f0bSmrg       return false;
1262*8feb0f0bSmrg     }
1263*8feb0f0bSmrg 
1264*8feb0f0bSmrg   return true;
1265*8feb0f0bSmrg }
1266*8feb0f0bSmrg 
1267*8feb0f0bSmrg /* Find out which branch of a conditional statement (COND) is invariant in the
1268*8feb0f0bSmrg    execution context of LOOP.  That is: once the branch is selected in certain
1269*8feb0f0bSmrg    iteration of the loop, any operand that contributes to computation of the
1270*8feb0f0bSmrg    conditional statement remains unchanged in all following iterations.  */
1271*8feb0f0bSmrg 
1272*8feb0f0bSmrg static edge
get_cond_invariant_branch(struct loop * loop,gcond * cond)1273*8feb0f0bSmrg get_cond_invariant_branch (struct loop *loop, gcond *cond)
1274*8feb0f0bSmrg {
1275*8feb0f0bSmrg   basic_block cond_bb = gimple_bb (cond);
1276*8feb0f0bSmrg   basic_block targ_bb[2];
1277*8feb0f0bSmrg   bool invar[2];
1278*8feb0f0bSmrg   unsigned invar_checks = 0;
1279*8feb0f0bSmrg 
1280*8feb0f0bSmrg   for (unsigned i = 0; i < 2; i++)
1281*8feb0f0bSmrg     {
1282*8feb0f0bSmrg       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1283*8feb0f0bSmrg 
1284*8feb0f0bSmrg       /* One branch directs to loop exit, no need to perform loop split upon
1285*8feb0f0bSmrg 	 this conditional statement.  Firstly, it is trivial if the exit branch
1286*8feb0f0bSmrg 	 is semi-invariant, for the statement is just to break loop.  Secondly,
1287*8feb0f0bSmrg 	 if the opposite branch is semi-invariant, it means that the statement
1288*8feb0f0bSmrg 	 is real loop-invariant, which is covered by loop unswitch.  */
1289*8feb0f0bSmrg       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1290*8feb0f0bSmrg 	return NULL;
1291*8feb0f0bSmrg     }
1292*8feb0f0bSmrg 
1293*8feb0f0bSmrg   for (unsigned i = 0; i < 2; i++)
1294*8feb0f0bSmrg     {
1295*8feb0f0bSmrg       invar[!i] = false;
1296*8feb0f0bSmrg 
1297*8feb0f0bSmrg       if (!branch_removable_p (targ_bb[i]))
1298*8feb0f0bSmrg 	continue;
1299*8feb0f0bSmrg 
1300*8feb0f0bSmrg       /* Given a semi-invariant branch, if its opposite branch dominates
1301*8feb0f0bSmrg 	 loop latch, it and its following trace will only be executed in
1302*8feb0f0bSmrg 	 final iteration of loop, namely it is not part of repeated body
1303*8feb0f0bSmrg 	 of the loop.  Similar to the above case that the branch is loop
1304*8feb0f0bSmrg 	 exit, no need to split loop.  */
1305*8feb0f0bSmrg       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1306*8feb0f0bSmrg 	continue;
1307*8feb0f0bSmrg 
1308*8feb0f0bSmrg       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1309*8feb0f0bSmrg       invar_checks++;
1310*8feb0f0bSmrg     }
1311*8feb0f0bSmrg 
1312*8feb0f0bSmrg   /* With both branches being invariant (handled by loop unswitch) or
1313*8feb0f0bSmrg      variant is not what we want.  */
1314*8feb0f0bSmrg   if (invar[0] ^ !invar[1])
1315*8feb0f0bSmrg     return NULL;
1316*8feb0f0bSmrg 
1317*8feb0f0bSmrg   /* Found a real loop-invariant condition, do nothing.  */
1318*8feb0f0bSmrg   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1319*8feb0f0bSmrg     return NULL;
1320*8feb0f0bSmrg 
1321*8feb0f0bSmrg   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1322*8feb0f0bSmrg }
1323*8feb0f0bSmrg 
1324*8feb0f0bSmrg /* Calculate increased code size measured by estimated insn number if applying
1325*8feb0f0bSmrg    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1326*8feb0f0bSmrg 
1327*8feb0f0bSmrg static int
compute_added_num_insns(struct loop * loop,const_edge branch_edge)1328*8feb0f0bSmrg compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1329*8feb0f0bSmrg {
1330*8feb0f0bSmrg   basic_block cond_bb = branch_edge->src;
1331*8feb0f0bSmrg   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1332*8feb0f0bSmrg   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1333*8feb0f0bSmrg   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1334*8feb0f0bSmrg   int num = 0;
1335*8feb0f0bSmrg 
1336*8feb0f0bSmrg   for (unsigned i = 0; i < loop->num_nodes; i++)
1337*8feb0f0bSmrg     {
1338*8feb0f0bSmrg       /* Do no count basic blocks only in opposite branch.  */
1339*8feb0f0bSmrg       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1340*8feb0f0bSmrg 	continue;
1341*8feb0f0bSmrg 
1342*8feb0f0bSmrg       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1343*8feb0f0bSmrg     }
1344*8feb0f0bSmrg 
1345*8feb0f0bSmrg   /* It is unnecessary to evaluate expression of the conditional statement
1346*8feb0f0bSmrg      in new loop that contains only invariant branch.  This expression should
1347*8feb0f0bSmrg      be constant value (either true or false).  Exclude code size of insns
1348*8feb0f0bSmrg      that contribute to computation of the expression.  */
1349*8feb0f0bSmrg 
1350*8feb0f0bSmrg   auto_vec<gimple *> worklist;
1351*8feb0f0bSmrg   hash_set<gimple *> removed;
1352*8feb0f0bSmrg   gimple *stmt = last_stmt (cond_bb);
1353*8feb0f0bSmrg 
1354*8feb0f0bSmrg   worklist.safe_push (stmt);
1355*8feb0f0bSmrg   removed.add (stmt);
1356*8feb0f0bSmrg   num -= estimate_num_insns (stmt, &eni_size_weights);
1357*8feb0f0bSmrg 
1358*8feb0f0bSmrg   do
1359*8feb0f0bSmrg     {
1360*8feb0f0bSmrg       ssa_op_iter opnd_iter;
1361*8feb0f0bSmrg       use_operand_p opnd_p;
1362*8feb0f0bSmrg 
1363*8feb0f0bSmrg       stmt = worklist.pop ();
1364*8feb0f0bSmrg       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1365*8feb0f0bSmrg 	{
1366*8feb0f0bSmrg 	  tree opnd = USE_FROM_PTR (opnd_p);
1367*8feb0f0bSmrg 
1368*8feb0f0bSmrg 	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1369*8feb0f0bSmrg 	    continue;
1370*8feb0f0bSmrg 
1371*8feb0f0bSmrg 	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1372*8feb0f0bSmrg 	  use_operand_p use_p;
1373*8feb0f0bSmrg 	  imm_use_iterator use_iter;
1374*8feb0f0bSmrg 
1375*8feb0f0bSmrg 	  if (removed.contains (opnd_stmt)
1376*8feb0f0bSmrg 	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1377*8feb0f0bSmrg 	    continue;
1378*8feb0f0bSmrg 
1379*8feb0f0bSmrg 	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1380*8feb0f0bSmrg 	    {
1381*8feb0f0bSmrg 	      gimple *use_stmt = USE_STMT (use_p);
1382*8feb0f0bSmrg 
1383*8feb0f0bSmrg 	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1384*8feb0f0bSmrg 		{
1385*8feb0f0bSmrg 		  opnd_stmt = NULL;
1386*8feb0f0bSmrg 		  break;
1387*8feb0f0bSmrg 		}
1388*8feb0f0bSmrg 	    }
1389*8feb0f0bSmrg 
1390*8feb0f0bSmrg 	  if (opnd_stmt)
1391*8feb0f0bSmrg 	    {
1392*8feb0f0bSmrg 	      worklist.safe_push (opnd_stmt);
1393*8feb0f0bSmrg 	      removed.add (opnd_stmt);
1394*8feb0f0bSmrg 	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1395*8feb0f0bSmrg 	    }
1396*8feb0f0bSmrg 	}
1397*8feb0f0bSmrg     } while (!worklist.is_empty ());
1398*8feb0f0bSmrg 
1399*8feb0f0bSmrg   gcc_assert (num >= 0);
1400*8feb0f0bSmrg   return num;
1401*8feb0f0bSmrg }
1402*8feb0f0bSmrg 
1403*8feb0f0bSmrg /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1404*8feb0f0bSmrg    and check whether it is eligible and profitable to perform loop split upon
1405*8feb0f0bSmrg    this branch in LOOP.  */
1406*8feb0f0bSmrg 
1407*8feb0f0bSmrg static edge
get_cond_branch_to_split_loop(struct loop * loop,gcond * cond)1408*8feb0f0bSmrg get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1409*8feb0f0bSmrg {
1410*8feb0f0bSmrg   edge invar_branch = get_cond_invariant_branch (loop, cond);
1411*8feb0f0bSmrg   if (!invar_branch)
1412*8feb0f0bSmrg     return NULL;
1413*8feb0f0bSmrg 
1414*8feb0f0bSmrg   /* When accurate profile information is available, and execution
1415*8feb0f0bSmrg      frequency of the branch is too low, just let it go.  */
1416*8feb0f0bSmrg   profile_probability prob = invar_branch->probability;
1417*8feb0f0bSmrg   if (prob.reliable_p ())
1418*8feb0f0bSmrg     {
1419*8feb0f0bSmrg       int thres = param_min_loop_cond_split_prob;
1420*8feb0f0bSmrg 
1421*8feb0f0bSmrg       if (prob < profile_probability::always ().apply_scale (thres, 100))
1422*8feb0f0bSmrg 	return NULL;
1423*8feb0f0bSmrg     }
1424*8feb0f0bSmrg 
1425*8feb0f0bSmrg   /* Add a threshold for increased code size to disable loop split.  */
1426*8feb0f0bSmrg   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1427*8feb0f0bSmrg     return NULL;
1428*8feb0f0bSmrg 
1429*8feb0f0bSmrg   return invar_branch;
1430*8feb0f0bSmrg }
1431*8feb0f0bSmrg 
1432*8feb0f0bSmrg /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1433*8feb0f0bSmrg    conditional statement, perform loop split transformation illustrated
1434*8feb0f0bSmrg    as the following graph.
1435*8feb0f0bSmrg 
1436*8feb0f0bSmrg                .-------T------ if (true) ------F------.
1437*8feb0f0bSmrg                |                    .---------------. |
1438*8feb0f0bSmrg                |                    |               | |
1439*8feb0f0bSmrg                v                    |               v v
1440*8feb0f0bSmrg           pre-header                |            pre-header
1441*8feb0f0bSmrg                | .------------.     |                 | .------------.
1442*8feb0f0bSmrg                | |            |     |                 | |            |
1443*8feb0f0bSmrg                | v            |     |                 | v            |
1444*8feb0f0bSmrg              header           |     |               header           |
1445*8feb0f0bSmrg                |              |     |                 |              |
1446*8feb0f0bSmrg       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1447*8feb0f0bSmrg       |                 |     |     |        |                 |     |
1448*8feb0f0bSmrg   invariant             |     |     |    invariant             |     |
1449*8feb0f0bSmrg       |                 |     |     |        |                 |     |
1450*8feb0f0bSmrg       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1451*8feb0f0bSmrg                |              |    /                  |              |
1452*8feb0f0bSmrg              stmts            |   /                 stmts            |
1453*8feb0f0bSmrg                |              F  T                    |              |
1454*8feb0f0bSmrg               / \             | /                    / \             |
1455*8feb0f0bSmrg      .-------*   *      [ if (cond) ]       .-------*   *            |
1456*8feb0f0bSmrg      |           |            |             |           |            |
1457*8feb0f0bSmrg      |         latch          |             |         latch          |
1458*8feb0f0bSmrg      |           |            |             |           |            |
1459*8feb0f0bSmrg      |           '------------'             |           '------------'
1460*8feb0f0bSmrg      '------------------------. .-----------'
1461*8feb0f0bSmrg              loop1            | |                   loop2
1462*8feb0f0bSmrg                               v v
1463*8feb0f0bSmrg                              exits
1464*8feb0f0bSmrg 
1465*8feb0f0bSmrg    In the graph, loop1 represents the part derived from original one, and
1466*8feb0f0bSmrg    loop2 is duplicated using loop_version (), which corresponds to the part
1467*8feb0f0bSmrg    of original one being splitted out.  In original latch edge of loop1, we
1468*8feb0f0bSmrg    insert a new conditional statement duplicated from the semi-invariant cond,
1469*8feb0f0bSmrg    and one of its branch goes back to loop1 header as a latch edge, and the
1470*8feb0f0bSmrg    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1471*8feb0f0bSmrg    we abandon the variant branch of the conditional statement by setting a
1472*8feb0f0bSmrg    constant bool condition, based on which branch is semi-invariant.  */
1473*8feb0f0bSmrg 
1474*8feb0f0bSmrg static bool
do_split_loop_on_cond(struct loop * loop1,edge invar_branch)1475*8feb0f0bSmrg do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1476*8feb0f0bSmrg {
1477*8feb0f0bSmrg   basic_block cond_bb = invar_branch->src;
1478*8feb0f0bSmrg   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1479*8feb0f0bSmrg   gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
1480*8feb0f0bSmrg 
1481*8feb0f0bSmrg   gcc_assert (cond_bb->loop_father == loop1);
1482*8feb0f0bSmrg 
1483*8feb0f0bSmrg   if (dump_enabled_p ())
1484*8feb0f0bSmrg     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1485*8feb0f0bSmrg 		     "loop split on semi-invariant condition at %s branch\n",
1486*8feb0f0bSmrg 		     true_invar ? "true" : "false");
1487*8feb0f0bSmrg 
1488*8feb0f0bSmrg   initialize_original_copy_tables ();
1489*8feb0f0bSmrg 
1490*8feb0f0bSmrg   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1491*8feb0f0bSmrg 				     profile_probability::always (),
1492*8feb0f0bSmrg 				     profile_probability::never (),
1493*8feb0f0bSmrg 				     profile_probability::always (),
1494*8feb0f0bSmrg 				     profile_probability::always (),
1495*8feb0f0bSmrg 				     true);
1496*8feb0f0bSmrg   if (!loop2)
1497*8feb0f0bSmrg     {
1498*8feb0f0bSmrg       free_original_copy_tables ();
1499*8feb0f0bSmrg       return false;
1500*8feb0f0bSmrg     }
1501*8feb0f0bSmrg 
1502*8feb0f0bSmrg   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1503*8feb0f0bSmrg   gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
1504*8feb0f0bSmrg 
1505*8feb0f0bSmrg   /* Replace the condition in loop2 with a bool constant to let PassManager
1506*8feb0f0bSmrg      remove the variant branch after current pass completes.  */
1507*8feb0f0bSmrg   if (true_invar)
1508*8feb0f0bSmrg     gimple_cond_make_true (cond_copy);
1509*8feb0f0bSmrg   else
1510*8feb0f0bSmrg     gimple_cond_make_false (cond_copy);
1511*8feb0f0bSmrg 
1512*8feb0f0bSmrg   update_stmt (cond_copy);
1513*8feb0f0bSmrg 
1514*8feb0f0bSmrg   /* Insert a new conditional statement on latch edge of loop1, its condition
1515*8feb0f0bSmrg      is duplicated from the semi-invariant.  This statement acts as a switch
1516*8feb0f0bSmrg      to transfer execution from loop1 to loop2, when loop1 enters into
1517*8feb0f0bSmrg      invariant state.  */
1518*8feb0f0bSmrg   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1519*8feb0f0bSmrg   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1520*8feb0f0bSmrg   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1521*8feb0f0bSmrg 					  gimple_cond_lhs (cond),
1522*8feb0f0bSmrg 					  gimple_cond_rhs (cond),
1523*8feb0f0bSmrg 					  NULL_TREE, NULL_TREE);
1524*8feb0f0bSmrg 
1525*8feb0f0bSmrg   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1526*8feb0f0bSmrg   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1527*8feb0f0bSmrg 
1528*8feb0f0bSmrg   edge to_loop1 = single_succ_edge (break_bb);
1529*8feb0f0bSmrg   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1530*8feb0f0bSmrg 
1531*8feb0f0bSmrg   to_loop1->flags &= ~EDGE_FALLTHRU;
1532*8feb0f0bSmrg   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1533*8feb0f0bSmrg   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1534*8feb0f0bSmrg 
1535*8feb0f0bSmrg   update_ssa (TODO_update_ssa);
1536*8feb0f0bSmrg 
1537*8feb0f0bSmrg   /* Due to introduction of a control flow edge from loop1 latch to loop2
1538*8feb0f0bSmrg      pre-header, we should update PHIs in loop2 to reflect this connection
1539*8feb0f0bSmrg      between loop1 and loop2.  */
1540*8feb0f0bSmrg   connect_loop_phis (loop1, loop2, to_loop2);
1541*8feb0f0bSmrg 
1542*8feb0f0bSmrg   free_original_copy_tables ();
1543*8feb0f0bSmrg 
1544*8feb0f0bSmrg   rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
1545*8feb0f0bSmrg 
1546*8feb0f0bSmrg   return true;
1547*8feb0f0bSmrg }
1548*8feb0f0bSmrg 
1549*8feb0f0bSmrg /* Traverse all conditional statements in LOOP, to find out a good candidate
1550*8feb0f0bSmrg    upon which we can do loop split.  */
1551*8feb0f0bSmrg 
1552*8feb0f0bSmrg static bool
split_loop_on_cond(struct loop * loop)1553*8feb0f0bSmrg split_loop_on_cond (struct loop *loop)
1554*8feb0f0bSmrg {
1555*8feb0f0bSmrg   split_info *info = new split_info ();
1556*8feb0f0bSmrg   basic_block *bbs = info->bbs = get_loop_body (loop);
1557*8feb0f0bSmrg   bool do_split = false;
1558*8feb0f0bSmrg 
1559*8feb0f0bSmrg   /* Allocate an area to keep temporary info, and associate its address
1560*8feb0f0bSmrg      with loop aux field.  */
1561*8feb0f0bSmrg   loop->aux = info;
1562*8feb0f0bSmrg 
1563*8feb0f0bSmrg   for (unsigned i = 0; i < loop->num_nodes; i++)
1564*8feb0f0bSmrg     bbs[i]->aux = NULL;
1565*8feb0f0bSmrg 
1566*8feb0f0bSmrg   for (unsigned i = 0; i < loop->num_nodes; i++)
1567*8feb0f0bSmrg     {
1568*8feb0f0bSmrg       basic_block bb = bbs[i];
1569*8feb0f0bSmrg 
1570*8feb0f0bSmrg       /* We only consider conditional statement, which be executed at most once
1571*8feb0f0bSmrg 	 in each iteration of the loop.  So skip statements in inner loops.  */
1572*8feb0f0bSmrg       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1573*8feb0f0bSmrg 	continue;
1574*8feb0f0bSmrg 
1575*8feb0f0bSmrg       /* Actually this check is not a must constraint.  With it, we can ensure
1576*8feb0f0bSmrg 	 conditional statement will always be executed in each iteration.  */
1577*8feb0f0bSmrg       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1578*8feb0f0bSmrg 	continue;
1579*8feb0f0bSmrg 
1580*8feb0f0bSmrg       gimple *last = last_stmt (bb);
1581*8feb0f0bSmrg 
1582*8feb0f0bSmrg       if (!last || gimple_code (last) != GIMPLE_COND)
1583*8feb0f0bSmrg 	continue;
1584*8feb0f0bSmrg 
1585*8feb0f0bSmrg       gcond *cond = as_a <gcond *> (last);
1586*8feb0f0bSmrg       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1587*8feb0f0bSmrg 
1588*8feb0f0bSmrg       if (branch_edge)
1589*8feb0f0bSmrg 	{
1590*8feb0f0bSmrg 	  do_split_loop_on_cond (loop, branch_edge);
1591*8feb0f0bSmrg 	  do_split = true;
1592*8feb0f0bSmrg 	  break;
1593*8feb0f0bSmrg 	}
1594*8feb0f0bSmrg     }
1595*8feb0f0bSmrg 
1596*8feb0f0bSmrg   delete info;
1597*8feb0f0bSmrg   loop->aux = NULL;
1598*8feb0f0bSmrg 
1599*8feb0f0bSmrg   return do_split;
1600*8feb0f0bSmrg }
1601*8feb0f0bSmrg 
16023ad841b2Smrg /* Main entry point.  Perform loop splitting on all suitable loops.  */
16033ad841b2Smrg 
16043ad841b2Smrg static unsigned int
tree_ssa_split_loops(void)16053ad841b2Smrg tree_ssa_split_loops (void)
16063ad841b2Smrg {
1607*8feb0f0bSmrg   class loop *loop;
16083ad841b2Smrg   bool changed = false;
16093ad841b2Smrg 
16103ad841b2Smrg   gcc_assert (scev_initialized_p ());
1611*8feb0f0bSmrg 
1612*8feb0f0bSmrg   calculate_dominance_info (CDI_POST_DOMINATORS);
1613*8feb0f0bSmrg 
16143ad841b2Smrg   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
16153ad841b2Smrg     loop->aux = NULL;
16163ad841b2Smrg 
16173ad841b2Smrg   /* Go through all loops starting from innermost.  */
16183ad841b2Smrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
16193ad841b2Smrg     {
16203ad841b2Smrg       if (loop->aux)
16213ad841b2Smrg 	{
16223ad841b2Smrg 	  /* If any of our inner loops was split, don't split us,
16233ad841b2Smrg 	     and mark our containing loop as having had splits as well.  */
16243ad841b2Smrg 	  loop_outer (loop)->aux = loop;
16253ad841b2Smrg 	  continue;
16263ad841b2Smrg 	}
16273ad841b2Smrg 
1628*8feb0f0bSmrg       if (optimize_loop_for_size_p (loop))
1629*8feb0f0bSmrg 	continue;
1630*8feb0f0bSmrg 
1631*8feb0f0bSmrg       if (split_loop (loop) || split_loop_on_cond (loop))
16323ad841b2Smrg 	{
1633*8feb0f0bSmrg 	  /* Mark our containing loop as having had some split inner loops.  */
16343ad841b2Smrg 	  loop_outer (loop)->aux = loop;
16353ad841b2Smrg 	  changed = true;
16363ad841b2Smrg 	}
16373ad841b2Smrg     }
16383ad841b2Smrg 
16393ad841b2Smrg   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
16403ad841b2Smrg     loop->aux = NULL;
16413ad841b2Smrg 
1642*8feb0f0bSmrg   clear_aux_for_blocks ();
1643*8feb0f0bSmrg 
1644*8feb0f0bSmrg   free_dominance_info (CDI_POST_DOMINATORS);
1645*8feb0f0bSmrg 
16463ad841b2Smrg   if (changed)
16473ad841b2Smrg     return TODO_cleanup_cfg;
16483ad841b2Smrg   return 0;
16493ad841b2Smrg }
16503ad841b2Smrg 
16513ad841b2Smrg /* Loop splitting pass.  */
16523ad841b2Smrg 
16533ad841b2Smrg namespace {
16543ad841b2Smrg 
16553ad841b2Smrg const pass_data pass_data_loop_split =
16563ad841b2Smrg {
16573ad841b2Smrg   GIMPLE_PASS, /* type */
16583ad841b2Smrg   "lsplit", /* name */
16593ad841b2Smrg   OPTGROUP_LOOP, /* optinfo_flags */
16603ad841b2Smrg   TV_LOOP_SPLIT, /* tv_id */
16613ad841b2Smrg   PROP_cfg, /* properties_required */
16623ad841b2Smrg   0, /* properties_provided */
16633ad841b2Smrg   0, /* properties_destroyed */
16643ad841b2Smrg   0, /* todo_flags_start */
16653ad841b2Smrg   0, /* todo_flags_finish */
16663ad841b2Smrg };
16673ad841b2Smrg 
16683ad841b2Smrg class pass_loop_split : public gimple_opt_pass
16693ad841b2Smrg {
16703ad841b2Smrg public:
pass_loop_split(gcc::context * ctxt)16713ad841b2Smrg   pass_loop_split (gcc::context *ctxt)
16723ad841b2Smrg     : gimple_opt_pass (pass_data_loop_split, ctxt)
16733ad841b2Smrg   {}
16743ad841b2Smrg 
16753ad841b2Smrg   /* opt_pass methods: */
gate(function *)16763ad841b2Smrg   virtual bool gate (function *) { return flag_split_loops != 0; }
16773ad841b2Smrg   virtual unsigned int execute (function *);
16783ad841b2Smrg 
16793ad841b2Smrg }; // class pass_loop_split
16803ad841b2Smrg 
16813ad841b2Smrg unsigned int
execute(function * fun)16823ad841b2Smrg pass_loop_split::execute (function *fun)
16833ad841b2Smrg {
16843ad841b2Smrg   if (number_of_loops (fun) <= 1)
16853ad841b2Smrg     return 0;
16863ad841b2Smrg 
16873ad841b2Smrg   return tree_ssa_split_loops ();
16883ad841b2Smrg }
16893ad841b2Smrg 
16903ad841b2Smrg } // anon namespace
16913ad841b2Smrg 
16923ad841b2Smrg gimple_opt_pass *
make_pass_loop_split(gcc::context * ctxt)16933ad841b2Smrg make_pass_loop_split (gcc::context *ctxt)
16943ad841b2Smrg {
16953ad841b2Smrg   return new pass_loop_split (ctxt);
16963ad841b2Smrg }
1697