xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-loop-split.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Loop splitting.
2    Copyright (C) 2015-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "tree-pass.h"
27 #include "ssa.h"
28 #include "fold-const.h"
29 #include "tree-cfg.h"
30 #include "tree-ssa.h"
31 #include "tree-ssa-loop-niter.h"
32 #include "tree-ssa-loop.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-into-ssa.h"
35 #include "tree-inline.h"
36 #include "tree-cfgcleanup.h"
37 #include "cfgloop.h"
38 #include "tree-scalar-evolution.h"
39 #include "gimple-iterator.h"
40 #include "gimple-pretty-print.h"
41 #include "cfghooks.h"
42 #include "gimple-fold.h"
43 #include "gimplify-me.h"
44 
45 /* This file implements two kinds of loop splitting.
46 
47    One transformation of loops like:
48 
49    for (i = 0; i < 100; i++)
50      {
51        if (i < 50)
52          A;
53        else
54          B;
55      }
56 
57    into:
58 
59    for (i = 0; i < 50; i++)
60      {
61        A;
62      }
63    for (; i < 100; i++)
64      {
65        B;
66      }
67 
68    */
69 
70 /* Return true when BB inside LOOP is a potential iteration space
71    split point, i.e. ends with a condition like "IV < comp", which
72    is true on one side of the iteration space and false on the other,
73    and the split point can be computed.  If so, also return the border
74    point in *BORDER and the comparison induction variable in IV.  */
75 
76 static tree
split_at_bb_p(class loop * loop,basic_block bb,tree * border,affine_iv * iv)77 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
78 {
79   gimple *last;
80   gcond *stmt;
81   affine_iv iv2;
82 
83   /* BB must end in a simple conditional jump.  */
84   last = last_stmt (bb);
85   if (!last || gimple_code (last) != GIMPLE_COND)
86     return NULL_TREE;
87   stmt = as_a <gcond *> (last);
88 
89   enum tree_code code = gimple_cond_code (stmt);
90 
91   /* Only handle relational comparisons, for equality and non-equality
92      we'd have to split the loop into two loops and a middle statement.  */
93   switch (code)
94     {
95       case LT_EXPR:
96       case LE_EXPR:
97       case GT_EXPR:
98       case GE_EXPR:
99 	break;
100       default:
101 	return NULL_TREE;
102     }
103 
104   if (loop_exits_from_bb_p (loop, bb))
105     return NULL_TREE;
106 
107   tree op0 = gimple_cond_lhs (stmt);
108   tree op1 = gimple_cond_rhs (stmt);
109   class loop *useloop = loop_containing_stmt (stmt);
110 
111   if (!simple_iv (loop, useloop, op0, iv, false))
112     return NULL_TREE;
113   if (!simple_iv (loop, useloop, op1, &iv2, false))
114     return NULL_TREE;
115 
116   /* Make it so that the first argument of the condition is
117      the looping one.  */
118   if (!integer_zerop (iv2.step))
119     {
120       std::swap (op0, op1);
121       std::swap (*iv, iv2);
122       code = swap_tree_comparison (code);
123       gimple_cond_set_condition (stmt, code, op0, op1);
124       update_stmt (stmt);
125     }
126   else if (integer_zerop (iv->step))
127     return NULL_TREE;
128   if (!integer_zerop (iv2.step))
129     return NULL_TREE;
130   if (!iv->no_overflow)
131     return NULL_TREE;
132 
133   if (dump_file && (dump_flags & TDF_DETAILS))
134     {
135       fprintf (dump_file, "Found potential split point: ");
136       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
137       fprintf (dump_file, " { ");
138       print_generic_expr (dump_file, iv->base, TDF_SLIM);
139       fprintf (dump_file, " + I*");
140       print_generic_expr (dump_file, iv->step, TDF_SLIM);
141       fprintf (dump_file, " } %s ", get_tree_code_name (code));
142       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
143       fprintf (dump_file, "\n");
144     }
145 
146   *border = iv2.base;
147   return op0;
148 }
149 
150 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
151    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
152    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
153    exit test statement to loop back only if the GUARD statement will
154    also be true/false in the next iteration.  */
155 
156 static void
patch_loop_exit(class loop * loop,gcond * guard,tree nextval,tree newbound,bool initial_true)157 patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
158 		 bool initial_true)
159 {
160   edge exit = single_exit (loop);
161   gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
162   gimple_cond_set_condition (stmt, gimple_cond_code (guard),
163 			     nextval, newbound);
164   update_stmt (stmt);
165 
166   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
167 
168   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
169   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
170 
171   if (initial_true)
172     {
173       exit->flags |= EDGE_FALSE_VALUE;
174       stay->flags |= EDGE_TRUE_VALUE;
175     }
176   else
177     {
178       exit->flags |= EDGE_TRUE_VALUE;
179       stay->flags |= EDGE_FALSE_VALUE;
180     }
181 }
182 
183 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
184    find the loop phi node in LOOP defining it directly, or create
185    such phi node.  Return that phi node.  */
186 
187 static gphi *
find_or_create_guard_phi(class loop * loop,tree guard_iv,affine_iv *)188 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
189 {
190   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
191   gphi *phi;
192   if ((phi = dyn_cast <gphi *> (def))
193       && gimple_bb (phi) == loop->header)
194     return phi;
195 
196   /* XXX Create the PHI instead.  */
197   return NULL;
198 }
199 
200 /* Returns true if the exit values of all loop phi nodes can be
201    determined easily (i.e. that connect_loop_phis can determine them).  */
202 
203 static bool
easy_exit_values(class loop * loop)204 easy_exit_values (class loop *loop)
205 {
206   edge exit = single_exit (loop);
207   edge latch = loop_latch_edge (loop);
208   gphi_iterator psi;
209 
210   /* Currently we regard the exit values as easy if they are the same
211      as the value over the backedge.  Which is the case if the definition
212      of the backedge value dominates the exit edge.  */
213   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
214     {
215       gphi *phi = psi.phi ();
216       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
217       basic_block bb;
218       if (TREE_CODE (next) == SSA_NAME
219 	  && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
220 	  && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
221 	return false;
222     }
223 
224   return true;
225 }
226 
227 /* This function updates the SSA form after connect_loops made a new
228    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
229    conditional).  I.e. the second loop can now be entered either
230    via the original entry or via NEW_E, so the entry values of LOOP2
231    phi nodes are either the original ones or those at the exit
232    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
233    this.  The loops need to fulfill easy_exit_values().  */
234 
235 static void
connect_loop_phis(class loop * loop1,class loop * loop2,edge new_e)236 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
237 {
238   basic_block rest = loop_preheader_edge (loop2)->src;
239   gcc_assert (new_e->dest == rest);
240   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
241 
242   edge firste = loop_preheader_edge (loop1);
243   edge seconde = loop_preheader_edge (loop2);
244   edge firstn = loop_latch_edge (loop1);
245   gphi_iterator psi_first, psi_second;
246   for (psi_first = gsi_start_phis (loop1->header),
247        psi_second = gsi_start_phis (loop2->header);
248        !gsi_end_p (psi_first);
249        gsi_next (&psi_first), gsi_next (&psi_second))
250     {
251       tree init, next, new_init;
252       use_operand_p op;
253       gphi *phi_first = psi_first.phi ();
254       gphi *phi_second = psi_second.phi ();
255 
256       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
257       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
258       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
259       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
260 
261       /* Prefer using original variable as a base for the new ssa name.
262 	 This is necessary for virtual ops, and useful in order to avoid
263 	 losing debug info for real ops.  */
264       if (TREE_CODE (next) == SSA_NAME
265 	  && useless_type_conversion_p (TREE_TYPE (next),
266 					TREE_TYPE (init)))
267 	new_init = copy_ssa_name (next);
268       else if (TREE_CODE (init) == SSA_NAME
269 	       && useless_type_conversion_p (TREE_TYPE (init),
270 					     TREE_TYPE (next)))
271 	new_init = copy_ssa_name (init);
272       else if (useless_type_conversion_p (TREE_TYPE (next),
273 					  TREE_TYPE (init)))
274 	new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
275 				       "unrinittmp");
276       else
277 	new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
278 				       "unrinittmp");
279 
280       gphi * newphi = create_phi_node (new_init, rest);
281       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
282       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
283       SET_USE (op, new_init);
284     }
285 }
286 
287 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
288    they are still equivalent and placed in two arms of a diamond, like so:
289 
290                .------if (cond)------.
291                v                     v
292              pre1                   pre2
293               |                      |
294         .--->h1                     h2<----.
295         |     |                      |     |
296         |    ex1---.            .---ex2    |
297         |    /     |            |     \    |
298         '---l1     X            |     l2---'
299                    |            |
300                    |            |
301                    '--->join<---'
302 
303    This function transforms the program such that LOOP1 is conditionally
304    falling through to LOOP2, or skipping it.  This is done by splitting
305    the ex1->join edge at X in the diagram above, and inserting a condition
306    whose one arm goes to pre2, resulting in this situation:
307 
308                .------if (cond)------.
309                v                     v
310              pre1       .---------->pre2
311               |         |            |
312         .--->h1         |           h2<----.
313         |     |         |            |     |
314         |    ex1---.    |       .---ex2    |
315         |    /     v    |       |     \    |
316         '---l1   skip---'       |     l2---'
317                    |            |
318                    |            |
319                    '--->join<---'
320 
321 
322    The condition used is the exit condition of LOOP1, which effectively means
323    that when the first loop exits (for whatever reason) but the real original
324    exit expression is still false the second loop will be entered.
325    The function returns the new edge cond->pre2.
326 
327    This doesn't update the SSA form, see connect_loop_phis for that.  */
328 
329 static edge
connect_loops(class loop * loop1,class loop * loop2)330 connect_loops (class loop *loop1, class loop *loop2)
331 {
332   edge exit = single_exit (loop1);
333   basic_block skip_bb = split_edge (exit);
334   gcond *skip_stmt;
335   gimple_stmt_iterator gsi;
336   edge new_e, skip_e;
337 
338   gimple *stmt = last_stmt (exit->src);
339   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
340 				 gimple_cond_lhs (stmt),
341 				 gimple_cond_rhs (stmt),
342 				 NULL_TREE, NULL_TREE);
343   gsi = gsi_last_bb (skip_bb);
344   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
345 
346   skip_e = EDGE_SUCC (skip_bb, 0);
347   skip_e->flags &= ~EDGE_FALLTHRU;
348   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
349   if (exit->flags & EDGE_TRUE_VALUE)
350     {
351       skip_e->flags |= EDGE_TRUE_VALUE;
352       new_e->flags |= EDGE_FALSE_VALUE;
353     }
354   else
355     {
356       skip_e->flags |= EDGE_FALSE_VALUE;
357       new_e->flags |= EDGE_TRUE_VALUE;
358     }
359 
360   new_e->probability = profile_probability::likely ();
361   skip_e->probability = new_e->probability.invert ();
362 
363   return new_e;
364 }
365 
366 /* This returns the new bound for iterations given the original iteration
367    space in NITER, an arbitrary new bound BORDER, assumed to be some
368    comparison value with a different IV, the initial value GUARD_INIT of
369    that other IV, and the comparison code GUARD_CODE that compares
370    that other IV with BORDER.  We return an SSA name, and place any
371    necessary statements for that computation into *STMTS.
372 
373    For example for such a loop:
374 
375      for (i = beg, j = guard_init; i < end; i++, j++)
376        if (j < border)  // this is supposed to be true/false
377          ...
378 
379    we want to return a new bound (on j) that makes the loop iterate
380    as long as the condition j < border stays true.  We also don't want
381    to iterate more often than the original loop, so we have to introduce
382    some cut-off as well (via min/max), effectively resulting in:
383 
384      newend = min (end+guard_init-beg, border)
385      for (i = beg; j = guard_init; j < newend; i++, j++)
386        if (j < c)
387          ...
388 
389    Depending on the direction of the IVs and if the exit tests
390    are strict or non-strict we need to use MIN or MAX,
391    and add or subtract 1.  This routine computes newend above.  */
392 
393 static tree
compute_new_first_bound(gimple_seq * stmts,class tree_niter_desc * niter,tree border,enum tree_code guard_code,tree guard_init)394 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
395 			 tree border,
396 			 enum tree_code guard_code, tree guard_init)
397 {
398   /* The niter structure contains the after-increment IV, we need
399      the loop-enter base, so subtract STEP once.  */
400   tree controlbase = force_gimple_operand (niter->control.base,
401 					   stmts, true, NULL_TREE);
402   tree controlstep = niter->control.step;
403   tree enddiff;
404   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
405     {
406       controlstep = gimple_build (stmts, NEGATE_EXPR,
407 				  TREE_TYPE (controlstep), controlstep);
408       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
409 			      TREE_TYPE (controlbase),
410 			      controlbase, controlstep);
411     }
412   else
413     enddiff = gimple_build (stmts, MINUS_EXPR,
414 			    TREE_TYPE (controlbase),
415 			    controlbase, controlstep);
416 
417   /* Compute end-beg.  */
418   gimple_seq stmts2;
419   tree end = force_gimple_operand (niter->bound, &stmts2,
420 					true, NULL_TREE);
421   gimple_seq_add_seq_without_update (stmts, stmts2);
422   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
423     {
424       tree tem = gimple_convert (stmts, sizetype, enddiff);
425       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
426       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
427 			      TREE_TYPE (enddiff),
428 			      end, tem);
429     }
430   else
431     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
432 			    end, enddiff);
433 
434   /* Compute guard_init + (end-beg).  */
435   tree newbound;
436   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
437   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
438     {
439       enddiff = gimple_convert (stmts, sizetype, enddiff);
440       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
441 			       TREE_TYPE (guard_init),
442 			       guard_init, enddiff);
443     }
444   else
445     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
446 			     guard_init, enddiff);
447 
448   /* Depending on the direction of the IVs the new bound for the first
449      loop is the minimum or maximum of old bound and border.
450      Also, if the guard condition isn't strictly less or greater,
451      we need to adjust the bound.  */
452   int addbound = 0;
453   enum tree_code minmax;
454   if (niter->cmp == LT_EXPR)
455     {
456       /* GT and LE are the same, inverted.  */
457       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
458 	addbound = -1;
459       minmax = MIN_EXPR;
460     }
461   else
462     {
463       gcc_assert (niter->cmp == GT_EXPR);
464       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
465 	addbound = 1;
466       minmax = MAX_EXPR;
467     }
468 
469   if (addbound)
470     {
471       tree type2 = TREE_TYPE (newbound);
472       if (POINTER_TYPE_P (type2))
473 	type2 = sizetype;
474       newbound = gimple_build (stmts,
475 			       POINTER_TYPE_P (TREE_TYPE (newbound))
476 			       ? POINTER_PLUS_EXPR : PLUS_EXPR,
477 			       TREE_TYPE (newbound),
478 			       newbound,
479 			       build_int_cst (type2, addbound));
480     }
481 
482   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
483 			      border, newbound);
484   return newend;
485 }
486 
487 /* Fix the two loop's bb count after split based on the split edge probability,
488    don't adjust the bbs dominated by true branches of that loop to avoid
489    dropping 1s down.  */
490 static void
fix_loop_bb_probability(class loop * loop1,class loop * loop2,edge true_edge,edge false_edge)491 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
492 			 edge false_edge)
493 {
494   update_ssa (TODO_update_ssa);
495 
496   /* Proportion first loop's bb counts except those dominated by true
497      branch to avoid drop 1s down.  */
498   basic_block *bbs1, *bbs2;
499   bbs1 = get_loop_body (loop1);
500   unsigned j;
501   for (j = 0; j < loop1->num_nodes; j++)
502     if (bbs1[j] == loop1->latch
503 	|| !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
504       bbs1[j]->count
505 	= bbs1[j]->count.apply_probability (true_edge->probability);
506   free (bbs1);
507 
508   /* Proportion second loop's bb counts except those dominated by false
509      branch to avoid drop 1s down.  */
510   basic_block bbi_copy = get_bb_copy (false_edge->dest);
511   bbs2 = get_loop_body (loop2);
512   for (j = 0; j < loop2->num_nodes; j++)
513     if (bbs2[j] == loop2->latch
514 	|| !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
515       bbs2[j]->count
516 	= bbs2[j]->count.apply_probability (true_edge->probability.invert ());
517   free (bbs2);
518 }
519 
520 /* Checks if LOOP contains an conditional block whose condition
521    depends on which side in the iteration space it is, and if so
522    splits the iteration space into two loops.  Returns true if the
523    loop was split.  NITER must contain the iteration descriptor for the
524    single exit of LOOP.  */
525 
526 static bool
split_loop(class loop * loop1)527 split_loop (class loop *loop1)
528 {
529   class tree_niter_desc niter;
530   basic_block *bbs;
531   unsigned i;
532   bool changed = false;
533   tree guard_iv;
534   tree border = NULL_TREE;
535   affine_iv iv;
536   edge exit1;
537 
538   if (!(exit1 = single_exit (loop1))
539       || EDGE_COUNT (exit1->src->succs) != 2
540       /* ??? We could handle non-empty latches when we split the latch edge
541 	 (not the exit edge), and put the new exit condition in the new block.
542 	 OTOH this executes some code unconditionally that might have been
543 	 skipped by the original exit before.  */
544       || !empty_block_p (loop1->latch)
545       || !easy_exit_values (loop1)
546       || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
547       || niter.cmp == ERROR_MARK
548       /* We can't yet handle loops controlled by a != predicate.  */
549       || niter.cmp == NE_EXPR)
550     return false;
551 
552   bbs = get_loop_body (loop1);
553 
554   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
555     {
556       free (bbs);
557       return false;
558     }
559 
560   /* Find a splitting opportunity.  */
561   for (i = 0; i < loop1->num_nodes; i++)
562     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
563       {
564 	/* Handling opposite steps is not implemented yet.  Neither
565 	   is handling different step sizes.  */
566 	if ((tree_int_cst_sign_bit (iv.step)
567 	     != tree_int_cst_sign_bit (niter.control.step))
568 	    || !tree_int_cst_equal (iv.step, niter.control.step))
569 	  continue;
570 
571 	/* Find a loop PHI node that defines guard_iv directly,
572 	   or create one doing that.  */
573 	gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
574 	if (!phi)
575 	  continue;
576 	gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
577 	tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
578 						 loop_preheader_edge (loop1));
579 	enum tree_code guard_code = gimple_cond_code (guard_stmt);
580 
581 	/* Loop splitting is implemented by versioning the loop, placing
582 	   the new loop after the old loop, make the first loop iterate
583 	   as long as the conditional stays true (or false) and let the
584 	   second (new) loop handle the rest of the iterations.
585 
586 	   First we need to determine if the condition will start being true
587 	   or false in the first loop.  */
588 	bool initial_true;
589 	switch (guard_code)
590 	  {
591 	    case LT_EXPR:
592 	    case LE_EXPR:
593 	      initial_true = !tree_int_cst_sign_bit (iv.step);
594 	      break;
595 	    case GT_EXPR:
596 	    case GE_EXPR:
597 	      initial_true = tree_int_cst_sign_bit (iv.step);
598 	      break;
599 	    default:
600 	      gcc_unreachable ();
601 	  }
602 
603 	/* Build a condition that will skip the first loop when the
604 	   guard condition won't ever be true (or false).  */
605 	gimple_seq stmts2;
606 	border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
607 	if (stmts2)
608 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
609 					    stmts2);
610 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
611 	if (!initial_true)
612 	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
613 
614 	edge true_edge, false_edge;
615 	extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
616 
617 	/* Now version the loop, placing loop2 after loop1 connecting
618 	   them, and fix up SSA form for that.  */
619 	initialize_original_copy_tables ();
620 	basic_block cond_bb;
621 
622 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
623 					  true_edge->probability,
624 					  true_edge->probability.invert (),
625 					  profile_probability::always (),
626 					  profile_probability::always (),
627 					  true);
628 	gcc_assert (loop2);
629 
630 	edge new_e = connect_loops (loop1, loop2);
631 	connect_loop_phis (loop1, loop2, new_e);
632 
633 	/* The iterations of the second loop is now already
634 	   exactly those that the first loop didn't do, but the
635 	   iteration space of the first loop is still the original one.
636 	   Compute the new bound for the guarding IV and patch the
637 	   loop exit to use it instead of original IV and bound.  */
638 	gimple_seq stmts = NULL;
639 	tree newend = compute_new_first_bound (&stmts, &niter, border,
640 					       guard_code, guard_init);
641 	if (stmts)
642 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
643 					    stmts);
644 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
645 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
646 
647 	fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
648 
649 	/* Fix first loop's exit probability after scaling.  */
650 	edge exit_to_latch1;
651 	if (EDGE_SUCC (exit1->src, 0) == exit1)
652 	  exit_to_latch1 = EDGE_SUCC (exit1->src, 1);
653 	else
654 	  exit_to_latch1 = EDGE_SUCC (exit1->src, 0);
655 	exit_to_latch1->probability *= true_edge->probability;
656 	exit1->probability = exit_to_latch1->probability.invert ();
657 
658 	/* Finally patch out the two copies of the condition to be always
659 	   true/false (or opposite).  */
660 	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
661 	gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
662 	if (!initial_true)
663 	  std::swap (force_true, force_false);
664 	gimple_cond_make_true (force_true);
665 	gimple_cond_make_false (force_false);
666 	update_stmt (force_true);
667 	update_stmt (force_false);
668 
669 	free_original_copy_tables ();
670 
671 	changed = true;
672 	if (dump_file && (dump_flags & TDF_DETAILS))
673 	  fprintf (dump_file, ";; Loop split.\n");
674 
675 	if (dump_enabled_p ())
676 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
677 
678 	/* Only deal with the first opportunity.  */
679 	break;
680       }
681 
682   free (bbs);
683   return changed;
684 }
685 
686 /* Another transformation of loops like:
687 
688    for (i = INIT (); CHECK (i); i = NEXT ())
689      {
690        if (expr (a_1, a_2, ..., a_n))  // expr is pure
691          a_j = ...;  // change at least one a_j
692        else
693          S;          // not change any a_j
694      }
695 
696    into:
697 
698    for (i = INIT (); CHECK (i); i = NEXT ())
699      {
700        if (expr (a_1, a_2, ..., a_n))
701          a_j = ...;
702        else
703          {
704            S;
705            i = NEXT ();
706            break;
707          }
708      }
709 
710    for (; CHECK (i); i = NEXT ())
711      {
712        S;
713      }
714 
715    */
716 
717 /* Data structure to hold temporary information during loop split upon
718    semi-invariant conditional statement.  */
719 class split_info {
720 public:
721   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
722   basic_block *bbs;
723 
724   /* All memory store/clobber statements in a loop.  */
725   auto_vec<gimple *> memory_stores;
726 
727   /* Whether above memory stores vector has been filled.  */
728   int need_init;
729 
730   /* Control dependencies of basic blocks in a loop.  */
731   auto_vec<hash_set<basic_block> *> control_deps;
732 
split_info()733   split_info () : bbs (NULL),  need_init (true) { }
734 
~split_info()735   ~split_info ()
736     {
737       if (bbs)
738 	free (bbs);
739 
740       for (unsigned i = 0; i < control_deps.length (); i++)
741 	delete control_deps[i];
742     }
743 };
744 
745 /* Find all statements with memory-write effect in LOOP, including memory
746    store and non-pure function call, and keep those in a vector.  This work
747    is only done one time, for the vector should be constant during analysis
748    stage of semi-invariant condition.  */
749 
750 static void
find_vdef_in_loop(struct loop * loop)751 find_vdef_in_loop (struct loop *loop)
752 {
753   split_info *info = (split_info *) loop->aux;
754   gphi *vphi = get_virtual_phi (loop->header);
755 
756   /* Indicate memory store vector has been filled.  */
757   info->need_init = false;
758 
759   /* If loop contains memory operation, there must be a virtual PHI node in
760      loop header basic block.  */
761   if (vphi == NULL)
762     return;
763 
764   /* All virtual SSA names inside the loop are connected to be a cyclic
765      graph via virtual PHI nodes.  The virtual PHI node in loop header just
766      links the first and the last virtual SSA names, by using the last as
767      PHI operand to define the first.  */
768   const edge latch = loop_latch_edge (loop);
769   const tree first = gimple_phi_result (vphi);
770   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
771 
772   /* The virtual SSA cyclic graph might consist of only one SSA name, who
773      is defined by itself.
774 
775        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
776 
777      This means the loop contains only memory loads, so we can skip it.  */
778   if (first == last)
779     return;
780 
781   auto_vec<gimple *> other_stores;
782   auto_vec<tree> worklist;
783   auto_bitmap visited;
784 
785   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
786   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
787   worklist.safe_push (last);
788 
789   do
790     {
791       tree vuse = worklist.pop ();
792       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
793 
794       /* We mark the first and last SSA names as visited at the beginning,
795 	 and reversely start the process from the last SSA name towards the
796 	 first, which ensures that this do-while will not touch SSA names
797 	 defined outside the loop.  */
798       gcc_assert (gimple_bb (stmt)
799 		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
800 
801       if (gimple_code (stmt) == GIMPLE_PHI)
802 	{
803 	  gphi *phi = as_a <gphi *> (stmt);
804 
805 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
806 	    {
807 	      tree arg = gimple_phi_arg_def (stmt, i);
808 
809 	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
810 		worklist.safe_push (arg);
811 	    }
812 	}
813       else
814 	{
815 	  tree prev = gimple_vuse (stmt);
816 
817 	  /* Non-pure call statement is conservatively assumed to impact all
818 	     memory locations.  So place call statements ahead of other memory
819 	     stores in the vector with an idea of using them as shortcut
820 	     terminators to memory alias analysis.  */
821 	  if (gimple_code (stmt) == GIMPLE_CALL)
822 	    info->memory_stores.safe_push (stmt);
823 	  else
824 	    other_stores.safe_push (stmt);
825 
826 	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
827 	    worklist.safe_push (prev);
828 	}
829     } while (!worklist.is_empty ());
830 
831     info->memory_stores.safe_splice (other_stores);
832 }
833 
834 /* Two basic blocks have equivalent control dependency if one dominates to
835    the other, and it is post-dominated by the latter.  Given a basic block
836    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
837    is a constraint that BB does not post-dominate loop header of LOOP, this
838    means BB is control-dependent on at least one basic block in LOOP.  */
839 
840 static basic_block
get_control_equiv_head_block(struct loop * loop,basic_block bb)841 get_control_equiv_head_block (struct loop *loop, basic_block bb)
842 {
843   while (!bb->aux)
844     {
845       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
846 
847       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
848 
849       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
850 	break;
851 
852       bb = dom_bb;
853     }
854   return bb;
855 }
856 
857 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
858    dependent on.  */
859 
860 static hash_set<basic_block> *
find_control_dep_blocks(struct loop * loop,basic_block bb)861 find_control_dep_blocks (struct loop *loop, basic_block bb)
862 {
863   /* BB has same control dependency as loop header, then it is not control-
864      dependent on any basic block in LOOP.  */
865   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
866     return NULL;
867 
868   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
869 
870   if (equiv_head->aux)
871     {
872       /* There is a basic block containing control dependency equivalent
873 	 to BB.  No need to recompute that, and also set this information
874 	 to other equivalent basic blocks.  */
875       for (; bb != equiv_head;
876 	   bb = get_immediate_dominator (CDI_DOMINATORS, bb))
877 	bb->aux = equiv_head->aux;
878       return (hash_set<basic_block> *) equiv_head->aux;
879     }
880 
881   /* A basic block X is control-dependent on another Y iff there exists
882      a path from X to Y, in which every basic block other than X and Y
883      is post-dominated by Y, but X is not post-dominated by Y.
884 
885      According to this rule, traverse basic blocks in the loop backwards
886      starting from BB, if a basic block is post-dominated by BB, extend
887      current post-dominating path to this block, otherwise it is another
888      one that BB is control-dependent on.  */
889 
890   auto_vec<basic_block> pdom_worklist;
891   hash_set<basic_block> pdom_visited;
892   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
893 
894   pdom_worklist.safe_push (equiv_head);
895 
896   do
897     {
898       basic_block pdom_bb = pdom_worklist.pop ();
899       edge_iterator ei;
900       edge e;
901 
902       if (pdom_visited.add (pdom_bb))
903 	continue;
904 
905       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
906 	{
907 	  basic_block pred_bb = e->src;
908 
909 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
910 	    {
911 	      dep_bbs->add (pred_bb);
912 	      continue;
913 	    }
914 
915 	  pred_bb = get_control_equiv_head_block (loop, pred_bb);
916 
917 	  if (pdom_visited.contains (pred_bb))
918 	    continue;
919 
920 	  if (!pred_bb->aux)
921 	    {
922 	      pdom_worklist.safe_push (pred_bb);
923 	      continue;
924 	    }
925 
926 	  /* If control dependency of basic block is available, fast extend
927 	     post-dominating path using the information instead of advancing
928 	     forward step-by-step.  */
929 	  hash_set<basic_block> *pred_dep_bbs
930 			= (hash_set<basic_block> *) pred_bb->aux;
931 
932 	  for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
933 	       iter != pred_dep_bbs->end (); ++iter)
934 	    {
935 	      basic_block pred_dep_bb = *iter;
936 
937 	      /* Basic blocks can either be in control dependency of BB, or
938 		 must be post-dominated by BB, if so, extend the path from
939 		 these basic blocks.  */
940 	      if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
941 		dep_bbs->add (pred_dep_bb);
942 	      else if (!pdom_visited.contains (pred_dep_bb))
943 		pdom_worklist.safe_push (pred_dep_bb);
944 	    }
945 	}
946     } while (!pdom_worklist.is_empty ());
947 
948   /* Record computed control dependencies in loop so that we can reach them
949      when reclaiming resources.  */
950   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
951 
952   /* Associate control dependence with related equivalent basic blocks.  */
953   for (equiv_head->aux = dep_bbs; bb != equiv_head;
954        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
955     bb->aux = dep_bbs;
956 
957   return dep_bbs;
958 }
959 
960 /* Forward declaration */
961 
962 static bool
963 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
964 			 const_basic_block skip_head,
965 			 hash_map<gimple *, bool> &stmt_stat);
966 
967 /* Given STMT, memory load or pure call statement, check whether it is impacted
968    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
969    trace is composed of SKIP_HEAD and those basic block dominated by it, always
970    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
971    NULL, all basic blocks of LOOP are checked.  */
972 
973 static bool
vuse_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)974 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
975 		       const_basic_block skip_head)
976 {
977   split_info *info = (split_info *) loop->aux;
978   tree rhs = NULL_TREE;
979   ao_ref ref;
980   gimple *store;
981   unsigned i;
982 
983   /* Collect memory store/clobber statements if haven't done that.  */
984   if (info->need_init)
985     find_vdef_in_loop (loop);
986 
987   if (is_gimple_assign (stmt))
988     rhs = gimple_assign_rhs1 (stmt);
989 
990   ao_ref_init (&ref, rhs);
991 
992   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
993     {
994       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
995       if (skip_head
996 	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
997 	continue;
998 
999       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1000 	return false;
1001     }
1002 
1003   return true;
1004 }
1005 
1006 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1007    certain iteration of LOOP, check whether an SSA name (NAME) remains
1008    unchanged in next iteration.  We call this characteristic semi-
1009    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1010    blocks and control flows in the loop will be considered.  Semi-invariant
1011    state of checked statement is cached in hash map STMT_STAT to avoid
1012    redundant computation in possible following re-check.  */
1013 
1014 static inline bool
ssa_semi_invariant_p(struct loop * loop,tree name,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1015 ssa_semi_invariant_p (struct loop *loop, tree name,
1016 		      const_basic_block skip_head,
1017 		      hash_map<gimple *, bool> &stmt_stat)
1018 {
1019   gimple *def = SSA_NAME_DEF_STMT (name);
1020   const_basic_block def_bb = gimple_bb (def);
1021 
1022   /* An SSA name defined outside loop is definitely semi-invariant.  */
1023   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1024     return true;
1025 
1026   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1027     return false;
1028 
1029   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1030 }
1031 
1032 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1033    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1034    are excluded from LOOP.  */
1035 
1036 static bool
loop_iter_phi_semi_invariant_p(struct loop * loop,gphi * loop_phi,const_basic_block skip_head)1037 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1038 				const_basic_block skip_head)
1039 {
1040   const_edge latch = loop_latch_edge (loop);
1041   tree name = gimple_phi_result (loop_phi);
1042   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1043 
1044   gcc_checking_assert (from);
1045 
1046   /* Loop iteration PHI node locates in loop header, and it has two source
1047      operands, one is an initial value coming from outside the loop, the other
1048      is a value through latch of the loop, which is derived in last iteration,
1049      we call the latter latch value.  From the PHI node to definition of latch
1050      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1051      assignment or likewise, there is no other kind of value redefinition, SSA
1052      name defined by the PHI node is semi-invariant.
1053 
1054                          loop entry
1055                               |     .--- latch ---.
1056                               |     |             |
1057                               v     v             |
1058                   x_1 = PHI <x_0,  x_3>           |
1059                            |                      |
1060                            v                      |
1061               .------- if (cond) -------.         |
1062               |                         |         |
1063               |                     [ SKIP ]      |
1064               |                         |         |
1065               |                     x_2 = ...     |
1066               |                         |         |
1067               '---- T ---->.<---- F ----'         |
1068                            |                      |
1069                            v                      |
1070                   x_3 = PHI <x_1, x_2>            |
1071                            |                      |
1072                            '----------------------'
1073 
1074      Suppose in certain iteration, execution flow in above graph goes through
1075      true branch, which means that one source value to define x_3 in false
1076      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1077      iterations is defined by x_3, we know that x_1 will never changed if COND
1078      always chooses true branch from then on.  */
1079 
1080   while (from != name)
1081     {
1082       /* A new value comes from a CONSTANT.  */
1083       if (TREE_CODE (from) != SSA_NAME)
1084 	return false;
1085 
1086       gimple *stmt = SSA_NAME_DEF_STMT (from);
1087       const_basic_block bb = gimple_bb (stmt);
1088 
1089       /* A new value comes from outside the loop.  */
1090       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1091 	return false;
1092 
1093       from = NULL_TREE;
1094 
1095       if (gimple_code (stmt) == GIMPLE_PHI)
1096 	{
1097 	  gphi *phi = as_a <gphi *> (stmt);
1098 
1099 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1100 	    {
1101 	      if (skip_head)
1102 		{
1103 		  const_edge e = gimple_phi_arg_edge (phi, i);
1104 
1105 		  /* Don't consider redefinitions in excluded basic blocks.  */
1106 		  if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1107 		    continue;
1108 		}
1109 
1110 	      tree arg = gimple_phi_arg_def (phi, i);
1111 
1112 	      if (!from)
1113 		from = arg;
1114 	      else if (!operand_equal_p (from, arg, 0))
1115 		/* There are more than one source operands that provide
1116 		   different values to the SSA name, it is variant.  */
1117 		return false;
1118 	    }
1119 	}
1120       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1121 	{
1122 	  /* For simple value copy, check its rhs instead.  */
1123 	  if (gimple_assign_ssa_name_copy_p (stmt))
1124 	    from = gimple_assign_rhs1 (stmt);
1125 	}
1126 
1127       /* Any other kind of definition is deemed to introduce a new value
1128 	 to the SSA name.  */
1129       if (!from)
1130 	return false;
1131     }
1132   return true;
1133 }
1134 
1135 /* Check whether conditional predicates that BB is control-dependent on, are
1136    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1137    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1138    in hash map STMT_STAT.  */
1139 
1140 static bool
control_dep_semi_invariant_p(struct loop * loop,basic_block bb,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1141 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1142 			      const_basic_block skip_head,
1143 			      hash_map<gimple *, bool> &stmt_stat)
1144 {
1145   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1146 
1147   if (!dep_bbs)
1148     return true;
1149 
1150   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1151        iter != dep_bbs->end (); ++iter)
1152     {
1153       gimple *last = last_stmt (*iter);
1154 
1155       if (!last)
1156 	return false;
1157 
1158       /* Only check condition predicates.  */
1159       if (gimple_code (last) != GIMPLE_COND
1160 	  && gimple_code (last) != GIMPLE_SWITCH)
1161 	return false;
1162 
1163       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1164 	return false;
1165     }
1166 
1167   return true;
1168 }
1169 
1170 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1171    semi-invariant, consequently, all its defined values are semi-invariant.
1172    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1173    Semi-invariant state of checked statement is cached in hash map
1174    STMT_STAT.  */
1175 
1176 static bool
stmt_semi_invariant_p_1(struct loop * loop,gimple * stmt,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1177 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1178 			 const_basic_block skip_head,
1179 			 hash_map<gimple *, bool> &stmt_stat)
1180 {
1181   bool existed;
1182   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1183 
1184   if (existed)
1185     return invar;
1186 
1187   /* A statement might depend on itself, which is treated as variant.  So set
1188      state of statement under check to be variant to ensure that.  */
1189   invar = false;
1190 
1191   if (gimple_code (stmt) == GIMPLE_PHI)
1192     {
1193       gphi *phi = as_a <gphi *> (stmt);
1194 
1195       if (gimple_bb (stmt) == loop->header)
1196 	{
1197 	  /* If the entry value is subject to abnormal coalescing
1198 	     avoid the transform since we're going to duplicate the
1199 	     loop header and thus likely introduce overlapping life-ranges
1200 	     between the PHI def and the entry on the path when the
1201 	     first loop is skipped.  */
1202 	  tree entry_def
1203 	    = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1204 	  if (TREE_CODE (entry_def) == SSA_NAME
1205 	      && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1206 	    return false;
1207 	  invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1208 	  return invar;
1209 	}
1210 
1211       /* For a loop PHI node that does not locate in loop header, it is semi-
1212 	 invariant only if two conditions are met.  The first is its source
1213 	 values are derived from CONSTANT (including loop-invariant value), or
1214 	 from SSA name defined by semi-invariant loop iteration PHI node.  The
1215 	 second is its source incoming edges are control-dependent on semi-
1216 	 invariant conditional predicates.  */
1217       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1218 	{
1219 	  const_edge e = gimple_phi_arg_edge (phi, i);
1220 	  tree arg = gimple_phi_arg_def (phi, i);
1221 
1222 	  if (TREE_CODE (arg) == SSA_NAME)
1223 	    {
1224 	      if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1225 		return false;
1226 
1227 	      /* If source value is defined in location from where the source
1228 		 edge comes in, no need to check control dependency again
1229 		 since this has been done in above SSA name check stage.  */
1230 	      if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1231 		continue;
1232 	    }
1233 
1234 	  if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1235 					     stmt_stat))
1236 	    return false;
1237 	}
1238     }
1239   else
1240     {
1241       ssa_op_iter iter;
1242       tree use;
1243 
1244       /* Volatile memory load or return of normal (non-const/non-pure) call
1245 	 should not be treated as constant in each iteration of loop.  */
1246       if (gimple_has_side_effects (stmt))
1247 	return false;
1248 
1249       /* Check if any memory store may kill memory load at this place.  */
1250       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1251 	return false;
1252 
1253       /* Although operand of a statement might be SSA name, CONSTANT or
1254 	 VARDECL, here we only need to check SSA name operands.  This is
1255 	 because check on VARDECL operands, which involve memory loads,
1256 	 must have been done prior to invocation of this function in
1257 	 vuse_semi_invariant_p.  */
1258       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1259 	if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1260 	  return false;
1261     }
1262 
1263   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1264 				     stmt_stat))
1265     return false;
1266 
1267   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1268      to new insertion, and thus invar may point to invalid memory.  */
1269   stmt_stat.put (stmt, true);
1270   return true;
1271 }
1272 
1273 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1274    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1275 
1276 static bool
stmt_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)1277 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1278 		       const_basic_block skip_head)
1279 {
1280   hash_map<gimple *, bool> stmt_stat;
1281   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1282 }
1283 
1284 /* Determine when conditional statement never transfers execution to one of its
1285    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1286    and those basic blocks dominated by BRANCH_BB.  */
1287 
1288 static bool
branch_removable_p(basic_block branch_bb)1289 branch_removable_p (basic_block branch_bb)
1290 {
1291   edge_iterator ei;
1292   edge e;
1293 
1294   if (single_pred_p (branch_bb))
1295     return true;
1296 
1297   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1298     {
1299       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1300 	continue;
1301 
1302       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1303 	continue;
1304 
1305        /* The branch can be reached from opposite branch, or from some
1306 	  statement not dominated by the conditional statement.  */
1307       return false;
1308     }
1309 
1310   return true;
1311 }
1312 
1313 /* Find out which branch of a conditional statement (COND) is invariant in the
1314    execution context of LOOP.  That is: once the branch is selected in certain
1315    iteration of the loop, any operand that contributes to computation of the
1316    conditional statement remains unchanged in all following iterations.  */
1317 
1318 static edge
get_cond_invariant_branch(struct loop * loop,gcond * cond)1319 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1320 {
1321   basic_block cond_bb = gimple_bb (cond);
1322   basic_block targ_bb[2];
1323   bool invar[2];
1324   unsigned invar_checks = 0;
1325 
1326   for (unsigned i = 0; i < 2; i++)
1327     {
1328       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1329 
1330       /* One branch directs to loop exit, no need to perform loop split upon
1331 	 this conditional statement.  Firstly, it is trivial if the exit branch
1332 	 is semi-invariant, for the statement is just to break loop.  Secondly,
1333 	 if the opposite branch is semi-invariant, it means that the statement
1334 	 is real loop-invariant, which is covered by loop unswitch.  */
1335       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1336 	return NULL;
1337     }
1338 
1339   for (unsigned i = 0; i < 2; i++)
1340     {
1341       invar[!i] = false;
1342 
1343       if (!branch_removable_p (targ_bb[i]))
1344 	continue;
1345 
1346       /* Given a semi-invariant branch, if its opposite branch dominates
1347 	 loop latch, it and its following trace will only be executed in
1348 	 final iteration of loop, namely it is not part of repeated body
1349 	 of the loop.  Similar to the above case that the branch is loop
1350 	 exit, no need to split loop.  */
1351       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1352 	continue;
1353 
1354       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1355       invar_checks++;
1356     }
1357 
1358   /* With both branches being invariant (handled by loop unswitch) or
1359      variant is not what we want.  */
1360   if (invar[0] ^ !invar[1])
1361     return NULL;
1362 
1363   /* Found a real loop-invariant condition, do nothing.  */
1364   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1365     return NULL;
1366 
1367   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1368 }
1369 
1370 /* Calculate increased code size measured by estimated insn number if applying
1371    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1372 
1373 static int
compute_added_num_insns(struct loop * loop,const_edge branch_edge)1374 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1375 {
1376   basic_block cond_bb = branch_edge->src;
1377   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1378   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1379   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1380   int num = 0;
1381 
1382   for (unsigned i = 0; i < loop->num_nodes; i++)
1383     {
1384       /* Do no count basic blocks only in opposite branch.  */
1385       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1386 	continue;
1387 
1388       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1389     }
1390 
1391   /* It is unnecessary to evaluate expression of the conditional statement
1392      in new loop that contains only invariant branch.  This expression should
1393      be constant value (either true or false).  Exclude code size of insns
1394      that contribute to computation of the expression.  */
1395 
1396   auto_vec<gimple *> worklist;
1397   hash_set<gimple *> removed;
1398   gimple *stmt = last_stmt (cond_bb);
1399 
1400   worklist.safe_push (stmt);
1401   removed.add (stmt);
1402   num -= estimate_num_insns (stmt, &eni_size_weights);
1403 
1404   do
1405     {
1406       ssa_op_iter opnd_iter;
1407       use_operand_p opnd_p;
1408 
1409       stmt = worklist.pop ();
1410       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1411 	{
1412 	  tree opnd = USE_FROM_PTR (opnd_p);
1413 
1414 	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1415 	    continue;
1416 
1417 	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1418 	  use_operand_p use_p;
1419 	  imm_use_iterator use_iter;
1420 
1421 	  if (removed.contains (opnd_stmt)
1422 	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1423 	    continue;
1424 
1425 	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1426 	    {
1427 	      gimple *use_stmt = USE_STMT (use_p);
1428 
1429 	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1430 		{
1431 		  opnd_stmt = NULL;
1432 		  break;
1433 		}
1434 	    }
1435 
1436 	  if (opnd_stmt)
1437 	    {
1438 	      worklist.safe_push (opnd_stmt);
1439 	      removed.add (opnd_stmt);
1440 	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1441 	    }
1442 	}
1443     } while (!worklist.is_empty ());
1444 
1445   gcc_assert (num >= 0);
1446   return num;
1447 }
1448 
1449 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1450    and check whether it is eligible and profitable to perform loop split upon
1451    this branch in LOOP.  */
1452 
1453 static edge
get_cond_branch_to_split_loop(struct loop * loop,gcond * cond)1454 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1455 {
1456   edge invar_branch = get_cond_invariant_branch (loop, cond);
1457   if (!invar_branch)
1458     return NULL;
1459 
1460   /* When accurate profile information is available, and execution
1461      frequency of the branch is too low, just let it go.  */
1462   profile_probability prob = invar_branch->probability;
1463   if (prob.reliable_p ())
1464     {
1465       int thres = param_min_loop_cond_split_prob;
1466 
1467       if (prob < profile_probability::always ().apply_scale (thres, 100))
1468 	return NULL;
1469     }
1470 
1471   /* Add a threshold for increased code size to disable loop split.  */
1472   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1473     return NULL;
1474 
1475   return invar_branch;
1476 }
1477 
1478 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1479    conditional statement, perform loop split transformation illustrated
1480    as the following graph.
1481 
1482                .-------T------ if (true) ------F------.
1483                |                    .---------------. |
1484                |                    |               | |
1485                v                    |               v v
1486           pre-header                |            pre-header
1487                | .------------.     |                 | .------------.
1488                | |            |     |                 | |            |
1489                | v            |     |                 | v            |
1490              header           |     |               header           |
1491                |              |     |                 |              |
1492       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1493       |                 |     |     |        |                 |     |
1494   invariant             |     |     |    invariant             |     |
1495       |                 |     |     |        |                 |     |
1496       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1497                |              |    /                  |              |
1498              stmts            |   /                 stmts            |
1499                |              F  T                    |              |
1500               / \             | /                    / \             |
1501      .-------*   *      [ if (cond) ]       .-------*   *            |
1502      |           |            |             |           |            |
1503      |         latch          |             |         latch          |
1504      |           |            |             |           |            |
1505      |           '------------'             |           '------------'
1506      '------------------------. .-----------'
1507              loop1            | |                   loop2
1508                               v v
1509                              exits
1510 
1511    In the graph, loop1 represents the part derived from original one, and
1512    loop2 is duplicated using loop_version (), which corresponds to the part
1513    of original one being splitted out.  In original latch edge of loop1, we
1514    insert a new conditional statement duplicated from the semi-invariant cond,
1515    and one of its branch goes back to loop1 header as a latch edge, and the
1516    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1517    we abandon the variant branch of the conditional statement by setting a
1518    constant bool condition, based on which branch is semi-invariant.  */
1519 
1520 static bool
do_split_loop_on_cond(struct loop * loop1,edge invar_branch)1521 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1522 {
1523   basic_block cond_bb = invar_branch->src;
1524   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1525   gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
1526 
1527   gcc_assert (cond_bb->loop_father == loop1);
1528 
1529   if (dump_enabled_p ())
1530     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1531 		     "loop split on semi-invariant condition at %s branch\n",
1532 		     true_invar ? "true" : "false");
1533 
1534   initialize_original_copy_tables ();
1535 
1536   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1537 				     invar_branch->probability.invert (),
1538 				     invar_branch->probability,
1539 				     profile_probability::always (),
1540 				     profile_probability::always (),
1541 				     true);
1542   if (!loop2)
1543     {
1544       free_original_copy_tables ();
1545       return false;
1546     }
1547 
1548   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1549   gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
1550 
1551   /* Replace the condition in loop2 with a bool constant to let PassManager
1552      remove the variant branch after current pass completes.  */
1553   if (true_invar)
1554     gimple_cond_make_true (cond_copy);
1555   else
1556     gimple_cond_make_false (cond_copy);
1557 
1558   update_stmt (cond_copy);
1559 
1560   /* Insert a new conditional statement on latch edge of loop1, its condition
1561      is duplicated from the semi-invariant.  This statement acts as a switch
1562      to transfer execution from loop1 to loop2, when loop1 enters into
1563      invariant state.  */
1564   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1565   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1566   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1567 					  gimple_cond_lhs (cond),
1568 					  gimple_cond_rhs (cond),
1569 					  NULL_TREE, NULL_TREE);
1570 
1571   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1572   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1573 
1574   edge to_loop1 = single_succ_edge (break_bb);
1575   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1576 
1577   to_loop1->flags &= ~EDGE_FALLTHRU;
1578   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1579   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1580 
1581   /* Due to introduction of a control flow edge from loop1 latch to loop2
1582      pre-header, we should update PHIs in loop2 to reflect this connection
1583      between loop1 and loop2.  */
1584   connect_loop_phis (loop1, loop2, to_loop2);
1585 
1586   edge true_edge, false_edge, skip_edge1, skip_edge2;
1587   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1588 
1589   skip_edge1 = true_invar ? false_edge : true_edge;
1590   skip_edge2 = true_invar ? true_edge : false_edge;
1591   fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1592 
1593   /* Fix first loop's exit probability after scaling.  */
1594   to_loop1->probability = invar_branch->probability.invert ();
1595   to_loop2->probability = invar_branch->probability;
1596 
1597   free_original_copy_tables ();
1598 
1599   return true;
1600 }
1601 
1602 /* Traverse all conditional statements in LOOP, to find out a good candidate
1603    upon which we can do loop split.  */
1604 
1605 static bool
split_loop_on_cond(struct loop * loop)1606 split_loop_on_cond (struct loop *loop)
1607 {
1608   split_info *info = new split_info ();
1609   basic_block *bbs = info->bbs = get_loop_body (loop);
1610   bool do_split = false;
1611 
1612   /* Allocate an area to keep temporary info, and associate its address
1613      with loop aux field.  */
1614   loop->aux = info;
1615 
1616   for (unsigned i = 0; i < loop->num_nodes; i++)
1617     bbs[i]->aux = NULL;
1618 
1619   for (unsigned i = 0; i < loop->num_nodes; i++)
1620     {
1621       basic_block bb = bbs[i];
1622 
1623       /* We only consider conditional statement, which be executed at most once
1624 	 in each iteration of the loop.  So skip statements in inner loops.  */
1625       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1626 	continue;
1627 
1628       /* Actually this check is not a must constraint.  With it, we can ensure
1629 	 conditional statement will always be executed in each iteration.  */
1630       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1631 	continue;
1632 
1633       gimple *last = last_stmt (bb);
1634 
1635       if (!last || gimple_code (last) != GIMPLE_COND)
1636 	continue;
1637 
1638       gcond *cond = as_a <gcond *> (last);
1639       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1640 
1641       if (branch_edge)
1642 	{
1643 	  do_split_loop_on_cond (loop, branch_edge);
1644 	  do_split = true;
1645 	  break;
1646 	}
1647     }
1648 
1649   delete info;
1650   loop->aux = NULL;
1651 
1652   return do_split;
1653 }
1654 
1655 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1656 
1657 static unsigned int
tree_ssa_split_loops(void)1658 tree_ssa_split_loops (void)
1659 {
1660   bool changed = false;
1661 
1662   gcc_assert (scev_initialized_p ());
1663 
1664   calculate_dominance_info (CDI_POST_DOMINATORS);
1665 
1666   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1667     loop->aux = NULL;
1668 
1669   /* Go through all loops starting from innermost.  */
1670   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1671     {
1672       if (loop->aux)
1673 	{
1674 	  /* If any of our inner loops was split, don't split us,
1675 	     and mark our containing loop as having had splits as well.  */
1676 	  loop_outer (loop)->aux = loop;
1677 	  continue;
1678 	}
1679 
1680       if (optimize_loop_for_size_p (loop))
1681 	continue;
1682 
1683       if (split_loop (loop) || split_loop_on_cond (loop))
1684 	{
1685 	  /* Mark our containing loop as having had some split inner loops.  */
1686 	  loop_outer (loop)->aux = loop;
1687 	  changed = true;
1688 	}
1689     }
1690 
1691   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1692     loop->aux = NULL;
1693 
1694   clear_aux_for_blocks ();
1695 
1696   free_dominance_info (CDI_POST_DOMINATORS);
1697 
1698   if (changed)
1699     {
1700       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1701       return TODO_cleanup_cfg;
1702     }
1703   return 0;
1704 }
1705 
1706 /* Loop splitting pass.  */
1707 
1708 namespace {
1709 
1710 const pass_data pass_data_loop_split =
1711 {
1712   GIMPLE_PASS, /* type */
1713   "lsplit", /* name */
1714   OPTGROUP_LOOP, /* optinfo_flags */
1715   TV_LOOP_SPLIT, /* tv_id */
1716   PROP_cfg, /* properties_required */
1717   0, /* properties_provided */
1718   0, /* properties_destroyed */
1719   0, /* todo_flags_start */
1720   0, /* todo_flags_finish */
1721 };
1722 
1723 class pass_loop_split : public gimple_opt_pass
1724 {
1725 public:
pass_loop_split(gcc::context * ctxt)1726   pass_loop_split (gcc::context *ctxt)
1727     : gimple_opt_pass (pass_data_loop_split, ctxt)
1728   {}
1729 
1730   /* opt_pass methods: */
gate(function *)1731   virtual bool gate (function *) { return flag_split_loops != 0; }
1732   virtual unsigned int execute (function *);
1733 
1734 }; // class pass_loop_split
1735 
1736 unsigned int
execute(function * fun)1737 pass_loop_split::execute (function *fun)
1738 {
1739   if (number_of_loops (fun) <= 1)
1740     return 0;
1741 
1742   return tree_ssa_split_loops ();
1743 }
1744 
1745 } // anon namespace
1746 
1747 gimple_opt_pass *
make_pass_loop_split(gcc::context * ctxt)1748 make_pass_loop_split (gcc::context *ctxt)
1749 {
1750   return new pass_loop_split (ctxt);
1751 }
1752