1 /* Loop splitting.
2 Copyright (C) 2015-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "tree-pass.h"
27 #include "ssa.h"
28 #include "fold-const.h"
29 #include "tree-cfg.h"
30 #include "tree-ssa.h"
31 #include "tree-ssa-loop-niter.h"
32 #include "tree-ssa-loop.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-into-ssa.h"
35 #include "tree-inline.h"
36 #include "tree-cfgcleanup.h"
37 #include "cfgloop.h"
38 #include "tree-scalar-evolution.h"
39 #include "gimple-iterator.h"
40 #include "gimple-pretty-print.h"
41 #include "cfghooks.h"
42 #include "gimple-fold.h"
43 #include "gimplify-me.h"
44
45 /* This file implements two kinds of loop splitting.
46
47 One transformation of loops like:
48
49 for (i = 0; i < 100; i++)
50 {
51 if (i < 50)
52 A;
53 else
54 B;
55 }
56
57 into:
58
59 for (i = 0; i < 50; i++)
60 {
61 A;
62 }
63 for (; i < 100; i++)
64 {
65 B;
66 }
67
68 */
69
70 /* Return true when BB inside LOOP is a potential iteration space
71 split point, i.e. ends with a condition like "IV < comp", which
72 is true on one side of the iteration space and false on the other,
73 and the split point can be computed. If so, also return the border
74 point in *BORDER and the comparison induction variable in IV. */
75
76 static tree
split_at_bb_p(class loop * loop,basic_block bb,tree * border,affine_iv * iv)77 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
78 {
79 gimple *last;
80 gcond *stmt;
81 affine_iv iv2;
82
83 /* BB must end in a simple conditional jump. */
84 last = last_stmt (bb);
85 if (!last || gimple_code (last) != GIMPLE_COND)
86 return NULL_TREE;
87 stmt = as_a <gcond *> (last);
88
89 enum tree_code code = gimple_cond_code (stmt);
90
91 /* Only handle relational comparisons, for equality and non-equality
92 we'd have to split the loop into two loops and a middle statement. */
93 switch (code)
94 {
95 case LT_EXPR:
96 case LE_EXPR:
97 case GT_EXPR:
98 case GE_EXPR:
99 break;
100 default:
101 return NULL_TREE;
102 }
103
104 if (loop_exits_from_bb_p (loop, bb))
105 return NULL_TREE;
106
107 tree op0 = gimple_cond_lhs (stmt);
108 tree op1 = gimple_cond_rhs (stmt);
109 class loop *useloop = loop_containing_stmt (stmt);
110
111 if (!simple_iv (loop, useloop, op0, iv, false))
112 return NULL_TREE;
113 if (!simple_iv (loop, useloop, op1, &iv2, false))
114 return NULL_TREE;
115
116 /* Make it so that the first argument of the condition is
117 the looping one. */
118 if (!integer_zerop (iv2.step))
119 {
120 std::swap (op0, op1);
121 std::swap (*iv, iv2);
122 code = swap_tree_comparison (code);
123 gimple_cond_set_condition (stmt, code, op0, op1);
124 update_stmt (stmt);
125 }
126 else if (integer_zerop (iv->step))
127 return NULL_TREE;
128 if (!integer_zerop (iv2.step))
129 return NULL_TREE;
130 if (!iv->no_overflow)
131 return NULL_TREE;
132
133 if (dump_file && (dump_flags & TDF_DETAILS))
134 {
135 fprintf (dump_file, "Found potential split point: ");
136 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
137 fprintf (dump_file, " { ");
138 print_generic_expr (dump_file, iv->base, TDF_SLIM);
139 fprintf (dump_file, " + I*");
140 print_generic_expr (dump_file, iv->step, TDF_SLIM);
141 fprintf (dump_file, " } %s ", get_tree_code_name (code));
142 print_generic_expr (dump_file, iv2.base, TDF_SLIM);
143 fprintf (dump_file, "\n");
144 }
145
146 *border = iv2.base;
147 return op0;
148 }
149
150 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
151 true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
152 (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
153 exit test statement to loop back only if the GUARD statement will
154 also be true/false in the next iteration. */
155
156 static void
patch_loop_exit(class loop * loop,gcond * guard,tree nextval,tree newbound,bool initial_true)157 patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
158 bool initial_true)
159 {
160 edge exit = single_exit (loop);
161 gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
162 gimple_cond_set_condition (stmt, gimple_cond_code (guard),
163 nextval, newbound);
164 update_stmt (stmt);
165
166 edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
167
168 exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
169 stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
170
171 if (initial_true)
172 {
173 exit->flags |= EDGE_FALSE_VALUE;
174 stay->flags |= EDGE_TRUE_VALUE;
175 }
176 else
177 {
178 exit->flags |= EDGE_TRUE_VALUE;
179 stay->flags |= EDGE_FALSE_VALUE;
180 }
181 }
182
183 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
184 find the loop phi node in LOOP defining it directly, or create
185 such phi node. Return that phi node. */
186
187 static gphi *
find_or_create_guard_phi(class loop * loop,tree guard_iv,affine_iv *)188 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
189 {
190 gimple *def = SSA_NAME_DEF_STMT (guard_iv);
191 gphi *phi;
192 if ((phi = dyn_cast <gphi *> (def))
193 && gimple_bb (phi) == loop->header)
194 return phi;
195
196 /* XXX Create the PHI instead. */
197 return NULL;
198 }
199
200 /* Returns true if the exit values of all loop phi nodes can be
201 determined easily (i.e. that connect_loop_phis can determine them). */
202
203 static bool
easy_exit_values(class loop * loop)204 easy_exit_values (class loop *loop)
205 {
206 edge exit = single_exit (loop);
207 edge latch = loop_latch_edge (loop);
208 gphi_iterator psi;
209
210 /* Currently we regard the exit values as easy if they are the same
211 as the value over the backedge. Which is the case if the definition
212 of the backedge value dominates the exit edge. */
213 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
214 {
215 gphi *phi = psi.phi ();
216 tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
217 basic_block bb;
218 if (TREE_CODE (next) == SSA_NAME
219 && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
220 && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
221 return false;
222 }
223
224 return true;
225 }
226
227 /* This function updates the SSA form after connect_loops made a new
228 edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
229 conditional). I.e. the second loop can now be entered either
230 via the original entry or via NEW_E, so the entry values of LOOP2
231 phi nodes are either the original ones or those at the exit
232 of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
233 this. The loops need to fulfill easy_exit_values(). */
234
235 static void
connect_loop_phis(class loop * loop1,class loop * loop2,edge new_e)236 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
237 {
238 basic_block rest = loop_preheader_edge (loop2)->src;
239 gcc_assert (new_e->dest == rest);
240 edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
241
242 edge firste = loop_preheader_edge (loop1);
243 edge seconde = loop_preheader_edge (loop2);
244 edge firstn = loop_latch_edge (loop1);
245 gphi_iterator psi_first, psi_second;
246 for (psi_first = gsi_start_phis (loop1->header),
247 psi_second = gsi_start_phis (loop2->header);
248 !gsi_end_p (psi_first);
249 gsi_next (&psi_first), gsi_next (&psi_second))
250 {
251 tree init, next, new_init;
252 use_operand_p op;
253 gphi *phi_first = psi_first.phi ();
254 gphi *phi_second = psi_second.phi ();
255
256 init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
257 next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
258 op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
259 gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
260
261 /* Prefer using original variable as a base for the new ssa name.
262 This is necessary for virtual ops, and useful in order to avoid
263 losing debug info for real ops. */
264 if (TREE_CODE (next) == SSA_NAME
265 && useless_type_conversion_p (TREE_TYPE (next),
266 TREE_TYPE (init)))
267 new_init = copy_ssa_name (next);
268 else if (TREE_CODE (init) == SSA_NAME
269 && useless_type_conversion_p (TREE_TYPE (init),
270 TREE_TYPE (next)))
271 new_init = copy_ssa_name (init);
272 else if (useless_type_conversion_p (TREE_TYPE (next),
273 TREE_TYPE (init)))
274 new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
275 "unrinittmp");
276 else
277 new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
278 "unrinittmp");
279
280 gphi * newphi = create_phi_node (new_init, rest);
281 add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
282 add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
283 SET_USE (op, new_init);
284 }
285 }
286
287 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
288 they are still equivalent and placed in two arms of a diamond, like so:
289
290 .------if (cond)------.
291 v v
292 pre1 pre2
293 | |
294 .--->h1 h2<----.
295 | | | |
296 | ex1---. .---ex2 |
297 | / | | \ |
298 '---l1 X | l2---'
299 | |
300 | |
301 '--->join<---'
302
303 This function transforms the program such that LOOP1 is conditionally
304 falling through to LOOP2, or skipping it. This is done by splitting
305 the ex1->join edge at X in the diagram above, and inserting a condition
306 whose one arm goes to pre2, resulting in this situation:
307
308 .------if (cond)------.
309 v v
310 pre1 .---------->pre2
311 | | |
312 .--->h1 | h2<----.
313 | | | | |
314 | ex1---. | .---ex2 |
315 | / v | | \ |
316 '---l1 skip---' | l2---'
317 | |
318 | |
319 '--->join<---'
320
321
322 The condition used is the exit condition of LOOP1, which effectively means
323 that when the first loop exits (for whatever reason) but the real original
324 exit expression is still false the second loop will be entered.
325 The function returns the new edge cond->pre2.
326
327 This doesn't update the SSA form, see connect_loop_phis for that. */
328
329 static edge
connect_loops(class loop * loop1,class loop * loop2)330 connect_loops (class loop *loop1, class loop *loop2)
331 {
332 edge exit = single_exit (loop1);
333 basic_block skip_bb = split_edge (exit);
334 gcond *skip_stmt;
335 gimple_stmt_iterator gsi;
336 edge new_e, skip_e;
337
338 gimple *stmt = last_stmt (exit->src);
339 skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
340 gimple_cond_lhs (stmt),
341 gimple_cond_rhs (stmt),
342 NULL_TREE, NULL_TREE);
343 gsi = gsi_last_bb (skip_bb);
344 gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
345
346 skip_e = EDGE_SUCC (skip_bb, 0);
347 skip_e->flags &= ~EDGE_FALLTHRU;
348 new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
349 if (exit->flags & EDGE_TRUE_VALUE)
350 {
351 skip_e->flags |= EDGE_TRUE_VALUE;
352 new_e->flags |= EDGE_FALSE_VALUE;
353 }
354 else
355 {
356 skip_e->flags |= EDGE_FALSE_VALUE;
357 new_e->flags |= EDGE_TRUE_VALUE;
358 }
359
360 new_e->probability = profile_probability::likely ();
361 skip_e->probability = new_e->probability.invert ();
362
363 return new_e;
364 }
365
366 /* This returns the new bound for iterations given the original iteration
367 space in NITER, an arbitrary new bound BORDER, assumed to be some
368 comparison value with a different IV, the initial value GUARD_INIT of
369 that other IV, and the comparison code GUARD_CODE that compares
370 that other IV with BORDER. We return an SSA name, and place any
371 necessary statements for that computation into *STMTS.
372
373 For example for such a loop:
374
375 for (i = beg, j = guard_init; i < end; i++, j++)
376 if (j < border) // this is supposed to be true/false
377 ...
378
379 we want to return a new bound (on j) that makes the loop iterate
380 as long as the condition j < border stays true. We also don't want
381 to iterate more often than the original loop, so we have to introduce
382 some cut-off as well (via min/max), effectively resulting in:
383
384 newend = min (end+guard_init-beg, border)
385 for (i = beg; j = guard_init; j < newend; i++, j++)
386 if (j < c)
387 ...
388
389 Depending on the direction of the IVs and if the exit tests
390 are strict or non-strict we need to use MIN or MAX,
391 and add or subtract 1. This routine computes newend above. */
392
393 static tree
compute_new_first_bound(gimple_seq * stmts,class tree_niter_desc * niter,tree border,enum tree_code guard_code,tree guard_init)394 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
395 tree border,
396 enum tree_code guard_code, tree guard_init)
397 {
398 /* The niter structure contains the after-increment IV, we need
399 the loop-enter base, so subtract STEP once. */
400 tree controlbase = force_gimple_operand (niter->control.base,
401 stmts, true, NULL_TREE);
402 tree controlstep = niter->control.step;
403 tree enddiff;
404 if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
405 {
406 controlstep = gimple_build (stmts, NEGATE_EXPR,
407 TREE_TYPE (controlstep), controlstep);
408 enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
409 TREE_TYPE (controlbase),
410 controlbase, controlstep);
411 }
412 else
413 enddiff = gimple_build (stmts, MINUS_EXPR,
414 TREE_TYPE (controlbase),
415 controlbase, controlstep);
416
417 /* Compute end-beg. */
418 gimple_seq stmts2;
419 tree end = force_gimple_operand (niter->bound, &stmts2,
420 true, NULL_TREE);
421 gimple_seq_add_seq_without_update (stmts, stmts2);
422 if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
423 {
424 tree tem = gimple_convert (stmts, sizetype, enddiff);
425 tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
426 enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
427 TREE_TYPE (enddiff),
428 end, tem);
429 }
430 else
431 enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
432 end, enddiff);
433
434 /* Compute guard_init + (end-beg). */
435 tree newbound;
436 enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
437 if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
438 {
439 enddiff = gimple_convert (stmts, sizetype, enddiff);
440 newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
441 TREE_TYPE (guard_init),
442 guard_init, enddiff);
443 }
444 else
445 newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
446 guard_init, enddiff);
447
448 /* Depending on the direction of the IVs the new bound for the first
449 loop is the minimum or maximum of old bound and border.
450 Also, if the guard condition isn't strictly less or greater,
451 we need to adjust the bound. */
452 int addbound = 0;
453 enum tree_code minmax;
454 if (niter->cmp == LT_EXPR)
455 {
456 /* GT and LE are the same, inverted. */
457 if (guard_code == GT_EXPR || guard_code == LE_EXPR)
458 addbound = -1;
459 minmax = MIN_EXPR;
460 }
461 else
462 {
463 gcc_assert (niter->cmp == GT_EXPR);
464 if (guard_code == GE_EXPR || guard_code == LT_EXPR)
465 addbound = 1;
466 minmax = MAX_EXPR;
467 }
468
469 if (addbound)
470 {
471 tree type2 = TREE_TYPE (newbound);
472 if (POINTER_TYPE_P (type2))
473 type2 = sizetype;
474 newbound = gimple_build (stmts,
475 POINTER_TYPE_P (TREE_TYPE (newbound))
476 ? POINTER_PLUS_EXPR : PLUS_EXPR,
477 TREE_TYPE (newbound),
478 newbound,
479 build_int_cst (type2, addbound));
480 }
481
482 tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
483 border, newbound);
484 return newend;
485 }
486
487 /* Fix the two loop's bb count after split based on the split edge probability,
488 don't adjust the bbs dominated by true branches of that loop to avoid
489 dropping 1s down. */
490 static void
fix_loop_bb_probability(class loop * loop1,class loop * loop2,edge true_edge,edge false_edge)491 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
492 edge false_edge)
493 {
494 update_ssa (TODO_update_ssa);
495
496 /* Proportion first loop's bb counts except those dominated by true
497 branch to avoid drop 1s down. */
498 basic_block *bbs1, *bbs2;
499 bbs1 = get_loop_body (loop1);
500 unsigned j;
501 for (j = 0; j < loop1->num_nodes; j++)
502 if (bbs1[j] == loop1->latch
503 || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
504 bbs1[j]->count
505 = bbs1[j]->count.apply_probability (true_edge->probability);
506 free (bbs1);
507
508 /* Proportion second loop's bb counts except those dominated by false
509 branch to avoid drop 1s down. */
510 basic_block bbi_copy = get_bb_copy (false_edge->dest);
511 bbs2 = get_loop_body (loop2);
512 for (j = 0; j < loop2->num_nodes; j++)
513 if (bbs2[j] == loop2->latch
514 || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
515 bbs2[j]->count
516 = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
517 free (bbs2);
518 }
519
520 /* Checks if LOOP contains an conditional block whose condition
521 depends on which side in the iteration space it is, and if so
522 splits the iteration space into two loops. Returns true if the
523 loop was split. NITER must contain the iteration descriptor for the
524 single exit of LOOP. */
525
526 static bool
split_loop(class loop * loop1)527 split_loop (class loop *loop1)
528 {
529 class tree_niter_desc niter;
530 basic_block *bbs;
531 unsigned i;
532 bool changed = false;
533 tree guard_iv;
534 tree border = NULL_TREE;
535 affine_iv iv;
536 edge exit1;
537
538 if (!(exit1 = single_exit (loop1))
539 || EDGE_COUNT (exit1->src->succs) != 2
540 /* ??? We could handle non-empty latches when we split the latch edge
541 (not the exit edge), and put the new exit condition in the new block.
542 OTOH this executes some code unconditionally that might have been
543 skipped by the original exit before. */
544 || !empty_block_p (loop1->latch)
545 || !easy_exit_values (loop1)
546 || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
547 || niter.cmp == ERROR_MARK
548 /* We can't yet handle loops controlled by a != predicate. */
549 || niter.cmp == NE_EXPR)
550 return false;
551
552 bbs = get_loop_body (loop1);
553
554 if (!can_copy_bbs_p (bbs, loop1->num_nodes))
555 {
556 free (bbs);
557 return false;
558 }
559
560 /* Find a splitting opportunity. */
561 for (i = 0; i < loop1->num_nodes; i++)
562 if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
563 {
564 /* Handling opposite steps is not implemented yet. Neither
565 is handling different step sizes. */
566 if ((tree_int_cst_sign_bit (iv.step)
567 != tree_int_cst_sign_bit (niter.control.step))
568 || !tree_int_cst_equal (iv.step, niter.control.step))
569 continue;
570
571 /* Find a loop PHI node that defines guard_iv directly,
572 or create one doing that. */
573 gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
574 if (!phi)
575 continue;
576 gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
577 tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
578 loop_preheader_edge (loop1));
579 enum tree_code guard_code = gimple_cond_code (guard_stmt);
580
581 /* Loop splitting is implemented by versioning the loop, placing
582 the new loop after the old loop, make the first loop iterate
583 as long as the conditional stays true (or false) and let the
584 second (new) loop handle the rest of the iterations.
585
586 First we need to determine if the condition will start being true
587 or false in the first loop. */
588 bool initial_true;
589 switch (guard_code)
590 {
591 case LT_EXPR:
592 case LE_EXPR:
593 initial_true = !tree_int_cst_sign_bit (iv.step);
594 break;
595 case GT_EXPR:
596 case GE_EXPR:
597 initial_true = tree_int_cst_sign_bit (iv.step);
598 break;
599 default:
600 gcc_unreachable ();
601 }
602
603 /* Build a condition that will skip the first loop when the
604 guard condition won't ever be true (or false). */
605 gimple_seq stmts2;
606 border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
607 if (stmts2)
608 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
609 stmts2);
610 tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
611 if (!initial_true)
612 cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
613
614 edge true_edge, false_edge;
615 extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
616
617 /* Now version the loop, placing loop2 after loop1 connecting
618 them, and fix up SSA form for that. */
619 initialize_original_copy_tables ();
620 basic_block cond_bb;
621
622 class loop *loop2 = loop_version (loop1, cond, &cond_bb,
623 true_edge->probability,
624 true_edge->probability.invert (),
625 profile_probability::always (),
626 profile_probability::always (),
627 true);
628 gcc_assert (loop2);
629
630 edge new_e = connect_loops (loop1, loop2);
631 connect_loop_phis (loop1, loop2, new_e);
632
633 /* The iterations of the second loop is now already
634 exactly those that the first loop didn't do, but the
635 iteration space of the first loop is still the original one.
636 Compute the new bound for the guarding IV and patch the
637 loop exit to use it instead of original IV and bound. */
638 gimple_seq stmts = NULL;
639 tree newend = compute_new_first_bound (&stmts, &niter, border,
640 guard_code, guard_init);
641 if (stmts)
642 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
643 stmts);
644 tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
645 patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
646
647 fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
648
649 /* Fix first loop's exit probability after scaling. */
650 edge exit_to_latch1;
651 if (EDGE_SUCC (exit1->src, 0) == exit1)
652 exit_to_latch1 = EDGE_SUCC (exit1->src, 1);
653 else
654 exit_to_latch1 = EDGE_SUCC (exit1->src, 0);
655 exit_to_latch1->probability *= true_edge->probability;
656 exit1->probability = exit_to_latch1->probability.invert ();
657
658 /* Finally patch out the two copies of the condition to be always
659 true/false (or opposite). */
660 gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
661 gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
662 if (!initial_true)
663 std::swap (force_true, force_false);
664 gimple_cond_make_true (force_true);
665 gimple_cond_make_false (force_false);
666 update_stmt (force_true);
667 update_stmt (force_false);
668
669 free_original_copy_tables ();
670
671 changed = true;
672 if (dump_file && (dump_flags & TDF_DETAILS))
673 fprintf (dump_file, ";; Loop split.\n");
674
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
677
678 /* Only deal with the first opportunity. */
679 break;
680 }
681
682 free (bbs);
683 return changed;
684 }
685
686 /* Another transformation of loops like:
687
688 for (i = INIT (); CHECK (i); i = NEXT ())
689 {
690 if (expr (a_1, a_2, ..., a_n)) // expr is pure
691 a_j = ...; // change at least one a_j
692 else
693 S; // not change any a_j
694 }
695
696 into:
697
698 for (i = INIT (); CHECK (i); i = NEXT ())
699 {
700 if (expr (a_1, a_2, ..., a_n))
701 a_j = ...;
702 else
703 {
704 S;
705 i = NEXT ();
706 break;
707 }
708 }
709
710 for (; CHECK (i); i = NEXT ())
711 {
712 S;
713 }
714
715 */
716
717 /* Data structure to hold temporary information during loop split upon
718 semi-invariant conditional statement. */
719 class split_info {
720 public:
721 /* Array of all basic blocks in a loop, returned by get_loop_body(). */
722 basic_block *bbs;
723
724 /* All memory store/clobber statements in a loop. */
725 auto_vec<gimple *> memory_stores;
726
727 /* Whether above memory stores vector has been filled. */
728 int need_init;
729
730 /* Control dependencies of basic blocks in a loop. */
731 auto_vec<hash_set<basic_block> *> control_deps;
732
split_info()733 split_info () : bbs (NULL), need_init (true) { }
734
~split_info()735 ~split_info ()
736 {
737 if (bbs)
738 free (bbs);
739
740 for (unsigned i = 0; i < control_deps.length (); i++)
741 delete control_deps[i];
742 }
743 };
744
745 /* Find all statements with memory-write effect in LOOP, including memory
746 store and non-pure function call, and keep those in a vector. This work
747 is only done one time, for the vector should be constant during analysis
748 stage of semi-invariant condition. */
749
750 static void
find_vdef_in_loop(struct loop * loop)751 find_vdef_in_loop (struct loop *loop)
752 {
753 split_info *info = (split_info *) loop->aux;
754 gphi *vphi = get_virtual_phi (loop->header);
755
756 /* Indicate memory store vector has been filled. */
757 info->need_init = false;
758
759 /* If loop contains memory operation, there must be a virtual PHI node in
760 loop header basic block. */
761 if (vphi == NULL)
762 return;
763
764 /* All virtual SSA names inside the loop are connected to be a cyclic
765 graph via virtual PHI nodes. The virtual PHI node in loop header just
766 links the first and the last virtual SSA names, by using the last as
767 PHI operand to define the first. */
768 const edge latch = loop_latch_edge (loop);
769 const tree first = gimple_phi_result (vphi);
770 const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
771
772 /* The virtual SSA cyclic graph might consist of only one SSA name, who
773 is defined by itself.
774
775 .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
776
777 This means the loop contains only memory loads, so we can skip it. */
778 if (first == last)
779 return;
780
781 auto_vec<gimple *> other_stores;
782 auto_vec<tree> worklist;
783 auto_bitmap visited;
784
785 bitmap_set_bit (visited, SSA_NAME_VERSION (first));
786 bitmap_set_bit (visited, SSA_NAME_VERSION (last));
787 worklist.safe_push (last);
788
789 do
790 {
791 tree vuse = worklist.pop ();
792 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
793
794 /* We mark the first and last SSA names as visited at the beginning,
795 and reversely start the process from the last SSA name towards the
796 first, which ensures that this do-while will not touch SSA names
797 defined outside the loop. */
798 gcc_assert (gimple_bb (stmt)
799 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
800
801 if (gimple_code (stmt) == GIMPLE_PHI)
802 {
803 gphi *phi = as_a <gphi *> (stmt);
804
805 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
806 {
807 tree arg = gimple_phi_arg_def (stmt, i);
808
809 if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
810 worklist.safe_push (arg);
811 }
812 }
813 else
814 {
815 tree prev = gimple_vuse (stmt);
816
817 /* Non-pure call statement is conservatively assumed to impact all
818 memory locations. So place call statements ahead of other memory
819 stores in the vector with an idea of using them as shortcut
820 terminators to memory alias analysis. */
821 if (gimple_code (stmt) == GIMPLE_CALL)
822 info->memory_stores.safe_push (stmt);
823 else
824 other_stores.safe_push (stmt);
825
826 if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
827 worklist.safe_push (prev);
828 }
829 } while (!worklist.is_empty ());
830
831 info->memory_stores.safe_splice (other_stores);
832 }
833
834 /* Two basic blocks have equivalent control dependency if one dominates to
835 the other, and it is post-dominated by the latter. Given a basic block
836 BB in LOOP, find farest equivalent dominating basic block. For BB, there
837 is a constraint that BB does not post-dominate loop header of LOOP, this
838 means BB is control-dependent on at least one basic block in LOOP. */
839
840 static basic_block
get_control_equiv_head_block(struct loop * loop,basic_block bb)841 get_control_equiv_head_block (struct loop *loop, basic_block bb)
842 {
843 while (!bb->aux)
844 {
845 basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
846
847 gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
848
849 if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
850 break;
851
852 bb = dom_bb;
853 }
854 return bb;
855 }
856
857 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
858 dependent on. */
859
860 static hash_set<basic_block> *
find_control_dep_blocks(struct loop * loop,basic_block bb)861 find_control_dep_blocks (struct loop *loop, basic_block bb)
862 {
863 /* BB has same control dependency as loop header, then it is not control-
864 dependent on any basic block in LOOP. */
865 if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
866 return NULL;
867
868 basic_block equiv_head = get_control_equiv_head_block (loop, bb);
869
870 if (equiv_head->aux)
871 {
872 /* There is a basic block containing control dependency equivalent
873 to BB. No need to recompute that, and also set this information
874 to other equivalent basic blocks. */
875 for (; bb != equiv_head;
876 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
877 bb->aux = equiv_head->aux;
878 return (hash_set<basic_block> *) equiv_head->aux;
879 }
880
881 /* A basic block X is control-dependent on another Y iff there exists
882 a path from X to Y, in which every basic block other than X and Y
883 is post-dominated by Y, but X is not post-dominated by Y.
884
885 According to this rule, traverse basic blocks in the loop backwards
886 starting from BB, if a basic block is post-dominated by BB, extend
887 current post-dominating path to this block, otherwise it is another
888 one that BB is control-dependent on. */
889
890 auto_vec<basic_block> pdom_worklist;
891 hash_set<basic_block> pdom_visited;
892 hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
893
894 pdom_worklist.safe_push (equiv_head);
895
896 do
897 {
898 basic_block pdom_bb = pdom_worklist.pop ();
899 edge_iterator ei;
900 edge e;
901
902 if (pdom_visited.add (pdom_bb))
903 continue;
904
905 FOR_EACH_EDGE (e, ei, pdom_bb->preds)
906 {
907 basic_block pred_bb = e->src;
908
909 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
910 {
911 dep_bbs->add (pred_bb);
912 continue;
913 }
914
915 pred_bb = get_control_equiv_head_block (loop, pred_bb);
916
917 if (pdom_visited.contains (pred_bb))
918 continue;
919
920 if (!pred_bb->aux)
921 {
922 pdom_worklist.safe_push (pred_bb);
923 continue;
924 }
925
926 /* If control dependency of basic block is available, fast extend
927 post-dominating path using the information instead of advancing
928 forward step-by-step. */
929 hash_set<basic_block> *pred_dep_bbs
930 = (hash_set<basic_block> *) pred_bb->aux;
931
932 for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
933 iter != pred_dep_bbs->end (); ++iter)
934 {
935 basic_block pred_dep_bb = *iter;
936
937 /* Basic blocks can either be in control dependency of BB, or
938 must be post-dominated by BB, if so, extend the path from
939 these basic blocks. */
940 if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
941 dep_bbs->add (pred_dep_bb);
942 else if (!pdom_visited.contains (pred_dep_bb))
943 pdom_worklist.safe_push (pred_dep_bb);
944 }
945 }
946 } while (!pdom_worklist.is_empty ());
947
948 /* Record computed control dependencies in loop so that we can reach them
949 when reclaiming resources. */
950 ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
951
952 /* Associate control dependence with related equivalent basic blocks. */
953 for (equiv_head->aux = dep_bbs; bb != equiv_head;
954 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
955 bb->aux = dep_bbs;
956
957 return dep_bbs;
958 }
959
960 /* Forward declaration */
961
962 static bool
963 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
964 const_basic_block skip_head,
965 hash_map<gimple *, bool> &stmt_stat);
966
967 /* Given STMT, memory load or pure call statement, check whether it is impacted
968 by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
969 trace is composed of SKIP_HEAD and those basic block dominated by it, always
970 corresponds to one branch of a conditional statement). If SKIP_HEAD is
971 NULL, all basic blocks of LOOP are checked. */
972
973 static bool
vuse_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)974 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
975 const_basic_block skip_head)
976 {
977 split_info *info = (split_info *) loop->aux;
978 tree rhs = NULL_TREE;
979 ao_ref ref;
980 gimple *store;
981 unsigned i;
982
983 /* Collect memory store/clobber statements if haven't done that. */
984 if (info->need_init)
985 find_vdef_in_loop (loop);
986
987 if (is_gimple_assign (stmt))
988 rhs = gimple_assign_rhs1 (stmt);
989
990 ao_ref_init (&ref, rhs);
991
992 FOR_EACH_VEC_ELT (info->memory_stores, i, store)
993 {
994 /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
995 if (skip_head
996 && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
997 continue;
998
999 if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1000 return false;
1001 }
1002
1003 return true;
1004 }
1005
1006 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1007 certain iteration of LOOP, check whether an SSA name (NAME) remains
1008 unchanged in next iteration. We call this characteristic semi-
1009 invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1010 blocks and control flows in the loop will be considered. Semi-invariant
1011 state of checked statement is cached in hash map STMT_STAT to avoid
1012 redundant computation in possible following re-check. */
1013
1014 static inline bool
ssa_semi_invariant_p(struct loop * loop,tree name,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1015 ssa_semi_invariant_p (struct loop *loop, tree name,
1016 const_basic_block skip_head,
1017 hash_map<gimple *, bool> &stmt_stat)
1018 {
1019 gimple *def = SSA_NAME_DEF_STMT (name);
1020 const_basic_block def_bb = gimple_bb (def);
1021
1022 /* An SSA name defined outside loop is definitely semi-invariant. */
1023 if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1024 return true;
1025
1026 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1027 return false;
1028
1029 return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1030 }
1031
1032 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1033 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1034 are excluded from LOOP. */
1035
1036 static bool
loop_iter_phi_semi_invariant_p(struct loop * loop,gphi * loop_phi,const_basic_block skip_head)1037 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1038 const_basic_block skip_head)
1039 {
1040 const_edge latch = loop_latch_edge (loop);
1041 tree name = gimple_phi_result (loop_phi);
1042 tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1043
1044 gcc_checking_assert (from);
1045
1046 /* Loop iteration PHI node locates in loop header, and it has two source
1047 operands, one is an initial value coming from outside the loop, the other
1048 is a value through latch of the loop, which is derived in last iteration,
1049 we call the latter latch value. From the PHI node to definition of latch
1050 value, if excluding branch trace starting from SKIP_HEAD, except copy-
1051 assignment or likewise, there is no other kind of value redefinition, SSA
1052 name defined by the PHI node is semi-invariant.
1053
1054 loop entry
1055 | .--- latch ---.
1056 | | |
1057 v v |
1058 x_1 = PHI <x_0, x_3> |
1059 | |
1060 v |
1061 .------- if (cond) -------. |
1062 | | |
1063 | [ SKIP ] |
1064 | | |
1065 | x_2 = ... |
1066 | | |
1067 '---- T ---->.<---- F ----' |
1068 | |
1069 v |
1070 x_3 = PHI <x_1, x_2> |
1071 | |
1072 '----------------------'
1073
1074 Suppose in certain iteration, execution flow in above graph goes through
1075 true branch, which means that one source value to define x_3 in false
1076 branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1077 iterations is defined by x_3, we know that x_1 will never changed if COND
1078 always chooses true branch from then on. */
1079
1080 while (from != name)
1081 {
1082 /* A new value comes from a CONSTANT. */
1083 if (TREE_CODE (from) != SSA_NAME)
1084 return false;
1085
1086 gimple *stmt = SSA_NAME_DEF_STMT (from);
1087 const_basic_block bb = gimple_bb (stmt);
1088
1089 /* A new value comes from outside the loop. */
1090 if (!bb || !flow_bb_inside_loop_p (loop, bb))
1091 return false;
1092
1093 from = NULL_TREE;
1094
1095 if (gimple_code (stmt) == GIMPLE_PHI)
1096 {
1097 gphi *phi = as_a <gphi *> (stmt);
1098
1099 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1100 {
1101 if (skip_head)
1102 {
1103 const_edge e = gimple_phi_arg_edge (phi, i);
1104
1105 /* Don't consider redefinitions in excluded basic blocks. */
1106 if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1107 continue;
1108 }
1109
1110 tree arg = gimple_phi_arg_def (phi, i);
1111
1112 if (!from)
1113 from = arg;
1114 else if (!operand_equal_p (from, arg, 0))
1115 /* There are more than one source operands that provide
1116 different values to the SSA name, it is variant. */
1117 return false;
1118 }
1119 }
1120 else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1121 {
1122 /* For simple value copy, check its rhs instead. */
1123 if (gimple_assign_ssa_name_copy_p (stmt))
1124 from = gimple_assign_rhs1 (stmt);
1125 }
1126
1127 /* Any other kind of definition is deemed to introduce a new value
1128 to the SSA name. */
1129 if (!from)
1130 return false;
1131 }
1132 return true;
1133 }
1134
1135 /* Check whether conditional predicates that BB is control-dependent on, are
1136 semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1137 are excluded from LOOP. Semi-invariant state of checked statement is cached
1138 in hash map STMT_STAT. */
1139
1140 static bool
control_dep_semi_invariant_p(struct loop * loop,basic_block bb,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1141 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1142 const_basic_block skip_head,
1143 hash_map<gimple *, bool> &stmt_stat)
1144 {
1145 hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1146
1147 if (!dep_bbs)
1148 return true;
1149
1150 for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1151 iter != dep_bbs->end (); ++iter)
1152 {
1153 gimple *last = last_stmt (*iter);
1154
1155 if (!last)
1156 return false;
1157
1158 /* Only check condition predicates. */
1159 if (gimple_code (last) != GIMPLE_COND
1160 && gimple_code (last) != GIMPLE_SWITCH)
1161 return false;
1162
1163 if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1164 return false;
1165 }
1166
1167 return true;
1168 }
1169
1170 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1171 semi-invariant, consequently, all its defined values are semi-invariant.
1172 Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1173 Semi-invariant state of checked statement is cached in hash map
1174 STMT_STAT. */
1175
1176 static bool
stmt_semi_invariant_p_1(struct loop * loop,gimple * stmt,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1177 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1178 const_basic_block skip_head,
1179 hash_map<gimple *, bool> &stmt_stat)
1180 {
1181 bool existed;
1182 bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1183
1184 if (existed)
1185 return invar;
1186
1187 /* A statement might depend on itself, which is treated as variant. So set
1188 state of statement under check to be variant to ensure that. */
1189 invar = false;
1190
1191 if (gimple_code (stmt) == GIMPLE_PHI)
1192 {
1193 gphi *phi = as_a <gphi *> (stmt);
1194
1195 if (gimple_bb (stmt) == loop->header)
1196 {
1197 /* If the entry value is subject to abnormal coalescing
1198 avoid the transform since we're going to duplicate the
1199 loop header and thus likely introduce overlapping life-ranges
1200 between the PHI def and the entry on the path when the
1201 first loop is skipped. */
1202 tree entry_def
1203 = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1204 if (TREE_CODE (entry_def) == SSA_NAME
1205 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1206 return false;
1207 invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1208 return invar;
1209 }
1210
1211 /* For a loop PHI node that does not locate in loop header, it is semi-
1212 invariant only if two conditions are met. The first is its source
1213 values are derived from CONSTANT (including loop-invariant value), or
1214 from SSA name defined by semi-invariant loop iteration PHI node. The
1215 second is its source incoming edges are control-dependent on semi-
1216 invariant conditional predicates. */
1217 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1218 {
1219 const_edge e = gimple_phi_arg_edge (phi, i);
1220 tree arg = gimple_phi_arg_def (phi, i);
1221
1222 if (TREE_CODE (arg) == SSA_NAME)
1223 {
1224 if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1225 return false;
1226
1227 /* If source value is defined in location from where the source
1228 edge comes in, no need to check control dependency again
1229 since this has been done in above SSA name check stage. */
1230 if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1231 continue;
1232 }
1233
1234 if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1235 stmt_stat))
1236 return false;
1237 }
1238 }
1239 else
1240 {
1241 ssa_op_iter iter;
1242 tree use;
1243
1244 /* Volatile memory load or return of normal (non-const/non-pure) call
1245 should not be treated as constant in each iteration of loop. */
1246 if (gimple_has_side_effects (stmt))
1247 return false;
1248
1249 /* Check if any memory store may kill memory load at this place. */
1250 if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1251 return false;
1252
1253 /* Although operand of a statement might be SSA name, CONSTANT or
1254 VARDECL, here we only need to check SSA name operands. This is
1255 because check on VARDECL operands, which involve memory loads,
1256 must have been done prior to invocation of this function in
1257 vuse_semi_invariant_p. */
1258 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1259 if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1260 return false;
1261 }
1262
1263 if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1264 stmt_stat))
1265 return false;
1266
1267 /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1268 to new insertion, and thus invar may point to invalid memory. */
1269 stmt_stat.put (stmt, true);
1270 return true;
1271 }
1272
1273 /* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1274 blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1275
1276 static bool
stmt_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)1277 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1278 const_basic_block skip_head)
1279 {
1280 hash_map<gimple *, bool> stmt_stat;
1281 return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1282 }
1283
1284 /* Determine when conditional statement never transfers execution to one of its
1285 branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1286 and those basic blocks dominated by BRANCH_BB. */
1287
1288 static bool
branch_removable_p(basic_block branch_bb)1289 branch_removable_p (basic_block branch_bb)
1290 {
1291 edge_iterator ei;
1292 edge e;
1293
1294 if (single_pred_p (branch_bb))
1295 return true;
1296
1297 FOR_EACH_EDGE (e, ei, branch_bb->preds)
1298 {
1299 if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1300 continue;
1301
1302 if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1303 continue;
1304
1305 /* The branch can be reached from opposite branch, or from some
1306 statement not dominated by the conditional statement. */
1307 return false;
1308 }
1309
1310 return true;
1311 }
1312
1313 /* Find out which branch of a conditional statement (COND) is invariant in the
1314 execution context of LOOP. That is: once the branch is selected in certain
1315 iteration of the loop, any operand that contributes to computation of the
1316 conditional statement remains unchanged in all following iterations. */
1317
1318 static edge
get_cond_invariant_branch(struct loop * loop,gcond * cond)1319 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1320 {
1321 basic_block cond_bb = gimple_bb (cond);
1322 basic_block targ_bb[2];
1323 bool invar[2];
1324 unsigned invar_checks = 0;
1325
1326 for (unsigned i = 0; i < 2; i++)
1327 {
1328 targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1329
1330 /* One branch directs to loop exit, no need to perform loop split upon
1331 this conditional statement. Firstly, it is trivial if the exit branch
1332 is semi-invariant, for the statement is just to break loop. Secondly,
1333 if the opposite branch is semi-invariant, it means that the statement
1334 is real loop-invariant, which is covered by loop unswitch. */
1335 if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1336 return NULL;
1337 }
1338
1339 for (unsigned i = 0; i < 2; i++)
1340 {
1341 invar[!i] = false;
1342
1343 if (!branch_removable_p (targ_bb[i]))
1344 continue;
1345
1346 /* Given a semi-invariant branch, if its opposite branch dominates
1347 loop latch, it and its following trace will only be executed in
1348 final iteration of loop, namely it is not part of repeated body
1349 of the loop. Similar to the above case that the branch is loop
1350 exit, no need to split loop. */
1351 if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1352 continue;
1353
1354 invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1355 invar_checks++;
1356 }
1357
1358 /* With both branches being invariant (handled by loop unswitch) or
1359 variant is not what we want. */
1360 if (invar[0] ^ !invar[1])
1361 return NULL;
1362
1363 /* Found a real loop-invariant condition, do nothing. */
1364 if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1365 return NULL;
1366
1367 return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1368 }
1369
1370 /* Calculate increased code size measured by estimated insn number if applying
1371 loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1372
1373 static int
compute_added_num_insns(struct loop * loop,const_edge branch_edge)1374 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1375 {
1376 basic_block cond_bb = branch_edge->src;
1377 unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1378 basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1379 basic_block *bbs = ((split_info *) loop->aux)->bbs;
1380 int num = 0;
1381
1382 for (unsigned i = 0; i < loop->num_nodes; i++)
1383 {
1384 /* Do no count basic blocks only in opposite branch. */
1385 if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1386 continue;
1387
1388 num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1389 }
1390
1391 /* It is unnecessary to evaluate expression of the conditional statement
1392 in new loop that contains only invariant branch. This expression should
1393 be constant value (either true or false). Exclude code size of insns
1394 that contribute to computation of the expression. */
1395
1396 auto_vec<gimple *> worklist;
1397 hash_set<gimple *> removed;
1398 gimple *stmt = last_stmt (cond_bb);
1399
1400 worklist.safe_push (stmt);
1401 removed.add (stmt);
1402 num -= estimate_num_insns (stmt, &eni_size_weights);
1403
1404 do
1405 {
1406 ssa_op_iter opnd_iter;
1407 use_operand_p opnd_p;
1408
1409 stmt = worklist.pop ();
1410 FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1411 {
1412 tree opnd = USE_FROM_PTR (opnd_p);
1413
1414 if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1415 continue;
1416
1417 gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1418 use_operand_p use_p;
1419 imm_use_iterator use_iter;
1420
1421 if (removed.contains (opnd_stmt)
1422 || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1423 continue;
1424
1425 FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1426 {
1427 gimple *use_stmt = USE_STMT (use_p);
1428
1429 if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1430 {
1431 opnd_stmt = NULL;
1432 break;
1433 }
1434 }
1435
1436 if (opnd_stmt)
1437 {
1438 worklist.safe_push (opnd_stmt);
1439 removed.add (opnd_stmt);
1440 num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1441 }
1442 }
1443 } while (!worklist.is_empty ());
1444
1445 gcc_assert (num >= 0);
1446 return num;
1447 }
1448
1449 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1450 and check whether it is eligible and profitable to perform loop split upon
1451 this branch in LOOP. */
1452
1453 static edge
get_cond_branch_to_split_loop(struct loop * loop,gcond * cond)1454 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1455 {
1456 edge invar_branch = get_cond_invariant_branch (loop, cond);
1457 if (!invar_branch)
1458 return NULL;
1459
1460 /* When accurate profile information is available, and execution
1461 frequency of the branch is too low, just let it go. */
1462 profile_probability prob = invar_branch->probability;
1463 if (prob.reliable_p ())
1464 {
1465 int thres = param_min_loop_cond_split_prob;
1466
1467 if (prob < profile_probability::always ().apply_scale (thres, 100))
1468 return NULL;
1469 }
1470
1471 /* Add a threshold for increased code size to disable loop split. */
1472 if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1473 return NULL;
1474
1475 return invar_branch;
1476 }
1477
1478 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1479 conditional statement, perform loop split transformation illustrated
1480 as the following graph.
1481
1482 .-------T------ if (true) ------F------.
1483 | .---------------. |
1484 | | | |
1485 v | v v
1486 pre-header | pre-header
1487 | .------------. | | .------------.
1488 | | | | | | |
1489 | v | | | v |
1490 header | | header |
1491 | | | | |
1492 .--- if (cond) ---. | | .--- if (true) ---. |
1493 | | | | | | |
1494 invariant | | | invariant | |
1495 | | | | | | |
1496 '---T--->.<---F---' | | '---T--->.<---F---' |
1497 | | / | |
1498 stmts | / stmts |
1499 | F T | |
1500 / \ | / / \ |
1501 .-------* * [ if (cond) ] .-------* * |
1502 | | | | | |
1503 | latch | | latch |
1504 | | | | | |
1505 | '------------' | '------------'
1506 '------------------------. .-----------'
1507 loop1 | | loop2
1508 v v
1509 exits
1510
1511 In the graph, loop1 represents the part derived from original one, and
1512 loop2 is duplicated using loop_version (), which corresponds to the part
1513 of original one being splitted out. In original latch edge of loop1, we
1514 insert a new conditional statement duplicated from the semi-invariant cond,
1515 and one of its branch goes back to loop1 header as a latch edge, and the
1516 other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1517 we abandon the variant branch of the conditional statement by setting a
1518 constant bool condition, based on which branch is semi-invariant. */
1519
1520 static bool
do_split_loop_on_cond(struct loop * loop1,edge invar_branch)1521 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1522 {
1523 basic_block cond_bb = invar_branch->src;
1524 bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1525 gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
1526
1527 gcc_assert (cond_bb->loop_father == loop1);
1528
1529 if (dump_enabled_p ())
1530 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1531 "loop split on semi-invariant condition at %s branch\n",
1532 true_invar ? "true" : "false");
1533
1534 initialize_original_copy_tables ();
1535
1536 struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1537 invar_branch->probability.invert (),
1538 invar_branch->probability,
1539 profile_probability::always (),
1540 profile_probability::always (),
1541 true);
1542 if (!loop2)
1543 {
1544 free_original_copy_tables ();
1545 return false;
1546 }
1547
1548 basic_block cond_bb_copy = get_bb_copy (cond_bb);
1549 gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
1550
1551 /* Replace the condition in loop2 with a bool constant to let PassManager
1552 remove the variant branch after current pass completes. */
1553 if (true_invar)
1554 gimple_cond_make_true (cond_copy);
1555 else
1556 gimple_cond_make_false (cond_copy);
1557
1558 update_stmt (cond_copy);
1559
1560 /* Insert a new conditional statement on latch edge of loop1, its condition
1561 is duplicated from the semi-invariant. This statement acts as a switch
1562 to transfer execution from loop1 to loop2, when loop1 enters into
1563 invariant state. */
1564 basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1565 basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1566 gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1567 gimple_cond_lhs (cond),
1568 gimple_cond_rhs (cond),
1569 NULL_TREE, NULL_TREE);
1570
1571 gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1572 gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1573
1574 edge to_loop1 = single_succ_edge (break_bb);
1575 edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1576
1577 to_loop1->flags &= ~EDGE_FALLTHRU;
1578 to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1579 to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1580
1581 /* Due to introduction of a control flow edge from loop1 latch to loop2
1582 pre-header, we should update PHIs in loop2 to reflect this connection
1583 between loop1 and loop2. */
1584 connect_loop_phis (loop1, loop2, to_loop2);
1585
1586 edge true_edge, false_edge, skip_edge1, skip_edge2;
1587 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1588
1589 skip_edge1 = true_invar ? false_edge : true_edge;
1590 skip_edge2 = true_invar ? true_edge : false_edge;
1591 fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1592
1593 /* Fix first loop's exit probability after scaling. */
1594 to_loop1->probability = invar_branch->probability.invert ();
1595 to_loop2->probability = invar_branch->probability;
1596
1597 free_original_copy_tables ();
1598
1599 return true;
1600 }
1601
1602 /* Traverse all conditional statements in LOOP, to find out a good candidate
1603 upon which we can do loop split. */
1604
1605 static bool
split_loop_on_cond(struct loop * loop)1606 split_loop_on_cond (struct loop *loop)
1607 {
1608 split_info *info = new split_info ();
1609 basic_block *bbs = info->bbs = get_loop_body (loop);
1610 bool do_split = false;
1611
1612 /* Allocate an area to keep temporary info, and associate its address
1613 with loop aux field. */
1614 loop->aux = info;
1615
1616 for (unsigned i = 0; i < loop->num_nodes; i++)
1617 bbs[i]->aux = NULL;
1618
1619 for (unsigned i = 0; i < loop->num_nodes; i++)
1620 {
1621 basic_block bb = bbs[i];
1622
1623 /* We only consider conditional statement, which be executed at most once
1624 in each iteration of the loop. So skip statements in inner loops. */
1625 if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1626 continue;
1627
1628 /* Actually this check is not a must constraint. With it, we can ensure
1629 conditional statement will always be executed in each iteration. */
1630 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1631 continue;
1632
1633 gimple *last = last_stmt (bb);
1634
1635 if (!last || gimple_code (last) != GIMPLE_COND)
1636 continue;
1637
1638 gcond *cond = as_a <gcond *> (last);
1639 edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1640
1641 if (branch_edge)
1642 {
1643 do_split_loop_on_cond (loop, branch_edge);
1644 do_split = true;
1645 break;
1646 }
1647 }
1648
1649 delete info;
1650 loop->aux = NULL;
1651
1652 return do_split;
1653 }
1654
1655 /* Main entry point. Perform loop splitting on all suitable loops. */
1656
1657 static unsigned int
tree_ssa_split_loops(void)1658 tree_ssa_split_loops (void)
1659 {
1660 bool changed = false;
1661
1662 gcc_assert (scev_initialized_p ());
1663
1664 calculate_dominance_info (CDI_POST_DOMINATORS);
1665
1666 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1667 loop->aux = NULL;
1668
1669 /* Go through all loops starting from innermost. */
1670 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1671 {
1672 if (loop->aux)
1673 {
1674 /* If any of our inner loops was split, don't split us,
1675 and mark our containing loop as having had splits as well. */
1676 loop_outer (loop)->aux = loop;
1677 continue;
1678 }
1679
1680 if (optimize_loop_for_size_p (loop))
1681 continue;
1682
1683 if (split_loop (loop) || split_loop_on_cond (loop))
1684 {
1685 /* Mark our containing loop as having had some split inner loops. */
1686 loop_outer (loop)->aux = loop;
1687 changed = true;
1688 }
1689 }
1690
1691 for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1692 loop->aux = NULL;
1693
1694 clear_aux_for_blocks ();
1695
1696 free_dominance_info (CDI_POST_DOMINATORS);
1697
1698 if (changed)
1699 {
1700 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1701 return TODO_cleanup_cfg;
1702 }
1703 return 0;
1704 }
1705
1706 /* Loop splitting pass. */
1707
1708 namespace {
1709
1710 const pass_data pass_data_loop_split =
1711 {
1712 GIMPLE_PASS, /* type */
1713 "lsplit", /* name */
1714 OPTGROUP_LOOP, /* optinfo_flags */
1715 TV_LOOP_SPLIT, /* tv_id */
1716 PROP_cfg, /* properties_required */
1717 0, /* properties_provided */
1718 0, /* properties_destroyed */
1719 0, /* todo_flags_start */
1720 0, /* todo_flags_finish */
1721 };
1722
1723 class pass_loop_split : public gimple_opt_pass
1724 {
1725 public:
pass_loop_split(gcc::context * ctxt)1726 pass_loop_split (gcc::context *ctxt)
1727 : gimple_opt_pass (pass_data_loop_split, ctxt)
1728 {}
1729
1730 /* opt_pass methods: */
gate(function *)1731 virtual bool gate (function *) { return flag_split_loops != 0; }
1732 virtual unsigned int execute (function *);
1733
1734 }; // class pass_loop_split
1735
1736 unsigned int
execute(function * fun)1737 pass_loop_split::execute (function *fun)
1738 {
1739 if (number_of_loops (fun) <= 1)
1740 return 0;
1741
1742 return tree_ssa_split_loops ();
1743 }
1744
1745 } // anon namespace
1746
1747 gimple_opt_pass *
make_pass_loop_split(gcc::context * ctxt)1748 make_pass_loop_split (gcc::context *ctxt)
1749 {
1750 return new pass_loop_split (ctxt);
1751 }
1752