1 /* Loop header copying on trees.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-into-ssa.h"
32 #include "cfgloop.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "cfganal.h"
42
43 /* Duplicates headers of loops if they are small enough, so that the statements
44 in the loop body are always executed when the loop is entered. This
45 increases effectiveness of code motion optimizations, and reduces the need
46 for loop preconditioning. */
47
48 /* Return true if the condition on the first iteration of the loop can
49 be statically determined. */
50
51 static bool
entry_loop_condition_is_static(class loop * l,path_range_query * query)52 entry_loop_condition_is_static (class loop *l, path_range_query *query)
53 {
54 edge e = loop_preheader_edge (l);
55 gcond *last = safe_dyn_cast <gcond *> (last_stmt (e->dest));
56
57 if (!last
58 || !irange::supports_type_p (TREE_TYPE (gimple_cond_lhs (last))))
59 return false;
60
61 edge true_e, false_e;
62 extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
63
64 /* If neither edge is the exit edge, this is not a case we'd like to
65 special-case. */
66 if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
67 return false;
68
69 tree desired_static_value;
70 if (loop_exit_edge_p (l, true_e))
71 desired_static_value = boolean_false_node;
72 else
73 desired_static_value = boolean_true_node;
74
75 int_range<2> r;
76 query->compute_ranges (e);
77 query->range_of_stmt (r, last);
78 return r == int_range<2> (desired_static_value, desired_static_value);
79 }
80
81 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
82 instructions should be duplicated, limit is decreased by the actual
83 amount. */
84
85 static bool
should_duplicate_loop_header_p(basic_block header,class loop * loop,int * limit)86 should_duplicate_loop_header_p (basic_block header, class loop *loop,
87 int *limit)
88 {
89 gimple_stmt_iterator bsi;
90
91 gcc_assert (!header->aux);
92
93 gcc_assert (EDGE_COUNT (header->succs) > 0);
94 if (single_succ_p (header))
95 {
96 if (dump_file && (dump_flags & TDF_DETAILS))
97 fprintf (dump_file,
98 " Not duplicating bb %i: it is single succ.\n",
99 header->index);
100 return false;
101 }
102
103 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
104 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
105 {
106 if (dump_file && (dump_flags & TDF_DETAILS))
107 fprintf (dump_file,
108 " Not duplicating bb %i: both successors are in loop.\n",
109 loop->num);
110 return false;
111 }
112
113 /* If this is not the original loop header, we want it to have just
114 one predecessor in order to match the && pattern. */
115 if (header != loop->header && !single_pred_p (header))
116 {
117 if (dump_file && (dump_flags & TDF_DETAILS))
118 fprintf (dump_file,
119 " Not duplicating bb %i: it has mutiple predecestors.\n",
120 header->index);
121 return false;
122 }
123
124 gcond *last = safe_dyn_cast <gcond *> (last_stmt (header));
125 if (!last)
126 {
127 if (dump_file && (dump_flags & TDF_DETAILS))
128 fprintf (dump_file,
129 " Not duplicating bb %i: it does not end by conditional.\n",
130 header->index);
131 return false;
132 }
133
134 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
135 gsi_next (&psi))
136 {
137 gphi *phi = psi.phi ();
138 tree res = gimple_phi_result (phi);
139 if (INTEGRAL_TYPE_P (TREE_TYPE (res))
140 || POINTER_TYPE_P (TREE_TYPE (res)))
141 gimple_set_uid (phi, 1 /* IV */);
142 else
143 gimple_set_uid (phi, 0);
144 }
145
146 /* Count number of instructions and punt on calls.
147 Populate stmts INV/IV flag to later apply heuristics to the
148 kind of conditions we want to copy. */
149 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi))
150 {
151 gimple *last = gsi_stmt (bsi);
152
153 if (gimple_code (last) == GIMPLE_LABEL)
154 continue;
155
156 if (is_gimple_debug (last))
157 continue;
158
159 if (gimple_code (last) == GIMPLE_CALL
160 && (!gimple_inexpensive_call_p (as_a <gcall *> (last))
161 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
162 at current loop's header. Don't copy in this case. */
163 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS)))
164 {
165 if (dump_file && (dump_flags & TDF_DETAILS))
166 fprintf (dump_file,
167 " Not duplicating bb %i: it contains call.\n",
168 header->index);
169 return false;
170 }
171
172 *limit -= estimate_num_insns (last, &eni_size_weights);
173 if (*limit < 0)
174 {
175 if (dump_file && (dump_flags & TDF_DETAILS))
176 fprintf (dump_file,
177 " Not duplicating bb %i contains too many insns.\n",
178 header->index);
179 return false;
180 }
181
182 /* Classify the stmt based on whether its computation is based
183 on a IV or whether it is invariant in the loop. */
184 gimple_set_uid (last, 0);
185 if (!gimple_vuse (last))
186 {
187 bool inv = true;
188 bool iv = false;
189 ssa_op_iter i;
190 tree op;
191 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
192 if (!SSA_NAME_IS_DEFAULT_DEF (op)
193 && flow_bb_inside_loop_p (loop,
194 gimple_bb (SSA_NAME_DEF_STMT (op))))
195 {
196 if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */))
197 inv = false;
198 if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */)
199 iv = true;
200 }
201 gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0));
202 }
203 }
204
205 /* If the condition tests a non-IV loop variant we do not want to rotate
206 the loop further. Unless this is the original loop header. */
207 tree lhs = gimple_cond_lhs (last);
208 tree rhs = gimple_cond_rhs (last);
209 if (header != loop->header
210 && ((TREE_CODE (lhs) == SSA_NAME
211 && !SSA_NAME_IS_DEFAULT_DEF (lhs)
212 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs)))
213 && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0)
214 || (TREE_CODE (rhs) == SSA_NAME
215 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
216 && flow_bb_inside_loop_p (loop,
217 gimple_bb (SSA_NAME_DEF_STMT (rhs)))
218 && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0)))
219 {
220 if (dump_file && (dump_flags & TDF_DETAILS))
221 fprintf (dump_file,
222 " Not duplicating bb %i: condition based on non-IV loop"
223 " variant.\n", header->index);
224 return false;
225 }
226
227 return true;
228 }
229
230 /* Checks whether LOOP is a do-while style loop. */
231
232 static bool
do_while_loop_p(class loop * loop)233 do_while_loop_p (class loop *loop)
234 {
235 gimple *stmt = last_stmt (loop->latch);
236
237 /* If the latch of the loop is not empty, it is not a do-while loop. */
238 if (stmt
239 && gimple_code (stmt) != GIMPLE_LABEL)
240 {
241 if (dump_file && (dump_flags & TDF_DETAILS))
242 fprintf (dump_file,
243 "Loop %i is not do-while loop: latch is not empty.\n",
244 loop->num);
245 return false;
246 }
247
248 /* If the latch does not have a single predecessor, it is not a
249 do-while loop. */
250 if (!single_pred_p (loop->latch))
251 {
252 if (dump_file && (dump_flags & TDF_DETAILS))
253 fprintf (dump_file,
254 "Loop %i is not do-while loop: latch has multiple "
255 "predecessors.\n", loop->num);
256 return false;
257 }
258
259 /* If the latch predecessor doesn't exit the loop, it is not a
260 do-while loop. */
261 if (!loop_exits_from_bb_p (loop, single_pred (loop->latch)))
262 {
263 if (dump_file && (dump_flags & TDF_DETAILS))
264 fprintf (dump_file,
265 "Loop %i is not do-while loop: latch predecessor "
266 "does not exit loop.\n", loop->num);
267 return false;
268 }
269
270 if (dump_file && (dump_flags & TDF_DETAILS))
271 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num);
272
273 return true;
274 }
275
276 namespace {
277
278 /* Common superclass for both header-copying phases. */
279 class ch_base : public gimple_opt_pass
280 {
281 protected:
ch_base(pass_data data,gcc::context * ctxt)282 ch_base (pass_data data, gcc::context *ctxt)
283 : gimple_opt_pass (data, ctxt)
284 {}
285
286 /* Copies headers of all loops in FUN for which process_loop_p is true. */
287 unsigned int copy_headers (function *fun);
288
289 /* Return true to copy headers of LOOP or false to skip. */
290 virtual bool process_loop_p (class loop *loop) = 0;
291 };
292
293 const pass_data pass_data_ch =
294 {
295 GIMPLE_PASS, /* type */
296 "ch", /* name */
297 OPTGROUP_LOOP, /* optinfo_flags */
298 TV_TREE_CH, /* tv_id */
299 ( PROP_cfg | PROP_ssa ), /* properties_required */
300 0, /* properties_provided */
301 0, /* properties_destroyed */
302 0, /* todo_flags_start */
303 0, /* todo_flags_finish */
304 };
305
306 class pass_ch : public ch_base
307 {
308 public:
pass_ch(gcc::context * ctxt)309 pass_ch (gcc::context *ctxt)
310 : ch_base (pass_data_ch, ctxt)
311 {}
312
313 /* opt_pass methods: */
gate(function *)314 virtual bool gate (function *) { return flag_tree_ch != 0; }
315
316 /* Initialize and finalize loop structures, copying headers inbetween. */
317 virtual unsigned int execute (function *);
318
clone()319 opt_pass * clone () { return new pass_ch (m_ctxt); }
320
321 protected:
322 /* ch_base method: */
323 virtual bool process_loop_p (class loop *loop);
324 }; // class pass_ch
325
326 const pass_data pass_data_ch_vect =
327 {
328 GIMPLE_PASS, /* type */
329 "ch_vect", /* name */
330 OPTGROUP_LOOP, /* optinfo_flags */
331 TV_TREE_CH, /* tv_id */
332 ( PROP_cfg | PROP_ssa ), /* properties_required */
333 0, /* properties_provided */
334 0, /* properties_destroyed */
335 0, /* todo_flags_start */
336 0, /* todo_flags_finish */
337 };
338
339 /* This is a more aggressive version of the same pass, designed to run just
340 before if-conversion and vectorization, to put more loops into the form
341 required for those phases. */
342 class pass_ch_vect : public ch_base
343 {
344 public:
pass_ch_vect(gcc::context * ctxt)345 pass_ch_vect (gcc::context *ctxt)
346 : ch_base (pass_data_ch_vect, ctxt)
347 {}
348
349 /* opt_pass methods: */
gate(function * fun)350 virtual bool gate (function *fun)
351 {
352 return flag_tree_ch != 0
353 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
354 }
355
356 /* Just copy headers, no initialization/finalization of loop structures. */
357 virtual unsigned int execute (function *);
358
359 protected:
360 /* ch_base method: */
361 virtual bool process_loop_p (class loop *loop);
362 }; // class pass_ch_vect
363
364 /* For all loops, copy the condition at the end of the loop body in front
365 of the loop. This is beneficial since it increases efficiency of
366 code motion optimizations. It also saves one jump on entry to the loop. */
367
368 unsigned int
copy_headers(function * fun)369 ch_base::copy_headers (function *fun)
370 {
371 basic_block header;
372 edge exit, entry;
373 basic_block *bbs, *copied_bbs;
374 unsigned n_bbs;
375 unsigned bbs_size;
376 bool changed = false;
377
378 if (number_of_loops (fun) <= 1)
379 return 0;
380
381 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
382 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
383 bbs_size = n_basic_blocks_for_fn (fun);
384
385 auto_vec<loop_p> candidates;
386 auto_vec<std::pair<edge, loop_p> > copied;
387
388 mark_dfs_back_edges ();
389 path_range_query *query = new path_range_query;
390 for (auto loop : loops_list (cfun, 0))
391 {
392 int initial_limit = param_max_loop_header_insns;
393 int remaining_limit = initial_limit;
394 if (dump_file && (dump_flags & TDF_DETAILS))
395 fprintf (dump_file,
396 "Analyzing loop %i\n", loop->num);
397
398 header = loop->header;
399
400 /* If the loop is already a do-while style one (either because it was
401 written as such, or because jump threading transformed it into one),
402 we might be in fact peeling the first iteration of the loop. This
403 in general is not a good idea. Also avoid touching infinite loops. */
404 if (!loop_has_exit_edges (loop)
405 || !process_loop_p (loop))
406 continue;
407
408 /* Avoid loop header copying when optimizing for size unless we can
409 determine that the loop condition is static in the first
410 iteration. */
411 if (optimize_loop_for_size_p (loop)
412 && !loop->force_vectorize
413 && !entry_loop_condition_is_static (loop, query))
414 {
415 if (dump_file && (dump_flags & TDF_DETAILS))
416 fprintf (dump_file,
417 " Not duplicating bb %i: optimizing for size.\n",
418 header->index);
419 continue;
420 }
421
422 if (should_duplicate_loop_header_p (header, loop, &remaining_limit))
423 candidates.safe_push (loop);
424 }
425 /* Do not use ranger after we change the IL and not have updated SSA. */
426 delete query;
427
428 for (auto loop : candidates)
429 {
430 int initial_limit = param_max_loop_header_insns;
431 int remaining_limit = initial_limit;
432 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file,
434 "Copying headers of loop %i\n", loop->num);
435
436 header = loop->header;
437
438 /* Iterate the header copying up to limit; this takes care of the cases
439 like while (a && b) {...}, where we want to have both of the conditions
440 copied. TODO -- handle while (a || b) - like cases, by not requiring
441 the header to have just a single successor and copying up to
442 postdominator. */
443
444 exit = NULL;
445 n_bbs = 0;
446 while (should_duplicate_loop_header_p (header, loop, &remaining_limit))
447 {
448 if (dump_file && (dump_flags & TDF_DETAILS))
449 fprintf (dump_file, " Will duplicate bb %i\n", header->index);
450
451 /* Find a successor of header that is inside a loop; i.e. the new
452 header after the condition is copied. */
453 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
454 exit = EDGE_SUCC (header, 0);
455 else
456 exit = EDGE_SUCC (header, 1);
457 bbs[n_bbs++] = header;
458 gcc_assert (bbs_size > n_bbs);
459 header = exit->dest;
460 }
461
462 if (!exit)
463 continue;
464
465 if (dump_file && (dump_flags & TDF_DETAILS))
466 fprintf (dump_file,
467 "Duplicating header of the loop %d up to edge %d->%d,"
468 " %i insns.\n",
469 loop->num, exit->src->index, exit->dest->index,
470 initial_limit - remaining_limit);
471
472 /* Ensure that the header will have just the latch as a predecessor
473 inside the loop. */
474 if (!single_pred_p (exit->dest))
475 exit = single_pred_edge (split_edge (exit));
476
477 entry = loop_preheader_edge (loop);
478
479 propagate_threaded_block_debug_into (exit->dest, entry->dest);
480 if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
481 true))
482 {
483 if (dump_file && (dump_flags & TDF_DETAILS))
484 fprintf (dump_file, "Duplication failed.\n");
485 continue;
486 }
487 copied.safe_push (std::make_pair (entry, loop));
488
489 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
490 this copying can introduce a case where we rely on undefined
491 signed overflow to eliminate the preheader condition, because
492 we assume that "j < j + 10" is true. We don't want to warn
493 about that case for -Wstrict-overflow, because in general we
494 don't warn about overflow involving loops. Prevent the
495 warning by setting the no_warning flag in the condition. */
496 if (warn_strict_overflow > 0)
497 {
498 unsigned int i;
499
500 for (i = 0; i < n_bbs; ++i)
501 {
502 gimple_stmt_iterator bsi;
503
504 for (bsi = gsi_start_bb (copied_bbs[i]);
505 !gsi_end_p (bsi);
506 gsi_next (&bsi))
507 {
508 gimple *stmt = gsi_stmt (bsi);
509 if (gimple_code (stmt) == GIMPLE_COND)
510 {
511 tree lhs = gimple_cond_lhs (stmt);
512 if (gimple_cond_code (stmt) != EQ_EXPR
513 && gimple_cond_code (stmt) != NE_EXPR
514 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
515 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
516 suppress_warning (stmt, OPT_Wstrict_overflow_);
517 }
518 else if (is_gimple_assign (stmt))
519 {
520 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
521 tree rhs1 = gimple_assign_rhs1 (stmt);
522 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
523 && rhs_code != EQ_EXPR
524 && rhs_code != NE_EXPR
525 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
526 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
527 suppress_warning (stmt, OPT_Wstrict_overflow_);
528 }
529 }
530 }
531 }
532
533 /* Ensure that the latch and the preheader is simple (we know that they
534 are not now, since there was the loop exit condition. */
535 split_edge (loop_preheader_edge (loop));
536 split_edge (loop_latch_edge (loop));
537
538 if (dump_file && (dump_flags & TDF_DETAILS))
539 {
540 if (do_while_loop_p (loop))
541 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num);
542 else
543 fprintf (dump_file, "Loop %d is still not do-while loop.\n",
544 loop->num);
545 }
546
547 changed = true;
548 }
549
550 if (changed)
551 {
552 update_ssa (TODO_update_ssa);
553 /* After updating SSA form perform CSE on the loop header
554 copies. This is esp. required for the pass before
555 vectorization since nothing cleans up copied exit tests
556 that can now be simplified. CSE from the entry of the
557 region we copied till all loop exit blocks but not
558 entering the loop itself. */
559 for (unsigned i = 0; i < copied.length (); ++i)
560 {
561 edge entry = copied[i].first;
562 loop_p loop = copied[i].second;
563 auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
564 bitmap exit_bbs = BITMAP_ALLOC (NULL);
565 for (unsigned j = 0; j < exit_edges.length (); ++j)
566 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
567 bitmap_set_bit (exit_bbs, loop->header->index);
568 do_rpo_vn (cfun, entry, exit_bbs);
569 BITMAP_FREE (exit_bbs);
570 }
571 }
572 free (bbs);
573 free (copied_bbs);
574
575 return changed ? TODO_cleanup_cfg : 0;
576 }
577
578 /* Initialize the loop structures we need, and finalize after. */
579
580 unsigned int
execute(function * fun)581 pass_ch::execute (function *fun)
582 {
583 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
584 | LOOPS_HAVE_SIMPLE_LATCHES
585 | LOOPS_HAVE_RECORDED_EXITS);
586
587 unsigned int res = copy_headers (fun);
588
589 loop_optimizer_finalize ();
590 return res;
591 }
592
593 /* Assume an earlier phase has already initialized all the loop structures that
594 we need here (and perhaps others too), and that these will be finalized by
595 a later phase. */
596
597 unsigned int
execute(function * fun)598 pass_ch_vect::execute (function *fun)
599 {
600 return copy_headers (fun);
601 }
602
603 /* Apply header copying according to a very simple test of do-while shape. */
604
605 bool
process_loop_p(class loop * loop)606 pass_ch::process_loop_p (class loop *loop)
607 {
608 return !do_while_loop_p (loop);
609 }
610
611 /* Apply header-copying to loops where we might enable vectorization. */
612
613 bool
process_loop_p(class loop * loop)614 pass_ch_vect::process_loop_p (class loop *loop)
615 {
616 if (!flag_tree_loop_vectorize && !loop->force_vectorize)
617 return false;
618
619 if (loop->dont_vectorize)
620 return false;
621
622 /* The vectorizer won't handle anything with multiple exits, so skip. */
623 edge exit = single_exit (loop);
624 if (!exit)
625 return false;
626
627 if (!do_while_loop_p (loop))
628 return true;
629
630 return false;
631 }
632
633 } // anon namespace
634
635 gimple_opt_pass *
make_pass_ch_vect(gcc::context * ctxt)636 make_pass_ch_vect (gcc::context *ctxt)
637 {
638 return new pass_ch_vect (ctxt);
639 }
640
641 gimple_opt_pass *
make_pass_ch(gcc::context * ctxt)642 make_pass_ch (gcc::context *ctxt)
643 {
644 return new pass_ch (ctxt);
645 }
646