xref: /openbsd-src/gnu/gcc/gcc/tree-ssa-loop-ivcanon.c (revision 404b540a9034ac75a6199ad1a32d1bbc7a0d4210)
1 /* Induction variable canonicalization.
2    Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING.  If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA.  */
20 
21 /* This pass detects the loops that iterate a constant number of times,
22    adds a canonical induction variable (step -1, tested against 0)
23    and replaces the exit test.  This enables the less powerful rtl
24    level analysis to use this information.
25 
26    This might spoil the code in some cases (by increasing register pressure).
27    Note that in the case the new variable is not needed, ivopts will get rid
28    of it, so it might only be a problem when there are no other linear induction
29    variables.  In that case the created optimization possibilities are likely
30    to pay up.
31 
32    Additionally in case we detect that it is beneficial to unroll the
33    loop completely, we do it right here to expose the optimization
34    possibilities to the following passes.  */
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "tm_p.h"
43 #include "hard-reg-set.h"
44 #include "basic-block.h"
45 #include "output.h"
46 #include "diagnostic.h"
47 #include "tree-flow.h"
48 #include "tree-dump.h"
49 #include "cfgloop.h"
50 #include "tree-pass.h"
51 #include "ggc.h"
52 #include "tree-chrec.h"
53 #include "tree-scalar-evolution.h"
54 #include "params.h"
55 #include "flags.h"
56 #include "tree-inline.h"
57 
58 /* Specifies types of loops that may be unrolled.  */
59 
60 enum unroll_level
61 {
62   UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
63 			   iteration.  */
64   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
65 			   of code size.  */
66   UL_ALL		/* All suitable loops.  */
67 };
68 
69 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
70    is the exit edge whose condition is replaced.  */
71 
72 static void
create_canonical_iv(struct loop * loop,edge exit,tree niter)73 create_canonical_iv (struct loop *loop, edge exit, tree niter)
74 {
75   edge in;
76   tree cond, type, var;
77   block_stmt_iterator incr_at;
78   enum tree_code cmp;
79 
80   if (dump_file && (dump_flags & TDF_DETAILS))
81     {
82       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
83       print_generic_expr (dump_file, niter, TDF_SLIM);
84       fprintf (dump_file, " iterations.\n");
85     }
86 
87   cond = last_stmt (exit->src);
88   in = EDGE_SUCC (exit->src, 0);
89   if (in == exit)
90     in = EDGE_SUCC (exit->src, 1);
91 
92   /* Note that we do not need to worry about overflows, since
93      type of niter is always unsigned and all comparisons are
94      just for equality/nonequality -- i.e. everything works
95      with a modulo arithmetics.  */
96 
97   type = TREE_TYPE (niter);
98   niter = fold_build2 (PLUS_EXPR, type,
99 		       niter,
100 		       build_int_cst (type, 1));
101   incr_at = bsi_last (in->src);
102   create_iv (niter,
103 	     build_int_cst (type, -1),
104 	     NULL_TREE, loop,
105 	     &incr_at, false, NULL, &var);
106 
107   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
108   COND_EXPR_COND (cond) = build2 (cmp, boolean_type_node,
109 				  var,
110 				  build_int_cst (type, 0));
111   update_stmt (cond);
112 }
113 
114 /* Computes an estimated number of insns in LOOP.  */
115 
116 unsigned
tree_num_loop_insns(struct loop * loop)117 tree_num_loop_insns (struct loop *loop)
118 {
119   basic_block *body = get_loop_body (loop);
120   block_stmt_iterator bsi;
121   unsigned size = 1, i;
122 
123   for (i = 0; i < loop->num_nodes; i++)
124     for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
125       size += estimate_num_insns (bsi_stmt (bsi));
126   free (body);
127 
128   return size;
129 }
130 
131 /* Estimate number of insns of completely unrolled loop.  We assume
132    that the size of the unrolled loop is decreased in the
133    following way (the numbers of insns are based on what
134    estimate_num_insns returns for appropriate statements):
135 
136    1) exit condition gets removed (2 insns)
137    2) increment of the control variable gets removed (2 insns)
138    3) All remaining statements are likely to get simplified
139       due to constant propagation.  Hard to estimate; just
140       as a heuristics we decrease the rest by 1/3.
141 
142    NINSNS is the number of insns in the loop before unrolling.
143    NUNROLL is the number of times the loop is unrolled.  */
144 
145 static unsigned HOST_WIDE_INT
estimated_unrolled_size(unsigned HOST_WIDE_INT ninsns,unsigned HOST_WIDE_INT nunroll)146 estimated_unrolled_size (unsigned HOST_WIDE_INT ninsns,
147 			 unsigned HOST_WIDE_INT nunroll)
148 {
149   HOST_WIDE_INT unr_insns = 2 * ((HOST_WIDE_INT) ninsns - 4) / 3;
150   if (unr_insns <= 0)
151     unr_insns = 1;
152   unr_insns *= (nunroll + 1);
153 
154   return unr_insns;
155 }
156 
157 /* Tries to unroll LOOP completely, i.e. NITER times.  LOOPS is the
158    loop tree.  UL determines which loops we are allowed to unroll.
159    EXIT is the exit of the loop that should be eliminated.  */
160 
161 static bool
try_unroll_loop_completely(struct loops * loops ATTRIBUTE_UNUSED,struct loop * loop,edge exit,tree niter,enum unroll_level ul)162 try_unroll_loop_completely (struct loops *loops ATTRIBUTE_UNUSED,
163 			    struct loop *loop,
164 			    edge exit, tree niter,
165 			    enum unroll_level ul)
166 {
167   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
168   tree old_cond, cond, dont_exit, do_exit;
169 
170   if (loop->inner)
171     return false;
172 
173   if (!host_integerp (niter, 1))
174     return false;
175   n_unroll = tree_low_cst (niter, 1);
176 
177   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
178   if (n_unroll > max_unroll)
179     return false;
180 
181   if (n_unroll)
182     {
183       if (ul == UL_SINGLE_ITER)
184 	return false;
185 
186       ninsns = tree_num_loop_insns (loop);
187 
188       if (n_unroll * ninsns
189 	  > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
190 	return false;
191 
192       if (ul == UL_NO_GROWTH)
193 	{
194 	  unr_insns = estimated_unrolled_size (ninsns, n_unroll);
195 
196 	  if (dump_file && (dump_flags & TDF_DETAILS))
197 	    {
198 	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
199 	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
200 		       (int) unr_insns);
201 	    }
202 
203 	  if (unr_insns > ninsns)
204 	    {
205 	      if (dump_file && (dump_flags & TDF_DETAILS))
206 		fprintf (dump_file, "Not unrolling loop %d:\n", loop->num);
207 	      return false;
208 	    }
209 	}
210     }
211 
212   if (exit->flags & EDGE_TRUE_VALUE)
213     {
214       dont_exit = boolean_false_node;
215       do_exit = boolean_true_node;
216     }
217   else
218     {
219       dont_exit = boolean_true_node;
220       do_exit = boolean_false_node;
221     }
222   cond = last_stmt (exit->src);
223 
224   if (n_unroll)
225     {
226       sbitmap wont_exit;
227       edge *edges_to_remove = XNEWVEC (edge, n_unroll);
228       unsigned int n_to_remove = 0;
229 
230       old_cond = COND_EXPR_COND (cond);
231       COND_EXPR_COND (cond) = dont_exit;
232       update_stmt (cond);
233       initialize_original_copy_tables ();
234 
235       wont_exit = sbitmap_alloc (n_unroll + 1);
236       sbitmap_ones (wont_exit);
237       RESET_BIT (wont_exit, 0);
238 
239       if (!tree_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
240 					       loops, n_unroll, wont_exit,
241 					       exit, edges_to_remove,
242 					       &n_to_remove,
243 					       DLTHE_FLAG_UPDATE_FREQ
244 					       | DLTHE_FLAG_COMPLETTE_PEEL))
245 	{
246 	  COND_EXPR_COND (cond) = old_cond;
247 	  update_stmt (cond);
248           free_original_copy_tables ();
249 	  free (wont_exit);
250 	  free (edges_to_remove);
251 	  return false;
252 	}
253       free (wont_exit);
254       free (edges_to_remove);
255       free_original_copy_tables ();
256     }
257 
258   COND_EXPR_COND (cond) = do_exit;
259   update_stmt (cond);
260 
261   update_ssa (TODO_update_ssa);
262 
263   if (dump_file && (dump_flags & TDF_DETAILS))
264     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
265 
266   return true;
267 }
268 
269 /* Adds a canonical induction variable to LOOP if suitable.  LOOPS is the loops
270    tree.  CREATE_IV is true if we may create a new iv.  UL determines
271    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
272    to determine the number of iterations of a loop by direct evaluation.
273    Returns true if cfg is changed.  */
274 
275 static bool
canonicalize_loop_induction_variables(struct loops * loops,struct loop * loop,bool create_iv,enum unroll_level ul,bool try_eval)276 canonicalize_loop_induction_variables (struct loops *loops, struct loop *loop,
277 				       bool create_iv, enum unroll_level ul,
278 				       bool try_eval)
279 {
280   edge exit = NULL;
281   tree niter;
282 
283   niter = number_of_iterations_in_loop (loop);
284   if (TREE_CODE (niter) == INTEGER_CST)
285     {
286       exit = loop->single_exit;
287       if (!just_once_each_iteration_p (loop, exit->src))
288 	return false;
289 
290       /* The result of number_of_iterations_in_loop is by one higher than
291 	 we expect (i.e. it returns number of executions of the exit
292 	 condition, not of the loop latch edge).  */
293       niter = fold_build2 (MINUS_EXPR, TREE_TYPE (niter), niter,
294 			   build_int_cst (TREE_TYPE (niter), 1));
295     }
296   else
297     {
298       /* If the loop has more than one exit, try checking all of them
299 	 for # of iterations determinable through scev.  */
300       if (!loop->single_exit)
301 	niter = find_loop_niter (loop, &exit);
302 
303       /* Finally if everything else fails, try brute force evaluation.  */
304       if (try_eval
305 	  && (chrec_contains_undetermined (niter)
306 	      || TREE_CODE (niter) != INTEGER_CST))
307 	niter = find_loop_niter_by_eval (loop, &exit);
308 
309       if (chrec_contains_undetermined (niter)
310 	  || TREE_CODE (niter) != INTEGER_CST)
311 	return false;
312     }
313 
314   if (dump_file && (dump_flags & TDF_DETAILS))
315     {
316       fprintf (dump_file, "Loop %d iterates ", loop->num);
317       print_generic_expr (dump_file, niter, TDF_SLIM);
318       fprintf (dump_file, " times.\n");
319     }
320 
321   if (try_unroll_loop_completely (loops, loop, exit, niter, ul))
322     return true;
323 
324   if (create_iv)
325     create_canonical_iv (loop, exit, niter);
326 
327   return false;
328 }
329 
330 /* The main entry point of the pass.  Adds canonical induction variables
331    to the suitable LOOPS.  */
332 
333 unsigned int
canonicalize_induction_variables(struct loops * loops)334 canonicalize_induction_variables (struct loops *loops)
335 {
336   unsigned i;
337   struct loop *loop;
338   bool changed = false;
339 
340   for (i = 1; i < loops->num; i++)
341     {
342       loop = loops->parray[i];
343 
344       if (loop)
345 	changed |= canonicalize_loop_induction_variables (loops, loop,
346 							  true, UL_SINGLE_ITER,
347 							  true);
348     }
349 
350   /* Clean up the information about numbers of iterations, since brute force
351      evaluation could reveal new information.  */
352   scev_reset ();
353 
354   if (changed)
355     return TODO_cleanup_cfg;
356   return 0;
357 }
358 
359 /* Unroll LOOPS completely if they iterate just few times.  Unless
360    MAY_INCREASE_SIZE is true, perform the unrolling only if the
361    size of the code does not increase.  */
362 
363 unsigned int
tree_unroll_loops_completely(struct loops * loops,bool may_increase_size)364 tree_unroll_loops_completely (struct loops *loops, bool may_increase_size)
365 {
366   unsigned i;
367   struct loop *loop;
368   bool changed = false;
369   enum unroll_level ul;
370 
371   for (i = 1; i < loops->num; i++)
372     {
373       loop = loops->parray[i];
374 
375       if (!loop)
376 	continue;
377 
378       if (may_increase_size && maybe_hot_bb_p (loop->header))
379 	ul = UL_ALL;
380       else
381 	ul = UL_NO_GROWTH;
382       changed |= canonicalize_loop_induction_variables (loops, loop,
383 							false, ul,
384 							!flag_tree_loop_ivcanon);
385     }
386 
387   /* Clean up the information about numbers of iterations, since complete
388      unrolling might have invalidated it.  */
389   scev_reset ();
390 
391   if (changed)
392     return TODO_cleanup_cfg;
393   return 0;
394 }
395 
396 /* Checks whether LOOP is empty.  */
397 
398 static bool
empty_loop_p(struct loop * loop)399 empty_loop_p (struct loop *loop)
400 {
401   edge exit;
402   struct tree_niter_desc niter;
403   tree phi, def;
404   basic_block *body;
405   block_stmt_iterator bsi;
406   unsigned i;
407   tree stmt;
408 
409   /* If the loop has multiple exits, it is too hard for us to handle.
410      Similarly, if the exit is not dominating, we cannot determine
411      whether the loop is not infinite.  */
412   exit = single_dom_exit (loop);
413   if (!exit)
414     return false;
415 
416   /* The loop must be finite.  */
417   if (!number_of_iterations_exit (loop, exit, &niter, false))
418     return false;
419 
420   /* Values of all loop exit phi nodes must be invariants.  */
421   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
422     {
423       if (!is_gimple_reg (PHI_RESULT (phi)))
424 	continue;
425 
426       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
427 
428       if (!expr_invariant_in_loop_p (loop, def))
429 	return false;
430     }
431 
432   /* And there should be no memory modifying or from other reasons
433      unremovable statements.  */
434   body = get_loop_body (loop);
435   for (i = 0; i < loop->num_nodes; i++)
436     {
437       /* Irreducible region might be infinite.  */
438       if (body[i]->flags & BB_IRREDUCIBLE_LOOP)
439 	{
440 	  free (body);
441 	  return false;
442 	}
443 
444       for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
445 	{
446 	  stmt = bsi_stmt (bsi);
447 	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)
448 	      || stmt_ann (stmt)->has_volatile_ops)
449 	    {
450 	      free (body);
451 	      return false;
452 	    }
453 
454 	  /* Also, asm statements and calls may have side effects and we
455 	     cannot change the number of times they are executed.  */
456 	  switch (TREE_CODE (stmt))
457 	    {
458 	    case RETURN_EXPR:
459 	    case MODIFY_EXPR:
460 	      stmt = get_call_expr_in (stmt);
461 	      if (!stmt)
462 		break;
463 
464 	    case CALL_EXPR:
465 	      if (TREE_SIDE_EFFECTS (stmt))
466 		{
467 		  free (body);
468 		  return false;
469 		}
470 	      break;
471 
472 	    case ASM_EXPR:
473 	      /* We cannot remove volatile assembler.  */
474 	      if (ASM_VOLATILE_P (stmt))
475 		{
476 		  free (body);
477 		  return false;
478 		}
479 	      break;
480 
481 	    default:
482 	      break;
483 	    }
484 	}
485       }
486   free (body);
487 
488   return true;
489 }
490 
491 /* Remove LOOP by making it exit in the first iteration.  */
492 
493 static void
remove_empty_loop(struct loop * loop)494 remove_empty_loop (struct loop *loop)
495 {
496   edge exit = single_dom_exit (loop), non_exit;
497   tree cond_stmt = last_stmt (exit->src);
498   tree do_exit;
499   basic_block *body;
500   unsigned n_before, freq_in, freq_h;
501   gcov_type exit_count = exit->count;
502 
503   non_exit = EDGE_SUCC (exit->src, 0);
504   if (non_exit == exit)
505     non_exit = EDGE_SUCC (exit->src, 1);
506 
507   if (exit->flags & EDGE_TRUE_VALUE)
508     do_exit = boolean_true_node;
509   else
510     do_exit = boolean_false_node;
511 
512   COND_EXPR_COND (cond_stmt) = do_exit;
513   update_stmt (cond_stmt);
514 
515   /* Let us set the probabilities of the edges coming from the exit block.  */
516   exit->probability = REG_BR_PROB_BASE;
517   non_exit->probability = 0;
518   non_exit->count = 0;
519 
520   /* Update frequencies and counts.  Everything before
521      the exit needs to be scaled FREQ_IN/FREQ_H times,
522      where FREQ_IN is the frequency of the entry edge
523      and FREQ_H is the frequency of the loop header.
524      Everything after the exit has zero frequency.  */
525   freq_h = loop->header->frequency;
526   freq_in = EDGE_FREQUENCY (loop_preheader_edge (loop));
527   if (freq_h != 0)
528     {
529       body = get_loop_body_in_dom_order (loop);
530       for (n_before = 1; n_before <= loop->num_nodes; n_before++)
531 	if (body[n_before - 1] == exit->src)
532 	  break;
533       scale_bbs_frequencies_int (body, n_before, freq_in, freq_h);
534       scale_bbs_frequencies_int (body + n_before, loop->num_nodes - n_before,
535 				 0, 1);
536       free (body);
537     }
538 
539   /* Number of executions of exit is not changed, thus we need to restore
540      the original value.  */
541   exit->count = exit_count;
542 }
543 
544 /* Removes LOOP if it is empty.  Returns true if LOOP is removed.  CHANGED
545    is set to true if LOOP or any of its subloops is removed.  */
546 
547 static bool
try_remove_empty_loop(struct loop * loop,bool * changed)548 try_remove_empty_loop (struct loop *loop, bool *changed)
549 {
550   bool nonempty_subloop = false;
551   struct loop *sub;
552 
553   /* First, all subloops must be removed.  */
554   for (sub = loop->inner; sub; sub = sub->next)
555     nonempty_subloop |= !try_remove_empty_loop (sub, changed);
556 
557   if (nonempty_subloop || !empty_loop_p (loop))
558     return false;
559 
560   remove_empty_loop (loop);
561   *changed = true;
562   return true;
563 }
564 
565 /* Remove the empty LOOPS.  */
566 
567 unsigned int
remove_empty_loops(struct loops * loops)568 remove_empty_loops (struct loops *loops)
569 {
570   bool changed = false;
571   struct loop *loop;
572 
573   for (loop = loops->tree_root->inner; loop; loop = loop->next)
574     try_remove_empty_loop (loop, &changed);
575 
576   if (changed)
577     {
578       scev_reset ();
579       return TODO_cleanup_cfg;
580     }
581   return 0;
582 }
583