xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivcanon.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Induction variable canonicalization.
2    Copyright (C) 2004, 2005, 2007, 2008, 2010
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass detects the loops that iterate a constant number of times,
22    adds a canonical induction variable (step -1, tested against 0)
23    and replaces the exit test.  This enables the less powerful rtl
24    level analysis to use this information.
25 
26    This might spoil the code in some cases (by increasing register pressure).
27    Note that in the case the new variable is not needed, ivopts will get rid
28    of it, so it might only be a problem when there are no other linear induction
29    variables.  In that case the created optimization possibilities are likely
30    to pay up.
31 
32    Additionally in case we detect that it is beneficial to unroll the
33    loop completely, we do it right here to expose the optimization
34    possibilities to the following passes.  */
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "tm_p.h"
43 #include "hard-reg-set.h"
44 #include "basic-block.h"
45 #include "output.h"
46 #include "diagnostic.h"
47 #include "tree-flow.h"
48 #include "tree-dump.h"
49 #include "cfgloop.h"
50 #include "tree-pass.h"
51 #include "ggc.h"
52 #include "tree-chrec.h"
53 #include "tree-scalar-evolution.h"
54 #include "params.h"
55 #include "flags.h"
56 #include "tree-inline.h"
57 #include "target.h"
58 
59 /* Specifies types of loops that may be unrolled.  */
60 
61 enum unroll_level
62 {
63   UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
64 			   iteration.  */
65   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
66 			   of code size.  */
67   UL_ALL		/* All suitable loops.  */
68 };
69 
70 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
71    is the exit edge whose condition is replaced.  */
72 
73 static void
74 create_canonical_iv (struct loop *loop, edge exit, tree niter)
75 {
76   edge in;
77   tree type, var;
78   gimple cond;
79   gimple_stmt_iterator incr_at;
80   enum tree_code cmp;
81 
82   if (dump_file && (dump_flags & TDF_DETAILS))
83     {
84       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
85       print_generic_expr (dump_file, niter, TDF_SLIM);
86       fprintf (dump_file, " iterations.\n");
87     }
88 
89   cond = last_stmt (exit->src);
90   in = EDGE_SUCC (exit->src, 0);
91   if (in == exit)
92     in = EDGE_SUCC (exit->src, 1);
93 
94   /* Note that we do not need to worry about overflows, since
95      type of niter is always unsigned and all comparisons are
96      just for equality/nonequality -- i.e. everything works
97      with a modulo arithmetics.  */
98 
99   type = TREE_TYPE (niter);
100   niter = fold_build2 (PLUS_EXPR, type,
101 		       niter,
102 		       build_int_cst (type, 1));
103   incr_at = gsi_last_bb (in->src);
104   create_iv (niter,
105 	     build_int_cst (type, -1),
106 	     NULL_TREE, loop,
107 	     &incr_at, false, NULL, &var);
108 
109   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
110   gimple_cond_set_code (cond, cmp);
111   gimple_cond_set_lhs (cond, var);
112   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
113   update_stmt (cond);
114 }
115 
116 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
117 
118 unsigned
119 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
120 {
121   basic_block *body = get_loop_body (loop);
122   gimple_stmt_iterator gsi;
123   unsigned size = 0, i;
124 
125   for (i = 0; i < loop->num_nodes; i++)
126     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
127       size += estimate_num_insns (gsi_stmt (gsi), weights);
128   free (body);
129 
130   return size;
131 }
132 
133 /* Describe size of loop as detected by tree_estimate_loop_size.  */
134 struct loop_size
135 {
136   /* Number of instructions in the loop.  */
137   int overall;
138 
139   /* Number of instructions that will be likely optimized out in
140      peeled iterations of loop  (i.e. computation based on induction
141      variable where induction variable starts at known constant.)  */
142   int eliminated_by_peeling;
143 
144   /* Same statistics for last iteration of loop: it is smaller because
145      instructions after exit are not executed.  */
146   int last_iteration;
147   int last_iteration_eliminated_by_peeling;
148 };
149 
150 /* Return true if OP in STMT will be constant after peeling LOOP.  */
151 
152 static bool
153 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
154 {
155   affine_iv iv;
156 
157   if (is_gimple_min_invariant (op))
158     return true;
159 
160   /* We can still fold accesses to constant arrays when index is known.  */
161   if (TREE_CODE (op) != SSA_NAME)
162     {
163       tree base = op;
164 
165       /* First make fast look if we see constant array inside.  */
166       while (handled_component_p (base))
167 	base = TREE_OPERAND (base, 0);
168       if ((DECL_P (base)
169       	   && TREE_STATIC (base)
170 	   && TREE_READONLY (base)
171            && (DECL_INITIAL (base)
172 	       || (!DECL_EXTERNAL (base)
173 		   && targetm.binds_local_p (base))))
174 	  || CONSTANT_CLASS_P (base))
175 	{
176 	  /* If so, see if we understand all the indices.  */
177 	  base = op;
178 	  while (handled_component_p (base))
179 	    {
180 	      if (TREE_CODE (base) == ARRAY_REF
181 		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
182 		return false;
183 	      base = TREE_OPERAND (base, 0);
184 	    }
185 	  return true;
186 	}
187       return false;
188     }
189 
190   /* Induction variables are constants.  */
191   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
192     return false;
193   if (!is_gimple_min_invariant (iv.base))
194     return false;
195   if (!is_gimple_min_invariant (iv.step))
196     return false;
197   return true;
198 }
199 
200 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
201    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
202 
203 static void
204 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
205 {
206   basic_block *body = get_loop_body (loop);
207   gimple_stmt_iterator gsi;
208   unsigned int i;
209   bool after_exit;
210 
211   size->overall = 0;
212   size->eliminated_by_peeling = 0;
213   size->last_iteration = 0;
214   size->last_iteration_eliminated_by_peeling = 0;
215 
216   if (dump_file && (dump_flags & TDF_DETAILS))
217     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
218   for (i = 0; i < loop->num_nodes; i++)
219     {
220       if (exit && body[i] != exit->src
221 	  && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
222 	after_exit = true;
223       else
224 	after_exit = false;
225       if (dump_file && (dump_flags & TDF_DETAILS))
226 	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
227 
228       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
229 	{
230 	  gimple stmt = gsi_stmt (gsi);
231 	  int num = estimate_num_insns (stmt, &eni_size_weights);
232 	  bool likely_eliminated = false;
233 
234 	  if (dump_file && (dump_flags & TDF_DETAILS))
235 	    {
236 	      fprintf (dump_file, "  size: %3i ", num);
237 	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
238 	    }
239 
240 	  /* Look for reasons why we might optimize this stmt away. */
241 
242 	  /* Exit conditional.  */
243 	  if (body[i] == exit->src && stmt == last_stmt (exit->src))
244 	    {
245 	      if (dump_file && (dump_flags & TDF_DETAILS))
246 	        fprintf (dump_file, "   Exit condition will be eliminated.\n");
247 	      likely_eliminated = true;
248 	    }
249 	  /* Sets of IV variables  */
250 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
251 	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
252 	    {
253 	      if (dump_file && (dump_flags & TDF_DETAILS))
254 	        fprintf (dump_file, "   Induction variable computation will"
255 			 " be folded away.\n");
256 	      likely_eliminated = true;
257 	    }
258 	  /* Assignments of IV variables.  */
259 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
260 		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
261 		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
262 		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
263 		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
264 		       				  stmt, loop)))
265 	    {
266 	      if (dump_file && (dump_flags & TDF_DETAILS))
267 	        fprintf (dump_file, "   Constant expression will be folded away.\n");
268 	      likely_eliminated = true;
269 	    }
270 	  /* Conditionals.  */
271 	  else if (gimple_code (stmt) == GIMPLE_COND
272 		   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
273 		   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
274 	    {
275 	      if (dump_file && (dump_flags & TDF_DETAILS))
276 	        fprintf (dump_file, "   Constant conditional.\n");
277 	      likely_eliminated = true;
278 	    }
279 
280 	  size->overall += num;
281 	  if (likely_eliminated)
282 	    size->eliminated_by_peeling += num;
283 	  if (!after_exit)
284 	    {
285 	      size->last_iteration += num;
286 	      if (likely_eliminated)
287 		size->last_iteration_eliminated_by_peeling += num;
288 	    }
289 	}
290     }
291   if (dump_file && (dump_flags & TDF_DETAILS))
292     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
293     	     size->eliminated_by_peeling, size->last_iteration,
294 	     size->last_iteration_eliminated_by_peeling);
295 
296   free (body);
297 }
298 
299 /* Estimate number of insns of completely unrolled loop.
300    It is (NUNROLL + 1) * size of loop body with taking into account
301    the fact that in last copy everything after exit conditional
302    is dead and that some instructions will be eliminated after
303    peeling.
304 
305    Loop body is likely going to simplify futher, this is difficult
306    to guess, we just decrease the result by 1/3.  */
307 
308 static unsigned HOST_WIDE_INT
309 estimated_unrolled_size (struct loop_size *size,
310 			 unsigned HOST_WIDE_INT nunroll)
311 {
312   HOST_WIDE_INT unr_insns = ((nunroll)
313   			     * (HOST_WIDE_INT) (size->overall
314 			     			- size->eliminated_by_peeling));
315   if (!nunroll)
316     unr_insns = 0;
317   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
318 
319   unr_insns = unr_insns * 2 / 3;
320   if (unr_insns <= 0)
321     unr_insns = 1;
322 
323   return unr_insns;
324 }
325 
326 /* Tries to unroll LOOP completely, i.e. NITER times.
327    UL determines which loops we are allowed to unroll.
328    EXIT is the exit of the loop that should be eliminated.  */
329 
330 static bool
331 try_unroll_loop_completely (struct loop *loop,
332 			    edge exit, tree niter,
333 			    enum unroll_level ul)
334 {
335   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
336   gimple cond;
337   struct loop_size size;
338 
339   if (loop->inner)
340     return false;
341 
342   if (!host_integerp (niter, 1))
343     return false;
344   n_unroll = tree_low_cst (niter, 1);
345 
346   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
347   if (n_unroll > max_unroll)
348     return false;
349 
350   if (n_unroll)
351     {
352       if (ul == UL_SINGLE_ITER)
353 	return false;
354 
355       tree_estimate_loop_size (loop, exit, &size);
356       ninsns = size.overall;
357 
358       unr_insns = estimated_unrolled_size (&size, n_unroll);
359       if (dump_file && (dump_flags & TDF_DETAILS))
360 	{
361 	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
362 	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
363 		   (int) unr_insns);
364 	}
365 
366       if (unr_insns > ninsns
367 	  && (unr_insns
368 	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
369 	{
370 	  if (dump_file && (dump_flags & TDF_DETAILS))
371 	    fprintf (dump_file, "Not unrolling loop %d "
372 		     "(--param max-completely-peeled-insns limit reached).\n",
373 		     loop->num);
374 	  return false;
375 	}
376 
377       if (ul == UL_NO_GROWTH
378 	  && unr_insns > ninsns)
379 	{
380 	  if (dump_file && (dump_flags & TDF_DETAILS))
381 	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
382 	  return false;
383 	}
384     }
385 
386   if (n_unroll)
387     {
388       sbitmap wont_exit;
389       edge e;
390       unsigned i;
391       VEC (edge, heap) *to_remove = NULL;
392 
393       initialize_original_copy_tables ();
394       wont_exit = sbitmap_alloc (n_unroll + 1);
395       sbitmap_ones (wont_exit);
396       RESET_BIT (wont_exit, 0);
397 
398       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
399 						 n_unroll, wont_exit,
400 						 exit, &to_remove,
401 						 DLTHE_FLAG_UPDATE_FREQ
402 						 | DLTHE_FLAG_COMPLETTE_PEEL))
403 	{
404           free_original_copy_tables ();
405 	  free (wont_exit);
406 	  return false;
407 	}
408 
409       for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
410 	{
411 	  bool ok = remove_path (e);
412 	  gcc_assert (ok);
413 	}
414 
415       VEC_free (edge, heap, to_remove);
416       free (wont_exit);
417       free_original_copy_tables ();
418     }
419 
420   cond = last_stmt (exit->src);
421   if (exit->flags & EDGE_TRUE_VALUE)
422     gimple_cond_make_true (cond);
423   else
424     gimple_cond_make_false (cond);
425   update_stmt (cond);
426   update_ssa (TODO_update_ssa);
427 
428   if (dump_file && (dump_flags & TDF_DETAILS))
429     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
430 
431   return true;
432 }
433 
434 /* Adds a canonical induction variable to LOOP if suitable.
435    CREATE_IV is true if we may create a new iv.  UL determines
436    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
437    to determine the number of iterations of a loop by direct evaluation.
438    Returns true if cfg is changed.  */
439 
440 static bool
441 canonicalize_loop_induction_variables (struct loop *loop,
442 				       bool create_iv, enum unroll_level ul,
443 				       bool try_eval)
444 {
445   edge exit = NULL;
446   tree niter;
447 
448   niter = number_of_latch_executions (loop);
449   if (TREE_CODE (niter) == INTEGER_CST)
450     {
451       exit = single_exit (loop);
452       if (!just_once_each_iteration_p (loop, exit->src))
453 	return false;
454     }
455   else
456     {
457       /* If the loop has more than one exit, try checking all of them
458 	 for # of iterations determinable through scev.  */
459       if (!single_exit (loop))
460 	niter = find_loop_niter (loop, &exit);
461 
462       /* Finally if everything else fails, try brute force evaluation.  */
463       if (try_eval
464 	  && (chrec_contains_undetermined (niter)
465 	      || TREE_CODE (niter) != INTEGER_CST))
466 	niter = find_loop_niter_by_eval (loop, &exit);
467 
468       if (chrec_contains_undetermined (niter)
469 	  || TREE_CODE (niter) != INTEGER_CST)
470 	return false;
471     }
472 
473   if (dump_file && (dump_flags & TDF_DETAILS))
474     {
475       fprintf (dump_file, "Loop %d iterates ", loop->num);
476       print_generic_expr (dump_file, niter, TDF_SLIM);
477       fprintf (dump_file, " times.\n");
478     }
479 
480   if (try_unroll_loop_completely (loop, exit, niter, ul))
481     return true;
482 
483   if (create_iv)
484     create_canonical_iv (loop, exit, niter);
485 
486   return false;
487 }
488 
489 /* The main entry point of the pass.  Adds canonical induction variables
490    to the suitable loops.  */
491 
492 unsigned int
493 canonicalize_induction_variables (void)
494 {
495   loop_iterator li;
496   struct loop *loop;
497   bool changed = false;
498 
499   FOR_EACH_LOOP (li, loop, 0)
500     {
501       changed |= canonicalize_loop_induction_variables (loop,
502 							true, UL_SINGLE_ITER,
503 							true);
504     }
505 
506   /* Clean up the information about numbers of iterations, since brute force
507      evaluation could reveal new information.  */
508   scev_reset ();
509 
510   if (changed)
511     return TODO_cleanup_cfg;
512   return 0;
513 }
514 
515 /* Unroll LOOPS completely if they iterate just few times.  Unless
516    MAY_INCREASE_SIZE is true, perform the unrolling only if the
517    size of the code does not increase.  */
518 
519 unsigned int
520 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
521 {
522   loop_iterator li;
523   struct loop *loop;
524   bool changed;
525   enum unroll_level ul;
526   int iteration = 0;
527 
528   do
529     {
530       changed = false;
531 
532       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
533 	{
534 	  if (may_increase_size && optimize_loop_for_speed_p (loop)
535 	      /* Unroll outermost loops only if asked to do so or they do
536 		 not cause code growth.  */
537 	      && (unroll_outer
538 		  || loop_outer (loop_outer (loop))))
539 	    ul = UL_ALL;
540 	  else
541 	    ul = UL_NO_GROWTH;
542 	  changed |= canonicalize_loop_induction_variables
543 		       (loop, false, ul, !flag_tree_loop_ivcanon);
544 	}
545 
546       if (changed)
547 	{
548 	  /* This will take care of removing completely unrolled loops
549 	     from the loop structures so we can continue unrolling now
550 	     innermost loops.  */
551 	  if (cleanup_tree_cfg ())
552 	    update_ssa (TODO_update_ssa_only_virtuals);
553 
554 	  /* Clean up the information about numbers of iterations, since
555 	     complete unrolling might have invalidated it.  */
556 	  scev_reset ();
557 	}
558     }
559   while (changed
560 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
561 
562   return 0;
563 }
564