xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/cfgloopanal.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "expr.h"
31 #include "output.h"
32 #include "graphds.h"
33 #include "params.h"
34 
35 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
36 
37 bool
38 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
39 {
40   /* It must be executed at least once each iteration.  */
41   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
42     return false;
43 
44   /* And just once.  */
45   if (bb->loop_father != loop)
46     return false;
47 
48   /* But this was not enough.  We might have some irreducible loop here.  */
49   if (bb->flags & BB_IRREDUCIBLE_LOOP)
50     return false;
51 
52   return true;
53 }
54 
55 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
56    throw away all latch edges and mark blocks inside any remaining cycle.
57    Everything is a bit complicated due to fact we do not want to do this
58    for parts of cycles that only "pass" through some loop -- i.e. for
59    each cycle, we want to mark blocks that belong directly to innermost
60    loop containing the whole cycle.
61 
62    LOOPS is the loop tree.  */
63 
64 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
65 #define BB_REPR(BB) ((BB)->index + 1)
66 
67 bool
68 mark_irreducible_loops (void)
69 {
70   basic_block act;
71   struct graph_edge *ge;
72   edge e;
73   edge_iterator ei;
74   int src, dest;
75   unsigned depth;
76   struct graph *g;
77   int num = number_of_loops ();
78   struct loop *cloop;
79   bool irred_loop_found = false;
80   int i;
81 
82   gcc_assert (current_loops != NULL);
83 
84   /* Reset the flags.  */
85   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
86     {
87       act->flags &= ~BB_IRREDUCIBLE_LOOP;
88       FOR_EACH_EDGE (e, ei, act->succs)
89 	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
90     }
91 
92   /* Create the edge lists.  */
93   g = new_graph (last_basic_block + num);
94 
95   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
96     FOR_EACH_EDGE (e, ei, act->succs)
97       {
98 	/* Ignore edges to exit.  */
99 	if (e->dest == EXIT_BLOCK_PTR)
100 	  continue;
101 
102 	src = BB_REPR (act);
103 	dest = BB_REPR (e->dest);
104 
105 	/* Ignore latch edges.  */
106 	if (e->dest->loop_father->header == e->dest
107 	    && e->dest->loop_father->latch == act)
108 	  continue;
109 
110 	/* Edges inside a single loop should be left where they are.  Edges
111 	   to subloop headers should lead to representative of the subloop,
112 	   but from the same place.
113 
114 	   Edges exiting loops should lead from representative
115 	   of the son of nearest common ancestor of the loops in that
116 	   act lays.  */
117 
118 	if (e->dest->loop_father->header == e->dest)
119 	  dest = LOOP_REPR (e->dest->loop_father);
120 
121 	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
122 	  {
123 	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
124 						      e->dest->loop_father));
125 	    if (depth == loop_depth (act->loop_father))
126 	      cloop = act->loop_father;
127 	    else
128 	      cloop = VEC_index (loop_p, act->loop_father->superloops, depth);
129 
130 	    src = LOOP_REPR (cloop);
131 	  }
132 
133 	add_edge (g, src, dest)->data = e;
134       }
135 
136   /* Find the strongly connected components.  */
137   graphds_scc (g, NULL);
138 
139   /* Mark the irreducible loops.  */
140   for (i = 0; i < g->n_vertices; i++)
141     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
142       {
143 	edge real = (edge) ge->data;
144 	/* edge E in graph G is irreducible if it connects two vertices in the
145 	   same scc.  */
146 
147 	/* All edges should lead from a component with higher number to the
148 	   one with lower one.  */
149 	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
150 
151 	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
152 	  continue;
153 
154 	real->flags |= EDGE_IRREDUCIBLE_LOOP;
155 	irred_loop_found = true;
156 	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
157 	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
158       }
159 
160   free_graph (g);
161 
162   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
163   return irred_loop_found;
164 }
165 
166 /* Counts number of insns inside LOOP.  */
167 int
168 num_loop_insns (const struct loop *loop)
169 {
170   basic_block *bbs, bb;
171   unsigned i, ninsns = 0;
172   rtx insn;
173 
174   bbs = get_loop_body (loop);
175   for (i = 0; i < loop->num_nodes; i++)
176     {
177       bb = bbs[i];
178       FOR_BB_INSNS (bb, insn)
179 	if (NONDEBUG_INSN_P (insn))
180 	  ninsns++;
181     }
182   free (bbs);
183 
184   if (!ninsns)
185     ninsns = 1;	/* To avoid division by zero.  */
186 
187   return ninsns;
188 }
189 
190 /* Counts number of insns executed on average per iteration LOOP.  */
191 int
192 average_num_loop_insns (const struct loop *loop)
193 {
194   basic_block *bbs, bb;
195   unsigned i, binsns, ninsns, ratio;
196   rtx insn;
197 
198   ninsns = 0;
199   bbs = get_loop_body (loop);
200   for (i = 0; i < loop->num_nodes; i++)
201     {
202       bb = bbs[i];
203 
204       binsns = 0;
205       FOR_BB_INSNS (bb, insn)
206 	if (NONDEBUG_INSN_P (insn))
207 	  binsns++;
208 
209       ratio = loop->header->frequency == 0
210 	      ? BB_FREQ_MAX
211 	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
212       ninsns += binsns * ratio;
213     }
214   free (bbs);
215 
216   ninsns /= BB_FREQ_MAX;
217   if (!ninsns)
218     ninsns = 1; /* To avoid division by zero.  */
219 
220   return ninsns;
221 }
222 
223 /* Returns expected number of iterations of LOOP, according to
224    measured or guessed profile.  No bounding is done on the
225    value.  */
226 
227 gcov_type
228 expected_loop_iterations_unbounded (const struct loop *loop)
229 {
230   edge e;
231   edge_iterator ei;
232 
233   if (loop->latch->count || loop->header->count)
234     {
235       gcov_type count_in, count_latch, expected;
236 
237       count_in = 0;
238       count_latch = 0;
239 
240       FOR_EACH_EDGE (e, ei, loop->header->preds)
241 	if (e->src == loop->latch)
242 	  count_latch = e->count;
243 	else
244 	  count_in += e->count;
245 
246       if (count_in == 0)
247 	expected = count_latch * 2;
248       else
249 	expected = (count_latch + count_in - 1) / count_in;
250 
251       return expected;
252     }
253   else
254     {
255       int freq_in, freq_latch;
256 
257       freq_in = 0;
258       freq_latch = 0;
259 
260       FOR_EACH_EDGE (e, ei, loop->header->preds)
261 	if (e->src == loop->latch)
262 	  freq_latch = EDGE_FREQUENCY (e);
263 	else
264 	  freq_in += EDGE_FREQUENCY (e);
265 
266       if (freq_in == 0)
267 	return freq_latch * 2;
268 
269       return (freq_latch + freq_in - 1) / freq_in;
270     }
271 }
272 
273 /* Returns expected number of LOOP iterations.  The returned value is bounded
274    by REG_BR_PROB_BASE.  */
275 
276 unsigned
277 expected_loop_iterations (const struct loop *loop)
278 {
279   gcov_type expected = expected_loop_iterations_unbounded (loop);
280   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
281 }
282 
283 /* Returns the maximum level of nesting of subloops of LOOP.  */
284 
285 unsigned
286 get_loop_level (const struct loop *loop)
287 {
288   const struct loop *ploop;
289   unsigned mx = 0, l;
290 
291   for (ploop = loop->inner; ploop; ploop = ploop->next)
292     {
293       l = get_loop_level (ploop);
294       if (l >= mx)
295 	mx = l + 1;
296     }
297   return mx;
298 }
299 
300 /* Returns estimate on cost of computing SEQ.  */
301 
302 static unsigned
303 seq_cost (const_rtx seq, bool speed)
304 {
305   unsigned cost = 0;
306   rtx set;
307 
308   for (; seq; seq = NEXT_INSN (seq))
309     {
310       set = single_set (seq);
311       if (set)
312 	cost += rtx_cost (set, SET, speed);
313       else
314 	cost++;
315     }
316 
317   return cost;
318 }
319 
320 /* The properties of the target.  */
321 
322 unsigned target_avail_regs;	/* Number of available registers.  */
323 unsigned target_res_regs;	/* Number of registers reserved for temporary
324 				   expressions.  */
325 unsigned target_reg_cost[2];	/* The cost for register when there still
326 				   is some reserve, but we are approaching
327 				   the number of available registers.  */
328 unsigned target_spill_cost[2];	/* The cost for register when we need
329 				   to spill.  */
330 
331 /* Initialize the constants for computing set costs.  */
332 
333 void
334 init_set_costs (void)
335 {
336   int speed;
337   rtx seq;
338   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
339   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
340   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
341   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
342   unsigned i;
343 
344   target_avail_regs = 0;
345   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
346     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
347 	&& !fixed_regs[i])
348       target_avail_regs++;
349 
350   target_res_regs = 3;
351 
352   for (speed = 0; speed < 2; speed++)
353      {
354       crtl->maybe_hot_insn_p = speed;
355       /* Set up the costs for using extra registers:
356 
357 	 1) If not many free registers remain, we should prefer having an
358 	    additional move to decreasing the number of available registers.
359 	    (TARGET_REG_COST).
360 	 2) If no registers are available, we need to spill, which may require
361 	    storing the old value to memory and loading it back
362 	    (TARGET_SPILL_COST).  */
363 
364       start_sequence ();
365       emit_move_insn (reg1, reg2);
366       seq = get_insns ();
367       end_sequence ();
368       target_reg_cost [speed] = seq_cost (seq, speed);
369 
370       start_sequence ();
371       emit_move_insn (mem, reg1);
372       emit_move_insn (reg2, mem);
373       seq = get_insns ();
374       end_sequence ();
375       target_spill_cost [speed] = seq_cost (seq, speed);
376     }
377   default_rtl_profile ();
378 }
379 
380 /* Estimates cost of increased register pressure caused by making N_NEW new
381    registers live around the loop.  N_OLD is the number of registers live
382    around the loop.  */
383 
384 unsigned
385 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
386 {
387   unsigned cost;
388   unsigned regs_needed = n_new + n_old;
389 
390   /* If we have enough registers, we should use them and not restrict
391      the transformations unnecessarily.  */
392   if (regs_needed + target_res_regs <= target_avail_regs)
393     return 0;
394 
395   if (regs_needed <= target_avail_regs)
396     /* If we are close to running out of registers, try to preserve
397        them.  */
398     cost = target_reg_cost [speed] * n_new;
399   else
400     /* If we run out of registers, it is very expensive to add another
401        one.  */
402     cost = target_spill_cost [speed] * n_new;
403 
404   if (optimize && (flag_ira_region == IRA_REGION_ALL
405 		   || flag_ira_region == IRA_REGION_MIXED)
406       && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
407     /* IRA regional allocation deals with high register pressure
408        better.  So decrease the cost (to do more accurate the cost
409        calculation for IRA, we need to know how many registers lives
410        through the loop transparently).  */
411     cost /= 2;
412 
413   return cost;
414 }
415 
416 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
417 
418 void
419 mark_loop_exit_edges (void)
420 {
421   basic_block bb;
422   edge e;
423 
424   if (number_of_loops () <= 1)
425     return;
426 
427   FOR_EACH_BB (bb)
428     {
429       edge_iterator ei;
430 
431       FOR_EACH_EDGE (e, ei, bb->succs)
432 	{
433 	  if (loop_outer (bb->loop_father)
434 	      && loop_exit_edge_p (bb->loop_father, e))
435 	    e->flags |= EDGE_LOOP_EXIT;
436 	  else
437 	    e->flags &= ~EDGE_LOOP_EXIT;
438 	}
439     }
440 }
441 
442