xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/cfgloopanal.c (revision 04028aa9310ca9c619eca5cf58ddf1e58624d1d7)
1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hard-reg-set.h"
26 #include "obstack.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "expr.h"
30 #include "graphds.h"
31 #include "params.h"
32 
33 struct target_cfgloop default_target_cfgloop;
34 #if SWITCHABLE_TARGET
35 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
36 #endif
37 
38 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
39 
40 bool
41 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
42 {
43   /* It must be executed at least once each iteration.  */
44   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
45     return false;
46 
47   /* And just once.  */
48   if (bb->loop_father != loop)
49     return false;
50 
51   /* But this was not enough.  We might have some irreducible loop here.  */
52   if (bb->flags & BB_IRREDUCIBLE_LOOP)
53     return false;
54 
55   return true;
56 }
57 
58 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
59    throw away all latch edges and mark blocks inside any remaining cycle.
60    Everything is a bit complicated due to fact we do not want to do this
61    for parts of cycles that only "pass" through some loop -- i.e. for
62    each cycle, we want to mark blocks that belong directly to innermost
63    loop containing the whole cycle.
64 
65    LOOPS is the loop tree.  */
66 
67 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
68 #define BB_REPR(BB) ((BB)->index + 1)
69 
70 bool
71 mark_irreducible_loops (void)
72 {
73   basic_block act;
74   struct graph_edge *ge;
75   edge e;
76   edge_iterator ei;
77   int src, dest;
78   unsigned depth;
79   struct graph *g;
80   int num = number_of_loops ();
81   struct loop *cloop;
82   bool irred_loop_found = false;
83   int i;
84 
85   gcc_assert (current_loops != NULL);
86 
87   /* Reset the flags.  */
88   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
89     {
90       act->flags &= ~BB_IRREDUCIBLE_LOOP;
91       FOR_EACH_EDGE (e, ei, act->succs)
92 	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
93     }
94 
95   /* Create the edge lists.  */
96   g = new_graph (last_basic_block + num);
97 
98   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
99     FOR_EACH_EDGE (e, ei, act->succs)
100       {
101 	/* Ignore edges to exit.  */
102 	if (e->dest == EXIT_BLOCK_PTR)
103 	  continue;
104 
105 	src = BB_REPR (act);
106 	dest = BB_REPR (e->dest);
107 
108 	/* Ignore latch edges.  */
109 	if (e->dest->loop_father->header == e->dest
110 	    && e->dest->loop_father->latch == act)
111 	  continue;
112 
113 	/* Edges inside a single loop should be left where they are.  Edges
114 	   to subloop headers should lead to representative of the subloop,
115 	   but from the same place.
116 
117 	   Edges exiting loops should lead from representative
118 	   of the son of nearest common ancestor of the loops in that
119 	   act lays.  */
120 
121 	if (e->dest->loop_father->header == e->dest)
122 	  dest = LOOP_REPR (e->dest->loop_father);
123 
124 	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
125 	  {
126 	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
127 						      e->dest->loop_father));
128 	    if (depth == loop_depth (act->loop_father))
129 	      cloop = act->loop_father;
130 	    else
131 	      cloop = (*act->loop_father->superloops)[depth];
132 
133 	    src = LOOP_REPR (cloop);
134 	  }
135 
136 	add_edge (g, src, dest)->data = e;
137       }
138 
139   /* Find the strongly connected components.  */
140   graphds_scc (g, NULL);
141 
142   /* Mark the irreducible loops.  */
143   for (i = 0; i < g->n_vertices; i++)
144     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
145       {
146 	edge real = (edge) ge->data;
147 	/* edge E in graph G is irreducible if it connects two vertices in the
148 	   same scc.  */
149 
150 	/* All edges should lead from a component with higher number to the
151 	   one with lower one.  */
152 	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
153 
154 	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
155 	  continue;
156 
157 	real->flags |= EDGE_IRREDUCIBLE_LOOP;
158 	irred_loop_found = true;
159 	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
160 	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
161       }
162 
163   free_graph (g);
164 
165   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
166   return irred_loop_found;
167 }
168 
169 /* Counts number of insns inside LOOP.  */
170 int
171 num_loop_insns (const struct loop *loop)
172 {
173   basic_block *bbs, bb;
174   unsigned i, ninsns = 0;
175   rtx insn;
176 
177   bbs = get_loop_body (loop);
178   for (i = 0; i < loop->num_nodes; i++)
179     {
180       bb = bbs[i];
181       FOR_BB_INSNS (bb, insn)
182 	if (NONDEBUG_INSN_P (insn))
183 	  ninsns++;
184     }
185   free (bbs);
186 
187   if (!ninsns)
188     ninsns = 1;	/* To avoid division by zero.  */
189 
190   return ninsns;
191 }
192 
193 /* Counts number of insns executed on average per iteration LOOP.  */
194 int
195 average_num_loop_insns (const struct loop *loop)
196 {
197   basic_block *bbs, bb;
198   unsigned i, binsns, ninsns, ratio;
199   rtx insn;
200 
201   ninsns = 0;
202   bbs = get_loop_body (loop);
203   for (i = 0; i < loop->num_nodes; i++)
204     {
205       bb = bbs[i];
206 
207       binsns = 0;
208       FOR_BB_INSNS (bb, insn)
209 	if (NONDEBUG_INSN_P (insn))
210 	  binsns++;
211 
212       ratio = loop->header->frequency == 0
213 	      ? BB_FREQ_MAX
214 	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
215       ninsns += binsns * ratio;
216     }
217   free (bbs);
218 
219   ninsns /= BB_FREQ_MAX;
220   if (!ninsns)
221     ninsns = 1; /* To avoid division by zero.  */
222 
223   return ninsns;
224 }
225 
226 /* Returns expected number of iterations of LOOP, according to
227    measured or guessed profile.  No bounding is done on the
228    value.  */
229 
230 gcov_type
231 expected_loop_iterations_unbounded (const struct loop *loop)
232 {
233   edge e;
234   edge_iterator ei;
235 
236   if (loop->latch->count || loop->header->count)
237     {
238       gcov_type count_in, count_latch, expected;
239 
240       count_in = 0;
241       count_latch = 0;
242 
243       FOR_EACH_EDGE (e, ei, loop->header->preds)
244 	if (e->src == loop->latch)
245 	  count_latch = e->count;
246 	else
247 	  count_in += e->count;
248 
249       if (count_in == 0)
250 	expected = count_latch * 2;
251       else
252 	expected = (count_latch + count_in - 1) / count_in;
253 
254       return expected;
255     }
256   else
257     {
258       int freq_in, freq_latch;
259 
260       freq_in = 0;
261       freq_latch = 0;
262 
263       FOR_EACH_EDGE (e, ei, loop->header->preds)
264 	if (e->src == loop->latch)
265 	  freq_latch = EDGE_FREQUENCY (e);
266 	else
267 	  freq_in += EDGE_FREQUENCY (e);
268 
269       if (freq_in == 0)
270 	return freq_latch * 2;
271 
272       return (freq_latch + freq_in - 1) / freq_in;
273     }
274 }
275 
276 /* Returns expected number of LOOP iterations.  The returned value is bounded
277    by REG_BR_PROB_BASE.  */
278 
279 unsigned
280 expected_loop_iterations (const struct loop *loop)
281 {
282   gcov_type expected = expected_loop_iterations_unbounded (loop);
283   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
284 }
285 
286 /* Returns the maximum level of nesting of subloops of LOOP.  */
287 
288 unsigned
289 get_loop_level (const struct loop *loop)
290 {
291   const struct loop *ploop;
292   unsigned mx = 0, l;
293 
294   for (ploop = loop->inner; ploop; ploop = ploop->next)
295     {
296       l = get_loop_level (ploop);
297       if (l >= mx)
298 	mx = l + 1;
299     }
300   return mx;
301 }
302 
303 /* Returns estimate on cost of computing SEQ.  */
304 
305 static unsigned
306 seq_cost (const_rtx seq, bool speed)
307 {
308   unsigned cost = 0;
309   rtx set;
310 
311   for (; seq; seq = NEXT_INSN (seq))
312     {
313       set = single_set (seq);
314       if (set)
315 	cost += set_rtx_cost (set, speed);
316       else
317 	cost++;
318     }
319 
320   return cost;
321 }
322 
323 /* Initialize the constants for computing set costs.  */
324 
325 void
326 init_set_costs (void)
327 {
328   int speed;
329   rtx seq;
330   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
331   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
332   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
333   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
334   unsigned i;
335 
336   target_avail_regs = 0;
337   target_clobbered_regs = 0;
338   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
339     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
340 	&& !fixed_regs[i])
341       {
342 	target_avail_regs++;
343 	if (call_used_regs[i])
344 	  target_clobbered_regs++;
345       }
346 
347   target_res_regs = 3;
348 
349   for (speed = 0; speed < 2; speed++)
350      {
351       crtl->maybe_hot_insn_p = speed;
352       /* Set up the costs for using extra registers:
353 
354 	 1) If not many free registers remain, we should prefer having an
355 	    additional move to decreasing the number of available registers.
356 	    (TARGET_REG_COST).
357 	 2) If no registers are available, we need to spill, which may require
358 	    storing the old value to memory and loading it back
359 	    (TARGET_SPILL_COST).  */
360 
361       start_sequence ();
362       emit_move_insn (reg1, reg2);
363       seq = get_insns ();
364       end_sequence ();
365       target_reg_cost [speed] = seq_cost (seq, speed);
366 
367       start_sequence ();
368       emit_move_insn (mem, reg1);
369       emit_move_insn (reg2, mem);
370       seq = get_insns ();
371       end_sequence ();
372       target_spill_cost [speed] = seq_cost (seq, speed);
373     }
374   default_rtl_profile ();
375 }
376 
377 /* Estimates cost of increased register pressure caused by making N_NEW new
378    registers live around the loop.  N_OLD is the number of registers live
379    around the loop.  If CALL_P is true, also take into account that
380    call-used registers may be clobbered in the loop body, reducing the
381    number of available registers before we spill.  */
382 
383 unsigned
384 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
385 			    bool call_p)
386 {
387   unsigned cost;
388   unsigned regs_needed = n_new + n_old;
389   unsigned available_regs = target_avail_regs;
390 
391   /* If there is a call in the loop body, the call-clobbered registers
392      are not available for loop invariants.  */
393   if (call_p)
394     available_regs = available_regs - target_clobbered_regs;
395 
396   /* If we have enough registers, we should use them and not restrict
397      the transformations unnecessarily.  */
398   if (regs_needed + target_res_regs <= available_regs)
399     return 0;
400 
401   if (regs_needed <= available_regs)
402     /* If we are close to running out of registers, try to preserve
403        them.  */
404     cost = target_reg_cost [speed] * n_new;
405   else
406     /* If we run out of registers, it is very expensive to add another
407        one.  */
408     cost = target_spill_cost [speed] * n_new;
409 
410   if (optimize && (flag_ira_region == IRA_REGION_ALL
411 		   || flag_ira_region == IRA_REGION_MIXED)
412       && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
413     /* IRA regional allocation deals with high register pressure
414        better.  So decrease the cost (to do more accurate the cost
415        calculation for IRA, we need to know how many registers lives
416        through the loop transparently).  */
417     cost /= 2;
418 
419   return cost;
420 }
421 
422 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
423 
424 void
425 mark_loop_exit_edges (void)
426 {
427   basic_block bb;
428   edge e;
429 
430   if (number_of_loops () <= 1)
431     return;
432 
433   FOR_EACH_BB (bb)
434     {
435       edge_iterator ei;
436 
437       FOR_EACH_EDGE (e, ei, bb->succs)
438 	{
439 	  if (loop_outer (bb->loop_father)
440 	      && loop_exit_edge_p (bb->loop_father, e))
441 	    e->flags |= EDGE_LOOP_EXIT;
442 	  else
443 	    e->flags &= ~EDGE_LOOP_EXIT;
444 	}
445     }
446 }
447 
448 /* Return exit edge if loop has only one exit that is likely
449    to be executed on runtime (i.e. it is not EH or leading
450    to noreturn call.  */
451 
452 edge
453 single_likely_exit (struct loop *loop)
454 {
455   edge found = single_exit (loop);
456   vec<edge> exits;
457   unsigned i;
458   edge ex;
459 
460   if (found)
461     return found;
462   exits = get_loop_exit_edges (loop);
463   FOR_EACH_VEC_ELT (exits, i, ex)
464     {
465       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
466 	continue;
467       /* The constant of 5 is set in a way so noreturn calls are
468 	 ruled out by this test.  The static branch prediction algorithm
469          will not assign such a low probability to conditionals for usual
470          reasons.  */
471       if (profile_status != PROFILE_ABSENT
472 	  && ex->probability < 5 && !ex->count)
473 	continue;
474       if (!found)
475 	found = ex;
476       else
477 	{
478 	  exits.release ();
479 	  return NULL;
480 	}
481     }
482   exits.release ();
483   return found;
484 }
485 
486 
487 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
488    order against direction of edges from latch.  Specially, if
489    header != latch, latch is the 1-st block.  */
490 
491 vec<basic_block>
492 get_loop_hot_path (const struct loop *loop)
493 {
494   basic_block bb = loop->header;
495   vec<basic_block> path = vNULL;
496   bitmap visited = BITMAP_ALLOC (NULL);
497 
498   while (true)
499     {
500       edge_iterator ei;
501       edge e;
502       edge best = NULL;
503 
504       path.safe_push (bb);
505       bitmap_set_bit (visited, bb->index);
506       FOR_EACH_EDGE (e, ei, bb->succs)
507         if ((!best || e->probability > best->probability)
508 	    && !loop_exit_edge_p (loop, e)
509 	    && !bitmap_bit_p (visited, e->dest->index))
510 	  best = e;
511       if (!best || best->dest == loop->header)
512 	break;
513       bb = best->dest;
514     }
515   BITMAP_FREE (visited);
516   return path;
517 }
518