xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/ipa-utils.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Utilities for ipa analysis.
2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "predict.h"
28 #include "alloc-pool.h"
29 #include "cgraph.h"
30 #include "lto-streamer.h"
31 #include "dumpfile.h"
32 #include "splay-tree.h"
33 #include "ipa-utils.h"
34 #include "symbol-summary.h"
35 #include "tree-vrp.h"
36 #include "ipa-prop.h"
37 #include "ipa-inline.h"
38 
39 /* Debugging function for postorder and inorder code. NOTE is a string
40    that is printed before the nodes are printed.  ORDER is an array of
41    cgraph_nodes that has COUNT useful nodes in it.  */
42 
43 void
44 ipa_print_order (FILE* out,
45 		 const char * note,
46 		 struct cgraph_node** order,
47 		 int count)
48 {
49   int i;
50   fprintf (out, "\n\n ordered call graph: %s\n", note);
51 
52   for (i = count - 1; i >= 0; i--)
53     order[i]->dump (out);
54   fprintf (out, "\n");
55   fflush (out);
56 }
57 
58 
59 struct searchc_env {
60   struct cgraph_node **stack;
61   struct cgraph_node **result;
62   int stack_size;
63   int order_pos;
64   splay_tree nodes_marked_new;
65   bool reduce;
66   bool allow_overwritable;
67   int count;
68 };
69 
70 /* This is an implementation of Tarjan's strongly connected region
71    finder as reprinted in Aho Hopcraft and Ullman's The Design and
72    Analysis of Computer Programs (1975) pages 192-193.  This version
73    has been customized for cgraph_nodes.  The env parameter is because
74    it is recursive and there are no nested functions here.  This
75    function should only be called from itself or
76    ipa_reduced_postorder.  ENV is a stack env and would be
77    unnecessary if C had nested functions.  V is the node to start
78    searching from.  */
79 
80 static void
81 searchc (struct searchc_env* env, struct cgraph_node *v,
82 	 bool (*ignore_edge) (struct cgraph_edge *))
83 {
84   struct cgraph_edge *edge;
85   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
86 
87   /* mark node as old */
88   v_info->new_node = false;
89   splay_tree_remove (env->nodes_marked_new, v->uid);
90 
91   v_info->dfn_number = env->count;
92   v_info->low_link = env->count;
93   env->count++;
94   env->stack[(env->stack_size)++] = v;
95   v_info->on_stack = true;
96 
97   for (edge = v->callees; edge; edge = edge->next_callee)
98     {
99       struct ipa_dfs_info * w_info;
100       enum availability avail;
101       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
102 
103       if (!w || (ignore_edge && ignore_edge (edge)))
104         continue;
105 
106       if (w->aux
107 	  && (avail > AVAIL_INTERPOSABLE
108 	      || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
109 	{
110 	  w_info = (struct ipa_dfs_info *) w->aux;
111 	  if (w_info->new_node)
112 	    {
113 	      searchc (env, w, ignore_edge);
114 	      v_info->low_link =
115 		(v_info->low_link < w_info->low_link) ?
116 		v_info->low_link : w_info->low_link;
117 	    }
118 	  else
119 	    if ((w_info->dfn_number < v_info->dfn_number)
120 		&& (w_info->on_stack))
121 	      v_info->low_link =
122 		(w_info->dfn_number < v_info->low_link) ?
123 		w_info->dfn_number : v_info->low_link;
124 	}
125     }
126 
127 
128   if (v_info->low_link == v_info->dfn_number)
129     {
130       struct cgraph_node *last = NULL;
131       struct cgraph_node *x;
132       struct ipa_dfs_info *x_info;
133       do {
134 	x = env->stack[--(env->stack_size)];
135 	x_info = (struct ipa_dfs_info *) x->aux;
136 	x_info->on_stack = false;
137 	x_info->scc_no = v_info->dfn_number;
138 
139 	if (env->reduce)
140 	  {
141 	    x_info->next_cycle = last;
142 	    last = x;
143 	  }
144 	else
145 	  env->result[env->order_pos++] = x;
146       }
147       while (v != x);
148       if (env->reduce)
149 	env->result[env->order_pos++] = v;
150     }
151 }
152 
153 /* Topsort the call graph by caller relation.  Put the result in ORDER.
154 
155    The REDUCE flag is true if you want the cycles reduced to single nodes.
156    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
157    call graph nodes in a reduced node.
158 
159    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
160    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
161    for the topological sort.   */
162 
163 int
164 ipa_reduced_postorder (struct cgraph_node **order,
165 		       bool reduce, bool allow_overwritable,
166 		       bool (*ignore_edge) (struct cgraph_edge *))
167 {
168   struct cgraph_node *node;
169   struct searchc_env env;
170   splay_tree_node result;
171   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
172   env.stack_size = 0;
173   env.result = order;
174   env.order_pos = 0;
175   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
176   env.count = 1;
177   env.reduce = reduce;
178   env.allow_overwritable = allow_overwritable;
179 
180   FOR_EACH_DEFINED_FUNCTION (node)
181     {
182       enum availability avail = node->get_availability ();
183 
184       if (avail > AVAIL_INTERPOSABLE
185 	  || (allow_overwritable
186 	      && (avail == AVAIL_INTERPOSABLE)))
187 	{
188 	  /* Reuse the info if it is already there.  */
189 	  struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
190 	  if (!info)
191 	    info = XCNEW (struct ipa_dfs_info);
192 	  info->new_node = true;
193 	  info->on_stack = false;
194 	  info->next_cycle = NULL;
195 	  node->aux = info;
196 
197 	  splay_tree_insert (env.nodes_marked_new,
198 			     (splay_tree_key)node->uid,
199 			     (splay_tree_value)node);
200 	}
201       else
202 	node->aux = NULL;
203     }
204   result = splay_tree_min (env.nodes_marked_new);
205   while (result)
206     {
207       node = (struct cgraph_node *)result->value;
208       searchc (&env, node, ignore_edge);
209       result = splay_tree_min (env.nodes_marked_new);
210     }
211   splay_tree_delete (env.nodes_marked_new);
212   free (env.stack);
213 
214   return env.order_pos;
215 }
216 
217 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
218    graph nodes.  */
219 
220 void
221 ipa_free_postorder_info (void)
222 {
223   struct cgraph_node *node;
224   FOR_EACH_DEFINED_FUNCTION (node)
225     {
226       /* Get rid of the aux information.  */
227       if (node->aux)
228 	{
229 	  free (node->aux);
230 	  node->aux = NULL;
231 	}
232     }
233 }
234 
235 /* Get the set of nodes for the cycle in the reduced call graph starting
236    from NODE.  */
237 
238 vec<cgraph_node *>
239 ipa_get_nodes_in_cycle (struct cgraph_node *node)
240 {
241   vec<cgraph_node *> v = vNULL;
242   struct ipa_dfs_info *node_dfs_info;
243   while (node)
244     {
245       v.safe_push (node);
246       node_dfs_info = (struct ipa_dfs_info *) node->aux;
247       node = node_dfs_info->next_cycle;
248     }
249   return v;
250 }
251 
252 /* Return true iff the CS is an edge within a strongly connected component as
253    computed by ipa_reduced_postorder.  */
254 
255 bool
256 ipa_edge_within_scc (struct cgraph_edge *cs)
257 {
258   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
259   struct ipa_dfs_info *callee_dfs;
260   struct cgraph_node *callee = cs->callee->function_symbol ();
261 
262   callee_dfs = (struct ipa_dfs_info *) callee->aux;
263   return (caller_dfs
264 	  && callee_dfs
265 	  && caller_dfs->scc_no == callee_dfs->scc_no);
266 }
267 
268 struct postorder_stack
269 {
270   struct cgraph_node *node;
271   struct cgraph_edge *edge;
272   int ref;
273 };
274 
275 /* Fill array order with all nodes with output flag set in the reverse
276    topological order.  Return the number of elements in the array.
277    FIXME: While walking, consider aliases, too.  */
278 
279 int
280 ipa_reverse_postorder (struct cgraph_node **order)
281 {
282   struct cgraph_node *node, *node2;
283   int stack_size = 0;
284   int order_pos = 0;
285   struct cgraph_edge *edge;
286   int pass;
287   struct ipa_ref *ref = NULL;
288 
289   struct postorder_stack *stack =
290     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
291 
292   /* We have to deal with cycles nicely, so use a depth first traversal
293      output algorithm.  Ignore the fact that some functions won't need
294      to be output and put them into order as well, so we get dependencies
295      right through inline functions.  */
296   FOR_EACH_FUNCTION (node)
297     node->aux = NULL;
298   for (pass = 0; pass < 2; pass++)
299     FOR_EACH_FUNCTION (node)
300       if (!node->aux
301 	  && (pass
302 	      || (!node->address_taken
303 		  && !node->global.inlined_to
304 		  && !node->alias && !node->thunk.thunk_p
305 		  && !node->only_called_directly_p ())))
306 	{
307 	  stack_size = 0;
308           stack[stack_size].node = node;
309 	  stack[stack_size].edge = node->callers;
310 	  stack[stack_size].ref = 0;
311 	  node->aux = (void *)(size_t)1;
312 	  while (stack_size >= 0)
313 	    {
314 	      while (true)
315 		{
316 		  node2 = NULL;
317 		  while (stack[stack_size].edge && !node2)
318 		    {
319 		      edge = stack[stack_size].edge;
320 		      node2 = edge->caller;
321 		      stack[stack_size].edge = edge->next_caller;
322 		      /* Break possible cycles involving always-inline
323 			 functions by ignoring edges from always-inline
324 			 functions to non-always-inline functions.  */
325 		      if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
326 			  && !DECL_DISREGARD_INLINE_LIMITS
327 			    (edge->callee->function_symbol ()->decl))
328 			node2 = NULL;
329 		    }
330 		  for (; stack[stack_size].node->iterate_referring (
331 						       stack[stack_size].ref,
332 						       ref) && !node2;
333 		       stack[stack_size].ref++)
334 		    {
335 		      if (ref->use == IPA_REF_ALIAS)
336 			node2 = dyn_cast <cgraph_node *> (ref->referring);
337 		    }
338 		  if (!node2)
339 		    break;
340 		  if (!node2->aux)
341 		    {
342 		      stack[++stack_size].node = node2;
343 		      stack[stack_size].edge = node2->callers;
344 		      stack[stack_size].ref = 0;
345 		      node2->aux = (void *)(size_t)1;
346 		    }
347 		}
348 	      order[order_pos++] = stack[stack_size--].node;
349 	    }
350 	}
351   free (stack);
352   FOR_EACH_FUNCTION (node)
353     node->aux = NULL;
354   return order_pos;
355 }
356 
357 
358 
359 /* Given a memory reference T, will return the variable at the bottom
360    of the access.  Unlike get_base_address, this will recurse through
361    INDIRECT_REFS.  */
362 
363 tree
364 get_base_var (tree t)
365 {
366   while (!SSA_VAR_P (t)
367 	 && (!CONSTANT_CLASS_P (t))
368 	 && TREE_CODE (t) != LABEL_DECL
369 	 && TREE_CODE (t) != FUNCTION_DECL
370 	 && TREE_CODE (t) != CONST_DECL
371 	 && TREE_CODE (t) != CONSTRUCTOR)
372     {
373       t = TREE_OPERAND (t, 0);
374     }
375   return t;
376 }
377 
378 
379 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
380    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
381    unless PRESERVE_BODY is set.  */
382 
383 void
384 ipa_merge_profiles (struct cgraph_node *dst,
385 		    struct cgraph_node *src,
386 		    bool preserve_body)
387 {
388   tree oldsrcdecl = src->decl;
389   struct function *srccfun, *dstcfun;
390   bool match = true;
391 
392   if (!src->definition
393       || !dst->definition)
394     return;
395   if (src->frequency < dst->frequency)
396     src->frequency = dst->frequency;
397 
398   /* Time profiles are merged.  */
399   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
400     dst->tp_first_run = src->tp_first_run;
401 
402   if (src->profile_id && !dst->profile_id)
403     dst->profile_id = src->profile_id;
404 
405   if (!dst->count)
406     return;
407   if (!src->count || src->alias)
408     return;
409   if (symtab->dump_file)
410     {
411       fprintf (symtab->dump_file, "Merging profiles of %s/%i to %s/%i\n",
412 	       xstrdup_for_dump (src->name ()), src->order,
413 	       xstrdup_for_dump (dst->name ()), dst->order);
414     }
415   dst->count += src->count;
416 
417   /* This is ugly.  We need to get both function bodies into memory.
418      If declaration is merged, we need to duplicate it to be able
419      to load body that is being replaced.  This makes symbol table
420      temporarily inconsistent.  */
421   if (src->decl == dst->decl)
422     {
423       struct lto_in_decl_state temp;
424       struct lto_in_decl_state *state;
425 
426       /* We are going to move the decl, we want to remove its file decl data.
427 	 and link these with the new decl. */
428       temp.fn_decl = src->decl;
429       lto_in_decl_state **slot
430 	= src->lto_file_data->function_decl_states->find_slot (&temp,
431 							       NO_INSERT);
432       state = *slot;
433       src->lto_file_data->function_decl_states->clear_slot (slot);
434       gcc_assert (state);
435 
436       /* Duplicate the decl and be sure it does not link into body of DST.  */
437       src->decl = copy_node (src->decl);
438       DECL_STRUCT_FUNCTION (src->decl) = NULL;
439       DECL_ARGUMENTS (src->decl) = NULL;
440       DECL_INITIAL (src->decl) = NULL;
441       DECL_RESULT (src->decl) = NULL;
442 
443       /* Associate the decl state with new declaration, so LTO streamer
444  	 can look it up.  */
445       state->fn_decl = src->decl;
446       slot
447 	= src->lto_file_data->function_decl_states->find_slot (state, INSERT);
448       gcc_assert (!*slot);
449       *slot = state;
450     }
451   src->get_untransformed_body ();
452   dst->get_untransformed_body ();
453   srccfun = DECL_STRUCT_FUNCTION (src->decl);
454   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
455   if (n_basic_blocks_for_fn (srccfun)
456       != n_basic_blocks_for_fn (dstcfun))
457     {
458       if (symtab->dump_file)
459 	fprintf (symtab->dump_file,
460 		 "Giving up; number of basic block mismatch.\n");
461       match = false;
462     }
463   else if (last_basic_block_for_fn (srccfun)
464 	   != last_basic_block_for_fn (dstcfun))
465     {
466       if (symtab->dump_file)
467 	fprintf (symtab->dump_file,
468 		 "Giving up; last block mismatch.\n");
469       match = false;
470     }
471   else
472     {
473       basic_block srcbb, dstbb;
474 
475       FOR_ALL_BB_FN (srcbb, srccfun)
476 	{
477 	  unsigned int i;
478 
479 	  dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
480 	  if (dstbb == NULL)
481 	    {
482 	      if (symtab->dump_file)
483 		fprintf (symtab->dump_file,
484 			 "No matching block for bb %i.\n",
485 			 srcbb->index);
486 	      match = false;
487 	      break;
488 	    }
489 	  if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
490 	    {
491 	      if (symtab->dump_file)
492 		fprintf (symtab->dump_file,
493 			 "Edge count mistmatch for bb %i.\n",
494 			 srcbb->index);
495 	      match = false;
496 	      break;
497 	    }
498 	  for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
499 	    {
500 	      edge srce = EDGE_SUCC (srcbb, i);
501 	      edge dste = EDGE_SUCC (dstbb, i);
502 	      if (srce->dest->index != dste->dest->index)
503 		{
504 		  if (symtab->dump_file)
505 		    fprintf (symtab->dump_file,
506 			     "Succ edge mistmatch for bb %i.\n",
507 			     srce->dest->index);
508 		  match = false;
509 		  break;
510 		}
511 	    }
512 	}
513     }
514   if (match)
515     {
516       struct cgraph_edge *e, *e2;
517       basic_block srcbb, dstbb;
518 
519       /* TODO: merge also statement histograms.  */
520       FOR_ALL_BB_FN (srcbb, srccfun)
521 	{
522 	  unsigned int i;
523 
524 	  dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
525 	  dstbb->count += srcbb->count;
526 	  for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
527 	    {
528 	      edge srce = EDGE_SUCC (srcbb, i);
529 	      edge dste = EDGE_SUCC (dstbb, i);
530 	      dste->count += srce->count;
531 	    }
532 	}
533       push_cfun (dstcfun);
534       counts_to_freqs ();
535       compute_function_frequency ();
536       pop_cfun ();
537       for (e = dst->callees; e; e = e->next_callee)
538 	{
539 	  if (e->speculative)
540 	    continue;
541 	  e->count = gimple_bb (e->call_stmt)->count;
542 	  e->frequency = compute_call_stmt_bb_frequency
543 			     (dst->decl,
544 			      gimple_bb (e->call_stmt));
545 	}
546       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
547 	   e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
548 	{
549 	  gcov_type count = gimple_bb (e->call_stmt)->count;
550 	  int freq = compute_call_stmt_bb_frequency
551 			(dst->decl,
552 			 gimple_bb (e->call_stmt));
553 	  /* When call is speculative, we need to re-distribute probabilities
554 	     the same way as they was.  This is not really correct because
555 	     in the other copy the speculation may differ; but probably it
556 	     is not really worth the effort.  */
557 	  if (e->speculative)
558 	    {
559 	      cgraph_edge *direct, *indirect;
560 	      cgraph_edge *direct2 = NULL, *indirect2 = NULL;
561 	      ipa_ref *ref;
562 
563 	      e->speculative_call_info (direct, indirect, ref);
564 	      gcc_assert (e == indirect);
565 	      if (e2 && e2->speculative)
566 	        e2->speculative_call_info (direct2, indirect2, ref);
567 	      if (indirect->count || direct->count)
568 		{
569 		  /* We should mismatch earlier if there is no matching
570 		     indirect edge.  */
571 		  if (!e2)
572 		    {
573 		      if (dump_file)
574 		        fprintf (dump_file,
575 				 "Mismatch in merging indirect edges\n");
576 		    }
577 		  else if (!e2->speculative)
578 		    indirect->count += e2->count;
579 		  else if (e2->speculative)
580 		    {
581 		      if (DECL_ASSEMBLER_NAME (direct2->callee->decl)
582 			  != DECL_ASSEMBLER_NAME (direct->callee->decl))
583 			{
584 			  if (direct2->count >= direct->count)
585 			    {
586 			      direct->redirect_callee (direct2->callee);
587 			      indirect->count += indirect2->count
588 						 + direct->count;
589 			      direct->count = direct2->count;
590 			    }
591 			  else
592 			    indirect->count += indirect2->count + direct2->count;
593 			}
594 		      else
595 			{
596 			   direct->count += direct2->count;
597 			   indirect->count += indirect2->count;
598 			}
599 		    }
600 		  int  prob = RDIV (direct->count * REG_BR_PROB_BASE ,
601 				    direct->count + indirect->count);
602 		  direct->frequency = RDIV (freq * prob, REG_BR_PROB_BASE);
603 		  indirect->frequency = RDIV (freq * (REG_BR_PROB_BASE - prob),
604 					      REG_BR_PROB_BASE);
605 		}
606 	      else
607 		/* At the moment we should have only profile feedback based
608 		   speculations when merging.  */
609 		gcc_unreachable ();
610 	    }
611 	  else if (e2 && e2->speculative)
612 	    {
613 	      cgraph_edge *direct, *indirect;
614 	      ipa_ref *ref;
615 
616 	      e2->speculative_call_info (direct, indirect, ref);
617 	      e->count = count;
618 	      e->frequency = freq;
619 	      int prob = RDIV (direct->count * REG_BR_PROB_BASE, e->count);
620 	      e->make_speculative (direct->callee, direct->count,
621 				   RDIV (freq * prob, REG_BR_PROB_BASE));
622 	    }
623 	  else
624 	    {
625 	      e->count = count;
626 	      e->frequency = freq;
627 	    }
628 	}
629       if (!preserve_body)
630         src->release_body ();
631       inline_update_overall_summary (dst);
632     }
633   /* TODO: if there is no match, we can scale up.  */
634   src->decl = oldsrcdecl;
635 }
636 
637 /* Return true if call to DEST is known to be self-recusive call withing FUNC.   */
638 
639 bool
640 recursive_call_p (tree func, tree dest)
641 {
642   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
643   struct cgraph_node *cnode = cgraph_node::get_create (func);
644   ipa_ref *alias;
645   enum availability avail;
646 
647   gcc_assert (!cnode->alias);
648   if (cnode != dest_node->ultimate_alias_target (&avail))
649     return false;
650   if (avail >= AVAIL_AVAILABLE)
651     return true;
652   if (!dest_node->semantically_equivalent_p (cnode))
653     return false;
654   /* If there is only one way to call the fuction or we know all of them
655      are semantically equivalent, we still can consider call recursive.  */
656   FOR_EACH_ALIAS (cnode, alias)
657     if (!dest_node->semantically_equivalent_p (alias->referring))
658       return false;
659   return true;
660 }
661