xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/ipa-profile.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Basic IPA optimizations based on profile.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* ipa-profile pass implements the following analysis propagating profille
21    inter-procedurally.
22 
23    - Count histogram construction.  This is a histogram analyzing how much
24      time is spent executing statements with a given execution count read
25      from profile feedback. This histogram is complete only with LTO,
26      otherwise it contains information only about the current unit.
27 
28      The information is used to set hot/cold thresholds.
29    - Next speculative indirect call resolution is performed:  the local
30      profile pass assigns profile-id to each function and provide us with a
31      histogram specifying the most common target.  We look up the callgraph
32      node corresponding to the target and produce a speculative call.
33 
34      This call may or may not survive through IPA optimization based on decision
35      of inliner.
36    - Finally we propagate the following flags: unlikely executed, executed
37      once, executed at startup and executed at exit.  These flags are used to
38      control code size/performance threshold and code placement (by producing
39      .text.unlikely/.text.hot/.text.startup/.text.exit subsections).  */
40 #include "config.h"
41 #include "system.h"
42 #include "coretypes.h"
43 #include "backend.h"
44 #include "tree.h"
45 #include "gimple.h"
46 #include "predict.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
49 #include "cgraph.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
53 #include "profile.h"
54 #include "value-prof.h"
55 #include "tree-inline.h"
56 #include "symbol-summary.h"
57 #include "tree-vrp.h"
58 #include "ipa-prop.h"
59 #include "ipa-fnsummary.h"
60 
61 /* Entry in the histogram.  */
62 
63 struct histogram_entry
64 {
65   gcov_type count;
66   int time;
67   int size;
68 };
69 
70 /* Histogram of profile values.
71    The histogram is represented as an ordered vector of entries allocated via
72    histogram_pool. During construction a separate hashtable is kept to lookup
73    duplicate entries.  */
74 
75 vec<histogram_entry *> histogram;
76 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
77 
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR.  */
79 
80 struct histogram_hash : nofree_ptr_hash <histogram_entry>
81 {
82   static inline hashval_t hash (const histogram_entry *);
83   static inline int equal (const histogram_entry *, const histogram_entry *);
84 };
85 
86 inline hashval_t
hash(const histogram_entry * val)87 histogram_hash::hash (const histogram_entry *val)
88 {
89   return val->count;
90 }
91 
92 inline int
equal(const histogram_entry * val,const histogram_entry * val2)93 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
94 {
95   return val->count == val2->count;
96 }
97 
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99    HASHTABLE is the on-side hash kept to avoid duplicates.  */
100 
101 static void
account_time_size(hash_table<histogram_hash> * hashtable,vec<histogram_entry * > & histogram,gcov_type count,int time,int size)102 account_time_size (hash_table<histogram_hash> *hashtable,
103 		   vec<histogram_entry *> &histogram,
104 		   gcov_type count, int time, int size)
105 {
106   histogram_entry key = {count, 0, 0};
107   histogram_entry **val = hashtable->find_slot (&key, INSERT);
108 
109   if (!*val)
110     {
111       *val = histogram_pool.allocate ();
112       **val = key;
113       histogram.safe_push (*val);
114     }
115   (*val)->time += time;
116   (*val)->size += size;
117 }
118 
119 int
cmp_counts(const void * v1,const void * v2)120 cmp_counts (const void *v1, const void *v2)
121 {
122   const histogram_entry *h1 = *(const histogram_entry * const *)v1;
123   const histogram_entry *h2 = *(const histogram_entry * const *)v2;
124   if (h1->count < h2->count)
125     return 1;
126   if (h1->count > h2->count)
127     return -1;
128   return 0;
129 }
130 
131 /* Dump HISTOGRAM to FILE.  */
132 
133 static void
dump_histogram(FILE * file,vec<histogram_entry * > histogram)134 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
135 {
136   unsigned int i;
137   gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
138 	    overall_size = 0;
139 
140   fprintf (dump_file, "Histogram:\n");
141   for (i = 0; i < histogram.length (); i++)
142     {
143       overall_time += histogram[i]->count * histogram[i]->time;
144       overall_size += histogram[i]->size;
145     }
146   if (!overall_time)
147     overall_time = 1;
148   if (!overall_size)
149     overall_size = 1;
150   for (i = 0; i < histogram.length (); i++)
151     {
152       cumulated_time += histogram[i]->count * histogram[i]->time;
153       cumulated_size += histogram[i]->size;
154       fprintf (file, "  %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
155 	       (int64_t) histogram[i]->count,
156 	       histogram[i]->time,
157 	       cumulated_time * 100.0 / overall_time,
158 	       histogram[i]->size,
159 	       cumulated_size * 100.0 / overall_size);
160    }
161 }
162 
163 /* Structure containing speculative target information from profile.  */
164 
165 struct speculative_call_target
166 {
167   speculative_call_target (unsigned int id = 0, int prob = 0)
target_idspeculative_call_target168     : target_id (id), target_probability (prob)
169   {
170   }
171 
172   /* Profile_id of target obtained from profile.  */
173   unsigned int target_id;
174   /* Probability that call will land in function with target_id.  */
175   unsigned int target_probability;
176 };
177 
178 class speculative_call_summary
179 {
180 public:
speculative_call_summary()181   speculative_call_summary () : speculative_call_targets ()
182   {}
183 
184   auto_vec<speculative_call_target> speculative_call_targets;
185 
186   void dump (FILE *f);
187 
188 };
189 
190   /* Class to manage call summaries.  */
191 
192 class ipa_profile_call_summaries
193   : public call_summary<speculative_call_summary *>
194 {
195 public:
ipa_profile_call_summaries(symbol_table * table)196   ipa_profile_call_summaries (symbol_table *table)
197     : call_summary<speculative_call_summary *> (table)
198   {}
199 
200   /* Duplicate info when an edge is cloned.  */
201   virtual void duplicate (cgraph_edge *, cgraph_edge *,
202 			  speculative_call_summary *old_sum,
203 			  speculative_call_summary *new_sum);
204 };
205 
206 static ipa_profile_call_summaries *call_sums = NULL;
207 
208 /* Dump all information in speculative call summary to F.  */
209 
210 void
dump(FILE * f)211 speculative_call_summary::dump (FILE *f)
212 {
213   cgraph_node *n2;
214 
215   unsigned spec_count = speculative_call_targets.length ();
216   for (unsigned i = 0; i < spec_count; i++)
217     {
218       speculative_call_target item = speculative_call_targets[i];
219       n2 = find_func_by_profile_id (item.target_id);
220       if (n2)
221 	fprintf (f, "    The %i speculative target is %s with prob %3.2f\n", i,
222 		 n2->dump_name (),
223 		 item.target_probability / (float) REG_BR_PROB_BASE);
224       else
225 	fprintf (f, "    The %i speculative target is %u with prob %3.2f\n", i,
226 		 item.target_id,
227 		 item.target_probability / (float) REG_BR_PROB_BASE);
228     }
229 }
230 
231 /* Duplicate info when an edge is cloned.  */
232 
233 void
duplicate(cgraph_edge *,cgraph_edge *,speculative_call_summary * old_sum,speculative_call_summary * new_sum)234 ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
235 				       speculative_call_summary *old_sum,
236 				       speculative_call_summary *new_sum)
237 {
238   if (!old_sum)
239     return;
240 
241   unsigned old_count = old_sum->speculative_call_targets.length ();
242   if (!old_count)
243     return;
244 
245   new_sum->speculative_call_targets.reserve_exact (old_count);
246   new_sum->speculative_call_targets.quick_grow_cleared (old_count);
247 
248   for (unsigned i = 0; i < old_count; i++)
249     {
250       new_sum->speculative_call_targets[i]
251 	= old_sum->speculative_call_targets[i];
252     }
253 }
254 
255 /* Collect histogram and speculative target summaries from CFG profiles.  */
256 
257 static void
ipa_profile_generate_summary(void)258 ipa_profile_generate_summary (void)
259 {
260   struct cgraph_node *node;
261   gimple_stmt_iterator gsi;
262   basic_block bb;
263 
264   hash_table<histogram_hash> hashtable (10);
265 
266   gcc_checking_assert (!call_sums);
267   call_sums = new ipa_profile_call_summaries (symtab);
268 
269   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
270     if (ENTRY_BLOCK_PTR_FOR_FN
271 	  (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
272       FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
273 	{
274 	  int time = 0;
275 	  int size = 0;
276 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
277 	    {
278 	      gimple *stmt = gsi_stmt (gsi);
279 	      if (gimple_code (stmt) == GIMPLE_CALL
280 		  && !gimple_call_fndecl (stmt))
281 		{
282 		  histogram_value h;
283 		  h = gimple_histogram_value_of_type
284 			(DECL_STRUCT_FUNCTION (node->decl),
285 			 stmt, HIST_TYPE_INDIR_CALL);
286 		  /* No need to do sanity check: gimple_ic_transform already
287 		     takes away bad histograms.  */
288 		  if (h)
289 		    {
290 		      gcov_type val, count, all;
291 		      struct cgraph_edge *e = node->get_edge (stmt);
292 		      if (e && !e->indirect_unknown_callee)
293 			continue;
294 
295 		      speculative_call_summary *csum
296 			= call_sums->get_create (e);
297 
298 		      for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++)
299 			{
300 			  if (!get_nth_most_common_value (NULL, "indirect call",
301 							  h, &val, &count, &all,
302 							  j))
303 			    continue;
304 
305 			  if (val == 0 || count == 0)
306 			    continue;
307 
308 			  if (count > all)
309 			    {
310 			      if (dump_file)
311 				fprintf (dump_file,
312 					 "Probability capped to 1\n");
313 			      count = all;
314 			    }
315 			  speculative_call_target item (
316 			    val, GCOV_COMPUTE_SCALE (count, all));
317 			  csum->speculative_call_targets.safe_push (item);
318 			}
319 
320 		      gimple_remove_histogram_value
321 			 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
322 		    }
323 		}
324 	      time += estimate_num_insns (stmt, &eni_time_weights);
325 	      size += estimate_num_insns (stmt, &eni_size_weights);
326 	    }
327 	  if (bb->count.ipa_p () && bb->count.initialized_p ())
328 	    account_time_size (&hashtable, histogram,
329 			       bb->count.ipa ().to_gcov_type (),
330 			       time, size);
331 	}
332   histogram.qsort (cmp_counts);
333 }
334 
335 /* Serialize the speculative summary info for LTO.  */
336 
337 static void
ipa_profile_write_edge_summary(lto_simple_output_block * ob,speculative_call_summary * csum)338 ipa_profile_write_edge_summary (lto_simple_output_block *ob,
339 				speculative_call_summary *csum)
340 {
341   unsigned len = 0;
342 
343   len = csum->speculative_call_targets.length ();
344 
345   gcc_assert (len <= GCOV_TOPN_VALUES);
346 
347   streamer_write_hwi_stream (ob->main_stream, len);
348 
349   if (len)
350     {
351       unsigned spec_count = csum->speculative_call_targets.length ();
352       for (unsigned i = 0; i < spec_count; i++)
353 	{
354 	  speculative_call_target item = csum->speculative_call_targets[i];
355 	  gcc_assert (item.target_id);
356 	  streamer_write_hwi_stream (ob->main_stream, item.target_id);
357 	  streamer_write_hwi_stream (ob->main_stream, item.target_probability);
358 	}
359     }
360 }
361 
362 /* Serialize the ipa info for lto.  */
363 
364 static void
ipa_profile_write_summary(void)365 ipa_profile_write_summary (void)
366 {
367   struct lto_simple_output_block *ob
368     = lto_create_simple_output_block (LTO_section_ipa_profile);
369   unsigned int i;
370 
371   streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
372   for (i = 0; i < histogram.length (); i++)
373     {
374       streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
375       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
376       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
377     }
378 
379   if (!call_sums)
380     return;
381 
382   /* Serialize speculative targets information.  */
383   unsigned int count = 0;
384   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
385   lto_symtab_encoder_iterator lsei;
386   cgraph_node *node;
387 
388   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
389        lsei_next_function_in_partition (&lsei))
390     {
391       node = lsei_cgraph_node (lsei);
392       if (node->definition && node->has_gimple_body_p ()
393 	  && node->indirect_calls)
394 	count++;
395     }
396 
397   streamer_write_uhwi_stream (ob->main_stream, count);
398 
399   /* Process all of the functions.  */
400   for (lsei = lsei_start_function_in_partition (encoder);
401        !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
402     {
403       cgraph_node *node = lsei_cgraph_node (lsei);
404       if (node->definition && node->has_gimple_body_p ()
405 	  && node->indirect_calls)
406 	{
407 	  int node_ref = lto_symtab_encoder_encode (encoder, node);
408 	  streamer_write_uhwi_stream (ob->main_stream, node_ref);
409 
410 	  for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
411 	    {
412 	      speculative_call_summary *csum = call_sums->get_create (e);
413 	      ipa_profile_write_edge_summary (ob, csum);
414 	    }
415       }
416     }
417 
418   lto_destroy_simple_output_block (ob);
419 }
420 
421 /* Dump all profile summary data for all cgraph nodes and edges to file F.  */
422 
423 static void
ipa_profile_dump_all_summaries(FILE * f)424 ipa_profile_dump_all_summaries (FILE *f)
425 {
426   fprintf (dump_file,
427 	   "\n========== IPA-profile speculative targets: ==========\n");
428   cgraph_node *node;
429   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
430     {
431       fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
432       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
433 	{
434 	  fprintf (f, "  Summary for %s of indirect edge %d:\n",
435 		   e->caller->dump_name (), e->lto_stmt_uid);
436 	  speculative_call_summary *csum = call_sums->get_create (e);
437 	  csum->dump (f);
438 	}
439     }
440   fprintf (f, "\n\n");
441 }
442 
443 /* Read speculative targets information about edge for LTO WPA.  */
444 
445 static void
ipa_profile_read_edge_summary(class lto_input_block * ib,cgraph_edge * edge)446 ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
447 {
448   unsigned i, len;
449 
450   len = streamer_read_hwi (ib);
451   gcc_assert (len <= GCOV_TOPN_VALUES);
452 
453   speculative_call_summary *csum = call_sums->get_create (edge);
454 
455   for (i = 0; i < len; i++)
456   {
457     unsigned int target_id = streamer_read_hwi (ib);
458     int target_probability = streamer_read_hwi (ib);
459     speculative_call_target item (target_id, target_probability);
460     csum->speculative_call_targets.safe_push (item);
461   }
462 }
463 
464 /* Read profile speculative targets section information for LTO WPA.  */
465 
466 static void
ipa_profile_read_summary_section(struct lto_file_decl_data * file_data,class lto_input_block * ib)467 ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
468 				  class lto_input_block *ib)
469 {
470   if (!ib)
471     return;
472 
473   lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
474 
475   unsigned int count = streamer_read_uhwi (ib);
476 
477   unsigned int i;
478   unsigned int index;
479   cgraph_node * node;
480 
481   for (i = 0; i < count; i++)
482     {
483       index = streamer_read_uhwi (ib);
484       encoder = file_data->symtab_node_encoder;
485       node
486 	= dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
487 
488       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
489 	ipa_profile_read_edge_summary (ib, e);
490     }
491 }
492 
493 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
494    */
495 
496 static void
ipa_profile_read_summary(void)497 ipa_profile_read_summary (void)
498 {
499   struct lto_file_decl_data ** file_data_vec
500     = lto_get_file_decl_data ();
501   struct lto_file_decl_data * file_data;
502   int j = 0;
503 
504   hash_table<histogram_hash> hashtable (10);
505 
506   gcc_checking_assert (!call_sums);
507   call_sums = new ipa_profile_call_summaries (symtab);
508 
509   while ((file_data = file_data_vec[j++]))
510     {
511       const char *data;
512       size_t len;
513       class lto_input_block *ib
514 	= lto_create_simple_input_block (file_data,
515 					 LTO_section_ipa_profile,
516 					 &data, &len);
517       if (ib)
518 	{
519           unsigned int num = streamer_read_uhwi (ib);
520 	  unsigned int n;
521 	  for (n = 0; n < num; n++)
522 	    {
523 	      gcov_type count = streamer_read_gcov_count (ib);
524 	      int time = streamer_read_uhwi (ib);
525 	      int size = streamer_read_uhwi (ib);
526 	      account_time_size (&hashtable, histogram,
527 				 count, time, size);
528 	    }
529 
530 	  ipa_profile_read_summary_section (file_data, ib);
531 
532 	  lto_destroy_simple_input_block (file_data,
533 					  LTO_section_ipa_profile,
534 					  ib, data, len);
535 	}
536     }
537   histogram.qsort (cmp_counts);
538 }
539 
540 /* Data used by ipa_propagate_frequency.  */
541 
542 struct ipa_propagate_frequency_data
543 {
544   cgraph_node *function_symbol;
545   bool maybe_unlikely_executed;
546   bool maybe_executed_once;
547   bool only_called_at_startup;
548   bool only_called_at_exit;
549 };
550 
551 /* Worker for ipa_propagate_frequency_1.  */
552 
553 static bool
ipa_propagate_frequency_1(struct cgraph_node * node,void * data)554 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
555 {
556   struct ipa_propagate_frequency_data *d;
557   struct cgraph_edge *edge;
558 
559   d = (struct ipa_propagate_frequency_data *)data;
560   for (edge = node->callers;
561        edge && (d->maybe_unlikely_executed || d->maybe_executed_once
562 	        || d->only_called_at_startup || d->only_called_at_exit);
563        edge = edge->next_caller)
564     {
565       if (edge->caller != d->function_symbol)
566 	{
567           d->only_called_at_startup &= edge->caller->only_called_at_startup;
568 	  /* It makes sense to put main() together with the static constructors.
569 	     It will be executed for sure, but rest of functions called from
570 	     main are definitely not at startup only.  */
571 	  if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
572 	    d->only_called_at_startup = 0;
573           d->only_called_at_exit &= edge->caller->only_called_at_exit;
574 	}
575 
576       /* When profile feedback is available, do not try to propagate too hard;
577 	 counts are already good guide on function frequencies and roundoff
578 	 errors can make us to push function into unlikely section even when
579 	 it is executed by the train run.  Transfer the function only if all
580 	 callers are unlikely executed.  */
581       if (profile_info
582 	  && !(edge->callee->count.ipa () == profile_count::zero ())
583 	  && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
584 	      || (edge->caller->inlined_to
585 		  && edge->caller->inlined_to->frequency
586 		     != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
587 	  d->maybe_unlikely_executed = false;
588       if (edge->count.ipa ().initialized_p ()
589 	  && !edge->count.ipa ().nonzero_p ())
590 	continue;
591       switch (edge->caller->frequency)
592         {
593 	case NODE_FREQUENCY_UNLIKELY_EXECUTED:
594 	  break;
595 	case NODE_FREQUENCY_EXECUTED_ONCE:
596 	  {
597 	    if (dump_file && (dump_flags & TDF_DETAILS))
598 	      fprintf (dump_file, "  Called by %s that is executed once\n",
599 		       edge->caller->dump_name ());
600 	    d->maybe_unlikely_executed = false;
601 	    ipa_call_summary *s = ipa_call_summaries->get (edge);
602 	    if (s != NULL && s->loop_depth)
603 	      {
604 		d->maybe_executed_once = false;
605 		if (dump_file && (dump_flags & TDF_DETAILS))
606 		  fprintf (dump_file, "  Called in loop\n");
607 	      }
608 	    break;
609 	  }
610 	case NODE_FREQUENCY_HOT:
611 	case NODE_FREQUENCY_NORMAL:
612 	  if (dump_file && (dump_flags & TDF_DETAILS))
613 	    fprintf (dump_file, "  Called by %s that is normal or hot\n",
614 		     edge->caller->dump_name ());
615 	  d->maybe_unlikely_executed = false;
616 	  d->maybe_executed_once = false;
617 	  break;
618 	}
619     }
620   return edge != NULL;
621 }
622 
623 /* Return ture if NODE contains hot calls.  */
624 
625 bool
contains_hot_call_p(struct cgraph_node * node)626 contains_hot_call_p (struct cgraph_node *node)
627 {
628   struct cgraph_edge *e;
629   for (e = node->callees; e; e = e->next_callee)
630     if (e->maybe_hot_p ())
631       return true;
632     else if (!e->inline_failed
633 	     && contains_hot_call_p (e->callee))
634       return true;
635   for (e = node->indirect_calls; e; e = e->next_callee)
636     if (e->maybe_hot_p ())
637       return true;
638   return false;
639 }
640 
641 /* See if the frequency of NODE can be updated based on frequencies of its
642    callers.  */
643 bool
ipa_propagate_frequency(struct cgraph_node * node)644 ipa_propagate_frequency (struct cgraph_node *node)
645 {
646   struct ipa_propagate_frequency_data d = {node, true, true, true, true};
647   bool changed = false;
648 
649   /* We cannot propagate anything useful about externally visible functions
650      nor about virtuals.  */
651   if (!node->local
652       || node->alias
653       || (opt_for_fn (node->decl, flag_devirtualize)
654 	  && DECL_VIRTUAL_P (node->decl)))
655     return false;
656   gcc_assert (node->analyzed);
657   if (dump_file && (dump_flags & TDF_DETAILS))
658     fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
659 
660   node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
661 				     true);
662 
663   if ((d.only_called_at_startup && !d.only_called_at_exit)
664       && !node->only_called_at_startup)
665     {
666        node->only_called_at_startup = true;
667        if (dump_file)
668          fprintf (dump_file, "Node %s promoted to only called at startup.\n",
669 		  node->dump_name ());
670        changed = true;
671     }
672   if ((d.only_called_at_exit && !d.only_called_at_startup)
673       && !node->only_called_at_exit)
674     {
675        node->only_called_at_exit = true;
676        if (dump_file)
677          fprintf (dump_file, "Node %s promoted to only called at exit.\n",
678 		  node->dump_name ());
679        changed = true;
680     }
681 
682   /* With profile we can decide on hot/normal based on count.  */
683   if (node->count. ipa().initialized_p ())
684     {
685       bool hot = false;
686       if (!(node->count. ipa() == profile_count::zero ())
687 	  && node->count. ipa() >= get_hot_bb_threshold ())
688 	hot = true;
689       if (!hot)
690 	hot |= contains_hot_call_p (node);
691       if (hot)
692 	{
693 	  if (node->frequency != NODE_FREQUENCY_HOT)
694 	    {
695 	      if (dump_file)
696 		fprintf (dump_file, "Node %s promoted to hot.\n",
697 			 node->dump_name ());
698 	      node->frequency = NODE_FREQUENCY_HOT;
699 	      return true;
700 	    }
701 	  return false;
702 	}
703       else if (node->frequency == NODE_FREQUENCY_HOT)
704 	{
705 	  if (dump_file)
706 	    fprintf (dump_file, "Node %s reduced to normal.\n",
707 		     node->dump_name ());
708 	  node->frequency = NODE_FREQUENCY_NORMAL;
709 	  changed = true;
710 	}
711     }
712   /* These come either from profile or user hints; never update them.  */
713   if (node->frequency == NODE_FREQUENCY_HOT
714       || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
715     return changed;
716   if (d.maybe_unlikely_executed)
717     {
718       node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
719       if (dump_file)
720 	fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
721 		 node->dump_name ());
722       changed = true;
723     }
724   else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
725     {
726       node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
727       if (dump_file)
728 	fprintf (dump_file, "Node %s promoted to executed once.\n",
729 		 node->dump_name ());
730       changed = true;
731     }
732   return changed;
733 }
734 
735 /* Check that number of arguments of N agrees with E.
736    Be conservative when summaries are not present.  */
737 
738 static bool
check_argument_count(struct cgraph_node * n,struct cgraph_edge * e)739 check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
740 {
741   if (!ipa_node_params_sum || !ipa_edge_args_sum)
742     return true;
743   class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
744   if (!info)
745     return true;
746   ipa_edge_args *e_info = IPA_EDGE_REF (e);
747   if (!e_info)
748     return true;
749   if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
750       && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
751 	  || !stdarg_p (TREE_TYPE (n->decl))))
752     return false;
753   return true;
754 }
755 
756 /* Simple ipa profile pass propagating frequencies across the callgraph.  */
757 
758 static unsigned int
ipa_profile(void)759 ipa_profile (void)
760 {
761   struct cgraph_node **order;
762   struct cgraph_edge *e;
763   int order_pos;
764   bool something_changed = false;
765   int i;
766   gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
767   struct cgraph_node *n,*n2;
768   int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
769   int nmismatch = 0, nimpossible = 0;
770   bool node_map_initialized = false;
771   gcov_type threshold;
772 
773   if (dump_file)
774     dump_histogram (dump_file, histogram);
775   for (i = 0; i < (int)histogram.length (); i++)
776     {
777       overall_time += histogram[i]->count * histogram[i]->time;
778       overall_size += histogram[i]->size;
779     }
780   threshold = 0;
781   if (overall_time)
782     {
783       gcc_assert (overall_size);
784 
785       cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
786       for (i = 0; cumulated < cutoff; i++)
787 	{
788 	  cumulated += histogram[i]->count * histogram[i]->time;
789           threshold = histogram[i]->count;
790 	}
791       if (!threshold)
792 	threshold = 1;
793       if (dump_file)
794 	{
795 	  gcov_type cumulated_time = 0, cumulated_size = 0;
796 
797           for (i = 0;
798 	       i < (int)histogram.length () && histogram[i]->count >= threshold;
799 	       i++)
800 	    {
801 	      cumulated_time += histogram[i]->count * histogram[i]->time;
802 	      cumulated_size += histogram[i]->size;
803 	    }
804 	  fprintf (dump_file, "Determined min count: %" PRId64
805 		   " Time:%3.2f%% Size:%3.2f%%\n",
806 		   (int64_t)threshold,
807 		   cumulated_time * 100.0 / overall_time,
808 		   cumulated_size * 100.0 / overall_size);
809 	}
810 
811       if (in_lto_p)
812 	{
813 	  if (dump_file)
814 	    fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
815           set_hot_bb_threshold (threshold);
816 	}
817     }
818   histogram.release ();
819   histogram_pool.release ();
820 
821   /* Produce speculative calls: we saved common target from profiling into
822      e->target_id.  Now, at link time, we can look up corresponding
823      function node and produce speculative call.  */
824 
825   gcc_checking_assert (call_sums);
826 
827   if (dump_file)
828     {
829       if (!node_map_initialized)
830 	init_node_map (false);
831       node_map_initialized = true;
832 
833       ipa_profile_dump_all_summaries (dump_file);
834     }
835 
836   FOR_EACH_DEFINED_FUNCTION (n)
837     {
838       bool update = false;
839 
840       if (!opt_for_fn (n->decl, flag_ipa_profile))
841 	continue;
842 
843       for (e = n->indirect_calls; e; e = e->next_callee)
844 	{
845 	  if (n->count.initialized_p ())
846 	    nindirect++;
847 
848 	  speculative_call_summary *csum = call_sums->get_create (e);
849 	  unsigned spec_count = csum->speculative_call_targets.length ();
850 	  if (spec_count)
851 	    {
852 	      if (!node_map_initialized)
853 		init_node_map (false);
854 	      node_map_initialized = true;
855 	      ncommon++;
856 
857 	      if (in_lto_p)
858 		{
859 		  if (dump_file)
860 		    {
861 		      fprintf (dump_file,
862 			       "Updating hotness threshold in LTO mode.\n");
863 		      fprintf (dump_file, "Updated min count: %" PRId64 "\n",
864 			       (int64_t) threshold / spec_count);
865 		    }
866 		  set_hot_bb_threshold (threshold / spec_count);
867 		}
868 
869 	      unsigned speculative_id = 0;
870 	      profile_count orig = e->count;
871 	      for (unsigned i = 0; i < spec_count; i++)
872 		{
873 		  speculative_call_target item
874 		    = csum->speculative_call_targets[i];
875 		  n2 = find_func_by_profile_id (item.target_id);
876 		  if (n2)
877 		    {
878 		      if (dump_file)
879 			{
880 			  fprintf (dump_file,
881 				   "Indirect call -> direct call from"
882 				   " other module %s => %s, prob %3.2f\n",
883 				   n->dump_name (),
884 				   n2->dump_name (),
885 				   item.target_probability
886 				     / (float) REG_BR_PROB_BASE);
887 			}
888 		      if (item.target_probability
889 		 	  < REG_BR_PROB_BASE / GCOV_TOPN_VALUES / 2)
890 			{
891 			  nuseless++;
892 			  if (dump_file)
893 			    fprintf (dump_file,
894 				     "Not speculating: "
895 				     "probability is too low.\n");
896 			}
897 		      else if (!e->maybe_hot_p ())
898 			{
899 			  nuseless++;
900 			  if (dump_file)
901 			    fprintf (dump_file,
902 				     "Not speculating: call is cold.\n");
903 			}
904 		      else if (n2->get_availability () <= AVAIL_INTERPOSABLE
905 			       && n2->can_be_discarded_p ())
906 			{
907 			  nuseless++;
908 			  if (dump_file)
909 			    fprintf (dump_file,
910 				     "Not speculating: target is overwritable "
911 				     "and can be discarded.\n");
912 			}
913 		      else if (!check_argument_count (n2, e))
914 			{
915 			  nmismatch++;
916 			  if (dump_file)
917 			    fprintf (dump_file,
918 				     "Not speculating: "
919 				     "parameter count mismatch\n");
920 			}
921 		      else if (e->indirect_info->polymorphic
922 			       && !opt_for_fn (n->decl, flag_devirtualize)
923 			       && !possible_polymorphic_call_target_p (e, n2))
924 			{
925 			  nimpossible++;
926 			  if (dump_file)
927 			    fprintf (dump_file,
928 				     "Not speculating: "
929 				     "function is not in the polymorphic "
930 				     "call target list\n");
931 			}
932 		      else
933 			{
934 			  /* Target may be overwritable, but profile says that
935 			     control flow goes to this particular implementation
936 			     of N2.  Speculate on the local alias to allow
937 			     inlining.  */
938 			  if (!n2->can_be_discarded_p ())
939 			    {
940 			      cgraph_node *alias;
941 			      alias = dyn_cast<cgraph_node *>
942 				   (n2->noninterposable_alias ());
943 			      if (alias)
944 				n2 = alias;
945 			    }
946 			  nconverted++;
947 			  profile_probability prob
948 				 = profile_probability::from_reg_br_prob_base
949 					(item.target_probability).adjusted ();
950 			  e->make_speculative (n2,
951 					       orig.apply_probability (prob),
952 					       speculative_id);
953 			  update = true;
954 			  speculative_id++;
955 			}
956 		    }
957 		  else
958 		    {
959 		      if (dump_file)
960 			fprintf (dump_file,
961 				 "Function with profile-id %i not found.\n",
962 				 item.target_id);
963 		      nunknown++;
964 		    }
965 		}
966 	    }
967 	}
968       if (update)
969 	ipa_update_overall_fn_summary (n);
970     }
971   if (node_map_initialized)
972     del_node_map ();
973   if (dump_file && nindirect)
974     fprintf (dump_file,
975 	     "%i indirect calls trained.\n"
976 	     "%i (%3.2f%%) have common target.\n"
977 	     "%i (%3.2f%%) targets was not found.\n"
978 	     "%i (%3.2f%%) targets had parameter count mismatch.\n"
979 	     "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
980 	     "%i (%3.2f%%) speculations seems useless.\n"
981 	     "%i (%3.2f%%) speculations produced.\n",
982 	     nindirect,
983 	     ncommon, ncommon * 100.0 / nindirect,
984 	     nunknown, nunknown * 100.0 / nindirect,
985 	     nmismatch, nmismatch * 100.0 / nindirect,
986 	     nimpossible, nimpossible * 100.0 / nindirect,
987 	     nuseless, nuseless * 100.0 / nindirect,
988 	     nconverted, nconverted * 100.0 / nindirect);
989 
990   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
991   order_pos = ipa_reverse_postorder (order);
992   for (i = order_pos - 1; i >= 0; i--)
993     {
994       if (order[i]->local
995 	  && opt_for_fn (order[i]->decl, flag_ipa_profile)
996 	  && ipa_propagate_frequency (order[i]))
997 	{
998 	  for (e = order[i]->callees; e; e = e->next_callee)
999 	    if (e->callee->local && !e->callee->aux)
1000 	      {
1001 	        something_changed = true;
1002 	        e->callee->aux = (void *)1;
1003 	      }
1004 	}
1005       order[i]->aux = NULL;
1006     }
1007 
1008   while (something_changed)
1009     {
1010       something_changed = false;
1011       for (i = order_pos - 1; i >= 0; i--)
1012 	{
1013 	  if (order[i]->aux
1014 	      && opt_for_fn (order[i]->decl, flag_ipa_profile)
1015 	      && ipa_propagate_frequency (order[i]))
1016 	    {
1017 	      for (e = order[i]->callees; e; e = e->next_callee)
1018 		if (e->callee->local && !e->callee->aux)
1019 		  {
1020 		    something_changed = true;
1021 		    e->callee->aux = (void *)1;
1022 		  }
1023 	    }
1024 	  order[i]->aux = NULL;
1025 	}
1026     }
1027   free (order);
1028 
1029   if (dump_file && (dump_flags & TDF_DETAILS))
1030     symtab->dump (dump_file);
1031 
1032   delete call_sums;
1033   call_sums = NULL;
1034 
1035   return 0;
1036 }
1037 
1038 namespace {
1039 
1040 const pass_data pass_data_ipa_profile =
1041 {
1042   IPA_PASS, /* type */
1043   "profile_estimate", /* name */
1044   OPTGROUP_NONE, /* optinfo_flags */
1045   TV_IPA_PROFILE, /* tv_id */
1046   0, /* properties_required */
1047   0, /* properties_provided */
1048   0, /* properties_destroyed */
1049   0, /* todo_flags_start */
1050   0, /* todo_flags_finish */
1051 };
1052 
1053 class pass_ipa_profile : public ipa_opt_pass_d
1054 {
1055 public:
pass_ipa_profile(gcc::context * ctxt)1056   pass_ipa_profile (gcc::context *ctxt)
1057     : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1058 		      ipa_profile_generate_summary, /* generate_summary */
1059 		      ipa_profile_write_summary, /* write_summary */
1060 		      ipa_profile_read_summary, /* read_summary */
1061 		      NULL, /* write_optimization_summary */
1062 		      NULL, /* read_optimization_summary */
1063 		      NULL, /* stmt_fixup */
1064 		      0, /* function_transform_todo_flags_start */
1065 		      NULL, /* function_transform */
1066 		      NULL) /* variable_transform */
1067   {}
1068 
1069   /* opt_pass methods: */
gate(function *)1070   virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
execute(function *)1071   virtual unsigned int execute (function *) { return ipa_profile (); }
1072 
1073 }; // class pass_ipa_profile
1074 
1075 } // anon namespace
1076 
1077 ipa_opt_pass_d *
make_pass_ipa_profile(gcc::context * ctxt)1078 make_pass_ipa_profile (gcc::context *ctxt)
1079 {
1080   return new pass_ipa_profile (ctxt);
1081 }
1082