xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/auto-profile.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2015 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 
26 #include "coretypes.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "options.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "tree-pass.h"
40 #include "flags.h"
41 #include "predict.h"
42 #include "vec.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "machmode.h"
46 #include "tm.h"
47 #include "hard-reg-set.h"
48 #include "input.h"
49 #include "function.h"
50 #include "dominance.h"
51 #include "cfg.h"
52 #include "basic-block.h"
53 #include "diagnostic-core.h"
54 #include "gcov-io.h"
55 #include "profile.h"
56 #include "langhooks.h"
57 #include "opts.h"
58 #include "tree-pass.h"
59 #include "cfgloop.h"
60 #include "tree-ssa-alias.h"
61 #include "tree-cfg.h"
62 #include "tree-cfgcleanup.h"
63 #include "tree-ssa-operands.h"
64 #include "tree-into-ssa.h"
65 #include "internal-fn.h"
66 #include "is-a.h"
67 #include "gimple-expr.h"
68 #include "gimple.h"
69 #include "gimple-iterator.h"
70 #include "gimple-ssa.h"
71 #include "hash-map.h"
72 #include "plugin-api.h"
73 #include "ipa-ref.h"
74 #include "cgraph.h"
75 #include "value-prof.h"
76 #include "coverage.h"
77 #include "params.h"
78 #include "alloc-pool.h"
79 #include "symbol-summary.h"
80 #include "ipa-prop.h"
81 #include "ipa-inline.h"
82 #include "tree-inline.h"
83 #include "stringpool.h"
84 #include "auto-profile.h"
85 
86 /* The following routines implements AutoFDO optimization.
87 
88    This optimization uses sampling profiles to annotate basic block counts
89    and uses heuristics to estimate branch probabilities.
90 
91    There are three phases in AutoFDO:
92 
93    Phase 1: Read profile from the profile data file.
94      The following info is read from the profile datafile:
95         * string_table: a map between function name and its index.
96         * autofdo_source_profile: a map from function_instance name to
97           function_instance. This is represented as a forest of
98           function_instances.
99         * WorkingSet: a histogram of how many instructions are covered for a
100           given percentage of total cycles. This is describing the binary
101           level information (not source level). This info is used to help
102           decide if we want aggressive optimizations that could increase
103           code footprint (e.g. loop unroll etc.)
104      A function instance is an instance of function that could either be a
105      standalone symbol, or a clone of a function that is inlined into another
106      function.
107 
108    Phase 2: Early inline + value profile transformation.
109      Early inline uses autofdo_source_profile to find if a callsite is:
110         * inlined in the profiled binary.
111         * callee body is hot in the profiling run.
112      If both condition satisfies, early inline will inline the callsite
113      regardless of the code growth.
114      Phase 2 is an iterative process. During each iteration, we also check
115      if an indirect callsite is promoted and inlined in the profiling run.
116      If yes, vpt will happen to force promote it and in the next iteration,
117      einline will inline the promoted callsite in the next iteration.
118 
119    Phase 3: Annotate control flow graph.
120      AutoFDO uses a separate pass to:
121         * Annotate basic block count
122         * Estimate branch probability
123 
124    After the above 3 phases, all profile is readily annotated on the GCC IR.
125    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
126    use of the profile. E.g. it uses existing mechanism to calculate the basic
127    block/edge frequency, as well as the cgraph node/edge count.
128 */
129 
130 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
131 #define AUTO_PROFILE_VERSION 1
132 
133 namespace autofdo
134 {
135 
136 /* Represent a source location: (function_decl, lineno).  */
137 typedef std::pair<tree, unsigned> decl_lineno;
138 
139 /* Represent an inline stack. vector[0] is the leaf node.  */
140 typedef auto_vec<decl_lineno> inline_stack;
141 
142 /* String array that stores function names.  */
143 typedef auto_vec<char *> string_vector;
144 
145 /* Map from function name's index in string_table to target's
146    execution count.  */
147 typedef std::map<unsigned, gcov_type> icall_target_map;
148 
149 /* Set of gimple stmts. Used to track if the stmt has already been promoted
150    to direct call.  */
151 typedef std::set<gimple> stmt_set;
152 
153 /* Represent count info of an inline stack.  */
154 struct count_info
155 {
156   /* Sampled count of the inline stack.  */
157   gcov_type count;
158 
159   /* Map from indirect call target to its sample count.  */
160   icall_target_map targets;
161 
162   /* Whether this inline stack is already used in annotation.
163 
164      Each inline stack should only be used to annotate IR once.
165      This will be enforced when instruction-level discriminator
166      is supported.  */
167   bool annotated;
168 };
169 
170 /* operator< for "const char *".  */
171 struct string_compare
172 {
173   bool operator()(const char *a, const char *b) const
174   {
175     return strcmp (a, b) < 0;
176   }
177 };
178 
179 /* Store a string array, indexed by string position in the array.  */
180 class string_table
181 {
182 public:
183   string_table ()
184   {}
185 
186   ~string_table ();
187 
188   /* For a given string, returns its index.  */
189   int get_index (const char *name) const;
190 
191   /* For a given decl, returns the index of the decl name.  */
192   int get_index_by_decl (tree decl) const;
193 
194   /* For a given index, returns the string.  */
195   const char *get_name (int index) const;
196 
197   /* Read profile, return TRUE on success.  */
198   bool read ();
199 
200 private:
201   typedef std::map<const char *, unsigned, string_compare> string_index_map;
202   string_vector vector_;
203   string_index_map map_;
204 };
205 
206 /* Profile of a function instance:
207      1. total_count of the function.
208      2. head_count (entry basic block count) of the function (only valid when
209         function is a top-level function_instance, i.e. it is the original copy
210         instead of the inlined copy).
211      3. map from source location (decl_lineno) to profile (count_info).
212      4. map from callsite to callee function_instance.  */
213 class function_instance
214 {
215 public:
216   typedef auto_vec<function_instance *> function_instance_stack;
217 
218   /* Read the profile and return a function_instance with head count as
219      HEAD_COUNT. Recursively read callsites to create nested function_instances
220      too. STACK is used to track the recursive creation process.  */
221   static function_instance *
222   read_function_instance (function_instance_stack *stack,
223                           gcov_type head_count);
224 
225   /* Recursively deallocate all callsites (nested function_instances).  */
226   ~function_instance ();
227 
228   /* Accessors.  */
229   int
230   name () const
231   {
232     return name_;
233   }
234   gcov_type
235   total_count () const
236   {
237     return total_count_;
238   }
239   gcov_type
240   head_count () const
241   {
242     return head_count_;
243   }
244 
245   /* Traverse callsites of the current function_instance to find one at the
246      location of LINENO and callee name represented in DECL.  */
247   function_instance *get_function_instance_by_decl (unsigned lineno,
248                                                     tree decl) const;
249 
250   /* Store the profile info for LOC in INFO. Return TRUE if profile info
251      is found.  */
252   bool get_count_info (location_t loc, count_info *info) const;
253 
254   /* Read the inlined indirect call target profile for STMT and store it in
255      MAP, return the total count for all inlined indirect calls.  */
256   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
257 
258   /* Sum of counts that is used during annotation.  */
259   gcov_type total_annotated_count () const;
260 
261   /* Mark LOC as annotated.  */
262   void mark_annotated (location_t loc);
263 
264 private:
265   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
266   typedef std::pair<unsigned, unsigned> callsite;
267 
268   /* Map from callsite to callee function_instance.  */
269   typedef std::map<callsite, function_instance *> callsite_map;
270 
271   function_instance (unsigned name, gcov_type head_count)
272       : name_ (name), total_count_ (0), head_count_ (head_count)
273   {
274   }
275 
276   /* Map from source location (decl_lineno) to profile (count_info).  */
277   typedef std::map<unsigned, count_info> position_count_map;
278 
279   /* function_instance name index in the string_table.  */
280   unsigned name_;
281 
282   /* Total sample count.  */
283   gcov_type total_count_;
284 
285   /* Entry BB's sample count.  */
286   gcov_type head_count_;
287 
288   /* Map from callsite location to callee function_instance.  */
289   callsite_map callsites;
290 
291   /* Map from source location to count_info.  */
292   position_count_map pos_counts;
293 };
294 
295 /* Profile for all functions.  */
296 class autofdo_source_profile
297 {
298 public:
299   static autofdo_source_profile *
300   create ()
301   {
302     autofdo_source_profile *map = new autofdo_source_profile ();
303 
304     if (map->read ())
305       return map;
306     delete map;
307     return NULL;
308   }
309 
310   ~autofdo_source_profile ();
311 
312   /* For a given DECL, returns the top-level function_instance.  */
313   function_instance *get_function_instance_by_decl (tree decl) const;
314 
315   /* Find count_info for a given gimple STMT. If found, store the count_info
316      in INFO and return true; otherwise return false.  */
317   bool get_count_info (gimple stmt, count_info *info) const;
318 
319   /* Find total count of the callee of EDGE.  */
320   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
321 
322   /* Update value profile INFO for STMT from the inlined indirect callsite.
323      Return true if INFO is updated.  */
324   bool update_inlined_ind_target (gcall *stmt, count_info *info);
325 
326   /* Mark LOC as annotated.  */
327   void mark_annotated (location_t loc);
328 
329 private:
330   /* Map from function_instance name index (in string_table) to
331      function_instance.  */
332   typedef std::map<unsigned, function_instance *> name_function_instance_map;
333 
334   autofdo_source_profile () {}
335 
336   /* Read AutoFDO profile and returns TRUE on success.  */
337   bool read ();
338 
339   /* Return the function_instance in the profile that correspond to the
340      inline STACK.  */
341   function_instance *
342   get_function_instance_by_inline_stack (const inline_stack &stack) const;
343 
344   name_function_instance_map map_;
345 };
346 
347 /* Store the strings read from the profile data file.  */
348 static string_table *afdo_string_table;
349 
350 /* Store the AutoFDO source profile.  */
351 static autofdo_source_profile *afdo_source_profile;
352 
353 /* gcov_ctr_summary structure to store the profile_info.  */
354 static struct gcov_ctr_summary *afdo_profile_info;
355 
356 /* Helper functions.  */
357 
358 /* Return the original name of NAME: strip the suffix that starts
359    with '.' Caller is responsible for freeing RET.  */
360 
361 static char *
362 get_original_name (const char *name)
363 {
364   char *ret = xstrdup (name);
365   char *find = strchr (ret, '.');
366   if (find != NULL)
367     *find = 0;
368   return ret;
369 }
370 
371 /* Return the combined location, which is a 32bit integer in which
372    higher 16 bits stores the line offset of LOC to the start lineno
373    of DECL, The lower 16 bits stores the discriminator.  */
374 
375 static unsigned
376 get_combined_location (location_t loc, tree decl)
377 {
378   /* TODO: allow more bits for line and less bits for discriminator.  */
379   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
380     warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
381   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
382 }
383 
384 /* Return the function decl of a given lexical BLOCK.  */
385 
386 static tree
387 get_function_decl_from_block (tree block)
388 {
389   tree decl;
390 
391   if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION)
392     return NULL_TREE;
393 
394   for (decl = BLOCK_ABSTRACT_ORIGIN (block);
395        decl && (TREE_CODE (decl) == BLOCK);
396        decl = BLOCK_ABSTRACT_ORIGIN (decl))
397     if (TREE_CODE (decl) == FUNCTION_DECL)
398       break;
399   return decl;
400 }
401 
402 /* Store inline stack for STMT in STACK.  */
403 
404 static void
405 get_inline_stack (location_t locus, inline_stack *stack)
406 {
407   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
408     return;
409 
410   tree block = LOCATION_BLOCK (locus);
411   if (block && TREE_CODE (block) == BLOCK)
412     {
413       int level = 0;
414       for (block = BLOCK_SUPERCONTEXT (block);
415            block && (TREE_CODE (block) == BLOCK);
416            block = BLOCK_SUPERCONTEXT (block))
417         {
418           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
419           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
420             continue;
421 
422           tree decl = get_function_decl_from_block (block);
423           stack->safe_push (
424               std::make_pair (decl, get_combined_location (locus, decl)));
425           locus = tmp_locus;
426           level++;
427         }
428     }
429   stack->safe_push (
430       std::make_pair (current_function_decl,
431                       get_combined_location (locus, current_function_decl)));
432 }
433 
434 /* Return STMT's combined location, which is a 32bit integer in which
435    higher 16 bits stores the line offset of LOC to the start lineno
436    of DECL, The lower 16 bits stores the discriminator.  */
437 
438 static unsigned
439 get_relative_location_for_stmt (gimple stmt)
440 {
441   location_t locus = gimple_location (stmt);
442   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
443     return UNKNOWN_LOCATION;
444 
445   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
446        block = BLOCK_SUPERCONTEXT (block))
447     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
448       return get_combined_location (locus,
449                                     get_function_decl_from_block (block));
450   return get_combined_location (locus, current_function_decl);
451 }
452 
453 /* Return true if BB contains indirect call.  */
454 
455 static bool
456 has_indirect_call (basic_block bb)
457 {
458   gimple_stmt_iterator gsi;
459 
460   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
461     {
462       gimple stmt = gsi_stmt (gsi);
463       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
464           && (gimple_call_fn (stmt) == NULL
465               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
466         return true;
467     }
468   return false;
469 }
470 
471 /* Member functions for string_table.  */
472 
473 /* Deconstructor.  */
474 
475 string_table::~string_table ()
476 {
477   for (unsigned i = 0; i < vector_.length (); i++)
478     free (vector_[i]);
479 }
480 
481 
482 /* Return the index of a given function NAME. Return -1 if NAME is not
483    found in string table.  */
484 
485 int
486 string_table::get_index (const char *name) const
487 {
488   if (name == NULL)
489     return -1;
490   string_index_map::const_iterator iter = map_.find (name);
491   if (iter == map_.end ())
492     return -1;
493 
494   return iter->second;
495 }
496 
497 /* Return the index of a given function DECL. Return -1 if DECL is not
498    found in string table.  */
499 
500 int
501 string_table::get_index_by_decl (tree decl) const
502 {
503   char *name
504       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
505   int ret = get_index (name);
506   free (name);
507   if (ret != -1)
508     return ret;
509   ret = get_index (lang_hooks.dwarf_name (decl, 0));
510   if (ret != -1)
511     return ret;
512   if (DECL_ABSTRACT_ORIGIN (decl))
513     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
514 
515   return -1;
516 }
517 
518 /* Return the function name of a given INDEX.  */
519 
520 const char *
521 string_table::get_name (int index) const
522 {
523   gcc_assert (index > 0 && index < (int)vector_.length ());
524   return vector_[index];
525 }
526 
527 /* Read the string table. Return TRUE if reading is successful.  */
528 
529 bool
530 string_table::read ()
531 {
532   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
533     return false;
534   /* Skip the length of the section.  */
535   gcov_read_unsigned ();
536   /* Read in the file name table.  */
537   unsigned string_num = gcov_read_unsigned ();
538   for (unsigned i = 0; i < string_num; i++)
539     {
540       vector_.safe_push (get_original_name (gcov_read_string ()));
541       map_[vector_.last ()] = i;
542     }
543   return true;
544 }
545 
546 /* Member functions for function_instance.  */
547 
548 function_instance::~function_instance ()
549 {
550   for (callsite_map::iterator iter = callsites.begin ();
551        iter != callsites.end (); ++iter)
552     delete iter->second;
553 }
554 
555 /* Traverse callsites of the current function_instance to find one at the
556    location of LINENO and callee name represented in DECL.  */
557 
558 function_instance *
559 function_instance::get_function_instance_by_decl (unsigned lineno,
560                                                   tree decl) const
561 {
562   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
563   if (func_name_idx != -1)
564     {
565       callsite_map::const_iterator ret
566           = callsites.find (std::make_pair (lineno, func_name_idx));
567       if (ret != callsites.end ())
568         return ret->second;
569     }
570   func_name_idx
571       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
572   if (func_name_idx != -1)
573     {
574       callsite_map::const_iterator ret
575           = callsites.find (std::make_pair (lineno, func_name_idx));
576       if (ret != callsites.end ())
577         return ret->second;
578     }
579   if (DECL_ABSTRACT_ORIGIN (decl))
580     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
581 
582   return NULL;
583 }
584 
585 /* Store the profile info for LOC in INFO. Return TRUE if profile info
586    is found.  */
587 
588 bool
589 function_instance::get_count_info (location_t loc, count_info *info) const
590 {
591   position_count_map::const_iterator iter = pos_counts.find (loc);
592   if (iter == pos_counts.end ())
593     return false;
594   *info = iter->second;
595   return true;
596 }
597 
598 /* Mark LOC as annotated.  */
599 
600 void
601 function_instance::mark_annotated (location_t loc)
602 {
603   position_count_map::iterator iter = pos_counts.find (loc);
604   if (iter == pos_counts.end ())
605     return;
606   iter->second.annotated = true;
607 }
608 
609 /* Read the inlined indirect call target profile for STMT and store it in
610    MAP, return the total count for all inlined indirect calls.  */
611 
612 gcov_type
613 function_instance::find_icall_target_map (gcall *stmt,
614                                           icall_target_map *map) const
615 {
616   gcov_type ret = 0;
617   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
618 
619   for (callsite_map::const_iterator iter = callsites.begin ();
620        iter != callsites.end (); ++iter)
621     {
622       unsigned callee = iter->second->name ();
623       /* Check if callsite location match the stmt.  */
624       if (iter->first.first != stmt_offset)
625         continue;
626       struct cgraph_node *node = cgraph_node::get_for_asmname (
627           get_identifier (afdo_string_table->get_name (callee)));
628       if (node == NULL)
629         continue;
630       if (!check_ic_target (stmt, node))
631         continue;
632       (*map)[callee] = iter->second->total_count ();
633       ret += iter->second->total_count ();
634     }
635   return ret;
636 }
637 
638 /* Read the profile and create a function_instance with head count as
639    HEAD_COUNT. Recursively read callsites to create nested function_instances
640    too. STACK is used to track the recursive creation process.  */
641 
642 /* function instance profile format:
643 
644    ENTRY_COUNT: 8 bytes
645    NAME_INDEX: 4 bytes
646    NUM_POS_COUNTS: 4 bytes
647    NUM_CALLSITES: 4 byte
648    POS_COUNT_1:
649      POS_1_OFFSET: 4 bytes
650      NUM_TARGETS: 4 bytes
651      COUNT: 8 bytes
652      TARGET_1:
653        VALUE_PROFILE_TYPE: 4 bytes
654        TARGET_IDX: 8 bytes
655        COUNT: 8 bytes
656      TARGET_2
657      ...
658      TARGET_n
659    POS_COUNT_2
660    ...
661    POS_COUNT_N
662    CALLSITE_1:
663      CALLSITE_1_OFFSET: 4 bytes
664      FUNCTION_INSTANCE_PROFILE (nested)
665    CALLSITE_2
666    ...
667    CALLSITE_n.  */
668 
669 function_instance *
670 function_instance::read_function_instance (function_instance_stack *stack,
671                                            gcov_type head_count)
672 {
673   unsigned name = gcov_read_unsigned ();
674   unsigned num_pos_counts = gcov_read_unsigned ();
675   unsigned num_callsites = gcov_read_unsigned ();
676   function_instance *s = new function_instance (name, head_count);
677   stack->safe_push (s);
678 
679   for (unsigned i = 0; i < num_pos_counts; i++)
680     {
681       unsigned offset = gcov_read_unsigned () & 0xffff0000;
682       unsigned num_targets = gcov_read_unsigned ();
683       gcov_type count = gcov_read_counter ();
684       s->pos_counts[offset].count = count;
685       for (unsigned j = 0; j < stack->length (); j++)
686         (*stack)[j]->total_count_ += count;
687       for (unsigned j = 0; j < num_targets; j++)
688         {
689           /* Only indirect call target histogram is supported now.  */
690           gcov_read_unsigned ();
691           gcov_type target_idx = gcov_read_counter ();
692           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
693         }
694     }
695   for (unsigned i = 0; i < num_callsites; i++)
696     {
697       unsigned offset = gcov_read_unsigned ();
698       function_instance *callee_function_instance
699           = read_function_instance (stack, 0);
700       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
701           = callee_function_instance;
702     }
703   stack->pop ();
704   return s;
705 }
706 
707 /* Sum of counts that is used during annotation.  */
708 
709 gcov_type
710 function_instance::total_annotated_count () const
711 {
712   gcov_type ret = 0;
713   for (callsite_map::const_iterator iter = callsites.begin ();
714        iter != callsites.end (); ++iter)
715     ret += iter->second->total_annotated_count ();
716   for (position_count_map::const_iterator iter = pos_counts.begin ();
717        iter != pos_counts.end (); ++iter)
718     if (iter->second.annotated)
719       ret += iter->second.count;
720   return ret;
721 }
722 
723 /* Member functions for autofdo_source_profile.  */
724 
725 autofdo_source_profile::~autofdo_source_profile ()
726 {
727   for (name_function_instance_map::const_iterator iter = map_.begin ();
728        iter != map_.end (); ++iter)
729     delete iter->second;
730 }
731 
732 /* For a given DECL, returns the top-level function_instance.  */
733 
734 function_instance *
735 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
736 {
737   int index = afdo_string_table->get_index_by_decl (decl);
738   if (index == -1)
739     return NULL;
740   name_function_instance_map::const_iterator ret = map_.find (index);
741   return ret == map_.end () ? NULL : ret->second;
742 }
743 
744 /* Find count_info for a given gimple STMT. If found, store the count_info
745    in INFO and return true; otherwise return false.  */
746 
747 bool
748 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
749 {
750   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
751     return false;
752 
753   inline_stack stack;
754   get_inline_stack (gimple_location (stmt), &stack);
755   if (stack.length () == 0)
756     return false;
757   function_instance *s = get_function_instance_by_inline_stack (stack);
758   if (s == NULL)
759     return false;
760   return s->get_count_info (stack[0].second, info);
761 }
762 
763 /* Mark LOC as annotated.  */
764 
765 void
766 autofdo_source_profile::mark_annotated (location_t loc)
767 {
768   inline_stack stack;
769   get_inline_stack (loc, &stack);
770   if (stack.length () == 0)
771     return;
772   function_instance *s = get_function_instance_by_inline_stack (stack);
773   if (s == NULL)
774     return;
775   s->mark_annotated (stack[0].second);
776 }
777 
778 /* Update value profile INFO for STMT from the inlined indirect callsite.
779    Return true if INFO is updated.  */
780 
781 bool
782 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
783                                                    count_info *info)
784 {
785   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
786     return false;
787 
788   count_info old_info;
789   get_count_info (stmt, &old_info);
790   gcov_type total = 0;
791   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
792        iter != old_info.targets.end (); ++iter)
793     total += iter->second;
794 
795   /* Program behavior changed, original promoted (and inlined) target is not
796      hot any more. Will avoid promote the original target.
797 
798      To check if original promoted target is still hot, we check the total
799      count of the unpromoted targets (stored in old_info). If it is no less
800      than half of the callsite count (stored in INFO), the original promoted
801      target is considered not hot any more.  */
802   if (total >= info->count / 2)
803     return false;
804 
805   inline_stack stack;
806   get_inline_stack (gimple_location (stmt), &stack);
807   if (stack.length () == 0)
808     return false;
809   function_instance *s = get_function_instance_by_inline_stack (stack);
810   if (s == NULL)
811     return false;
812   icall_target_map map;
813   if (s->find_icall_target_map (stmt, &map) == 0)
814     return false;
815   for (icall_target_map::const_iterator iter = map.begin ();
816        iter != map.end (); ++iter)
817     info->targets[iter->first] = iter->second;
818   return true;
819 }
820 
821 /* Find total count of the callee of EDGE.  */
822 
823 gcov_type
824 autofdo_source_profile::get_callsite_total_count (
825     struct cgraph_edge *edge) const
826 {
827   inline_stack stack;
828   stack.safe_push (std::make_pair (edge->callee->decl, 0));
829   get_inline_stack (gimple_location (edge->call_stmt), &stack);
830 
831   function_instance *s = get_function_instance_by_inline_stack (stack);
832   if (s == NULL
833       || afdo_string_table->get_index (IDENTIFIER_POINTER (
834              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
835     return 0;
836 
837   return s->total_count ();
838 }
839 
840 /* Read AutoFDO profile and returns TRUE on success.  */
841 
842 /* source profile format:
843 
844    GCOV_TAG_AFDO_FUNCTION: 4 bytes
845    LENGTH: 4 bytes
846    NUM_FUNCTIONS: 4 bytes
847    FUNCTION_INSTANCE_1
848    FUNCTION_INSTANCE_2
849    ...
850    FUNCTION_INSTANCE_N.  */
851 
852 bool
853 autofdo_source_profile::read ()
854 {
855   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
856     {
857       inform (0, "Not expected TAG.");
858       return false;
859     }
860 
861   /* Skip the length of the section.  */
862   gcov_read_unsigned ();
863 
864   /* Read in the function/callsite profile, and store it in local
865      data structure.  */
866   unsigned function_num = gcov_read_unsigned ();
867   for (unsigned i = 0; i < function_num; i++)
868     {
869       function_instance::function_instance_stack stack;
870       function_instance *s = function_instance::read_function_instance (
871           &stack, gcov_read_counter ());
872       afdo_profile_info->sum_all += s->total_count ();
873       map_[s->name ()] = s;
874     }
875   return true;
876 }
877 
878 /* Return the function_instance in the profile that correspond to the
879    inline STACK.  */
880 
881 function_instance *
882 autofdo_source_profile::get_function_instance_by_inline_stack (
883     const inline_stack &stack) const
884 {
885   name_function_instance_map::const_iterator iter = map_.find (
886       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
887   if (iter == map_.end())
888     return NULL;
889   function_instance *s = iter->second;
890   for (unsigned i = stack.length() - 1; i > 0; i--)
891     {
892       s = s->get_function_instance_by_decl (
893           stack[i].second, stack[i - 1].first);
894       if (s == NULL)
895         return NULL;
896     }
897   return s;
898 }
899 
900 /* Module profile is only used by LIPO. Here we simply ignore it.  */
901 
902 static void
903 fake_read_autofdo_module_profile ()
904 {
905   /* Read in the module info.  */
906   gcov_read_unsigned ();
907 
908   /* Skip the length of the section.  */
909   gcov_read_unsigned ();
910 
911   /* Read in the file name table.  */
912   unsigned total_module_num = gcov_read_unsigned ();
913   gcc_assert (total_module_num == 0);
914 }
915 
916 /* Read data from profile data file.  */
917 
918 static void
919 read_profile (void)
920 {
921   if (gcov_open (auto_profile_file, 1) == 0)
922     error ("Cannot open profile file %s.", auto_profile_file);
923 
924   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
925     error ("AutoFDO profile magic number does not mathch.");
926 
927   /* Skip the version number.  */
928   unsigned version = gcov_read_unsigned ();
929   if (version != AUTO_PROFILE_VERSION)
930     error ("AutoFDO profile version %u does match %u.",
931            version, AUTO_PROFILE_VERSION);
932 
933   /* Skip the empty integer.  */
934   gcov_read_unsigned ();
935 
936   /* string_table.  */
937   afdo_string_table = new string_table ();
938   if (!afdo_string_table->read())
939     error ("Cannot read string table from %s.", auto_profile_file);
940 
941   /* autofdo_source_profile.  */
942   afdo_source_profile = autofdo_source_profile::create ();
943   if (afdo_source_profile == NULL)
944     error ("Cannot read function profile from %s.", auto_profile_file);
945 
946   /* autofdo_module_profile.  */
947   fake_read_autofdo_module_profile ();
948 
949   /* Read in the working set.  */
950   if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
951     error ("Cannot read working set from %s.", auto_profile_file);
952 
953   /* Skip the length of the section.  */
954   gcov_read_unsigned ();
955   gcov_working_set_t set[128];
956   for (unsigned i = 0; i < 128; i++)
957     {
958       set[i].num_counters = gcov_read_unsigned ();
959       set[i].min_counter = gcov_read_counter ();
960     }
961   add_working_set (set);
962 }
963 
964 /* From AutoFDO profiles, find values inside STMT for that we want to measure
965    histograms for indirect-call optimization.
966 
967    This function is actually served for 2 purposes:
968      * before annotation, we need to mark histogram, promote and inline
969      * after annotation, we just need to mark, and let follow-up logic to
970        decide if it needs to promote and inline.  */
971 
972 static void
973 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
974                     bool transform)
975 {
976   gimple gs = gsi_stmt (*gsi);
977   tree callee;
978 
979   if (map.size () == 0)
980     return;
981   gcall *stmt = dyn_cast <gcall *> (gs);
982   if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
983     return;
984 
985   callee = gimple_call_fn (stmt);
986 
987   histogram_value hist = gimple_alloc_histogram_value (
988       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
989   hist->n_counters = 3;
990   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
991   gimple_add_histogram_value (cfun, stmt, hist);
992 
993   gcov_type total = 0;
994   icall_target_map::const_iterator max_iter = map.end ();
995 
996   for (icall_target_map::const_iterator iter = map.begin ();
997        iter != map.end (); ++iter)
998     {
999       total += iter->second;
1000       if (max_iter == map.end () || max_iter->second < iter->second)
1001         max_iter = iter;
1002     }
1003 
1004   hist->hvalue.counters[0]
1005       = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1006   hist->hvalue.counters[1] = max_iter->second;
1007   hist->hvalue.counters[2] = total;
1008 
1009   if (!transform)
1010     return;
1011 
1012   struct cgraph_edge *indirect_edge
1013       = cgraph_node::get (current_function_decl)->get_edge (stmt);
1014   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1015       get_identifier ((const char *) hist->hvalue.counters[0]));
1016 
1017   if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1018     return;
1019   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1020     return;
1021   struct cgraph_edge *new_edge
1022       = indirect_edge->make_speculative (direct_call, 0, 0);
1023   new_edge->redirect_call_stmt_to_callee ();
1024   gimple_remove_histogram_value (cfun, stmt, hist);
1025   inline_call (new_edge, true, NULL, NULL, false);
1026 }
1027 
1028 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1029    histograms and adds them to list VALUES.  */
1030 
1031 static void
1032 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1033           bool transform)
1034 {
1035   afdo_indirect_call (gsi, map, transform);
1036 }
1037 
1038 typedef std::set<basic_block> bb_set;
1039 typedef std::set<edge> edge_set;
1040 
1041 static bool
1042 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1043 {
1044   return annotated.find (bb) != annotated.end ();
1045 }
1046 
1047 static void
1048 set_bb_annotated (basic_block bb, bb_set *annotated)
1049 {
1050   annotated->insert (bb);
1051 }
1052 
1053 static bool
1054 is_edge_annotated (const edge e, const edge_set &annotated)
1055 {
1056   return annotated.find (e) != annotated.end ();
1057 }
1058 
1059 static void
1060 set_edge_annotated (edge e, edge_set *annotated)
1061 {
1062   annotated->insert (e);
1063 }
1064 
1065 /* For a given BB, set its execution count. Attach value profile if a stmt
1066    is not in PROMOTED, because we only want to promote an indirect call once.
1067    Return TRUE if BB is annotated.  */
1068 
1069 static bool
1070 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1071 {
1072   gimple_stmt_iterator gsi;
1073   edge e;
1074   edge_iterator ei;
1075   gcov_type max_count = 0;
1076   bool has_annotated = false;
1077 
1078   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1079     {
1080       count_info info;
1081       gimple stmt = gsi_stmt (gsi);
1082       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1083         continue;
1084       if (afdo_source_profile->get_count_info (stmt, &info))
1085         {
1086           if (info.count > max_count)
1087             max_count = info.count;
1088           has_annotated = true;
1089           if (info.targets.size () > 0
1090               && promoted.find (stmt) == promoted.end ())
1091             afdo_vpt (&gsi, info.targets, false);
1092         }
1093     }
1094 
1095   if (!has_annotated)
1096     return false;
1097 
1098   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1099     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1100   for (gphi_iterator gpi = gsi_start_phis (bb);
1101        !gsi_end_p (gpi);
1102        gsi_next (&gpi))
1103     {
1104       gphi *phi = gpi.phi ();
1105       size_t i;
1106       for (i = 0; i < gimple_phi_num_args (phi); i++)
1107         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1108     }
1109   FOR_EACH_EDGE (e, ei, bb->succs)
1110   afdo_source_profile->mark_annotated (e->goto_locus);
1111 
1112   bb->count = max_count;
1113   return true;
1114 }
1115 
1116 /* BB1 and BB2 are in an equivalent class iff:
1117    1. BB1 dominates BB2.
1118    2. BB2 post-dominates BB1.
1119    3. BB1 and BB2 are in the same loop nest.
1120    This function finds the equivalent class for each basic block, and
1121    stores a pointer to the first BB in its equivalent class. Meanwhile,
1122    set bb counts for the same equivalent class to be idenical. Update
1123    ANNOTATED_BB for the first BB in its equivalent class.  */
1124 
1125 static void
1126 afdo_find_equiv_class (bb_set *annotated_bb)
1127 {
1128   basic_block bb;
1129 
1130   FOR_ALL_BB_FN (bb, cfun)
1131   bb->aux = NULL;
1132 
1133   FOR_ALL_BB_FN (bb, cfun)
1134   {
1135     vec<basic_block> dom_bbs;
1136     basic_block bb1;
1137     int i;
1138 
1139     if (bb->aux != NULL)
1140       continue;
1141     bb->aux = bb;
1142     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1143     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1144     if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1145         && bb1->loop_father == bb->loop_father)
1146       {
1147         bb1->aux = bb;
1148         if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1149           {
1150             bb->count = bb1->count;
1151             set_bb_annotated (bb, annotated_bb);
1152           }
1153       }
1154     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1155     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1156     if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1157         && bb1->loop_father == bb->loop_father)
1158       {
1159         bb1->aux = bb;
1160         if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1161           {
1162             bb->count = bb1->count;
1163             set_bb_annotated (bb, annotated_bb);
1164           }
1165       }
1166   }
1167 }
1168 
1169 /* If a basic block's count is known, and only one of its in/out edges' count
1170    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1171    edges' counts are known, then the basic block's unknown count can also be
1172    calculated.
1173    IS_SUCC is true if out edges of a basic blocks are examined.
1174    Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1175    Return TRUE if any basic block/edge count is changed.  */
1176 
1177 static bool
1178 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1179                      edge_set *annotated_edge)
1180 {
1181   basic_block bb;
1182   bool changed = false;
1183 
1184   FOR_EACH_BB_FN (bb, cfun)
1185   {
1186     edge e, unknown_edge = NULL;
1187     edge_iterator ei;
1188     int num_unknown_edge = 0;
1189     gcov_type total_known_count = 0;
1190 
1191     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1192     if (!is_edge_annotated (e, *annotated_edge))
1193       num_unknown_edge++, unknown_edge = e;
1194     else
1195       total_known_count += e->count;
1196 
1197     if (num_unknown_edge == 0)
1198       {
1199         if (total_known_count > bb->count)
1200           {
1201             bb->count = total_known_count;
1202             changed = true;
1203           }
1204         if (!is_bb_annotated (bb, *annotated_bb))
1205           {
1206             set_bb_annotated (bb, annotated_bb);
1207             changed = true;
1208           }
1209       }
1210     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1211       {
1212         if (bb->count >= total_known_count)
1213           unknown_edge->count = bb->count - total_known_count;
1214         else
1215           unknown_edge->count = 0;
1216         set_edge_annotated (unknown_edge, annotated_edge);
1217         changed = true;
1218       }
1219   }
1220   return changed;
1221 }
1222 
1223 /* Special propagation for circuit expressions. Because GCC translates
1224    control flow into data flow for circuit expressions. E.g.
1225    BB1:
1226    if (a && b)
1227      BB2
1228    else
1229      BB3
1230 
1231    will be translated into:
1232 
1233    BB1:
1234      if (a)
1235        goto BB.t1
1236      else
1237        goto BB.t3
1238    BB.t1:
1239      if (b)
1240        goto BB.t2
1241      else
1242        goto BB.t3
1243    BB.t2:
1244      goto BB.t3
1245    BB.t3:
1246      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1247      if (tmp)
1248        goto BB2
1249      else
1250        goto BB3
1251 
1252    In this case, we need to propagate through PHI to determine the edge
1253    count of BB1->BB.t1, BB.t1->BB.t2.
1254    Update ANNOTATED_EDGE accordingly.  */
1255 
1256 static void
1257 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1258 {
1259   basic_block bb;
1260   FOR_ALL_BB_FN (bb, cfun)
1261   {
1262     gimple def_stmt;
1263     tree cmp_rhs, cmp_lhs;
1264     gimple cmp_stmt = last_stmt (bb);
1265     edge e;
1266     edge_iterator ei;
1267 
1268     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1269       continue;
1270     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1271     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1272     if (!TREE_CONSTANT (cmp_rhs)
1273         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1274       continue;
1275     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1276       continue;
1277     if (!is_bb_annotated (bb, annotated_bb))
1278       continue;
1279     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1280     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1281            && gimple_assign_single_p (def_stmt)
1282            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1283       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1284     if (!def_stmt)
1285       continue;
1286     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1287     if (!phi_stmt)
1288       continue;
1289     FOR_EACH_EDGE (e, ei, bb->succs)
1290     {
1291       unsigned i, total = 0;
1292       edge only_one;
1293       bool check_value_one = (((integer_onep (cmp_rhs))
1294                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1295                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1296       if (!is_edge_annotated (e, *annotated_edge))
1297         continue;
1298       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1299         {
1300           tree val = gimple_phi_arg_def (phi_stmt, i);
1301           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1302 
1303           if (!TREE_CONSTANT (val)
1304               || !(integer_zerop (val) || integer_onep (val)))
1305             continue;
1306           if (check_value_one ^ integer_onep (val))
1307             continue;
1308           total++;
1309           only_one = ep;
1310           if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1311             {
1312               ep->probability = 0;
1313               ep->count = 0;
1314               set_edge_annotated (ep, annotated_edge);
1315             }
1316         }
1317       if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1318         {
1319           only_one->probability = e->probability;
1320           only_one->count = e->count;
1321           set_edge_annotated (only_one, annotated_edge);
1322         }
1323     }
1324   }
1325 }
1326 
1327 /* Propagate the basic block count and edge count on the control flow
1328    graph. We do the propagation iteratively until stablize.  */
1329 
1330 static void
1331 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1332 {
1333   basic_block bb;
1334   bool changed = true;
1335   int i = 0;
1336 
1337   FOR_ALL_BB_FN (bb, cfun)
1338   {
1339     bb->count = ((basic_block)bb->aux)->count;
1340     if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1341       set_bb_annotated (bb, annotated_bb);
1342   }
1343 
1344   while (changed && i++ < 10)
1345     {
1346       changed = false;
1347 
1348       if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1349         changed = true;
1350       if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1351         changed = true;
1352       afdo_propagate_circuit (*annotated_bb, annotated_edge);
1353     }
1354 }
1355 
1356 /* Propagate counts on control flow graph and calculate branch
1357    probabilities.  */
1358 
1359 static void
1360 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1361 {
1362   basic_block bb;
1363   bool has_sample = false;
1364 
1365   FOR_EACH_BB_FN (bb, cfun)
1366   if (bb->count > 0)
1367     has_sample = true;
1368 
1369   if (!has_sample)
1370     return;
1371 
1372   calculate_dominance_info (CDI_POST_DOMINATORS);
1373   calculate_dominance_info (CDI_DOMINATORS);
1374   loop_optimizer_init (0);
1375 
1376   afdo_find_equiv_class (annotated_bb);
1377   afdo_propagate (annotated_bb, annotated_edge);
1378 
1379   FOR_EACH_BB_FN (bb, cfun)
1380   {
1381     edge e;
1382     edge_iterator ei;
1383     int num_unknown_succ = 0;
1384     gcov_type total_count = 0;
1385 
1386     FOR_EACH_EDGE (e, ei, bb->succs)
1387     {
1388       if (!is_edge_annotated (e, *annotated_edge))
1389         num_unknown_succ++;
1390       else
1391         total_count += e->count;
1392     }
1393     if (num_unknown_succ == 0 && total_count > 0)
1394       {
1395         FOR_EACH_EDGE (e, ei, bb->succs)
1396         e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1397       }
1398   }
1399   FOR_ALL_BB_FN (bb, cfun)
1400   {
1401     edge e;
1402     edge_iterator ei;
1403 
1404     FOR_EACH_EDGE (e, ei, bb->succs)
1405     e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1406     bb->aux = NULL;
1407   }
1408 
1409   loop_optimizer_finalize ();
1410   free_dominance_info (CDI_DOMINATORS);
1411   free_dominance_info (CDI_POST_DOMINATORS);
1412 }
1413 
1414 /* Perform value profile transformation using AutoFDO profile. Add the
1415    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1416    indirect call promoted.  */
1417 
1418 static bool
1419 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1420 {
1421   basic_block bb;
1422   if (afdo_source_profile->get_function_instance_by_decl (
1423           current_function_decl) == NULL)
1424     return false;
1425 
1426   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1427 
1428   bool has_vpt = false;
1429   FOR_EACH_BB_FN (bb, cfun)
1430   {
1431     if (!has_indirect_call (bb))
1432       continue;
1433     gimple_stmt_iterator gsi;
1434 
1435     gcov_type bb_count = 0;
1436     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1437       {
1438         count_info info;
1439         gimple stmt = gsi_stmt (gsi);
1440         if (afdo_source_profile->get_count_info (stmt, &info))
1441           bb_count = MAX (bb_count, info.count);
1442       }
1443 
1444     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1445       {
1446         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1447         /* IC_promotion and early_inline_2 is done in multiple iterations.
1448            No need to promoted the stmt if its in promoted_stmts (means
1449            it is already been promoted in the previous iterations).  */
1450         if ((!stmt) || gimple_call_fn (stmt) == NULL
1451             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1452             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1453           continue;
1454 
1455         count_info info;
1456         afdo_source_profile->get_count_info (stmt, &info);
1457         info.count = bb_count;
1458         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1459           {
1460             /* Promote the indirect call and update the promoted_stmts.  */
1461             promoted_stmts->insert (stmt);
1462             afdo_vpt (&gsi, info.targets, true);
1463             has_vpt = true;
1464           }
1465       }
1466   }
1467 
1468   if (has_vpt)
1469     {
1470       unsigned todo = optimize_inline_calls (current_function_decl);
1471       if (todo & TODO_update_ssa_any)
1472        update_ssa (TODO_update_ssa);
1473       return true;
1474     }
1475 
1476   return false;
1477 }
1478 
1479 /* Annotate auto profile to the control flow graph. Do not annotate value
1480    profile for stmts in PROMOTED_STMTS.  */
1481 
1482 static void
1483 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1484 {
1485   basic_block bb;
1486   bb_set annotated_bb;
1487   edge_set annotated_edge;
1488   const function_instance *s
1489       = afdo_source_profile->get_function_instance_by_decl (
1490           current_function_decl);
1491 
1492   if (s == NULL)
1493     return;
1494   cgraph_node::get (current_function_decl)->count = s->head_count ();
1495   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1496   gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1497 
1498   FOR_EACH_BB_FN (bb, cfun)
1499   {
1500     edge e;
1501     edge_iterator ei;
1502 
1503     bb->count = 0;
1504     FOR_EACH_EDGE (e, ei, bb->succs)
1505     e->count = 0;
1506 
1507     if (afdo_set_bb_count (bb, promoted_stmts))
1508       set_bb_annotated (bb, &annotated_bb);
1509     if (bb->count > max_count)
1510       max_count = bb->count;
1511   }
1512   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1513       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1514     {
1515       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1516           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1517       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1518     }
1519   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1520       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1521     {
1522       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1523           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1524       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1525     }
1526   afdo_source_profile->mark_annotated (
1527       DECL_SOURCE_LOCATION (current_function_decl));
1528   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1529   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1530   if (max_count > 0)
1531     {
1532       afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1533       counts_to_freqs ();
1534       profile_status_for_fn (cfun) = PROFILE_READ;
1535     }
1536   if (flag_value_profile_transformations)
1537     {
1538       gimple_value_profile_transformations ();
1539       free_dominance_info (CDI_DOMINATORS);
1540       free_dominance_info (CDI_POST_DOMINATORS);
1541       update_ssa (TODO_update_ssa);
1542     }
1543 }
1544 
1545 /* Wrapper function to invoke early inliner.  */
1546 
1547 static void
1548 early_inline ()
1549 {
1550   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1551   unsigned todo = early_inliner (cfun);
1552   if (todo & TODO_update_ssa_any)
1553     update_ssa (TODO_update_ssa);
1554 }
1555 
1556 /* Use AutoFDO profile to annoate the control flow graph.
1557    Return the todo flag.  */
1558 
1559 static unsigned int
1560 auto_profile (void)
1561 {
1562   struct cgraph_node *node;
1563 
1564   if (symtab->state == FINISHED)
1565     return 0;
1566 
1567   init_node_map (true);
1568   profile_info = autofdo::afdo_profile_info;
1569 
1570   FOR_EACH_FUNCTION (node)
1571   {
1572     if (!gimple_has_body_p (node->decl))
1573       continue;
1574 
1575     /* Don't profile functions produced for builtin stuff.  */
1576     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1577       continue;
1578 
1579     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1580 
1581     /* First do indirect call promotion and early inline to make the
1582        IR match the profiled binary before actual annotation.
1583 
1584        This is needed because an indirect call might have been promoted
1585        and inlined in the profiled binary. If we do not promote and
1586        inline these indirect calls before annotation, the profile for
1587        these promoted functions will be lost.
1588 
1589        e.g. foo() --indirect_call--> bar()
1590        In profiled binary, the callsite is promoted and inlined, making
1591        the profile look like:
1592 
1593        foo: {
1594          loc_foo_1: count_1
1595          bar@loc_foo_2: {
1596            loc_bar_1: count_2
1597            loc_bar_2: count_3
1598          }
1599        }
1600 
1601        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1602        If we perform annotation on it, the profile inside bar@loc_foo2
1603        will be wasted.
1604 
1605        To avoid this, we promote loc_foo_2 and inline the promoted bar
1606        function before annotation, so the profile inside bar@loc_foo2
1607        will be useful.  */
1608     autofdo::stmt_set promoted_stmts;
1609     for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1610       {
1611         if (!flag_value_profile_transformations
1612             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1613           break;
1614         early_inline ();
1615       }
1616 
1617     early_inline ();
1618     autofdo::afdo_annotate_cfg (promoted_stmts);
1619     compute_function_frequency ();
1620 
1621     /* Local pure-const may imply need to fixup the cfg.  */
1622     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1623       cleanup_tree_cfg ();
1624 
1625     free_dominance_info (CDI_DOMINATORS);
1626     free_dominance_info (CDI_POST_DOMINATORS);
1627     cgraph_edge::rebuild_edges ();
1628     compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1629     pop_cfun ();
1630   }
1631 
1632   return TODO_rebuild_cgraph_edges;
1633 }
1634 } /* namespace autofdo.  */
1635 
1636 /* Read the profile from the profile data file.  */
1637 
1638 void
1639 read_autofdo_file (void)
1640 {
1641   if (auto_profile_file == NULL)
1642     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1643 
1644   autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1645       1, sizeof (struct gcov_ctr_summary));
1646   autofdo::afdo_profile_info->runs = 1;
1647   autofdo::afdo_profile_info->sum_max = 0;
1648   autofdo::afdo_profile_info->sum_all = 0;
1649 
1650   /* Read the profile from the profile file.  */
1651   autofdo::read_profile ();
1652 }
1653 
1654 /* Free the resources.  */
1655 
1656 void
1657 end_auto_profile (void)
1658 {
1659   delete autofdo::afdo_source_profile;
1660   delete autofdo::afdo_string_table;
1661   profile_info = NULL;
1662 }
1663 
1664 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1665 
1666 bool
1667 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1668 {
1669   gcov_type count
1670       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1671 
1672   if (count > 0)
1673     {
1674       bool is_hot;
1675       const struct gcov_ctr_summary *saved_profile_info = profile_info;
1676       /* At early inline stage, profile_info is not set yet. We need to
1677          temporarily set it to afdo_profile_info to calculate hotness.  */
1678       profile_info = autofdo::afdo_profile_info;
1679       is_hot = maybe_hot_count_p (NULL, count);
1680       profile_info = saved_profile_info;
1681       return is_hot;
1682     }
1683 
1684   return false;
1685 }
1686 
1687 namespace
1688 {
1689 
1690 const pass_data pass_data_ipa_auto_profile = {
1691   SIMPLE_IPA_PASS, "afdo", /* name */
1692   OPTGROUP_NONE,           /* optinfo_flags */
1693   TV_IPA_AUTOFDO,          /* tv_id */
1694   0,                       /* properties_required */
1695   0,                       /* properties_provided */
1696   0,                       /* properties_destroyed */
1697   0,                       /* todo_flags_start */
1698   0,                       /* todo_flags_finish */
1699 };
1700 
1701 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1702 {
1703 public:
1704   pass_ipa_auto_profile (gcc::context *ctxt)
1705       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1706   {
1707   }
1708 
1709   /* opt_pass methods: */
1710   virtual bool
1711   gate (function *)
1712   {
1713     return flag_auto_profile;
1714   }
1715   virtual unsigned int
1716   execute (function *)
1717   {
1718     return autofdo::auto_profile ();
1719   }
1720 }; // class pass_ipa_auto_profile
1721 
1722 } // anon namespace
1723 
1724 simple_ipa_opt_pass *
1725 make_pass_ipa_auto_profile (gcc::context *ctxt)
1726 {
1727   return new pass_ipa_auto_profile (ctxt);
1728 }
1729