xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/auto-profile.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2017 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "params.h"
45 #include "symbol-summary.h"
46 #include "ipa-prop.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50 #include "tree-pretty-print.h"
51 #include "gimple-pretty-print.h"
52 
53 /* The following routines implements AutoFDO optimization.
54 
55    This optimization uses sampling profiles to annotate basic block counts
56    and uses heuristics to estimate branch probabilities.
57 
58    There are three phases in AutoFDO:
59 
60    Phase 1: Read profile from the profile data file.
61      The following info is read from the profile datafile:
62         * string_table: a map between function name and its index.
63         * autofdo_source_profile: a map from function_instance name to
64           function_instance. This is represented as a forest of
65           function_instances.
66         * WorkingSet: a histogram of how many instructions are covered for a
67           given percentage of total cycles. This is describing the binary
68           level information (not source level). This info is used to help
69           decide if we want aggressive optimizations that could increase
70           code footprint (e.g. loop unroll etc.)
71      A function instance is an instance of function that could either be a
72      standalone symbol, or a clone of a function that is inlined into another
73      function.
74 
75    Phase 2: Early inline + value profile transformation.
76      Early inline uses autofdo_source_profile to find if a callsite is:
77         * inlined in the profiled binary.
78         * callee body is hot in the profiling run.
79      If both condition satisfies, early inline will inline the callsite
80      regardless of the code growth.
81      Phase 2 is an iterative process. During each iteration, we also check
82      if an indirect callsite is promoted and inlined in the profiling run.
83      If yes, vpt will happen to force promote it and in the next iteration,
84      einline will inline the promoted callsite in the next iteration.
85 
86    Phase 3: Annotate control flow graph.
87      AutoFDO uses a separate pass to:
88         * Annotate basic block count
89         * Estimate branch probability
90 
91    After the above 3 phases, all profile is readily annotated on the GCC IR.
92    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
93    use of the profile. E.g. it uses existing mechanism to calculate the basic
94    block/edge frequency, as well as the cgraph node/edge count.
95 */
96 
97 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
98 #define AUTO_PROFILE_VERSION 1
99 
100 namespace autofdo
101 {
102 
103 /* Represent a source location: (function_decl, lineno).  */
104 typedef std::pair<tree, unsigned> decl_lineno;
105 
106 /* Represent an inline stack. vector[0] is the leaf node.  */
107 typedef auto_vec<decl_lineno> inline_stack;
108 
109 /* String array that stores function names.  */
110 typedef auto_vec<char *> string_vector;
111 
112 /* Map from function name's index in string_table to target's
113    execution count.  */
114 typedef std::map<unsigned, gcov_type> icall_target_map;
115 
116 /* Set of gimple stmts. Used to track if the stmt has already been promoted
117    to direct call.  */
118 typedef std::set<gimple *> stmt_set;
119 
120 /* Represent count info of an inline stack.  */
121 struct count_info
122 {
123   /* Sampled count of the inline stack.  */
124   gcov_type count;
125 
126   /* Map from indirect call target to its sample count.  */
127   icall_target_map targets;
128 
129   /* Whether this inline stack is already used in annotation.
130 
131      Each inline stack should only be used to annotate IR once.
132      This will be enforced when instruction-level discriminator
133      is supported.  */
134   bool annotated;
135 };
136 
137 /* operator< for "const char *".  */
138 struct string_compare
139 {
140   bool operator()(const char *a, const char *b) const
141   {
142     return strcmp (a, b) < 0;
143   }
144 };
145 
146 /* Store a string array, indexed by string position in the array.  */
147 class string_table
148 {
149 public:
150   string_table ()
151   {}
152 
153   ~string_table ();
154 
155   /* For a given string, returns its index.  */
156   int get_index (const char *name) const;
157 
158   /* For a given decl, returns the index of the decl name.  */
159   int get_index_by_decl (tree decl) const;
160 
161   /* For a given index, returns the string.  */
162   const char *get_name (int index) const;
163 
164   /* Read profile, return TRUE on success.  */
165   bool read ();
166 
167 private:
168   typedef std::map<const char *, unsigned, string_compare> string_index_map;
169   string_vector vector_;
170   string_index_map map_;
171 };
172 
173 /* Profile of a function instance:
174      1. total_count of the function.
175      2. head_count (entry basic block count) of the function (only valid when
176         function is a top-level function_instance, i.e. it is the original copy
177         instead of the inlined copy).
178      3. map from source location (decl_lineno) to profile (count_info).
179      4. map from callsite to callee function_instance.  */
180 class function_instance
181 {
182 public:
183   typedef auto_vec<function_instance *> function_instance_stack;
184 
185   /* Read the profile and return a function_instance with head count as
186      HEAD_COUNT. Recursively read callsites to create nested function_instances
187      too. STACK is used to track the recursive creation process.  */
188   static function_instance *
189   read_function_instance (function_instance_stack *stack,
190                           gcov_type head_count);
191 
192   /* Recursively deallocate all callsites (nested function_instances).  */
193   ~function_instance ();
194 
195   /* Accessors.  */
196   int
197   name () const
198   {
199     return name_;
200   }
201   gcov_type
202   total_count () const
203   {
204     return total_count_;
205   }
206   gcov_type
207   head_count () const
208   {
209     return head_count_;
210   }
211 
212   /* Traverse callsites of the current function_instance to find one at the
213      location of LINENO and callee name represented in DECL.  */
214   function_instance *get_function_instance_by_decl (unsigned lineno,
215                                                     tree decl) const;
216 
217   /* Store the profile info for LOC in INFO. Return TRUE if profile info
218      is found.  */
219   bool get_count_info (location_t loc, count_info *info) const;
220 
221   /* Read the inlined indirect call target profile for STMT and store it in
222      MAP, return the total count for all inlined indirect calls.  */
223   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
224 
225   /* Sum of counts that is used during annotation.  */
226   gcov_type total_annotated_count () const;
227 
228   /* Mark LOC as annotated.  */
229   void mark_annotated (location_t loc);
230 
231 private:
232   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
233   typedef std::pair<unsigned, unsigned> callsite;
234 
235   /* Map from callsite to callee function_instance.  */
236   typedef std::map<callsite, function_instance *> callsite_map;
237 
238   function_instance (unsigned name, gcov_type head_count)
239       : name_ (name), total_count_ (0), head_count_ (head_count)
240   {
241   }
242 
243   /* Map from source location (decl_lineno) to profile (count_info).  */
244   typedef std::map<unsigned, count_info> position_count_map;
245 
246   /* function_instance name index in the string_table.  */
247   unsigned name_;
248 
249   /* Total sample count.  */
250   gcov_type total_count_;
251 
252   /* Entry BB's sample count.  */
253   gcov_type head_count_;
254 
255   /* Map from callsite location to callee function_instance.  */
256   callsite_map callsites;
257 
258   /* Map from source location to count_info.  */
259   position_count_map pos_counts;
260 };
261 
262 /* Profile for all functions.  */
263 class autofdo_source_profile
264 {
265 public:
266   static autofdo_source_profile *
267   create ()
268   {
269     autofdo_source_profile *map = new autofdo_source_profile ();
270 
271     if (map->read ())
272       return map;
273     delete map;
274     return NULL;
275   }
276 
277   ~autofdo_source_profile ();
278 
279   /* For a given DECL, returns the top-level function_instance.  */
280   function_instance *get_function_instance_by_decl (tree decl) const;
281 
282   /* Find count_info for a given gimple STMT. If found, store the count_info
283      in INFO and return true; otherwise return false.  */
284   bool get_count_info (gimple *stmt, count_info *info) const;
285 
286   /* Find total count of the callee of EDGE.  */
287   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
288 
289   /* Update value profile INFO for STMT from the inlined indirect callsite.
290      Return true if INFO is updated.  */
291   bool update_inlined_ind_target (gcall *stmt, count_info *info);
292 
293   /* Mark LOC as annotated.  */
294   void mark_annotated (location_t loc);
295 
296 private:
297   /* Map from function_instance name index (in string_table) to
298      function_instance.  */
299   typedef std::map<unsigned, function_instance *> name_function_instance_map;
300 
301   autofdo_source_profile () {}
302 
303   /* Read AutoFDO profile and returns TRUE on success.  */
304   bool read ();
305 
306   /* Return the function_instance in the profile that correspond to the
307      inline STACK.  */
308   function_instance *
309   get_function_instance_by_inline_stack (const inline_stack &stack) const;
310 
311   name_function_instance_map map_;
312 };
313 
314 /* Store the strings read from the profile data file.  */
315 static string_table *afdo_string_table;
316 
317 /* Store the AutoFDO source profile.  */
318 static autofdo_source_profile *afdo_source_profile;
319 
320 /* gcov_ctr_summary structure to store the profile_info.  */
321 static struct gcov_ctr_summary *afdo_profile_info;
322 
323 /* Helper functions.  */
324 
325 /* Return the original name of NAME: strip the suffix that starts
326    with '.' Caller is responsible for freeing RET.  */
327 
328 static char *
329 get_original_name (const char *name)
330 {
331   char *ret = xstrdup (name);
332   char *find = strchr (ret, '.');
333   if (find != NULL)
334     *find = 0;
335   return ret;
336 }
337 
338 /* Return the combined location, which is a 32bit integer in which
339    higher 16 bits stores the line offset of LOC to the start lineno
340    of DECL, The lower 16 bits stores the discriminator.  */
341 
342 static unsigned
343 get_combined_location (location_t loc, tree decl)
344 {
345   /* TODO: allow more bits for line and less bits for discriminator.  */
346   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
347     warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
348   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
349 }
350 
351 /* Return the function decl of a given lexical BLOCK.  */
352 
353 static tree
354 get_function_decl_from_block (tree block)
355 {
356   tree decl;
357 
358   if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION)
359     return NULL_TREE;
360 
361   for (decl = BLOCK_ABSTRACT_ORIGIN (block);
362        decl && (TREE_CODE (decl) == BLOCK);
363        decl = BLOCK_ABSTRACT_ORIGIN (decl))
364     if (TREE_CODE (decl) == FUNCTION_DECL)
365       break;
366   return decl;
367 }
368 
369 /* Store inline stack for STMT in STACK.  */
370 
371 static void
372 get_inline_stack (location_t locus, inline_stack *stack)
373 {
374   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
375     return;
376 
377   tree block = LOCATION_BLOCK (locus);
378   if (block && TREE_CODE (block) == BLOCK)
379     {
380       int level = 0;
381       for (block = BLOCK_SUPERCONTEXT (block);
382            block && (TREE_CODE (block) == BLOCK);
383            block = BLOCK_SUPERCONTEXT (block))
384         {
385           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
386           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
387             continue;
388 
389           tree decl = get_function_decl_from_block (block);
390           stack->safe_push (
391               std::make_pair (decl, get_combined_location (locus, decl)));
392           locus = tmp_locus;
393           level++;
394         }
395     }
396   stack->safe_push (
397       std::make_pair (current_function_decl,
398                       get_combined_location (locus, current_function_decl)));
399 }
400 
401 /* Return STMT's combined location, which is a 32bit integer in which
402    higher 16 bits stores the line offset of LOC to the start lineno
403    of DECL, The lower 16 bits stores the discriminator.  */
404 
405 static unsigned
406 get_relative_location_for_stmt (gimple *stmt)
407 {
408   location_t locus = gimple_location (stmt);
409   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
410     return UNKNOWN_LOCATION;
411 
412   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
413        block = BLOCK_SUPERCONTEXT (block))
414     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
415       return get_combined_location (locus,
416                                     get_function_decl_from_block (block));
417   return get_combined_location (locus, current_function_decl);
418 }
419 
420 /* Return true if BB contains indirect call.  */
421 
422 static bool
423 has_indirect_call (basic_block bb)
424 {
425   gimple_stmt_iterator gsi;
426 
427   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
428     {
429       gimple *stmt = gsi_stmt (gsi);
430       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
431           && (gimple_call_fn (stmt) == NULL
432               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
433         return true;
434     }
435   return false;
436 }
437 
438 /* Member functions for string_table.  */
439 
440 /* Deconstructor.  */
441 
442 string_table::~string_table ()
443 {
444   for (unsigned i = 0; i < vector_.length (); i++)
445     free (vector_[i]);
446 }
447 
448 
449 /* Return the index of a given function NAME. Return -1 if NAME is not
450    found in string table.  */
451 
452 int
453 string_table::get_index (const char *name) const
454 {
455   if (name == NULL)
456     return -1;
457   string_index_map::const_iterator iter = map_.find (name);
458   if (iter == map_.end ())
459     return -1;
460 
461   return iter->second;
462 }
463 
464 /* Return the index of a given function DECL. Return -1 if DECL is not
465    found in string table.  */
466 
467 int
468 string_table::get_index_by_decl (tree decl) const
469 {
470   char *name
471       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
472   int ret = get_index (name);
473   free (name);
474   if (ret != -1)
475     return ret;
476   ret = get_index (lang_hooks.dwarf_name (decl, 0));
477   if (ret != -1)
478     return ret;
479   if (DECL_ABSTRACT_ORIGIN (decl))
480     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
481 
482   return -1;
483 }
484 
485 /* Return the function name of a given INDEX.  */
486 
487 const char *
488 string_table::get_name (int index) const
489 {
490   gcc_assert (index > 0 && index < (int)vector_.length ());
491   return vector_[index];
492 }
493 
494 /* Read the string table. Return TRUE if reading is successful.  */
495 
496 bool
497 string_table::read ()
498 {
499   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
500     return false;
501   /* Skip the length of the section.  */
502   gcov_read_unsigned ();
503   /* Read in the file name table.  */
504   unsigned string_num = gcov_read_unsigned ();
505   for (unsigned i = 0; i < string_num; i++)
506     {
507       vector_.safe_push (get_original_name (gcov_read_string ()));
508       map_[vector_.last ()] = i;
509     }
510   return true;
511 }
512 
513 /* Member functions for function_instance.  */
514 
515 function_instance::~function_instance ()
516 {
517   for (callsite_map::iterator iter = callsites.begin ();
518        iter != callsites.end (); ++iter)
519     delete iter->second;
520 }
521 
522 /* Traverse callsites of the current function_instance to find one at the
523    location of LINENO and callee name represented in DECL.  */
524 
525 function_instance *
526 function_instance::get_function_instance_by_decl (unsigned lineno,
527                                                   tree decl) const
528 {
529   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
530   if (func_name_idx != -1)
531     {
532       callsite_map::const_iterator ret
533           = callsites.find (std::make_pair (lineno, func_name_idx));
534       if (ret != callsites.end ())
535         return ret->second;
536     }
537   func_name_idx
538       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
539   if (func_name_idx != -1)
540     {
541       callsite_map::const_iterator ret
542           = callsites.find (std::make_pair (lineno, func_name_idx));
543       if (ret != callsites.end ())
544         return ret->second;
545     }
546   if (DECL_ABSTRACT_ORIGIN (decl))
547     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
548 
549   return NULL;
550 }
551 
552 /* Store the profile info for LOC in INFO. Return TRUE if profile info
553    is found.  */
554 
555 bool
556 function_instance::get_count_info (location_t loc, count_info *info) const
557 {
558   position_count_map::const_iterator iter = pos_counts.find (loc);
559   if (iter == pos_counts.end ())
560     return false;
561   *info = iter->second;
562   return true;
563 }
564 
565 /* Mark LOC as annotated.  */
566 
567 void
568 function_instance::mark_annotated (location_t loc)
569 {
570   position_count_map::iterator iter = pos_counts.find (loc);
571   if (iter == pos_counts.end ())
572     return;
573   iter->second.annotated = true;
574 }
575 
576 /* Read the inlined indirect call target profile for STMT and store it in
577    MAP, return the total count for all inlined indirect calls.  */
578 
579 gcov_type
580 function_instance::find_icall_target_map (gcall *stmt,
581                                           icall_target_map *map) const
582 {
583   gcov_type ret = 0;
584   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
585 
586   for (callsite_map::const_iterator iter = callsites.begin ();
587        iter != callsites.end (); ++iter)
588     {
589       unsigned callee = iter->second->name ();
590       /* Check if callsite location match the stmt.  */
591       if (iter->first.first != stmt_offset)
592         continue;
593       struct cgraph_node *node = cgraph_node::get_for_asmname (
594           get_identifier (afdo_string_table->get_name (callee)));
595       if (node == NULL)
596         continue;
597       if (!check_ic_target (stmt, node))
598         continue;
599       (*map)[callee] = iter->second->total_count ();
600       ret += iter->second->total_count ();
601     }
602   return ret;
603 }
604 
605 /* Read the profile and create a function_instance with head count as
606    HEAD_COUNT. Recursively read callsites to create nested function_instances
607    too. STACK is used to track the recursive creation process.  */
608 
609 /* function instance profile format:
610 
611    ENTRY_COUNT: 8 bytes
612    NAME_INDEX: 4 bytes
613    NUM_POS_COUNTS: 4 bytes
614    NUM_CALLSITES: 4 byte
615    POS_COUNT_1:
616      POS_1_OFFSET: 4 bytes
617      NUM_TARGETS: 4 bytes
618      COUNT: 8 bytes
619      TARGET_1:
620        VALUE_PROFILE_TYPE: 4 bytes
621        TARGET_IDX: 8 bytes
622        COUNT: 8 bytes
623      TARGET_2
624      ...
625      TARGET_n
626    POS_COUNT_2
627    ...
628    POS_COUNT_N
629    CALLSITE_1:
630      CALLSITE_1_OFFSET: 4 bytes
631      FUNCTION_INSTANCE_PROFILE (nested)
632    CALLSITE_2
633    ...
634    CALLSITE_n.  */
635 
636 function_instance *
637 function_instance::read_function_instance (function_instance_stack *stack,
638                                            gcov_type head_count)
639 {
640   unsigned name = gcov_read_unsigned ();
641   unsigned num_pos_counts = gcov_read_unsigned ();
642   unsigned num_callsites = gcov_read_unsigned ();
643   function_instance *s = new function_instance (name, head_count);
644   stack->safe_push (s);
645 
646   for (unsigned i = 0; i < num_pos_counts; i++)
647     {
648       unsigned offset = gcov_read_unsigned () & 0xffff0000;
649       unsigned num_targets = gcov_read_unsigned ();
650       gcov_type count = gcov_read_counter ();
651       s->pos_counts[offset].count = count;
652       for (unsigned j = 0; j < stack->length (); j++)
653         (*stack)[j]->total_count_ += count;
654       for (unsigned j = 0; j < num_targets; j++)
655         {
656           /* Only indirect call target histogram is supported now.  */
657           gcov_read_unsigned ();
658           gcov_type target_idx = gcov_read_counter ();
659           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
660         }
661     }
662   for (unsigned i = 0; i < num_callsites; i++)
663     {
664       unsigned offset = gcov_read_unsigned ();
665       function_instance *callee_function_instance
666           = read_function_instance (stack, 0);
667       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
668           = callee_function_instance;
669     }
670   stack->pop ();
671   return s;
672 }
673 
674 /* Sum of counts that is used during annotation.  */
675 
676 gcov_type
677 function_instance::total_annotated_count () const
678 {
679   gcov_type ret = 0;
680   for (callsite_map::const_iterator iter = callsites.begin ();
681        iter != callsites.end (); ++iter)
682     ret += iter->second->total_annotated_count ();
683   for (position_count_map::const_iterator iter = pos_counts.begin ();
684        iter != pos_counts.end (); ++iter)
685     if (iter->second.annotated)
686       ret += iter->second.count;
687   return ret;
688 }
689 
690 /* Member functions for autofdo_source_profile.  */
691 
692 autofdo_source_profile::~autofdo_source_profile ()
693 {
694   for (name_function_instance_map::const_iterator iter = map_.begin ();
695        iter != map_.end (); ++iter)
696     delete iter->second;
697 }
698 
699 /* For a given DECL, returns the top-level function_instance.  */
700 
701 function_instance *
702 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
703 {
704   int index = afdo_string_table->get_index_by_decl (decl);
705   if (index == -1)
706     return NULL;
707   name_function_instance_map::const_iterator ret = map_.find (index);
708   return ret == map_.end () ? NULL : ret->second;
709 }
710 
711 /* Find count_info for a given gimple STMT. If found, store the count_info
712    in INFO and return true; otherwise return false.  */
713 
714 bool
715 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
716 {
717   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
718     return false;
719 
720   inline_stack stack;
721   get_inline_stack (gimple_location (stmt), &stack);
722   if (stack.length () == 0)
723     return false;
724   function_instance *s = get_function_instance_by_inline_stack (stack);
725   if (s == NULL)
726     return false;
727   return s->get_count_info (stack[0].second, info);
728 }
729 
730 /* Mark LOC as annotated.  */
731 
732 void
733 autofdo_source_profile::mark_annotated (location_t loc)
734 {
735   inline_stack stack;
736   get_inline_stack (loc, &stack);
737   if (stack.length () == 0)
738     return;
739   function_instance *s = get_function_instance_by_inline_stack (stack);
740   if (s == NULL)
741     return;
742   s->mark_annotated (stack[0].second);
743 }
744 
745 /* Update value profile INFO for STMT from the inlined indirect callsite.
746    Return true if INFO is updated.  */
747 
748 bool
749 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
750                                                    count_info *info)
751 {
752   if (dump_file)
753     {
754       fprintf (dump_file, "Checking indirect call -> direct call ");
755       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
756     }
757 
758   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
759     {
760       if (dump_file)
761 	fprintf (dump_file, " good locus\n");
762       return false;
763     }
764 
765   count_info old_info;
766   get_count_info (stmt, &old_info);
767   gcov_type total = 0;
768   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
769        iter != old_info.targets.end (); ++iter)
770     total += iter->second;
771 
772   /* Program behavior changed, original promoted (and inlined) target is not
773      hot any more. Will avoid promote the original target.
774 
775      To check if original promoted target is still hot, we check the total
776      count of the unpromoted targets (stored in old_info). If it is no less
777      than half of the callsite count (stored in INFO), the original promoted
778      target is considered not hot any more.  */
779   if (total >= info->count / 2)
780     {
781       if (dump_file)
782 	fprintf (dump_file, " not hot anymore %ld >= %ld",
783 		 (long)total,
784 		 (long)info->count /2);
785       return false;
786     }
787 
788   inline_stack stack;
789   get_inline_stack (gimple_location (stmt), &stack);
790   if (stack.length () == 0)
791     {
792       if (dump_file)
793 	fprintf (dump_file, " no inline stack\n");
794       return false;
795     }
796   function_instance *s = get_function_instance_by_inline_stack (stack);
797   if (s == NULL)
798     {
799       if (dump_file)
800 	fprintf (dump_file, " function not found in inline stack\n");
801       return false;
802     }
803   icall_target_map map;
804   if (s->find_icall_target_map (stmt, &map) == 0)
805     {
806       if (dump_file)
807 	fprintf (dump_file, " no target map\n");
808       return false;
809     }
810   for (icall_target_map::const_iterator iter = map.begin ();
811        iter != map.end (); ++iter)
812     info->targets[iter->first] = iter->second;
813   if (dump_file)
814     fprintf (dump_file, " looks good\n");
815   return true;
816 }
817 
818 /* Find total count of the callee of EDGE.  */
819 
820 gcov_type
821 autofdo_source_profile::get_callsite_total_count (
822     struct cgraph_edge *edge) const
823 {
824   inline_stack stack;
825   stack.safe_push (std::make_pair (edge->callee->decl, 0));
826   get_inline_stack (gimple_location (edge->call_stmt), &stack);
827 
828   function_instance *s = get_function_instance_by_inline_stack (stack);
829   if (s == NULL
830       || afdo_string_table->get_index (IDENTIFIER_POINTER (
831              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
832     return 0;
833 
834   return s->total_count ();
835 }
836 
837 /* Read AutoFDO profile and returns TRUE on success.  */
838 
839 /* source profile format:
840 
841    GCOV_TAG_AFDO_FUNCTION: 4 bytes
842    LENGTH: 4 bytes
843    NUM_FUNCTIONS: 4 bytes
844    FUNCTION_INSTANCE_1
845    FUNCTION_INSTANCE_2
846    ...
847    FUNCTION_INSTANCE_N.  */
848 
849 bool
850 autofdo_source_profile::read ()
851 {
852   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
853     {
854       inform (0, "Not expected TAG.");
855       return false;
856     }
857 
858   /* Skip the length of the section.  */
859   gcov_read_unsigned ();
860 
861   /* Read in the function/callsite profile, and store it in local
862      data structure.  */
863   unsigned function_num = gcov_read_unsigned ();
864   for (unsigned i = 0; i < function_num; i++)
865     {
866       function_instance::function_instance_stack stack;
867       function_instance *s = function_instance::read_function_instance (
868           &stack, gcov_read_counter ());
869       afdo_profile_info->sum_all += s->total_count ();
870       map_[s->name ()] = s;
871     }
872   return true;
873 }
874 
875 /* Return the function_instance in the profile that correspond to the
876    inline STACK.  */
877 
878 function_instance *
879 autofdo_source_profile::get_function_instance_by_inline_stack (
880     const inline_stack &stack) const
881 {
882   name_function_instance_map::const_iterator iter = map_.find (
883       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
884   if (iter == map_.end())
885     return NULL;
886   function_instance *s = iter->second;
887   for (unsigned i = stack.length() - 1; i > 0; i--)
888     {
889       s = s->get_function_instance_by_decl (
890           stack[i].second, stack[i - 1].first);
891       if (s == NULL)
892         return NULL;
893     }
894   return s;
895 }
896 
897 /* Module profile is only used by LIPO. Here we simply ignore it.  */
898 
899 static void
900 fake_read_autofdo_module_profile ()
901 {
902   /* Read in the module info.  */
903   gcov_read_unsigned ();
904 
905   /* Skip the length of the section.  */
906   gcov_read_unsigned ();
907 
908   /* Read in the file name table.  */
909   unsigned total_module_num = gcov_read_unsigned ();
910   gcc_assert (total_module_num == 0);
911 }
912 
913 /* Read data from profile data file.  */
914 
915 static void
916 read_profile (void)
917 {
918   if (gcov_open (auto_profile_file, 1) == 0)
919     {
920       error ("cannot open profile file %s", auto_profile_file);
921       return;
922     }
923 
924   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
925     {
926       error ("AutoFDO profile magic number does not match");
927       return;
928     }
929 
930   /* Skip the version number.  */
931   unsigned version = gcov_read_unsigned ();
932   if (version != AUTO_PROFILE_VERSION)
933     {
934       error ("AutoFDO profile version %u does match %u",
935 	     version, AUTO_PROFILE_VERSION);
936       return;
937     }
938 
939   /* Skip the empty integer.  */
940   gcov_read_unsigned ();
941 
942   /* string_table.  */
943   afdo_string_table = new string_table ();
944   if (!afdo_string_table->read())
945     {
946       error ("cannot read string table from %s", auto_profile_file);
947       return;
948     }
949 
950   /* autofdo_source_profile.  */
951   afdo_source_profile = autofdo_source_profile::create ();
952   if (afdo_source_profile == NULL)
953     {
954       error ("cannot read function profile from %s", auto_profile_file);
955       return;
956     }
957 
958   /* autofdo_module_profile.  */
959   fake_read_autofdo_module_profile ();
960 
961   /* Read in the working set.  */
962   if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
963     {
964       error ("cannot read working set from %s", auto_profile_file);
965       return;
966     }
967 
968   /* Skip the length of the section.  */
969   gcov_read_unsigned ();
970   gcov_working_set_t set[128];
971   for (unsigned i = 0; i < 128; i++)
972     {
973       set[i].num_counters = gcov_read_unsigned ();
974       set[i].min_counter = gcov_read_counter ();
975     }
976   add_working_set (set);
977 }
978 
979 /* From AutoFDO profiles, find values inside STMT for that we want to measure
980    histograms for indirect-call optimization.
981 
982    This function is actually served for 2 purposes:
983      * before annotation, we need to mark histogram, promote and inline
984      * after annotation, we just need to mark, and let follow-up logic to
985        decide if it needs to promote and inline.  */
986 
987 static void
988 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
989                     bool transform)
990 {
991   gimple *gs = gsi_stmt (*gsi);
992   tree callee;
993 
994   if (map.size () == 0)
995     return;
996   gcall *stmt = dyn_cast <gcall *> (gs);
997   if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
998     return;
999 
1000   callee = gimple_call_fn (stmt);
1001 
1002   histogram_value hist = gimple_alloc_histogram_value (
1003       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1004   hist->n_counters = 3;
1005   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1006   gimple_add_histogram_value (cfun, stmt, hist);
1007 
1008   gcov_type total = 0;
1009   icall_target_map::const_iterator max_iter = map.end ();
1010 
1011   for (icall_target_map::const_iterator iter = map.begin ();
1012        iter != map.end (); ++iter)
1013     {
1014       total += iter->second;
1015       if (max_iter == map.end () || max_iter->second < iter->second)
1016         max_iter = iter;
1017     }
1018 
1019   hist->hvalue.counters[0]
1020       = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1021   hist->hvalue.counters[1] = max_iter->second;
1022   hist->hvalue.counters[2] = total;
1023 
1024   if (!transform)
1025     return;
1026 
1027   struct cgraph_edge *indirect_edge
1028       = cgraph_node::get (current_function_decl)->get_edge (stmt);
1029   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1030       get_identifier ((const char *) hist->hvalue.counters[0]));
1031 
1032   if (dump_file)
1033     {
1034       fprintf (dump_file, "Indirect call -> direct call ");
1035       print_generic_expr (dump_file, callee, TDF_SLIM);
1036       fprintf (dump_file, " => ");
1037       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1038     }
1039 
1040   if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1041     {
1042       if (dump_file)
1043         fprintf (dump_file, " not transforming\n");
1044       return;
1045     }
1046   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1047     {
1048       if (dump_file)
1049         fprintf (dump_file, " no declaration\n");
1050       return;
1051     }
1052 
1053   if (dump_file)
1054     {
1055       fprintf (dump_file, " transformation on insn ");
1056       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1057       fprintf (dump_file, "\n");
1058     }
1059 
1060   struct cgraph_edge *new_edge
1061       = indirect_edge->make_speculative (direct_call, 0, 0);
1062   new_edge->redirect_call_stmt_to_callee ();
1063   gimple_remove_histogram_value (cfun, stmt, hist);
1064   inline_call (new_edge, true, NULL, NULL, false);
1065 }
1066 
1067 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1068    histograms and adds them to list VALUES.  */
1069 
1070 static void
1071 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1072           bool transform)
1073 {
1074   afdo_indirect_call (gsi, map, transform);
1075 }
1076 
1077 typedef std::set<basic_block> bb_set;
1078 typedef std::set<edge> edge_set;
1079 
1080 static bool
1081 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1082 {
1083   return annotated.find (bb) != annotated.end ();
1084 }
1085 
1086 static void
1087 set_bb_annotated (basic_block bb, bb_set *annotated)
1088 {
1089   annotated->insert (bb);
1090 }
1091 
1092 static bool
1093 is_edge_annotated (const edge e, const edge_set &annotated)
1094 {
1095   return annotated.find (e) != annotated.end ();
1096 }
1097 
1098 static void
1099 set_edge_annotated (edge e, edge_set *annotated)
1100 {
1101   annotated->insert (e);
1102 }
1103 
1104 /* For a given BB, set its execution count. Attach value profile if a stmt
1105    is not in PROMOTED, because we only want to promote an indirect call once.
1106    Return TRUE if BB is annotated.  */
1107 
1108 static bool
1109 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1110 {
1111   gimple_stmt_iterator gsi;
1112   edge e;
1113   edge_iterator ei;
1114   gcov_type max_count = 0;
1115   bool has_annotated = false;
1116 
1117   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1118     {
1119       count_info info;
1120       gimple *stmt = gsi_stmt (gsi);
1121       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1122         continue;
1123       if (afdo_source_profile->get_count_info (stmt, &info))
1124         {
1125           if (info.count > max_count)
1126             max_count = info.count;
1127           has_annotated = true;
1128           if (info.targets.size () > 0
1129               && promoted.find (stmt) == promoted.end ())
1130             afdo_vpt (&gsi, info.targets, false);
1131         }
1132     }
1133 
1134   if (!has_annotated)
1135     return false;
1136 
1137   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1138     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1139   for (gphi_iterator gpi = gsi_start_phis (bb);
1140        !gsi_end_p (gpi);
1141        gsi_next (&gpi))
1142     {
1143       gphi *phi = gpi.phi ();
1144       size_t i;
1145       for (i = 0; i < gimple_phi_num_args (phi); i++)
1146         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1147     }
1148   FOR_EACH_EDGE (e, ei, bb->succs)
1149   afdo_source_profile->mark_annotated (e->goto_locus);
1150 
1151   bb->count = max_count;
1152   return true;
1153 }
1154 
1155 /* BB1 and BB2 are in an equivalent class iff:
1156    1. BB1 dominates BB2.
1157    2. BB2 post-dominates BB1.
1158    3. BB1 and BB2 are in the same loop nest.
1159    This function finds the equivalent class for each basic block, and
1160    stores a pointer to the first BB in its equivalent class. Meanwhile,
1161    set bb counts for the same equivalent class to be idenical. Update
1162    ANNOTATED_BB for the first BB in its equivalent class.  */
1163 
1164 static void
1165 afdo_find_equiv_class (bb_set *annotated_bb)
1166 {
1167   basic_block bb;
1168 
1169   FOR_ALL_BB_FN (bb, cfun)
1170   bb->aux = NULL;
1171 
1172   FOR_ALL_BB_FN (bb, cfun)
1173   {
1174     vec<basic_block> dom_bbs;
1175     basic_block bb1;
1176     int i;
1177 
1178     if (bb->aux != NULL)
1179       continue;
1180     bb->aux = bb;
1181     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1182     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1183       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1184 	  && bb1->loop_father == bb->loop_father)
1185 	{
1186 	  bb1->aux = bb;
1187 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1188 	    {
1189 	      bb->count = bb1->count;
1190 	      set_bb_annotated (bb, annotated_bb);
1191 	    }
1192 	}
1193     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1194     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1195       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1196 	  && bb1->loop_father == bb->loop_father)
1197 	{
1198 	  bb1->aux = bb;
1199 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1200 	    {
1201 	      bb->count = bb1->count;
1202 	      set_bb_annotated (bb, annotated_bb);
1203 	    }
1204 	}
1205   }
1206 }
1207 
1208 /* If a basic block's count is known, and only one of its in/out edges' count
1209    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1210    edges' counts are known, then the basic block's unknown count can also be
1211    calculated.
1212    IS_SUCC is true if out edges of a basic blocks are examined.
1213    Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1214    Return TRUE if any basic block/edge count is changed.  */
1215 
1216 static bool
1217 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1218                      edge_set *annotated_edge)
1219 {
1220   basic_block bb;
1221   bool changed = false;
1222 
1223   FOR_EACH_BB_FN (bb, cfun)
1224   {
1225     edge e, unknown_edge = NULL;
1226     edge_iterator ei;
1227     int num_unknown_edge = 0;
1228     gcov_type total_known_count = 0;
1229 
1230     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1231       if (!is_edge_annotated (e, *annotated_edge))
1232 	num_unknown_edge++, unknown_edge = e;
1233       else
1234 	total_known_count += e->count;
1235 
1236     if (num_unknown_edge == 0)
1237       {
1238         if (total_known_count > bb->count)
1239           {
1240             bb->count = total_known_count;
1241             changed = true;
1242           }
1243         if (!is_bb_annotated (bb, *annotated_bb))
1244           {
1245             set_bb_annotated (bb, annotated_bb);
1246             changed = true;
1247           }
1248       }
1249     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1250       {
1251         if (bb->count >= total_known_count)
1252           unknown_edge->count = bb->count - total_known_count;
1253         else
1254           unknown_edge->count = 0;
1255         set_edge_annotated (unknown_edge, annotated_edge);
1256         changed = true;
1257       }
1258   }
1259   return changed;
1260 }
1261 
1262 /* Special propagation for circuit expressions. Because GCC translates
1263    control flow into data flow for circuit expressions. E.g.
1264    BB1:
1265    if (a && b)
1266      BB2
1267    else
1268      BB3
1269 
1270    will be translated into:
1271 
1272    BB1:
1273      if (a)
1274        goto BB.t1
1275      else
1276        goto BB.t3
1277    BB.t1:
1278      if (b)
1279        goto BB.t2
1280      else
1281        goto BB.t3
1282    BB.t2:
1283      goto BB.t3
1284    BB.t3:
1285      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1286      if (tmp)
1287        goto BB2
1288      else
1289        goto BB3
1290 
1291    In this case, we need to propagate through PHI to determine the edge
1292    count of BB1->BB.t1, BB.t1->BB.t2.
1293    Update ANNOTATED_EDGE accordingly.  */
1294 
1295 static void
1296 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1297 {
1298   basic_block bb;
1299   FOR_ALL_BB_FN (bb, cfun)
1300   {
1301     gimple *def_stmt;
1302     tree cmp_rhs, cmp_lhs;
1303     gimple *cmp_stmt = last_stmt (bb);
1304     edge e;
1305     edge_iterator ei;
1306 
1307     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1308       continue;
1309     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1310     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1311     if (!TREE_CONSTANT (cmp_rhs)
1312         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1313       continue;
1314     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1315       continue;
1316     if (!is_bb_annotated (bb, annotated_bb))
1317       continue;
1318     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1319     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1320            && gimple_assign_single_p (def_stmt)
1321            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1322       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1323     if (!def_stmt)
1324       continue;
1325     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1326     if (!phi_stmt)
1327       continue;
1328     FOR_EACH_EDGE (e, ei, bb->succs)
1329     {
1330       unsigned i, total = 0;
1331       edge only_one;
1332       bool check_value_one = (((integer_onep (cmp_rhs))
1333                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1334                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1335       if (!is_edge_annotated (e, *annotated_edge))
1336         continue;
1337       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1338         {
1339           tree val = gimple_phi_arg_def (phi_stmt, i);
1340           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1341 
1342           if (!TREE_CONSTANT (val)
1343               || !(integer_zerop (val) || integer_onep (val)))
1344             continue;
1345           if (check_value_one ^ integer_onep (val))
1346             continue;
1347           total++;
1348           only_one = ep;
1349           if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1350             {
1351               ep->probability = 0;
1352               ep->count = 0;
1353               set_edge_annotated (ep, annotated_edge);
1354             }
1355         }
1356       if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1357         {
1358           only_one->probability = e->probability;
1359           only_one->count = e->count;
1360           set_edge_annotated (only_one, annotated_edge);
1361         }
1362     }
1363   }
1364 }
1365 
1366 /* Propagate the basic block count and edge count on the control flow
1367    graph. We do the propagation iteratively until stablize.  */
1368 
1369 static void
1370 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1371 {
1372   basic_block bb;
1373   bool changed = true;
1374   int i = 0;
1375 
1376   FOR_ALL_BB_FN (bb, cfun)
1377   {
1378     bb->count = ((basic_block)bb->aux)->count;
1379     if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1380       set_bb_annotated (bb, annotated_bb);
1381   }
1382 
1383   while (changed && i++ < 10)
1384     {
1385       changed = false;
1386 
1387       if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1388         changed = true;
1389       if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1390         changed = true;
1391       afdo_propagate_circuit (*annotated_bb, annotated_edge);
1392     }
1393 }
1394 
1395 /* Propagate counts on control flow graph and calculate branch
1396    probabilities.  */
1397 
1398 static void
1399 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1400 {
1401   basic_block bb;
1402   bool has_sample = false;
1403 
1404   FOR_EACH_BB_FN (bb, cfun)
1405   {
1406     if (bb->count > 0)
1407       {
1408 	has_sample = true;
1409 	break;
1410       }
1411   }
1412 
1413   if (!has_sample)
1414     return;
1415 
1416   calculate_dominance_info (CDI_POST_DOMINATORS);
1417   calculate_dominance_info (CDI_DOMINATORS);
1418   loop_optimizer_init (0);
1419 
1420   afdo_find_equiv_class (annotated_bb);
1421   afdo_propagate (annotated_bb, annotated_edge);
1422 
1423   FOR_EACH_BB_FN (bb, cfun)
1424   {
1425     edge e;
1426     edge_iterator ei;
1427     int num_unknown_succ = 0;
1428     gcov_type total_count = 0;
1429 
1430     FOR_EACH_EDGE (e, ei, bb->succs)
1431     {
1432       if (!is_edge_annotated (e, *annotated_edge))
1433         num_unknown_succ++;
1434       else
1435         total_count += e->count;
1436     }
1437     if (num_unknown_succ == 0 && total_count > 0)
1438       {
1439         FOR_EACH_EDGE (e, ei, bb->succs)
1440         e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1441       }
1442   }
1443   FOR_ALL_BB_FN (bb, cfun)
1444   {
1445     edge e;
1446     edge_iterator ei;
1447 
1448     FOR_EACH_EDGE (e, ei, bb->succs)
1449       e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1450     bb->aux = NULL;
1451   }
1452 
1453   loop_optimizer_finalize ();
1454   free_dominance_info (CDI_DOMINATORS);
1455   free_dominance_info (CDI_POST_DOMINATORS);
1456 }
1457 
1458 /* Perform value profile transformation using AutoFDO profile. Add the
1459    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1460    indirect call promoted.  */
1461 
1462 static bool
1463 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1464 {
1465   basic_block bb;
1466   if (afdo_source_profile->get_function_instance_by_decl (
1467           current_function_decl) == NULL)
1468     return false;
1469 
1470   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1471 
1472   bool has_vpt = false;
1473   FOR_EACH_BB_FN (bb, cfun)
1474   {
1475     if (!has_indirect_call (bb))
1476       continue;
1477     gimple_stmt_iterator gsi;
1478 
1479     gcov_type bb_count = 0;
1480     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1481       {
1482         count_info info;
1483 	gimple *stmt = gsi_stmt (gsi);
1484         if (afdo_source_profile->get_count_info (stmt, &info))
1485           bb_count = MAX (bb_count, info.count);
1486       }
1487 
1488     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1489       {
1490         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1491         /* IC_promotion and early_inline_2 is done in multiple iterations.
1492            No need to promoted the stmt if its in promoted_stmts (means
1493            it is already been promoted in the previous iterations).  */
1494         if ((!stmt) || gimple_call_fn (stmt) == NULL
1495             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1496             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1497           continue;
1498 
1499         count_info info;
1500         afdo_source_profile->get_count_info (stmt, &info);
1501         info.count = bb_count;
1502         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1503           {
1504             /* Promote the indirect call and update the promoted_stmts.  */
1505             promoted_stmts->insert (stmt);
1506             afdo_vpt (&gsi, info.targets, true);
1507             has_vpt = true;
1508           }
1509       }
1510   }
1511 
1512   if (has_vpt)
1513     {
1514       unsigned todo = optimize_inline_calls (current_function_decl);
1515       if (todo & TODO_update_ssa_any)
1516        update_ssa (TODO_update_ssa);
1517       return true;
1518     }
1519 
1520   return false;
1521 }
1522 
1523 /* Annotate auto profile to the control flow graph. Do not annotate value
1524    profile for stmts in PROMOTED_STMTS.  */
1525 
1526 static void
1527 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1528 {
1529   basic_block bb;
1530   bb_set annotated_bb;
1531   edge_set annotated_edge;
1532   const function_instance *s
1533       = afdo_source_profile->get_function_instance_by_decl (
1534           current_function_decl);
1535 
1536   if (s == NULL)
1537     return;
1538   cgraph_node::get (current_function_decl)->count = s->head_count ();
1539   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1540   gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1541 
1542   FOR_EACH_BB_FN (bb, cfun)
1543   {
1544     edge e;
1545     edge_iterator ei;
1546 
1547     bb->count = 0;
1548     FOR_EACH_EDGE (e, ei, bb->succs)
1549       e->count = 0;
1550 
1551     if (afdo_set_bb_count (bb, promoted_stmts))
1552       set_bb_annotated (bb, &annotated_bb);
1553     if (bb->count > max_count)
1554       max_count = bb->count;
1555   }
1556   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1557       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1558     {
1559       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1560           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1561       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1562     }
1563   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1564       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1565     {
1566       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1567           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1568       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1569     }
1570   afdo_source_profile->mark_annotated (
1571       DECL_SOURCE_LOCATION (current_function_decl));
1572   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1573   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1574   if (max_count > 0)
1575     {
1576       afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1577       counts_to_freqs ();
1578       profile_status_for_fn (cfun) = PROFILE_READ;
1579     }
1580   if (flag_value_profile_transformations)
1581     {
1582       gimple_value_profile_transformations ();
1583       free_dominance_info (CDI_DOMINATORS);
1584       free_dominance_info (CDI_POST_DOMINATORS);
1585       update_ssa (TODO_update_ssa);
1586     }
1587 }
1588 
1589 /* Wrapper function to invoke early inliner.  */
1590 
1591 static void
1592 early_inline ()
1593 {
1594   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1595   unsigned todo = early_inliner (cfun);
1596   if (todo & TODO_update_ssa_any)
1597     update_ssa (TODO_update_ssa);
1598 }
1599 
1600 /* Use AutoFDO profile to annoate the control flow graph.
1601    Return the todo flag.  */
1602 
1603 static unsigned int
1604 auto_profile (void)
1605 {
1606   struct cgraph_node *node;
1607 
1608   if (symtab->state == FINISHED)
1609     return 0;
1610 
1611   init_node_map (true);
1612   profile_info = autofdo::afdo_profile_info;
1613 
1614   FOR_EACH_FUNCTION (node)
1615   {
1616     if (!gimple_has_body_p (node->decl))
1617       continue;
1618 
1619     /* Don't profile functions produced for builtin stuff.  */
1620     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1621       continue;
1622 
1623     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1624 
1625     /* First do indirect call promotion and early inline to make the
1626        IR match the profiled binary before actual annotation.
1627 
1628        This is needed because an indirect call might have been promoted
1629        and inlined in the profiled binary. If we do not promote and
1630        inline these indirect calls before annotation, the profile for
1631        these promoted functions will be lost.
1632 
1633        e.g. foo() --indirect_call--> bar()
1634        In profiled binary, the callsite is promoted and inlined, making
1635        the profile look like:
1636 
1637        foo: {
1638          loc_foo_1: count_1
1639          bar@loc_foo_2: {
1640            loc_bar_1: count_2
1641            loc_bar_2: count_3
1642          }
1643        }
1644 
1645        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1646        If we perform annotation on it, the profile inside bar@loc_foo2
1647        will be wasted.
1648 
1649        To avoid this, we promote loc_foo_2 and inline the promoted bar
1650        function before annotation, so the profile inside bar@loc_foo2
1651        will be useful.  */
1652     autofdo::stmt_set promoted_stmts;
1653     for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1654       {
1655         if (!flag_value_profile_transformations
1656             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1657           break;
1658         early_inline ();
1659       }
1660 
1661     early_inline ();
1662     autofdo::afdo_annotate_cfg (promoted_stmts);
1663     compute_function_frequency ();
1664 
1665     /* Local pure-const may imply need to fixup the cfg.  */
1666     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1667       cleanup_tree_cfg ();
1668 
1669     free_dominance_info (CDI_DOMINATORS);
1670     free_dominance_info (CDI_POST_DOMINATORS);
1671     cgraph_edge::rebuild_edges ();
1672     compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1673     pop_cfun ();
1674   }
1675 
1676   return TODO_rebuild_cgraph_edges;
1677 }
1678 } /* namespace autofdo.  */
1679 
1680 /* Read the profile from the profile data file.  */
1681 
1682 void
1683 read_autofdo_file (void)
1684 {
1685   if (auto_profile_file == NULL)
1686     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1687 
1688   autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1689       1, sizeof (struct gcov_ctr_summary));
1690   autofdo::afdo_profile_info->runs = 1;
1691   autofdo::afdo_profile_info->sum_max = 0;
1692   autofdo::afdo_profile_info->sum_all = 0;
1693 
1694   /* Read the profile from the profile file.  */
1695   autofdo::read_profile ();
1696 }
1697 
1698 /* Free the resources.  */
1699 
1700 void
1701 end_auto_profile (void)
1702 {
1703   delete autofdo::afdo_source_profile;
1704   delete autofdo::afdo_string_table;
1705   profile_info = NULL;
1706 }
1707 
1708 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1709 
1710 bool
1711 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1712 {
1713   gcov_type count
1714       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1715 
1716   if (count > 0)
1717     {
1718       bool is_hot;
1719       const struct gcov_ctr_summary *saved_profile_info = profile_info;
1720       /* At early inline stage, profile_info is not set yet. We need to
1721          temporarily set it to afdo_profile_info to calculate hotness.  */
1722       profile_info = autofdo::afdo_profile_info;
1723       is_hot = maybe_hot_count_p (NULL, count);
1724       profile_info = saved_profile_info;
1725       return is_hot;
1726     }
1727 
1728   return false;
1729 }
1730 
1731 namespace
1732 {
1733 
1734 const pass_data pass_data_ipa_auto_profile = {
1735   SIMPLE_IPA_PASS, "afdo", /* name */
1736   OPTGROUP_NONE,           /* optinfo_flags */
1737   TV_IPA_AUTOFDO,          /* tv_id */
1738   0,                       /* properties_required */
1739   0,                       /* properties_provided */
1740   0,                       /* properties_destroyed */
1741   0,                       /* todo_flags_start */
1742   0,                       /* todo_flags_finish */
1743 };
1744 
1745 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1746 {
1747 public:
1748   pass_ipa_auto_profile (gcc::context *ctxt)
1749       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1750   {
1751   }
1752 
1753   /* opt_pass methods: */
1754   virtual bool
1755   gate (function *)
1756   {
1757     return flag_auto_profile;
1758   }
1759   virtual unsigned int
1760   execute (function *)
1761   {
1762     return autofdo::auto_profile ();
1763   }
1764 }; // class pass_ipa_auto_profile
1765 
1766 } // anon namespace
1767 
1768 simple_ipa_opt_pass *
1769 make_pass_ipa_auto_profile (gcc::context *ctxt)
1770 {
1771   return new pass_ipa_auto_profile (ctxt);
1772 }
1773