xref: /netbsd-src/external/gpl3/gcc/dist/gcc/auto-profile.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2022 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "symbol-summary.h"
45 #include "ipa-prop.h"
46 #include "ipa-fnsummary.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50 #include "tree-pretty-print.h"
51 #include "gimple-pretty-print.h"
52 
53 /* The following routines implements AutoFDO optimization.
54 
55    This optimization uses sampling profiles to annotate basic block counts
56    and uses heuristics to estimate branch probabilities.
57 
58    There are three phases in AutoFDO:
59 
60    Phase 1: Read profile from the profile data file.
61      The following info is read from the profile datafile:
62         * string_table: a map between function name and its index.
63         * autofdo_source_profile: a map from function_instance name to
64           function_instance. This is represented as a forest of
65           function_instances.
66         * WorkingSet: a histogram of how many instructions are covered for a
67           given percentage of total cycles. This is describing the binary
68           level information (not source level). This info is used to help
69           decide if we want aggressive optimizations that could increase
70           code footprint (e.g. loop unroll etc.)
71      A function instance is an instance of function that could either be a
72      standalone symbol, or a clone of a function that is inlined into another
73      function.
74 
75    Phase 2: Early inline + value profile transformation.
76      Early inline uses autofdo_source_profile to find if a callsite is:
77         * inlined in the profiled binary.
78         * callee body is hot in the profiling run.
79      If both condition satisfies, early inline will inline the callsite
80      regardless of the code growth.
81      Phase 2 is an iterative process. During each iteration, we also check
82      if an indirect callsite is promoted and inlined in the profiling run.
83      If yes, vpt will happen to force promote it and in the next iteration,
84      einline will inline the promoted callsite in the next iteration.
85 
86    Phase 3: Annotate control flow graph.
87      AutoFDO uses a separate pass to:
88         * Annotate basic block count
89         * Estimate branch probability
90 
91    After the above 3 phases, all profile is readily annotated on the GCC IR.
92    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
93    use of the profile. E.g. it uses existing mechanism to calculate the basic
94    block/edge frequency, as well as the cgraph node/edge count.
95 */
96 
97 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
98 #define AUTO_PROFILE_VERSION 2
99 
100 namespace autofdo
101 {
102 
103 /* Intermediate edge info used when propagating AutoFDO profile information.
104    We can't edge->count() directly since it's computed from edge's probability
105    while probability is yet not decided during propagation.  */
106 #define AFDO_EINFO(e)                     ((class edge_info *) e->aux)
107 class edge_info
108 {
109 public:
edge_info()110   edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
is_annotated() const111   bool is_annotated () const { return annotated_; }
set_annotated()112   void set_annotated () { annotated_ = true; }
get_count() const113   profile_count get_count () const { return count_; }
set_count(profile_count count)114   void set_count (profile_count count) { count_ = count; }
115 private:
116   profile_count count_;
117   bool annotated_;
118 };
119 
120 /* Represent a source location: (function_decl, lineno).  */
121 typedef std::pair<tree, unsigned> decl_lineno;
122 
123 /* Represent an inline stack. vector[0] is the leaf node.  */
124 typedef auto_vec<decl_lineno> inline_stack;
125 
126 /* String array that stores function names.  */
127 typedef auto_vec<char *> string_vector;
128 
129 /* Map from function name's index in string_table to target's
130    execution count.  */
131 typedef std::map<unsigned, gcov_type> icall_target_map;
132 
133 /* Set of gimple stmts. Used to track if the stmt has already been promoted
134    to direct call.  */
135 typedef std::set<gimple *> stmt_set;
136 
137 /* Represent count info of an inline stack.  */
138 class count_info
139 {
140 public:
141   /* Sampled count of the inline stack.  */
142   gcov_type count;
143 
144   /* Map from indirect call target to its sample count.  */
145   icall_target_map targets;
146 
147   /* Whether this inline stack is already used in annotation.
148 
149      Each inline stack should only be used to annotate IR once.
150      This will be enforced when instruction-level discriminator
151      is supported.  */
152   bool annotated;
153 };
154 
155 /* operator< for "const char *".  */
156 struct string_compare
157 {
operator ()autofdo::string_compare158   bool operator()(const char *a, const char *b) const
159   {
160     return strcmp (a, b) < 0;
161   }
162 };
163 
164 /* Store a string array, indexed by string position in the array.  */
165 class string_table
166 {
167 public:
string_table()168   string_table ()
169   {}
170 
171   ~string_table ();
172 
173   /* For a given string, returns its index.  */
174   int get_index (const char *name) const;
175 
176   /* For a given decl, returns the index of the decl name.  */
177   int get_index_by_decl (tree decl) const;
178 
179   /* For a given index, returns the string.  */
180   const char *get_name (int index) const;
181 
182   /* Read profile, return TRUE on success.  */
183   bool read ();
184 
185 private:
186   typedef std::map<const char *, unsigned, string_compare> string_index_map;
187   string_vector vector_;
188   string_index_map map_;
189 };
190 
191 /* Profile of a function instance:
192      1. total_count of the function.
193      2. head_count (entry basic block count) of the function (only valid when
194         function is a top-level function_instance, i.e. it is the original copy
195         instead of the inlined copy).
196      3. map from source location (decl_lineno) to profile (count_info).
197      4. map from callsite to callee function_instance.  */
198 class function_instance
199 {
200 public:
201   typedef auto_vec<function_instance *> function_instance_stack;
202 
203   /* Read the profile and return a function_instance with head count as
204      HEAD_COUNT. Recursively read callsites to create nested function_instances
205      too. STACK is used to track the recursive creation process.  */
206   static function_instance *
207   read_function_instance (function_instance_stack *stack,
208                           gcov_type head_count);
209 
210   /* Recursively deallocate all callsites (nested function_instances).  */
211   ~function_instance ();
212 
213   /* Accessors.  */
214   int
name() const215   name () const
216   {
217     return name_;
218   }
219   gcov_type
total_count() const220   total_count () const
221   {
222     return total_count_;
223   }
224   gcov_type
head_count() const225   head_count () const
226   {
227     return head_count_;
228   }
229 
230   /* Traverse callsites of the current function_instance to find one at the
231      location of LINENO and callee name represented in DECL.  */
232   function_instance *get_function_instance_by_decl (unsigned lineno,
233                                                     tree decl) const;
234 
235   /* Store the profile info for LOC in INFO. Return TRUE if profile info
236      is found.  */
237   bool get_count_info (location_t loc, count_info *info) const;
238 
239   /* Read the inlined indirect call target profile for STMT and store it in
240      MAP, return the total count for all inlined indirect calls.  */
241   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
242 
243   /* Sum of counts that is used during annotation.  */
244   gcov_type total_annotated_count () const;
245 
246   /* Mark LOC as annotated.  */
247   void mark_annotated (location_t loc);
248 
249 private:
250   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
251   typedef std::pair<unsigned, unsigned> callsite;
252 
253   /* Map from callsite to callee function_instance.  */
254   typedef std::map<callsite, function_instance *> callsite_map;
255 
function_instance(unsigned name,gcov_type head_count)256   function_instance (unsigned name, gcov_type head_count)
257       : name_ (name), total_count_ (0), head_count_ (head_count)
258   {
259   }
260 
261   /* Map from source location (decl_lineno) to profile (count_info).  */
262   typedef std::map<unsigned, count_info> position_count_map;
263 
264   /* function_instance name index in the string_table.  */
265   unsigned name_;
266 
267   /* Total sample count.  */
268   gcov_type total_count_;
269 
270   /* Entry BB's sample count.  */
271   gcov_type head_count_;
272 
273   /* Map from callsite location to callee function_instance.  */
274   callsite_map callsites;
275 
276   /* Map from source location to count_info.  */
277   position_count_map pos_counts;
278 };
279 
280 /* Profile for all functions.  */
281 class autofdo_source_profile
282 {
283 public:
284   static autofdo_source_profile *
create()285   create ()
286   {
287     autofdo_source_profile *map = new autofdo_source_profile ();
288 
289     if (map->read ())
290       return map;
291     delete map;
292     return NULL;
293   }
294 
295   ~autofdo_source_profile ();
296 
297   /* For a given DECL, returns the top-level function_instance.  */
298   function_instance *get_function_instance_by_decl (tree decl) const;
299 
300   /* Find count_info for a given gimple STMT. If found, store the count_info
301      in INFO and return true; otherwise return false.  */
302   bool get_count_info (gimple *stmt, count_info *info) const;
303 
304   /* Find total count of the callee of EDGE.  */
305   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
306 
307   /* Update value profile INFO for STMT from the inlined indirect callsite.
308      Return true if INFO is updated.  */
309   bool update_inlined_ind_target (gcall *stmt, count_info *info);
310 
311   /* Mark LOC as annotated.  */
312   void mark_annotated (location_t loc);
313 
314 private:
315   /* Map from function_instance name index (in string_table) to
316      function_instance.  */
317   typedef std::map<unsigned, function_instance *> name_function_instance_map;
318 
autofdo_source_profile()319   autofdo_source_profile () {}
320 
321   /* Read AutoFDO profile and returns TRUE on success.  */
322   bool read ();
323 
324   /* Return the function_instance in the profile that correspond to the
325      inline STACK.  */
326   function_instance *
327   get_function_instance_by_inline_stack (const inline_stack &stack) const;
328 
329   name_function_instance_map map_;
330 };
331 
332 /* Store the strings read from the profile data file.  */
333 static string_table *afdo_string_table;
334 
335 /* Store the AutoFDO source profile.  */
336 static autofdo_source_profile *afdo_source_profile;
337 
338 /* gcov_summary structure to store the profile_info.  */
339 static gcov_summary *afdo_profile_info;
340 
341 /* Helper functions.  */
342 
343 /* Return the original name of NAME: strip the suffix that starts
344    with '.' Caller is responsible for freeing RET.  */
345 
346 static char *
get_original_name(const char * name)347 get_original_name (const char *name)
348 {
349   char *ret = xstrdup (name);
350   char *find = strchr (ret, '.');
351   if (find != NULL)
352     *find = 0;
353   return ret;
354 }
355 
356 /* Return the combined location, which is a 32bit integer in which
357    higher 16 bits stores the line offset of LOC to the start lineno
358    of DECL, The lower 16 bits stores the discriminator.  */
359 
360 static unsigned
get_combined_location(location_t loc,tree decl)361 get_combined_location (location_t loc, tree decl)
362 {
363   /* TODO: allow more bits for line and less bits for discriminator.  */
364   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
365     warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
366   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
367 }
368 
369 /* Return the function decl of a given lexical BLOCK.  */
370 
371 static tree
get_function_decl_from_block(tree block)372 get_function_decl_from_block (tree block)
373 {
374   if (!inlined_function_outer_scope_p (block))
375     return NULL_TREE;
376 
377   return BLOCK_ABSTRACT_ORIGIN (block);
378 }
379 
380 /* Store inline stack for STMT in STACK.  */
381 
382 static void
get_inline_stack(location_t locus,inline_stack * stack)383 get_inline_stack (location_t locus, inline_stack *stack)
384 {
385   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
386     return;
387 
388   tree block = LOCATION_BLOCK (locus);
389   if (block && TREE_CODE (block) == BLOCK)
390     {
391       int level = 0;
392       for (block = BLOCK_SUPERCONTEXT (block);
393            block && (TREE_CODE (block) == BLOCK);
394            block = BLOCK_SUPERCONTEXT (block))
395         {
396           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
397           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
398             continue;
399 
400           tree decl = get_function_decl_from_block (block);
401           stack->safe_push (
402               std::make_pair (decl, get_combined_location (locus, decl)));
403           locus = tmp_locus;
404           level++;
405         }
406     }
407   stack->safe_push (
408       std::make_pair (current_function_decl,
409                       get_combined_location (locus, current_function_decl)));
410 }
411 
412 /* Return STMT's combined location, which is a 32bit integer in which
413    higher 16 bits stores the line offset of LOC to the start lineno
414    of DECL, The lower 16 bits stores the discriminator.  */
415 
416 static unsigned
get_relative_location_for_stmt(gimple * stmt)417 get_relative_location_for_stmt (gimple *stmt)
418 {
419   location_t locus = gimple_location (stmt);
420   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
421     return UNKNOWN_LOCATION;
422 
423   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
424        block = BLOCK_SUPERCONTEXT (block))
425     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
426       return get_combined_location (locus,
427                                     get_function_decl_from_block (block));
428   return get_combined_location (locus, current_function_decl);
429 }
430 
431 /* Return true if BB contains indirect call.  */
432 
433 static bool
has_indirect_call(basic_block bb)434 has_indirect_call (basic_block bb)
435 {
436   gimple_stmt_iterator gsi;
437 
438   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
439     {
440       gimple *stmt = gsi_stmt (gsi);
441       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
442           && (gimple_call_fn (stmt) == NULL
443               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
444         return true;
445     }
446   return false;
447 }
448 
449 /* Member functions for string_table.  */
450 
451 /* Deconstructor.  */
452 
~string_table()453 string_table::~string_table ()
454 {
455   for (unsigned i = 0; i < vector_.length (); i++)
456     free (vector_[i]);
457 }
458 
459 
460 /* Return the index of a given function NAME. Return -1 if NAME is not
461    found in string table.  */
462 
463 int
get_index(const char * name) const464 string_table::get_index (const char *name) const
465 {
466   if (name == NULL)
467     return -1;
468   string_index_map::const_iterator iter = map_.find (name);
469   if (iter == map_.end ())
470     return -1;
471 
472   return iter->second;
473 }
474 
475 /* Return the index of a given function DECL. Return -1 if DECL is not
476    found in string table.  */
477 
478 int
get_index_by_decl(tree decl) const479 string_table::get_index_by_decl (tree decl) const
480 {
481   char *name
482       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
483   int ret = get_index (name);
484   free (name);
485   if (ret != -1)
486     return ret;
487   ret = get_index (lang_hooks.dwarf_name (decl, 0));
488   if (ret != -1)
489     return ret;
490   if (DECL_FROM_INLINE (decl))
491     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
492 
493   return -1;
494 }
495 
496 /* Return the function name of a given INDEX.  */
497 
498 const char *
get_name(int index) const499 string_table::get_name (int index) const
500 {
501   gcc_assert (index > 0 && index < (int)vector_.length ());
502   return vector_[index];
503 }
504 
505 /* Read the string table. Return TRUE if reading is successful.  */
506 
507 bool
read()508 string_table::read ()
509 {
510   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
511     return false;
512   /* Skip the length of the section.  */
513   gcov_read_unsigned ();
514   /* Read in the file name table.  */
515   unsigned string_num = gcov_read_unsigned ();
516   for (unsigned i = 0; i < string_num; i++)
517     {
518       vector_.safe_push (get_original_name (gcov_read_string ()));
519       map_[vector_.last ()] = i;
520     }
521   return true;
522 }
523 
524 /* Member functions for function_instance.  */
525 
~function_instance()526 function_instance::~function_instance ()
527 {
528   for (callsite_map::iterator iter = callsites.begin ();
529        iter != callsites.end (); ++iter)
530     delete iter->second;
531 }
532 
533 /* Traverse callsites of the current function_instance to find one at the
534    location of LINENO and callee name represented in DECL.  */
535 
536 function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl) const537 function_instance::get_function_instance_by_decl (unsigned lineno,
538                                                   tree decl) const
539 {
540   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
541   if (func_name_idx != -1)
542     {
543       callsite_map::const_iterator ret
544           = callsites.find (std::make_pair (lineno, func_name_idx));
545       if (ret != callsites.end ())
546         return ret->second;
547     }
548   func_name_idx
549       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
550   if (func_name_idx != -1)
551     {
552       callsite_map::const_iterator ret
553           = callsites.find (std::make_pair (lineno, func_name_idx));
554       if (ret != callsites.end ())
555         return ret->second;
556     }
557   if (DECL_FROM_INLINE (decl))
558     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
559 
560   return NULL;
561 }
562 
563 /* Store the profile info for LOC in INFO. Return TRUE if profile info
564    is found.  */
565 
566 bool
get_count_info(location_t loc,count_info * info) const567 function_instance::get_count_info (location_t loc, count_info *info) const
568 {
569   position_count_map::const_iterator iter = pos_counts.find (loc);
570   if (iter == pos_counts.end ())
571     return false;
572   *info = iter->second;
573   return true;
574 }
575 
576 /* Mark LOC as annotated.  */
577 
578 void
mark_annotated(location_t loc)579 function_instance::mark_annotated (location_t loc)
580 {
581   position_count_map::iterator iter = pos_counts.find (loc);
582   if (iter == pos_counts.end ())
583     return;
584   iter->second.annotated = true;
585 }
586 
587 /* Read the inlined indirect call target profile for STMT and store it in
588    MAP, return the total count for all inlined indirect calls.  */
589 
590 gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map) const591 function_instance::find_icall_target_map (gcall *stmt,
592                                           icall_target_map *map) const
593 {
594   gcov_type ret = 0;
595   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
596 
597   for (callsite_map::const_iterator iter = callsites.begin ();
598        iter != callsites.end (); ++iter)
599     {
600       unsigned callee = iter->second->name ();
601       /* Check if callsite location match the stmt.  */
602       if (iter->first.first != stmt_offset)
603         continue;
604       struct cgraph_node *node = cgraph_node::get_for_asmname (
605           get_identifier (afdo_string_table->get_name (callee)));
606       if (node == NULL)
607         continue;
608       (*map)[callee] = iter->second->total_count ();
609       ret += iter->second->total_count ();
610     }
611   return ret;
612 }
613 
614 /* Read the profile and create a function_instance with head count as
615    HEAD_COUNT. Recursively read callsites to create nested function_instances
616    too. STACK is used to track the recursive creation process.  */
617 
618 /* function instance profile format:
619 
620    ENTRY_COUNT: 8 bytes
621    NAME_INDEX: 4 bytes
622    NUM_POS_COUNTS: 4 bytes
623    NUM_CALLSITES: 4 byte
624    POS_COUNT_1:
625      POS_1_OFFSET: 4 bytes
626      NUM_TARGETS: 4 bytes
627      COUNT: 8 bytes
628      TARGET_1:
629        VALUE_PROFILE_TYPE: 4 bytes
630        TARGET_IDX: 8 bytes
631        COUNT: 8 bytes
632      TARGET_2
633      ...
634      TARGET_n
635    POS_COUNT_2
636    ...
637    POS_COUNT_N
638    CALLSITE_1:
639      CALLSITE_1_OFFSET: 4 bytes
640      FUNCTION_INSTANCE_PROFILE (nested)
641    CALLSITE_2
642    ...
643    CALLSITE_n.  */
644 
645 function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)646 function_instance::read_function_instance (function_instance_stack *stack,
647                                            gcov_type head_count)
648 {
649   unsigned name = gcov_read_unsigned ();
650   unsigned num_pos_counts = gcov_read_unsigned ();
651   unsigned num_callsites = gcov_read_unsigned ();
652   function_instance *s = new function_instance (name, head_count);
653   stack->safe_push (s);
654 
655   for (unsigned i = 0; i < num_pos_counts; i++)
656     {
657       unsigned offset = gcov_read_unsigned () & 0xffff0000;
658       unsigned num_targets = gcov_read_unsigned ();
659       gcov_type count = gcov_read_counter ();
660       s->pos_counts[offset].count = count;
661       for (unsigned j = 0; j < stack->length (); j++)
662         (*stack)[j]->total_count_ += count;
663       for (unsigned j = 0; j < num_targets; j++)
664         {
665           /* Only indirect call target histogram is supported now.  */
666           gcov_read_unsigned ();
667           gcov_type target_idx = gcov_read_counter ();
668           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
669         }
670     }
671   for (unsigned i = 0; i < num_callsites; i++)
672     {
673       unsigned offset = gcov_read_unsigned ();
674       function_instance *callee_function_instance
675           = read_function_instance (stack, 0);
676       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
677           = callee_function_instance;
678     }
679   stack->pop ();
680   return s;
681 }
682 
683 /* Sum of counts that is used during annotation.  */
684 
685 gcov_type
total_annotated_count() const686 function_instance::total_annotated_count () const
687 {
688   gcov_type ret = 0;
689   for (callsite_map::const_iterator iter = callsites.begin ();
690        iter != callsites.end (); ++iter)
691     ret += iter->second->total_annotated_count ();
692   for (position_count_map::const_iterator iter = pos_counts.begin ();
693        iter != pos_counts.end (); ++iter)
694     if (iter->second.annotated)
695       ret += iter->second.count;
696   return ret;
697 }
698 
699 /* Member functions for autofdo_source_profile.  */
700 
~autofdo_source_profile()701 autofdo_source_profile::~autofdo_source_profile ()
702 {
703   for (name_function_instance_map::const_iterator iter = map_.begin ();
704        iter != map_.end (); ++iter)
705     delete iter->second;
706 }
707 
708 /* For a given DECL, returns the top-level function_instance.  */
709 
710 function_instance *
get_function_instance_by_decl(tree decl) const711 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
712 {
713   int index = afdo_string_table->get_index_by_decl (decl);
714   if (index == -1)
715     return NULL;
716   name_function_instance_map::const_iterator ret = map_.find (index);
717   return ret == map_.end () ? NULL : ret->second;
718 }
719 
720 /* Find count_info for a given gimple STMT. If found, store the count_info
721    in INFO and return true; otherwise return false.  */
722 
723 bool
get_count_info(gimple * stmt,count_info * info) const724 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
725 {
726   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
727     return false;
728 
729   inline_stack stack;
730   get_inline_stack (gimple_location (stmt), &stack);
731   if (stack.length () == 0)
732     return false;
733   function_instance *s = get_function_instance_by_inline_stack (stack);
734   if (s == NULL)
735     return false;
736   return s->get_count_info (stack[0].second, info);
737 }
738 
739 /* Mark LOC as annotated.  */
740 
741 void
mark_annotated(location_t loc)742 autofdo_source_profile::mark_annotated (location_t loc)
743 {
744   inline_stack stack;
745   get_inline_stack (loc, &stack);
746   if (stack.length () == 0)
747     return;
748   function_instance *s = get_function_instance_by_inline_stack (stack);
749   if (s == NULL)
750     return;
751   s->mark_annotated (stack[0].second);
752 }
753 
754 /* Update value profile INFO for STMT from the inlined indirect callsite.
755    Return true if INFO is updated.  */
756 
757 bool
update_inlined_ind_target(gcall * stmt,count_info * info)758 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
759                                                    count_info *info)
760 {
761   if (dump_file)
762     {
763       fprintf (dump_file, "Checking indirect call -> direct call ");
764       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
765     }
766 
767   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
768     {
769       if (dump_file)
770 	fprintf (dump_file, " good locus\n");
771       return false;
772     }
773 
774   count_info old_info;
775   get_count_info (stmt, &old_info);
776   gcov_type total = 0;
777   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
778        iter != old_info.targets.end (); ++iter)
779     total += iter->second;
780 
781   /* Program behavior changed, original promoted (and inlined) target is not
782      hot any more. Will avoid promote the original target.
783 
784      To check if original promoted target is still hot, we check the total
785      count of the unpromoted targets (stored in TOTAL). If a callsite count
786      (stored in INFO) is smaller than half of the total count, the original
787      promoted target is considered not hot any more.  */
788   if (info->count < total / 2)
789     {
790       if (dump_file)
791 	fprintf (dump_file, " not hot anymore %ld < %ld",
792 		 (long)info->count,
793 		 (long)total /2);
794       return false;
795     }
796 
797   inline_stack stack;
798   get_inline_stack (gimple_location (stmt), &stack);
799   if (stack.length () == 0)
800     {
801       if (dump_file)
802 	fprintf (dump_file, " no inline stack\n");
803       return false;
804     }
805   function_instance *s = get_function_instance_by_inline_stack (stack);
806   if (s == NULL)
807     {
808       if (dump_file)
809 	fprintf (dump_file, " function not found in inline stack\n");
810       return false;
811     }
812   icall_target_map map;
813   if (s->find_icall_target_map (stmt, &map) == 0)
814     {
815       if (dump_file)
816 	fprintf (dump_file, " no target map\n");
817       return false;
818     }
819   for (icall_target_map::const_iterator iter = map.begin ();
820        iter != map.end (); ++iter)
821     info->targets[iter->first] = iter->second;
822   if (dump_file)
823     fprintf (dump_file, " looks good\n");
824   return true;
825 }
826 
827 /* Find total count of the callee of EDGE.  */
828 
829 gcov_type
get_callsite_total_count(struct cgraph_edge * edge) const830 autofdo_source_profile::get_callsite_total_count (
831     struct cgraph_edge *edge) const
832 {
833   inline_stack stack;
834   stack.safe_push (std::make_pair (edge->callee->decl, 0));
835   get_inline_stack (gimple_location (edge->call_stmt), &stack);
836 
837   function_instance *s = get_function_instance_by_inline_stack (stack);
838   if (s == NULL
839       || afdo_string_table->get_index (IDENTIFIER_POINTER (
840              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
841     return 0;
842 
843   return s->total_count ();
844 }
845 
846 /* Read AutoFDO profile and returns TRUE on success.  */
847 
848 /* source profile format:
849 
850    GCOV_TAG_AFDO_FUNCTION: 4 bytes
851    LENGTH: 4 bytes
852    NUM_FUNCTIONS: 4 bytes
853    FUNCTION_INSTANCE_1
854    FUNCTION_INSTANCE_2
855    ...
856    FUNCTION_INSTANCE_N.  */
857 
858 bool
read()859 autofdo_source_profile::read ()
860 {
861   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
862     {
863       inform (UNKNOWN_LOCATION, "Not expected TAG.");
864       return false;
865     }
866 
867   /* Skip the length of the section.  */
868   gcov_read_unsigned ();
869 
870   /* Read in the function/callsite profile, and store it in local
871      data structure.  */
872   unsigned function_num = gcov_read_unsigned ();
873   for (unsigned i = 0; i < function_num; i++)
874     {
875       function_instance::function_instance_stack stack;
876       function_instance *s = function_instance::read_function_instance (
877           &stack, gcov_read_counter ());
878       map_[s->name ()] = s;
879     }
880   return true;
881 }
882 
883 /* Return the function_instance in the profile that correspond to the
884    inline STACK.  */
885 
886 function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack) const887 autofdo_source_profile::get_function_instance_by_inline_stack (
888     const inline_stack &stack) const
889 {
890   name_function_instance_map::const_iterator iter = map_.find (
891       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
892   if (iter == map_.end())
893     return NULL;
894   function_instance *s = iter->second;
895   for (unsigned i = stack.length() - 1; i > 0; i--)
896     {
897       s = s->get_function_instance_by_decl (
898           stack[i].second, stack[i - 1].first);
899       if (s == NULL)
900         return NULL;
901     }
902   return s;
903 }
904 
905 /* Module profile is only used by LIPO. Here we simply ignore it.  */
906 
907 static void
fake_read_autofdo_module_profile()908 fake_read_autofdo_module_profile ()
909 {
910   /* Read in the module info.  */
911   gcov_read_unsigned ();
912 
913   /* Skip the length of the section.  */
914   gcov_read_unsigned ();
915 
916   /* Read in the file name table.  */
917   unsigned total_module_num = gcov_read_unsigned ();
918   gcc_assert (total_module_num == 0);
919 }
920 
921 /* Read data from profile data file.  */
922 
923 static void
read_profile(void)924 read_profile (void)
925 {
926   if (gcov_open (auto_profile_file, 1) == 0)
927     {
928       error ("cannot open profile file %s", auto_profile_file);
929       return;
930     }
931 
932   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
933     {
934       error ("AutoFDO profile magic number does not match");
935       return;
936     }
937 
938   /* Skip the version number.  */
939   unsigned version = gcov_read_unsigned ();
940   if (version != AUTO_PROFILE_VERSION)
941     {
942       error ("AutoFDO profile version %u does not match %u",
943 	     version, AUTO_PROFILE_VERSION);
944       return;
945     }
946 
947   /* Skip the empty integer.  */
948   gcov_read_unsigned ();
949 
950   /* string_table.  */
951   afdo_string_table = new string_table ();
952   if (!afdo_string_table->read())
953     {
954       error ("cannot read string table from %s", auto_profile_file);
955       return;
956     }
957 
958   /* autofdo_source_profile.  */
959   afdo_source_profile = autofdo_source_profile::create ();
960   if (afdo_source_profile == NULL)
961     {
962       error ("cannot read function profile from %s", auto_profile_file);
963       return;
964     }
965 
966   /* autofdo_module_profile.  */
967   fake_read_autofdo_module_profile ();
968 }
969 
970 /* From AutoFDO profiles, find values inside STMT for that we want to measure
971    histograms for indirect-call optimization.
972 
973    This function is actually served for 2 purposes:
974      * before annotation, we need to mark histogram, promote and inline
975      * after annotation, we just need to mark, and let follow-up logic to
976        decide if it needs to promote and inline.  */
977 
978 static bool
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)979 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
980                     bool transform)
981 {
982   gimple *gs = gsi_stmt (*gsi);
983   tree callee;
984 
985   if (map.size () == 0)
986     return false;
987   gcall *stmt = dyn_cast <gcall *> (gs);
988   if (!stmt
989       || gimple_call_internal_p (stmt)
990       || gimple_call_fndecl (stmt) != NULL_TREE)
991     return false;
992 
993   gcov_type total = 0;
994   icall_target_map::const_iterator max_iter = map.end ();
995 
996   for (icall_target_map::const_iterator iter = map.begin ();
997        iter != map.end (); ++iter)
998     {
999       total += iter->second;
1000       if (max_iter == map.end () || max_iter->second < iter->second)
1001         max_iter = iter;
1002     }
1003   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1004       get_identifier (afdo_string_table->get_name (max_iter->first)));
1005   if (direct_call == NULL || !direct_call->profile_id)
1006     return false;
1007 
1008   callee = gimple_call_fn (stmt);
1009 
1010   histogram_value hist = gimple_alloc_histogram_value (
1011       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1012   hist->n_counters = 4;
1013   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1014   gimple_add_histogram_value (cfun, stmt, hist);
1015 
1016   /* Total counter */
1017   hist->hvalue.counters[0] = total;
1018   /* Number of value/counter pairs */
1019   hist->hvalue.counters[1] = 1;
1020   /* Value */
1021   hist->hvalue.counters[2] = direct_call->profile_id;
1022   /* Counter */
1023   hist->hvalue.counters[3] = max_iter->second;
1024 
1025   if (!transform)
1026     return false;
1027 
1028   cgraph_node* current_function_node = cgraph_node::get (current_function_decl);
1029 
1030   /* If the direct call is a recursive call, don't promote it since
1031      we are not set up to inline recursive calls at this stage. */
1032   if (direct_call == current_function_node)
1033     return false;
1034 
1035   struct cgraph_edge *indirect_edge
1036       = current_function_node->get_edge (stmt);
1037 
1038   if (dump_file)
1039     {
1040       fprintf (dump_file, "Indirect call -> direct call ");
1041       print_generic_expr (dump_file, callee, TDF_SLIM);
1042       fprintf (dump_file, " => ");
1043       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1044     }
1045 
1046   if (direct_call == NULL)
1047     {
1048       if (dump_file)
1049         fprintf (dump_file, " not transforming\n");
1050       return false;
1051     }
1052   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1053     {
1054       if (dump_file)
1055         fprintf (dump_file, " no declaration\n");
1056       return false;
1057     }
1058 
1059   if (dump_file)
1060     {
1061       fprintf (dump_file, " transformation on insn ");
1062       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1063       fprintf (dump_file, "\n");
1064     }
1065 
1066   /* FIXME: Count should be initialized.  */
1067   struct cgraph_edge *new_edge
1068       = indirect_edge->make_speculative (direct_call,
1069 					 profile_count::uninitialized ());
1070   cgraph_edge::redirect_call_stmt_to_callee (new_edge);
1071   gimple_remove_histogram_value (cfun, stmt, hist);
1072   inline_call (new_edge, true, NULL, NULL, false);
1073   return true;
1074 }
1075 
1076 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1077    histograms and adds them to list VALUES.  */
1078 
1079 static bool
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)1080 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1081           bool transform)
1082 {
1083   return afdo_indirect_call (gsi, map, transform);
1084 }
1085 
1086 typedef std::set<basic_block> bb_set;
1087 typedef std::set<edge> edge_set;
1088 
1089 static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1090 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1091 {
1092   return annotated.find (bb) != annotated.end ();
1093 }
1094 
1095 static void
set_bb_annotated(basic_block bb,bb_set * annotated)1096 set_bb_annotated (basic_block bb, bb_set *annotated)
1097 {
1098   annotated->insert (bb);
1099 }
1100 
1101 /* For a given BB, set its execution count. Attach value profile if a stmt
1102    is not in PROMOTED, because we only want to promote an indirect call once.
1103    Return TRUE if BB is annotated.  */
1104 
1105 static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1106 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1107 {
1108   gimple_stmt_iterator gsi;
1109   edge e;
1110   edge_iterator ei;
1111   gcov_type max_count = 0;
1112   bool has_annotated = false;
1113 
1114   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1115     {
1116       count_info info;
1117       gimple *stmt = gsi_stmt (gsi);
1118       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1119         continue;
1120       if (afdo_source_profile->get_count_info (stmt, &info))
1121         {
1122           if (info.count > max_count)
1123             max_count = info.count;
1124           has_annotated = true;
1125           if (info.targets.size () > 0
1126               && promoted.find (stmt) == promoted.end ())
1127             afdo_vpt (&gsi, info.targets, false);
1128         }
1129     }
1130 
1131   if (!has_annotated)
1132     return false;
1133 
1134   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1135     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1136   for (gphi_iterator gpi = gsi_start_phis (bb);
1137        !gsi_end_p (gpi);
1138        gsi_next (&gpi))
1139     {
1140       gphi *phi = gpi.phi ();
1141       size_t i;
1142       for (i = 0; i < gimple_phi_num_args (phi); i++)
1143         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1144     }
1145   FOR_EACH_EDGE (e, ei, bb->succs)
1146   afdo_source_profile->mark_annotated (e->goto_locus);
1147 
1148   bb->count = profile_count::from_gcov_type (max_count).afdo ();
1149   return true;
1150 }
1151 
1152 /* BB1 and BB2 are in an equivalent class iff:
1153    1. BB1 dominates BB2.
1154    2. BB2 post-dominates BB1.
1155    3. BB1 and BB2 are in the same loop nest.
1156    This function finds the equivalent class for each basic block, and
1157    stores a pointer to the first BB in its equivalent class. Meanwhile,
1158    set bb counts for the same equivalent class to be idenical. Update
1159    ANNOTATED_BB for the first BB in its equivalent class.  */
1160 
1161 static void
afdo_find_equiv_class(bb_set * annotated_bb)1162 afdo_find_equiv_class (bb_set *annotated_bb)
1163 {
1164   basic_block bb;
1165 
1166   FOR_ALL_BB_FN (bb, cfun)
1167   bb->aux = NULL;
1168 
1169   FOR_ALL_BB_FN (bb, cfun)
1170   {
1171     if (bb->aux != NULL)
1172       continue;
1173     bb->aux = bb;
1174     for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb))
1175       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1176 	  && bb1->loop_father == bb->loop_father)
1177 	{
1178 	  bb1->aux = bb;
1179 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1180 	    {
1181 	      bb->count = bb1->count;
1182 	      set_bb_annotated (bb, annotated_bb);
1183 	    }
1184 	}
1185 
1186     for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb))
1187       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1188 	  && bb1->loop_father == bb->loop_father)
1189 	{
1190 	  bb1->aux = bb;
1191 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1192 	    {
1193 	      bb->count = bb1->count;
1194 	      set_bb_annotated (bb, annotated_bb);
1195 	    }
1196 	}
1197   }
1198 }
1199 
1200 /* If a basic block's count is known, and only one of its in/out edges' count
1201    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1202    edges' counts are known, then the basic block's unknown count can also be
1203    calculated. Also, if a block has a single predecessor or successor, the block's
1204    count can be propagated to that predecessor or successor.
1205    IS_SUCC is true if out edges of a basic blocks are examined.
1206    Update ANNOTATED_BB accordingly.
1207    Return TRUE if any basic block/edge count is changed.  */
1208 
1209 static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb)1210 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
1211 {
1212   basic_block bb;
1213   bool changed = false;
1214 
1215   FOR_EACH_BB_FN (bb, cfun)
1216   {
1217     edge e, unknown_edge = NULL;
1218     edge_iterator ei;
1219     int num_unknown_edge = 0;
1220     int num_edge = 0;
1221     profile_count total_known_count = profile_count::zero ().afdo ();
1222 
1223     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1224       {
1225 	gcc_assert (AFDO_EINFO (e) != NULL);
1226 	if (! AFDO_EINFO (e)->is_annotated ())
1227 	  num_unknown_edge++, unknown_edge = e;
1228 	else
1229 	  total_known_count += AFDO_EINFO (e)->get_count ();
1230 	num_edge++;
1231       }
1232 
1233     /* Be careful not to annotate block with no successor in special cases.  */
1234     if (num_unknown_edge == 0 && total_known_count > bb->count)
1235       {
1236 	bb->count = total_known_count;
1237 	if (!is_bb_annotated (bb, *annotated_bb))
1238 	  set_bb_annotated (bb, annotated_bb);
1239 	changed = true;
1240       }
1241     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1242       {
1243 	if (bb->count > total_known_count)
1244 	  {
1245 	      profile_count new_count = bb->count - total_known_count;
1246 	      AFDO_EINFO(unknown_edge)->set_count(new_count);
1247 	      if (num_edge == 1)
1248 		{
1249 		  basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : unknown_edge->src;
1250 		  if (new_count > succ_or_pred_bb->count)
1251 		    {
1252 		      succ_or_pred_bb->count = new_count;
1253 		      if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb))
1254 			set_bb_annotated (succ_or_pred_bb, annotated_bb);
1255 		    }
1256 		}
1257 	   }
1258 	else
1259 	  AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ());
1260 	AFDO_EINFO (unknown_edge)->set_annotated ();
1261 	changed = true;
1262       }
1263   }
1264   return changed;
1265 }
1266 
1267 /* Special propagation for circuit expressions. Because GCC translates
1268    control flow into data flow for circuit expressions. E.g.
1269    BB1:
1270    if (a && b)
1271      BB2
1272    else
1273      BB3
1274 
1275    will be translated into:
1276 
1277    BB1:
1278      if (a)
1279        goto BB.t1
1280      else
1281        goto BB.t3
1282    BB.t1:
1283      if (b)
1284        goto BB.t2
1285      else
1286        goto BB.t3
1287    BB.t2:
1288      goto BB.t3
1289    BB.t3:
1290      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1291      if (tmp)
1292        goto BB2
1293      else
1294        goto BB3
1295 
1296    In this case, we need to propagate through PHI to determine the edge
1297    count of BB1->BB.t1, BB.t1->BB.t2.  */
1298 
1299 static void
afdo_propagate_circuit(const bb_set & annotated_bb)1300 afdo_propagate_circuit (const bb_set &annotated_bb)
1301 {
1302   basic_block bb;
1303   FOR_ALL_BB_FN (bb, cfun)
1304   {
1305     gimple *def_stmt;
1306     tree cmp_rhs, cmp_lhs;
1307     gimple *cmp_stmt = last_stmt (bb);
1308     edge e;
1309     edge_iterator ei;
1310 
1311     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1312       continue;
1313     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1314     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1315     if (!TREE_CONSTANT (cmp_rhs)
1316         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1317       continue;
1318     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1319       continue;
1320     if (!is_bb_annotated (bb, annotated_bb))
1321       continue;
1322     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1323     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1324            && gimple_assign_single_p (def_stmt)
1325            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1326       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1327     if (!def_stmt)
1328       continue;
1329     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1330     if (!phi_stmt)
1331       continue;
1332     FOR_EACH_EDGE (e, ei, bb->succs)
1333     {
1334       unsigned i, total = 0;
1335       edge only_one;
1336       bool check_value_one = (((integer_onep (cmp_rhs))
1337                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1338                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1339       if (! AFDO_EINFO (e)->is_annotated ())
1340         continue;
1341       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1342         {
1343           tree val = gimple_phi_arg_def (phi_stmt, i);
1344           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1345 
1346           if (!TREE_CONSTANT (val)
1347               || !(integer_zerop (val) || integer_onep (val)))
1348             continue;
1349           if (check_value_one ^ integer_onep (val))
1350             continue;
1351           total++;
1352           only_one = ep;
1353           if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
1354 	      && ! AFDO_EINFO (ep)->is_annotated ())
1355 	    {
1356 	      AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
1357 	      AFDO_EINFO (ep)->set_annotated ();
1358 	    }
1359 	}
1360       if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
1361 	{
1362 	  AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
1363 	  AFDO_EINFO (only_one)->set_annotated ();
1364 	}
1365     }
1366   }
1367 }
1368 
1369 /* Propagate the basic block count and edge count on the control flow
1370    graph. We do the propagation iteratively until stablize.  */
1371 
1372 static void
afdo_propagate(bb_set * annotated_bb)1373 afdo_propagate (bb_set *annotated_bb)
1374 {
1375   basic_block bb;
1376   bool changed = true;
1377   int i = 0;
1378 
1379   FOR_ALL_BB_FN (bb, cfun)
1380   {
1381     bb->count = ((basic_block)bb->aux)->count;
1382     if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
1383       set_bb_annotated (bb, annotated_bb);
1384   }
1385 
1386   while (changed && i++ < 10)
1387     {
1388       changed = false;
1389 
1390       if (afdo_propagate_edge (true, annotated_bb))
1391         changed = true;
1392       if (afdo_propagate_edge (false, annotated_bb))
1393         changed = true;
1394       afdo_propagate_circuit (*annotated_bb);
1395     }
1396 }
1397 
1398 /* Propagate counts on control flow graph and calculate branch
1399    probabilities.  */
1400 
1401 static void
afdo_calculate_branch_prob(bb_set * annotated_bb)1402 afdo_calculate_branch_prob (bb_set *annotated_bb)
1403 {
1404   edge e;
1405   edge_iterator ei;
1406   basic_block bb;
1407 
1408   calculate_dominance_info (CDI_POST_DOMINATORS);
1409   calculate_dominance_info (CDI_DOMINATORS);
1410   loop_optimizer_init (0);
1411 
1412   FOR_ALL_BB_FN (bb, cfun)
1413     {
1414       gcc_assert (bb->aux == NULL);
1415       FOR_EACH_EDGE (e, ei, bb->succs)
1416 	{
1417 	  gcc_assert (e->aux == NULL);
1418 	  e->aux = new edge_info ();
1419 	}
1420     }
1421 
1422   afdo_find_equiv_class (annotated_bb);
1423   afdo_propagate (annotated_bb);
1424 
1425   FOR_EACH_BB_FN (bb, cfun)
1426   {
1427     int num_unknown_succ = 0;
1428     profile_count total_count = profile_count::zero ().afdo ();
1429 
1430     FOR_EACH_EDGE (e, ei, bb->succs)
1431     {
1432       gcc_assert (AFDO_EINFO (e) != NULL);
1433       if (! AFDO_EINFO (e)->is_annotated ())
1434         num_unknown_succ++;
1435       else
1436         total_count += AFDO_EINFO (e)->get_count ();
1437     }
1438     if (num_unknown_succ == 0 && total_count > profile_count::zero ())
1439       {
1440 	FOR_EACH_EDGE (e, ei, bb->succs)
1441 	  e->probability
1442 	    = AFDO_EINFO (e)->get_count ().probability_in (total_count);
1443       }
1444   }
1445   FOR_ALL_BB_FN (bb, cfun)
1446     {
1447       bb->aux = NULL;
1448       FOR_EACH_EDGE (e, ei, bb->succs)
1449 	if (AFDO_EINFO (e) != NULL)
1450 	  {
1451 	    delete AFDO_EINFO (e);
1452 	    e->aux = NULL;
1453 	  }
1454     }
1455 
1456   loop_optimizer_finalize ();
1457   free_dominance_info (CDI_DOMINATORS);
1458   free_dominance_info (CDI_POST_DOMINATORS);
1459 }
1460 
1461 /* Perform value profile transformation using AutoFDO profile. Add the
1462    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1463    indirect call promoted.  */
1464 
1465 static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1466 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1467 {
1468   basic_block bb;
1469   if (afdo_source_profile->get_function_instance_by_decl (
1470           current_function_decl) == NULL)
1471     return false;
1472 
1473   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1474 
1475   bool has_vpt = false;
1476   FOR_EACH_BB_FN (bb, cfun)
1477   {
1478     if (!has_indirect_call (bb))
1479       continue;
1480     gimple_stmt_iterator gsi;
1481 
1482     gcov_type bb_count = 0;
1483     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1484       {
1485         count_info info;
1486 	gimple *stmt = gsi_stmt (gsi);
1487         if (afdo_source_profile->get_count_info (stmt, &info))
1488           bb_count = MAX (bb_count, info.count);
1489       }
1490 
1491     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492       {
1493         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1494         /* IC_promotion and early_inline_2 is done in multiple iterations.
1495            No need to promoted the stmt if its in promoted_stmts (means
1496            it is already been promoted in the previous iterations).  */
1497         if ((!stmt) || gimple_call_fn (stmt) == NULL
1498             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1499             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1500           continue;
1501 
1502         count_info info;
1503         afdo_source_profile->get_count_info (stmt, &info);
1504         info.count = bb_count;
1505         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1506           {
1507             /* Promote the indirect call and update the promoted_stmts.  */
1508             promoted_stmts->insert (stmt);
1509             if (afdo_vpt (&gsi, info.targets, true))
1510               has_vpt = true;
1511           }
1512       }
1513   }
1514 
1515   if (has_vpt)
1516     {
1517       unsigned todo = optimize_inline_calls (current_function_decl);
1518       if (todo & TODO_update_ssa_any)
1519        update_ssa (TODO_update_ssa);
1520       return true;
1521     }
1522 
1523   return false;
1524 }
1525 
1526 /* Annotate auto profile to the control flow graph. Do not annotate value
1527    profile for stmts in PROMOTED_STMTS.  */
1528 
1529 static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1530 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1531 {
1532   basic_block bb;
1533   bb_set annotated_bb;
1534   const function_instance *s
1535       = afdo_source_profile->get_function_instance_by_decl (
1536           current_function_decl);
1537 
1538   if (s == NULL)
1539     return;
1540   cgraph_node::get (current_function_decl)->count
1541      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1542   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1543      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1544   EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo ();
1545   profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1546 
1547   FOR_EACH_BB_FN (bb, cfun)
1548     {
1549       /* As autoFDO uses sampling approach, we have to assume that all
1550 	 counters are zero when not seen by autoFDO.  */
1551       bb->count = profile_count::zero ().afdo ();
1552       if (afdo_set_bb_count (bb, promoted_stmts))
1553 	set_bb_annotated (bb, &annotated_bb);
1554       if (bb->count > max_count)
1555 	max_count = bb->count;
1556     }
1557   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1558       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1559     {
1560       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1561           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1562       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1563     }
1564   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1565       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1566     {
1567       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1568           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1569       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1570     }
1571   afdo_source_profile->mark_annotated (
1572       DECL_SOURCE_LOCATION (current_function_decl));
1573   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1574   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1575   if (max_count > profile_count::zero ())
1576     {
1577       /* Calculate, propagate count and probability information on CFG.  */
1578       afdo_calculate_branch_prob (&annotated_bb);
1579     }
1580   update_max_bb_count ();
1581   profile_status_for_fn (cfun) = PROFILE_READ;
1582   if (flag_value_profile_transformations)
1583     {
1584       gimple_value_profile_transformations ();
1585       free_dominance_info (CDI_DOMINATORS);
1586       free_dominance_info (CDI_POST_DOMINATORS);
1587       update_ssa (TODO_update_ssa);
1588     }
1589 }
1590 
1591 /* Wrapper function to invoke early inliner.  */
1592 
1593 static void
early_inline()1594 early_inline ()
1595 {
1596   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1597   unsigned todo = early_inliner (cfun);
1598   if (todo & TODO_update_ssa_any)
1599     update_ssa (TODO_update_ssa);
1600 }
1601 
1602 /* Use AutoFDO profile to annoate the control flow graph.
1603    Return the todo flag.  */
1604 
1605 static unsigned int
auto_profile(void)1606 auto_profile (void)
1607 {
1608   struct cgraph_node *node;
1609 
1610   if (symtab->state == FINISHED)
1611     return 0;
1612 
1613   init_node_map (true);
1614   profile_info = autofdo::afdo_profile_info;
1615 
1616   FOR_EACH_FUNCTION (node)
1617   {
1618     if (!gimple_has_body_p (node->decl))
1619       continue;
1620 
1621     /* Don't profile functions produced for builtin stuff.  */
1622     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1623       continue;
1624 
1625     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1626 
1627     /* First do indirect call promotion and early inline to make the
1628        IR match the profiled binary before actual annotation.
1629 
1630        This is needed because an indirect call might have been promoted
1631        and inlined in the profiled binary. If we do not promote and
1632        inline these indirect calls before annotation, the profile for
1633        these promoted functions will be lost.
1634 
1635        e.g. foo() --indirect_call--> bar()
1636        In profiled binary, the callsite is promoted and inlined, making
1637        the profile look like:
1638 
1639        foo: {
1640          loc_foo_1: count_1
1641          bar@loc_foo_2: {
1642            loc_bar_1: count_2
1643            loc_bar_2: count_3
1644          }
1645        }
1646 
1647        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1648        If we perform annotation on it, the profile inside bar@loc_foo2
1649        will be wasted.
1650 
1651        To avoid this, we promote loc_foo_2 and inline the promoted bar
1652        function before annotation, so the profile inside bar@loc_foo2
1653        will be useful.  */
1654     autofdo::stmt_set promoted_stmts;
1655     for (int i = 0; i < 10; i++)
1656       {
1657         if (!flag_value_profile_transformations
1658             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1659           break;
1660         early_inline ();
1661       }
1662 
1663     early_inline ();
1664     autofdo::afdo_annotate_cfg (promoted_stmts);
1665     compute_function_frequency ();
1666 
1667     /* Local pure-const may imply need to fixup the cfg.  */
1668     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1669       cleanup_tree_cfg ();
1670 
1671     free_dominance_info (CDI_DOMINATORS);
1672     free_dominance_info (CDI_POST_DOMINATORS);
1673     cgraph_edge::rebuild_edges ();
1674     compute_fn_summary (cgraph_node::get (current_function_decl), true);
1675     pop_cfun ();
1676   }
1677 
1678   return TODO_rebuild_cgraph_edges;
1679 }
1680 } /* namespace autofdo.  */
1681 
1682 /* Read the profile from the profile data file.  */
1683 
1684 void
read_autofdo_file(void)1685 read_autofdo_file (void)
1686 {
1687   if (auto_profile_file == NULL)
1688     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1689 
1690   autofdo::afdo_profile_info = XNEW (gcov_summary);
1691   autofdo::afdo_profile_info->runs = 1;
1692   autofdo::afdo_profile_info->sum_max = 0;
1693 
1694   /* Read the profile from the profile file.  */
1695   autofdo::read_profile ();
1696 }
1697 
1698 /* Free the resources.  */
1699 
1700 void
end_auto_profile(void)1701 end_auto_profile (void)
1702 {
1703   delete autofdo::afdo_source_profile;
1704   delete autofdo::afdo_string_table;
1705   profile_info = NULL;
1706 }
1707 
1708 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1709 
1710 bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1711 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1712 {
1713   gcov_type count
1714       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1715 
1716   if (count > 0)
1717     {
1718       bool is_hot;
1719       profile_count pcount = profile_count::from_gcov_type (count).afdo ();
1720       gcov_summary *saved_profile_info = profile_info;
1721       /* At early inline stage, profile_info is not set yet. We need to
1722          temporarily set it to afdo_profile_info to calculate hotness.  */
1723       profile_info = autofdo::afdo_profile_info;
1724       is_hot = maybe_hot_count_p (NULL, pcount);
1725       profile_info = saved_profile_info;
1726       return is_hot;
1727     }
1728 
1729   return false;
1730 }
1731 
1732 namespace
1733 {
1734 
1735 const pass_data pass_data_ipa_auto_profile = {
1736   SIMPLE_IPA_PASS, "afdo", /* name */
1737   OPTGROUP_NONE,           /* optinfo_flags */
1738   TV_IPA_AUTOFDO,          /* tv_id */
1739   0,                       /* properties_required */
1740   0,                       /* properties_provided */
1741   0,                       /* properties_destroyed */
1742   0,                       /* todo_flags_start */
1743   0,                       /* todo_flags_finish */
1744 };
1745 
1746 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1747 {
1748 public:
pass_ipa_auto_profile(gcc::context * ctxt)1749   pass_ipa_auto_profile (gcc::context *ctxt)
1750       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1751   {
1752   }
1753 
1754   /* opt_pass methods: */
1755   virtual bool
gate(function *)1756   gate (function *)
1757   {
1758     return flag_auto_profile;
1759   }
1760   virtual unsigned int
execute(function *)1761   execute (function *)
1762   {
1763     return autofdo::auto_profile ();
1764   }
1765 }; // class pass_ipa_auto_profile
1766 
1767 } // anon namespace
1768 
1769 simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1770 make_pass_ipa_auto_profile (gcc::context *ctxt)
1771 {
1772   return new pass_ipa_auto_profile (ctxt);
1773 }
1774