xref: /dflybsd-src/contrib/gcc-8.0/gcc/auto-profile.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Read and annotate call graph profile from the auto profile data file.
2*38fd1498Szrj    Copyright (C) 2014-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Dehao Chen (dehao@google.com)
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj #include "config.h"
22*38fd1498Szrj #define INCLUDE_MAP
23*38fd1498Szrj #define INCLUDE_SET
24*38fd1498Szrj #include "system.h"
25*38fd1498Szrj #include "coretypes.h"
26*38fd1498Szrj #include "backend.h"
27*38fd1498Szrj #include "tree.h"
28*38fd1498Szrj #include "gimple.h"
29*38fd1498Szrj #include "predict.h"
30*38fd1498Szrj #include "alloc-pool.h"
31*38fd1498Szrj #include "tree-pass.h"
32*38fd1498Szrj #include "ssa.h"
33*38fd1498Szrj #include "cgraph.h"
34*38fd1498Szrj #include "gcov-io.h"
35*38fd1498Szrj #include "diagnostic-core.h"
36*38fd1498Szrj #include "profile.h"
37*38fd1498Szrj #include "langhooks.h"
38*38fd1498Szrj #include "cfgloop.h"
39*38fd1498Szrj #include "tree-cfg.h"
40*38fd1498Szrj #include "tree-cfgcleanup.h"
41*38fd1498Szrj #include "tree-into-ssa.h"
42*38fd1498Szrj #include "gimple-iterator.h"
43*38fd1498Szrj #include "value-prof.h"
44*38fd1498Szrj #include "params.h"
45*38fd1498Szrj #include "symbol-summary.h"
46*38fd1498Szrj #include "ipa-prop.h"
47*38fd1498Szrj #include "ipa-fnsummary.h"
48*38fd1498Szrj #include "ipa-inline.h"
49*38fd1498Szrj #include "tree-inline.h"
50*38fd1498Szrj #include "auto-profile.h"
51*38fd1498Szrj #include "tree-pretty-print.h"
52*38fd1498Szrj #include "gimple-pretty-print.h"
53*38fd1498Szrj 
54*38fd1498Szrj /* The following routines implements AutoFDO optimization.
55*38fd1498Szrj 
56*38fd1498Szrj    This optimization uses sampling profiles to annotate basic block counts
57*38fd1498Szrj    and uses heuristics to estimate branch probabilities.
58*38fd1498Szrj 
59*38fd1498Szrj    There are three phases in AutoFDO:
60*38fd1498Szrj 
61*38fd1498Szrj    Phase 1: Read profile from the profile data file.
62*38fd1498Szrj      The following info is read from the profile datafile:
63*38fd1498Szrj         * string_table: a map between function name and its index.
64*38fd1498Szrj         * autofdo_source_profile: a map from function_instance name to
65*38fd1498Szrj           function_instance. This is represented as a forest of
66*38fd1498Szrj           function_instances.
67*38fd1498Szrj         * WorkingSet: a histogram of how many instructions are covered for a
68*38fd1498Szrj           given percentage of total cycles. This is describing the binary
69*38fd1498Szrj           level information (not source level). This info is used to help
70*38fd1498Szrj           decide if we want aggressive optimizations that could increase
71*38fd1498Szrj           code footprint (e.g. loop unroll etc.)
72*38fd1498Szrj      A function instance is an instance of function that could either be a
73*38fd1498Szrj      standalone symbol, or a clone of a function that is inlined into another
74*38fd1498Szrj      function.
75*38fd1498Szrj 
76*38fd1498Szrj    Phase 2: Early inline + value profile transformation.
77*38fd1498Szrj      Early inline uses autofdo_source_profile to find if a callsite is:
78*38fd1498Szrj         * inlined in the profiled binary.
79*38fd1498Szrj         * callee body is hot in the profiling run.
80*38fd1498Szrj      If both condition satisfies, early inline will inline the callsite
81*38fd1498Szrj      regardless of the code growth.
82*38fd1498Szrj      Phase 2 is an iterative process. During each iteration, we also check
83*38fd1498Szrj      if an indirect callsite is promoted and inlined in the profiling run.
84*38fd1498Szrj      If yes, vpt will happen to force promote it and in the next iteration,
85*38fd1498Szrj      einline will inline the promoted callsite in the next iteration.
86*38fd1498Szrj 
87*38fd1498Szrj    Phase 3: Annotate control flow graph.
88*38fd1498Szrj      AutoFDO uses a separate pass to:
89*38fd1498Szrj         * Annotate basic block count
90*38fd1498Szrj         * Estimate branch probability
91*38fd1498Szrj 
92*38fd1498Szrj    After the above 3 phases, all profile is readily annotated on the GCC IR.
93*38fd1498Szrj    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
94*38fd1498Szrj    use of the profile. E.g. it uses existing mechanism to calculate the basic
95*38fd1498Szrj    block/edge frequency, as well as the cgraph node/edge count.
96*38fd1498Szrj */
97*38fd1498Szrj 
98*38fd1498Szrj #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
99*38fd1498Szrj #define AUTO_PROFILE_VERSION 1
100*38fd1498Szrj 
101*38fd1498Szrj namespace autofdo
102*38fd1498Szrj {
103*38fd1498Szrj 
104*38fd1498Szrj /* Represent a source location: (function_decl, lineno).  */
105*38fd1498Szrj typedef std::pair<tree, unsigned> decl_lineno;
106*38fd1498Szrj 
107*38fd1498Szrj /* Represent an inline stack. vector[0] is the leaf node.  */
108*38fd1498Szrj typedef auto_vec<decl_lineno> inline_stack;
109*38fd1498Szrj 
110*38fd1498Szrj /* String array that stores function names.  */
111*38fd1498Szrj typedef auto_vec<char *> string_vector;
112*38fd1498Szrj 
113*38fd1498Szrj /* Map from function name's index in string_table to target's
114*38fd1498Szrj    execution count.  */
115*38fd1498Szrj typedef std::map<unsigned, gcov_type> icall_target_map;
116*38fd1498Szrj 
117*38fd1498Szrj /* Set of gimple stmts. Used to track if the stmt has already been promoted
118*38fd1498Szrj    to direct call.  */
119*38fd1498Szrj typedef std::set<gimple *> stmt_set;
120*38fd1498Szrj 
121*38fd1498Szrj /* Represent count info of an inline stack.  */
122*38fd1498Szrj struct count_info
123*38fd1498Szrj {
124*38fd1498Szrj   /* Sampled count of the inline stack.  */
125*38fd1498Szrj   gcov_type count;
126*38fd1498Szrj 
127*38fd1498Szrj   /* Map from indirect call target to its sample count.  */
128*38fd1498Szrj   icall_target_map targets;
129*38fd1498Szrj 
130*38fd1498Szrj   /* Whether this inline stack is already used in annotation.
131*38fd1498Szrj 
132*38fd1498Szrj      Each inline stack should only be used to annotate IR once.
133*38fd1498Szrj      This will be enforced when instruction-level discriminator
134*38fd1498Szrj      is supported.  */
135*38fd1498Szrj   bool annotated;
136*38fd1498Szrj };
137*38fd1498Szrj 
138*38fd1498Szrj /* operator< for "const char *".  */
139*38fd1498Szrj struct string_compare
140*38fd1498Szrj {
operatorstring_compare141*38fd1498Szrj   bool operator()(const char *a, const char *b) const
142*38fd1498Szrj   {
143*38fd1498Szrj     return strcmp (a, b) < 0;
144*38fd1498Szrj   }
145*38fd1498Szrj };
146*38fd1498Szrj 
147*38fd1498Szrj /* Store a string array, indexed by string position in the array.  */
148*38fd1498Szrj class string_table
149*38fd1498Szrj {
150*38fd1498Szrj public:
string_table()151*38fd1498Szrj   string_table ()
152*38fd1498Szrj   {}
153*38fd1498Szrj 
154*38fd1498Szrj   ~string_table ();
155*38fd1498Szrj 
156*38fd1498Szrj   /* For a given string, returns its index.  */
157*38fd1498Szrj   int get_index (const char *name) const;
158*38fd1498Szrj 
159*38fd1498Szrj   /* For a given decl, returns the index of the decl name.  */
160*38fd1498Szrj   int get_index_by_decl (tree decl) const;
161*38fd1498Szrj 
162*38fd1498Szrj   /* For a given index, returns the string.  */
163*38fd1498Szrj   const char *get_name (int index) const;
164*38fd1498Szrj 
165*38fd1498Szrj   /* Read profile, return TRUE on success.  */
166*38fd1498Szrj   bool read ();
167*38fd1498Szrj 
168*38fd1498Szrj private:
169*38fd1498Szrj   typedef std::map<const char *, unsigned, string_compare> string_index_map;
170*38fd1498Szrj   string_vector vector_;
171*38fd1498Szrj   string_index_map map_;
172*38fd1498Szrj };
173*38fd1498Szrj 
174*38fd1498Szrj /* Profile of a function instance:
175*38fd1498Szrj      1. total_count of the function.
176*38fd1498Szrj      2. head_count (entry basic block count) of the function (only valid when
177*38fd1498Szrj         function is a top-level function_instance, i.e. it is the original copy
178*38fd1498Szrj         instead of the inlined copy).
179*38fd1498Szrj      3. map from source location (decl_lineno) to profile (count_info).
180*38fd1498Szrj      4. map from callsite to callee function_instance.  */
181*38fd1498Szrj class function_instance
182*38fd1498Szrj {
183*38fd1498Szrj public:
184*38fd1498Szrj   typedef auto_vec<function_instance *> function_instance_stack;
185*38fd1498Szrj 
186*38fd1498Szrj   /* Read the profile and return a function_instance with head count as
187*38fd1498Szrj      HEAD_COUNT. Recursively read callsites to create nested function_instances
188*38fd1498Szrj      too. STACK is used to track the recursive creation process.  */
189*38fd1498Szrj   static function_instance *
190*38fd1498Szrj   read_function_instance (function_instance_stack *stack,
191*38fd1498Szrj                           gcov_type head_count);
192*38fd1498Szrj 
193*38fd1498Szrj   /* Recursively deallocate all callsites (nested function_instances).  */
194*38fd1498Szrj   ~function_instance ();
195*38fd1498Szrj 
196*38fd1498Szrj   /* Accessors.  */
197*38fd1498Szrj   int
name()198*38fd1498Szrj   name () const
199*38fd1498Szrj   {
200*38fd1498Szrj     return name_;
201*38fd1498Szrj   }
202*38fd1498Szrj   gcov_type
total_count()203*38fd1498Szrj   total_count () const
204*38fd1498Szrj   {
205*38fd1498Szrj     return total_count_;
206*38fd1498Szrj   }
207*38fd1498Szrj   gcov_type
head_count()208*38fd1498Szrj   head_count () const
209*38fd1498Szrj   {
210*38fd1498Szrj     return head_count_;
211*38fd1498Szrj   }
212*38fd1498Szrj 
213*38fd1498Szrj   /* Traverse callsites of the current function_instance to find one at the
214*38fd1498Szrj      location of LINENO and callee name represented in DECL.  */
215*38fd1498Szrj   function_instance *get_function_instance_by_decl (unsigned lineno,
216*38fd1498Szrj                                                     tree decl) const;
217*38fd1498Szrj 
218*38fd1498Szrj   /* Store the profile info for LOC in INFO. Return TRUE if profile info
219*38fd1498Szrj      is found.  */
220*38fd1498Szrj   bool get_count_info (location_t loc, count_info *info) const;
221*38fd1498Szrj 
222*38fd1498Szrj   /* Read the inlined indirect call target profile for STMT and store it in
223*38fd1498Szrj      MAP, return the total count for all inlined indirect calls.  */
224*38fd1498Szrj   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
225*38fd1498Szrj 
226*38fd1498Szrj   /* Sum of counts that is used during annotation.  */
227*38fd1498Szrj   gcov_type total_annotated_count () const;
228*38fd1498Szrj 
229*38fd1498Szrj   /* Mark LOC as annotated.  */
230*38fd1498Szrj   void mark_annotated (location_t loc);
231*38fd1498Szrj 
232*38fd1498Szrj private:
233*38fd1498Szrj   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
234*38fd1498Szrj   typedef std::pair<unsigned, unsigned> callsite;
235*38fd1498Szrj 
236*38fd1498Szrj   /* Map from callsite to callee function_instance.  */
237*38fd1498Szrj   typedef std::map<callsite, function_instance *> callsite_map;
238*38fd1498Szrj 
function_instance(unsigned name,gcov_type head_count)239*38fd1498Szrj   function_instance (unsigned name, gcov_type head_count)
240*38fd1498Szrj       : name_ (name), total_count_ (0), head_count_ (head_count)
241*38fd1498Szrj   {
242*38fd1498Szrj   }
243*38fd1498Szrj 
244*38fd1498Szrj   /* Map from source location (decl_lineno) to profile (count_info).  */
245*38fd1498Szrj   typedef std::map<unsigned, count_info> position_count_map;
246*38fd1498Szrj 
247*38fd1498Szrj   /* function_instance name index in the string_table.  */
248*38fd1498Szrj   unsigned name_;
249*38fd1498Szrj 
250*38fd1498Szrj   /* Total sample count.  */
251*38fd1498Szrj   gcov_type total_count_;
252*38fd1498Szrj 
253*38fd1498Szrj   /* Entry BB's sample count.  */
254*38fd1498Szrj   gcov_type head_count_;
255*38fd1498Szrj 
256*38fd1498Szrj   /* Map from callsite location to callee function_instance.  */
257*38fd1498Szrj   callsite_map callsites;
258*38fd1498Szrj 
259*38fd1498Szrj   /* Map from source location to count_info.  */
260*38fd1498Szrj   position_count_map pos_counts;
261*38fd1498Szrj };
262*38fd1498Szrj 
263*38fd1498Szrj /* Profile for all functions.  */
264*38fd1498Szrj class autofdo_source_profile
265*38fd1498Szrj {
266*38fd1498Szrj public:
267*38fd1498Szrj   static autofdo_source_profile *
create()268*38fd1498Szrj   create ()
269*38fd1498Szrj   {
270*38fd1498Szrj     autofdo_source_profile *map = new autofdo_source_profile ();
271*38fd1498Szrj 
272*38fd1498Szrj     if (map->read ())
273*38fd1498Szrj       return map;
274*38fd1498Szrj     delete map;
275*38fd1498Szrj     return NULL;
276*38fd1498Szrj   }
277*38fd1498Szrj 
278*38fd1498Szrj   ~autofdo_source_profile ();
279*38fd1498Szrj 
280*38fd1498Szrj   /* For a given DECL, returns the top-level function_instance.  */
281*38fd1498Szrj   function_instance *get_function_instance_by_decl (tree decl) const;
282*38fd1498Szrj 
283*38fd1498Szrj   /* Find count_info for a given gimple STMT. If found, store the count_info
284*38fd1498Szrj      in INFO and return true; otherwise return false.  */
285*38fd1498Szrj   bool get_count_info (gimple *stmt, count_info *info) const;
286*38fd1498Szrj 
287*38fd1498Szrj   /* Find total count of the callee of EDGE.  */
288*38fd1498Szrj   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
289*38fd1498Szrj 
290*38fd1498Szrj   /* Update value profile INFO for STMT from the inlined indirect callsite.
291*38fd1498Szrj      Return true if INFO is updated.  */
292*38fd1498Szrj   bool update_inlined_ind_target (gcall *stmt, count_info *info);
293*38fd1498Szrj 
294*38fd1498Szrj   /* Mark LOC as annotated.  */
295*38fd1498Szrj   void mark_annotated (location_t loc);
296*38fd1498Szrj 
297*38fd1498Szrj private:
298*38fd1498Szrj   /* Map from function_instance name index (in string_table) to
299*38fd1498Szrj      function_instance.  */
300*38fd1498Szrj   typedef std::map<unsigned, function_instance *> name_function_instance_map;
301*38fd1498Szrj 
autofdo_source_profile()302*38fd1498Szrj   autofdo_source_profile () {}
303*38fd1498Szrj 
304*38fd1498Szrj   /* Read AutoFDO profile and returns TRUE on success.  */
305*38fd1498Szrj   bool read ();
306*38fd1498Szrj 
307*38fd1498Szrj   /* Return the function_instance in the profile that correspond to the
308*38fd1498Szrj      inline STACK.  */
309*38fd1498Szrj   function_instance *
310*38fd1498Szrj   get_function_instance_by_inline_stack (const inline_stack &stack) const;
311*38fd1498Szrj 
312*38fd1498Szrj   name_function_instance_map map_;
313*38fd1498Szrj };
314*38fd1498Szrj 
315*38fd1498Szrj /* Store the strings read from the profile data file.  */
316*38fd1498Szrj static string_table *afdo_string_table;
317*38fd1498Szrj 
318*38fd1498Szrj /* Store the AutoFDO source profile.  */
319*38fd1498Szrj static autofdo_source_profile *afdo_source_profile;
320*38fd1498Szrj 
321*38fd1498Szrj /* gcov_ctr_summary structure to store the profile_info.  */
322*38fd1498Szrj static struct gcov_ctr_summary *afdo_profile_info;
323*38fd1498Szrj 
324*38fd1498Szrj /* Helper functions.  */
325*38fd1498Szrj 
326*38fd1498Szrj /* Return the original name of NAME: strip the suffix that starts
327*38fd1498Szrj    with '.' Caller is responsible for freeing RET.  */
328*38fd1498Szrj 
329*38fd1498Szrj static char *
get_original_name(const char * name)330*38fd1498Szrj get_original_name (const char *name)
331*38fd1498Szrj {
332*38fd1498Szrj   char *ret = xstrdup (name);
333*38fd1498Szrj   char *find = strchr (ret, '.');
334*38fd1498Szrj   if (find != NULL)
335*38fd1498Szrj     *find = 0;
336*38fd1498Szrj   return ret;
337*38fd1498Szrj }
338*38fd1498Szrj 
339*38fd1498Szrj /* Return the combined location, which is a 32bit integer in which
340*38fd1498Szrj    higher 16 bits stores the line offset of LOC to the start lineno
341*38fd1498Szrj    of DECL, The lower 16 bits stores the discriminator.  */
342*38fd1498Szrj 
343*38fd1498Szrj static unsigned
get_combined_location(location_t loc,tree decl)344*38fd1498Szrj get_combined_location (location_t loc, tree decl)
345*38fd1498Szrj {
346*38fd1498Szrj   /* TODO: allow more bits for line and less bits for discriminator.  */
347*38fd1498Szrj   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
348*38fd1498Szrj     warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
349*38fd1498Szrj   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
350*38fd1498Szrj }
351*38fd1498Szrj 
352*38fd1498Szrj /* Return the function decl of a given lexical BLOCK.  */
353*38fd1498Szrj 
354*38fd1498Szrj static tree
get_function_decl_from_block(tree block)355*38fd1498Szrj get_function_decl_from_block (tree block)
356*38fd1498Szrj {
357*38fd1498Szrj   tree decl;
358*38fd1498Szrj 
359*38fd1498Szrj   if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION)
360*38fd1498Szrj     return NULL_TREE;
361*38fd1498Szrj 
362*38fd1498Szrj   for (decl = BLOCK_ABSTRACT_ORIGIN (block);
363*38fd1498Szrj        decl && (TREE_CODE (decl) == BLOCK);
364*38fd1498Szrj        decl = BLOCK_ABSTRACT_ORIGIN (decl))
365*38fd1498Szrj     if (TREE_CODE (decl) == FUNCTION_DECL)
366*38fd1498Szrj       break;
367*38fd1498Szrj   return decl;
368*38fd1498Szrj }
369*38fd1498Szrj 
370*38fd1498Szrj /* Store inline stack for STMT in STACK.  */
371*38fd1498Szrj 
372*38fd1498Szrj static void
get_inline_stack(location_t locus,inline_stack * stack)373*38fd1498Szrj get_inline_stack (location_t locus, inline_stack *stack)
374*38fd1498Szrj {
375*38fd1498Szrj   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
376*38fd1498Szrj     return;
377*38fd1498Szrj 
378*38fd1498Szrj   tree block = LOCATION_BLOCK (locus);
379*38fd1498Szrj   if (block && TREE_CODE (block) == BLOCK)
380*38fd1498Szrj     {
381*38fd1498Szrj       int level = 0;
382*38fd1498Szrj       for (block = BLOCK_SUPERCONTEXT (block);
383*38fd1498Szrj            block && (TREE_CODE (block) == BLOCK);
384*38fd1498Szrj            block = BLOCK_SUPERCONTEXT (block))
385*38fd1498Szrj         {
386*38fd1498Szrj           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
387*38fd1498Szrj           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
388*38fd1498Szrj             continue;
389*38fd1498Szrj 
390*38fd1498Szrj           tree decl = get_function_decl_from_block (block);
391*38fd1498Szrj           stack->safe_push (
392*38fd1498Szrj               std::make_pair (decl, get_combined_location (locus, decl)));
393*38fd1498Szrj           locus = tmp_locus;
394*38fd1498Szrj           level++;
395*38fd1498Szrj         }
396*38fd1498Szrj     }
397*38fd1498Szrj   stack->safe_push (
398*38fd1498Szrj       std::make_pair (current_function_decl,
399*38fd1498Szrj                       get_combined_location (locus, current_function_decl)));
400*38fd1498Szrj }
401*38fd1498Szrj 
402*38fd1498Szrj /* Return STMT's combined location, which is a 32bit integer in which
403*38fd1498Szrj    higher 16 bits stores the line offset of LOC to the start lineno
404*38fd1498Szrj    of DECL, The lower 16 bits stores the discriminator.  */
405*38fd1498Szrj 
406*38fd1498Szrj static unsigned
get_relative_location_for_stmt(gimple * stmt)407*38fd1498Szrj get_relative_location_for_stmt (gimple *stmt)
408*38fd1498Szrj {
409*38fd1498Szrj   location_t locus = gimple_location (stmt);
410*38fd1498Szrj   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
411*38fd1498Szrj     return UNKNOWN_LOCATION;
412*38fd1498Szrj 
413*38fd1498Szrj   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
414*38fd1498Szrj        block = BLOCK_SUPERCONTEXT (block))
415*38fd1498Szrj     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
416*38fd1498Szrj       return get_combined_location (locus,
417*38fd1498Szrj                                     get_function_decl_from_block (block));
418*38fd1498Szrj   return get_combined_location (locus, current_function_decl);
419*38fd1498Szrj }
420*38fd1498Szrj 
421*38fd1498Szrj /* Return true if BB contains indirect call.  */
422*38fd1498Szrj 
423*38fd1498Szrj static bool
has_indirect_call(basic_block bb)424*38fd1498Szrj has_indirect_call (basic_block bb)
425*38fd1498Szrj {
426*38fd1498Szrj   gimple_stmt_iterator gsi;
427*38fd1498Szrj 
428*38fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
429*38fd1498Szrj     {
430*38fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
431*38fd1498Szrj       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
432*38fd1498Szrj           && (gimple_call_fn (stmt) == NULL
433*38fd1498Szrj               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
434*38fd1498Szrj         return true;
435*38fd1498Szrj     }
436*38fd1498Szrj   return false;
437*38fd1498Szrj }
438*38fd1498Szrj 
439*38fd1498Szrj /* Member functions for string_table.  */
440*38fd1498Szrj 
441*38fd1498Szrj /* Deconstructor.  */
442*38fd1498Szrj 
~string_table()443*38fd1498Szrj string_table::~string_table ()
444*38fd1498Szrj {
445*38fd1498Szrj   for (unsigned i = 0; i < vector_.length (); i++)
446*38fd1498Szrj     free (vector_[i]);
447*38fd1498Szrj }
448*38fd1498Szrj 
449*38fd1498Szrj 
450*38fd1498Szrj /* Return the index of a given function NAME. Return -1 if NAME is not
451*38fd1498Szrj    found in string table.  */
452*38fd1498Szrj 
453*38fd1498Szrj int
get_index(const char * name)454*38fd1498Szrj string_table::get_index (const char *name) const
455*38fd1498Szrj {
456*38fd1498Szrj   if (name == NULL)
457*38fd1498Szrj     return -1;
458*38fd1498Szrj   string_index_map::const_iterator iter = map_.find (name);
459*38fd1498Szrj   if (iter == map_.end ())
460*38fd1498Szrj     return -1;
461*38fd1498Szrj 
462*38fd1498Szrj   return iter->second;
463*38fd1498Szrj }
464*38fd1498Szrj 
465*38fd1498Szrj /* Return the index of a given function DECL. Return -1 if DECL is not
466*38fd1498Szrj    found in string table.  */
467*38fd1498Szrj 
468*38fd1498Szrj int
get_index_by_decl(tree decl)469*38fd1498Szrj string_table::get_index_by_decl (tree decl) const
470*38fd1498Szrj {
471*38fd1498Szrj   char *name
472*38fd1498Szrj       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
473*38fd1498Szrj   int ret = get_index (name);
474*38fd1498Szrj   free (name);
475*38fd1498Szrj   if (ret != -1)
476*38fd1498Szrj     return ret;
477*38fd1498Szrj   ret = get_index (lang_hooks.dwarf_name (decl, 0));
478*38fd1498Szrj   if (ret != -1)
479*38fd1498Szrj     return ret;
480*38fd1498Szrj   if (DECL_ABSTRACT_ORIGIN (decl) && DECL_ABSTRACT_ORIGIN (decl) != decl)
481*38fd1498Szrj     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
482*38fd1498Szrj 
483*38fd1498Szrj   return -1;
484*38fd1498Szrj }
485*38fd1498Szrj 
486*38fd1498Szrj /* Return the function name of a given INDEX.  */
487*38fd1498Szrj 
488*38fd1498Szrj const char *
get_name(int index)489*38fd1498Szrj string_table::get_name (int index) const
490*38fd1498Szrj {
491*38fd1498Szrj   gcc_assert (index > 0 && index < (int)vector_.length ());
492*38fd1498Szrj   return vector_[index];
493*38fd1498Szrj }
494*38fd1498Szrj 
495*38fd1498Szrj /* Read the string table. Return TRUE if reading is successful.  */
496*38fd1498Szrj 
497*38fd1498Szrj bool
read()498*38fd1498Szrj string_table::read ()
499*38fd1498Szrj {
500*38fd1498Szrj   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
501*38fd1498Szrj     return false;
502*38fd1498Szrj   /* Skip the length of the section.  */
503*38fd1498Szrj   gcov_read_unsigned ();
504*38fd1498Szrj   /* Read in the file name table.  */
505*38fd1498Szrj   unsigned string_num = gcov_read_unsigned ();
506*38fd1498Szrj   for (unsigned i = 0; i < string_num; i++)
507*38fd1498Szrj     {
508*38fd1498Szrj       vector_.safe_push (get_original_name (gcov_read_string ()));
509*38fd1498Szrj       map_[vector_.last ()] = i;
510*38fd1498Szrj     }
511*38fd1498Szrj   return true;
512*38fd1498Szrj }
513*38fd1498Szrj 
514*38fd1498Szrj /* Member functions for function_instance.  */
515*38fd1498Szrj 
~function_instance()516*38fd1498Szrj function_instance::~function_instance ()
517*38fd1498Szrj {
518*38fd1498Szrj   for (callsite_map::iterator iter = callsites.begin ();
519*38fd1498Szrj        iter != callsites.end (); ++iter)
520*38fd1498Szrj     delete iter->second;
521*38fd1498Szrj }
522*38fd1498Szrj 
523*38fd1498Szrj /* Traverse callsites of the current function_instance to find one at the
524*38fd1498Szrj    location of LINENO and callee name represented in DECL.  */
525*38fd1498Szrj 
526*38fd1498Szrj function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl)527*38fd1498Szrj function_instance::get_function_instance_by_decl (unsigned lineno,
528*38fd1498Szrj                                                   tree decl) const
529*38fd1498Szrj {
530*38fd1498Szrj   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
531*38fd1498Szrj   if (func_name_idx != -1)
532*38fd1498Szrj     {
533*38fd1498Szrj       callsite_map::const_iterator ret
534*38fd1498Szrj           = callsites.find (std::make_pair (lineno, func_name_idx));
535*38fd1498Szrj       if (ret != callsites.end ())
536*38fd1498Szrj         return ret->second;
537*38fd1498Szrj     }
538*38fd1498Szrj   func_name_idx
539*38fd1498Szrj       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
540*38fd1498Szrj   if (func_name_idx != -1)
541*38fd1498Szrj     {
542*38fd1498Szrj       callsite_map::const_iterator ret
543*38fd1498Szrj           = callsites.find (std::make_pair (lineno, func_name_idx));
544*38fd1498Szrj       if (ret != callsites.end ())
545*38fd1498Szrj         return ret->second;
546*38fd1498Szrj     }
547*38fd1498Szrj   if (DECL_ABSTRACT_ORIGIN (decl))
548*38fd1498Szrj     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
549*38fd1498Szrj 
550*38fd1498Szrj   return NULL;
551*38fd1498Szrj }
552*38fd1498Szrj 
553*38fd1498Szrj /* Store the profile info for LOC in INFO. Return TRUE if profile info
554*38fd1498Szrj    is found.  */
555*38fd1498Szrj 
556*38fd1498Szrj bool
get_count_info(location_t loc,count_info * info)557*38fd1498Szrj function_instance::get_count_info (location_t loc, count_info *info) const
558*38fd1498Szrj {
559*38fd1498Szrj   position_count_map::const_iterator iter = pos_counts.find (loc);
560*38fd1498Szrj   if (iter == pos_counts.end ())
561*38fd1498Szrj     return false;
562*38fd1498Szrj   *info = iter->second;
563*38fd1498Szrj   return true;
564*38fd1498Szrj }
565*38fd1498Szrj 
566*38fd1498Szrj /* Mark LOC as annotated.  */
567*38fd1498Szrj 
568*38fd1498Szrj void
mark_annotated(location_t loc)569*38fd1498Szrj function_instance::mark_annotated (location_t loc)
570*38fd1498Szrj {
571*38fd1498Szrj   position_count_map::iterator iter = pos_counts.find (loc);
572*38fd1498Szrj   if (iter == pos_counts.end ())
573*38fd1498Szrj     return;
574*38fd1498Szrj   iter->second.annotated = true;
575*38fd1498Szrj }
576*38fd1498Szrj 
577*38fd1498Szrj /* Read the inlined indirect call target profile for STMT and store it in
578*38fd1498Szrj    MAP, return the total count for all inlined indirect calls.  */
579*38fd1498Szrj 
580*38fd1498Szrj gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map)581*38fd1498Szrj function_instance::find_icall_target_map (gcall *stmt,
582*38fd1498Szrj                                           icall_target_map *map) const
583*38fd1498Szrj {
584*38fd1498Szrj   gcov_type ret = 0;
585*38fd1498Szrj   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
586*38fd1498Szrj 
587*38fd1498Szrj   for (callsite_map::const_iterator iter = callsites.begin ();
588*38fd1498Szrj        iter != callsites.end (); ++iter)
589*38fd1498Szrj     {
590*38fd1498Szrj       unsigned callee = iter->second->name ();
591*38fd1498Szrj       /* Check if callsite location match the stmt.  */
592*38fd1498Szrj       if (iter->first.first != stmt_offset)
593*38fd1498Szrj         continue;
594*38fd1498Szrj       struct cgraph_node *node = cgraph_node::get_for_asmname (
595*38fd1498Szrj           get_identifier (afdo_string_table->get_name (callee)));
596*38fd1498Szrj       if (node == NULL)
597*38fd1498Szrj         continue;
598*38fd1498Szrj       if (!check_ic_target (stmt, node))
599*38fd1498Szrj         continue;
600*38fd1498Szrj       (*map)[callee] = iter->second->total_count ();
601*38fd1498Szrj       ret += iter->second->total_count ();
602*38fd1498Szrj     }
603*38fd1498Szrj   return ret;
604*38fd1498Szrj }
605*38fd1498Szrj 
606*38fd1498Szrj /* Read the profile and create a function_instance with head count as
607*38fd1498Szrj    HEAD_COUNT. Recursively read callsites to create nested function_instances
608*38fd1498Szrj    too. STACK is used to track the recursive creation process.  */
609*38fd1498Szrj 
610*38fd1498Szrj /* function instance profile format:
611*38fd1498Szrj 
612*38fd1498Szrj    ENTRY_COUNT: 8 bytes
613*38fd1498Szrj    NAME_INDEX: 4 bytes
614*38fd1498Szrj    NUM_POS_COUNTS: 4 bytes
615*38fd1498Szrj    NUM_CALLSITES: 4 byte
616*38fd1498Szrj    POS_COUNT_1:
617*38fd1498Szrj      POS_1_OFFSET: 4 bytes
618*38fd1498Szrj      NUM_TARGETS: 4 bytes
619*38fd1498Szrj      COUNT: 8 bytes
620*38fd1498Szrj      TARGET_1:
621*38fd1498Szrj        VALUE_PROFILE_TYPE: 4 bytes
622*38fd1498Szrj        TARGET_IDX: 8 bytes
623*38fd1498Szrj        COUNT: 8 bytes
624*38fd1498Szrj      TARGET_2
625*38fd1498Szrj      ...
626*38fd1498Szrj      TARGET_n
627*38fd1498Szrj    POS_COUNT_2
628*38fd1498Szrj    ...
629*38fd1498Szrj    POS_COUNT_N
630*38fd1498Szrj    CALLSITE_1:
631*38fd1498Szrj      CALLSITE_1_OFFSET: 4 bytes
632*38fd1498Szrj      FUNCTION_INSTANCE_PROFILE (nested)
633*38fd1498Szrj    CALLSITE_2
634*38fd1498Szrj    ...
635*38fd1498Szrj    CALLSITE_n.  */
636*38fd1498Szrj 
637*38fd1498Szrj function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)638*38fd1498Szrj function_instance::read_function_instance (function_instance_stack *stack,
639*38fd1498Szrj                                            gcov_type head_count)
640*38fd1498Szrj {
641*38fd1498Szrj   unsigned name = gcov_read_unsigned ();
642*38fd1498Szrj   unsigned num_pos_counts = gcov_read_unsigned ();
643*38fd1498Szrj   unsigned num_callsites = gcov_read_unsigned ();
644*38fd1498Szrj   function_instance *s = new function_instance (name, head_count);
645*38fd1498Szrj   stack->safe_push (s);
646*38fd1498Szrj 
647*38fd1498Szrj   for (unsigned i = 0; i < num_pos_counts; i++)
648*38fd1498Szrj     {
649*38fd1498Szrj       unsigned offset = gcov_read_unsigned () & 0xffff0000;
650*38fd1498Szrj       unsigned num_targets = gcov_read_unsigned ();
651*38fd1498Szrj       gcov_type count = gcov_read_counter ();
652*38fd1498Szrj       s->pos_counts[offset].count = count;
653*38fd1498Szrj       for (unsigned j = 0; j < stack->length (); j++)
654*38fd1498Szrj         (*stack)[j]->total_count_ += count;
655*38fd1498Szrj       for (unsigned j = 0; j < num_targets; j++)
656*38fd1498Szrj         {
657*38fd1498Szrj           /* Only indirect call target histogram is supported now.  */
658*38fd1498Szrj           gcov_read_unsigned ();
659*38fd1498Szrj           gcov_type target_idx = gcov_read_counter ();
660*38fd1498Szrj           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
661*38fd1498Szrj         }
662*38fd1498Szrj     }
663*38fd1498Szrj   for (unsigned i = 0; i < num_callsites; i++)
664*38fd1498Szrj     {
665*38fd1498Szrj       unsigned offset = gcov_read_unsigned ();
666*38fd1498Szrj       function_instance *callee_function_instance
667*38fd1498Szrj           = read_function_instance (stack, 0);
668*38fd1498Szrj       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
669*38fd1498Szrj           = callee_function_instance;
670*38fd1498Szrj     }
671*38fd1498Szrj   stack->pop ();
672*38fd1498Szrj   return s;
673*38fd1498Szrj }
674*38fd1498Szrj 
675*38fd1498Szrj /* Sum of counts that is used during annotation.  */
676*38fd1498Szrj 
677*38fd1498Szrj gcov_type
total_annotated_count()678*38fd1498Szrj function_instance::total_annotated_count () const
679*38fd1498Szrj {
680*38fd1498Szrj   gcov_type ret = 0;
681*38fd1498Szrj   for (callsite_map::const_iterator iter = callsites.begin ();
682*38fd1498Szrj        iter != callsites.end (); ++iter)
683*38fd1498Szrj     ret += iter->second->total_annotated_count ();
684*38fd1498Szrj   for (position_count_map::const_iterator iter = pos_counts.begin ();
685*38fd1498Szrj        iter != pos_counts.end (); ++iter)
686*38fd1498Szrj     if (iter->second.annotated)
687*38fd1498Szrj       ret += iter->second.count;
688*38fd1498Szrj   return ret;
689*38fd1498Szrj }
690*38fd1498Szrj 
691*38fd1498Szrj /* Member functions for autofdo_source_profile.  */
692*38fd1498Szrj 
~autofdo_source_profile()693*38fd1498Szrj autofdo_source_profile::~autofdo_source_profile ()
694*38fd1498Szrj {
695*38fd1498Szrj   for (name_function_instance_map::const_iterator iter = map_.begin ();
696*38fd1498Szrj        iter != map_.end (); ++iter)
697*38fd1498Szrj     delete iter->second;
698*38fd1498Szrj }
699*38fd1498Szrj 
700*38fd1498Szrj /* For a given DECL, returns the top-level function_instance.  */
701*38fd1498Szrj 
702*38fd1498Szrj function_instance *
get_function_instance_by_decl(tree decl)703*38fd1498Szrj autofdo_source_profile::get_function_instance_by_decl (tree decl) const
704*38fd1498Szrj {
705*38fd1498Szrj   int index = afdo_string_table->get_index_by_decl (decl);
706*38fd1498Szrj   if (index == -1)
707*38fd1498Szrj     return NULL;
708*38fd1498Szrj   name_function_instance_map::const_iterator ret = map_.find (index);
709*38fd1498Szrj   return ret == map_.end () ? NULL : ret->second;
710*38fd1498Szrj }
711*38fd1498Szrj 
712*38fd1498Szrj /* Find count_info for a given gimple STMT. If found, store the count_info
713*38fd1498Szrj    in INFO and return true; otherwise return false.  */
714*38fd1498Szrj 
715*38fd1498Szrj bool
get_count_info(gimple * stmt,count_info * info)716*38fd1498Szrj autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
717*38fd1498Szrj {
718*38fd1498Szrj   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
719*38fd1498Szrj     return false;
720*38fd1498Szrj 
721*38fd1498Szrj   inline_stack stack;
722*38fd1498Szrj   get_inline_stack (gimple_location (stmt), &stack);
723*38fd1498Szrj   if (stack.length () == 0)
724*38fd1498Szrj     return false;
725*38fd1498Szrj   function_instance *s = get_function_instance_by_inline_stack (stack);
726*38fd1498Szrj   if (s == NULL)
727*38fd1498Szrj     return false;
728*38fd1498Szrj   return s->get_count_info (stack[0].second, info);
729*38fd1498Szrj }
730*38fd1498Szrj 
731*38fd1498Szrj /* Mark LOC as annotated.  */
732*38fd1498Szrj 
733*38fd1498Szrj void
mark_annotated(location_t loc)734*38fd1498Szrj autofdo_source_profile::mark_annotated (location_t loc)
735*38fd1498Szrj {
736*38fd1498Szrj   inline_stack stack;
737*38fd1498Szrj   get_inline_stack (loc, &stack);
738*38fd1498Szrj   if (stack.length () == 0)
739*38fd1498Szrj     return;
740*38fd1498Szrj   function_instance *s = get_function_instance_by_inline_stack (stack);
741*38fd1498Szrj   if (s == NULL)
742*38fd1498Szrj     return;
743*38fd1498Szrj   s->mark_annotated (stack[0].second);
744*38fd1498Szrj }
745*38fd1498Szrj 
746*38fd1498Szrj /* Update value profile INFO for STMT from the inlined indirect callsite.
747*38fd1498Szrj    Return true if INFO is updated.  */
748*38fd1498Szrj 
749*38fd1498Szrj bool
update_inlined_ind_target(gcall * stmt,count_info * info)750*38fd1498Szrj autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
751*38fd1498Szrj                                                    count_info *info)
752*38fd1498Szrj {
753*38fd1498Szrj   if (dump_file)
754*38fd1498Szrj     {
755*38fd1498Szrj       fprintf (dump_file, "Checking indirect call -> direct call ");
756*38fd1498Szrj       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
757*38fd1498Szrj     }
758*38fd1498Szrj 
759*38fd1498Szrj   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
760*38fd1498Szrj     {
761*38fd1498Szrj       if (dump_file)
762*38fd1498Szrj 	fprintf (dump_file, " good locus\n");
763*38fd1498Szrj       return false;
764*38fd1498Szrj     }
765*38fd1498Szrj 
766*38fd1498Szrj   count_info old_info;
767*38fd1498Szrj   get_count_info (stmt, &old_info);
768*38fd1498Szrj   gcov_type total = 0;
769*38fd1498Szrj   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
770*38fd1498Szrj        iter != old_info.targets.end (); ++iter)
771*38fd1498Szrj     total += iter->second;
772*38fd1498Szrj 
773*38fd1498Szrj   /* Program behavior changed, original promoted (and inlined) target is not
774*38fd1498Szrj      hot any more. Will avoid promote the original target.
775*38fd1498Szrj 
776*38fd1498Szrj      To check if original promoted target is still hot, we check the total
777*38fd1498Szrj      count of the unpromoted targets (stored in TOTAL). If a callsite count
778*38fd1498Szrj      (stored in INFO) is smaller than half of the total count, the original
779*38fd1498Szrj      promoted target is considered not hot any more.  */
780*38fd1498Szrj   if (info->count < total / 2)
781*38fd1498Szrj     {
782*38fd1498Szrj       if (dump_file)
783*38fd1498Szrj 	fprintf (dump_file, " not hot anymore %ld < %ld",
784*38fd1498Szrj 		 (long)info->count,
785*38fd1498Szrj 		 (long)total /2);
786*38fd1498Szrj       return false;
787*38fd1498Szrj     }
788*38fd1498Szrj 
789*38fd1498Szrj   inline_stack stack;
790*38fd1498Szrj   get_inline_stack (gimple_location (stmt), &stack);
791*38fd1498Szrj   if (stack.length () == 0)
792*38fd1498Szrj     {
793*38fd1498Szrj       if (dump_file)
794*38fd1498Szrj 	fprintf (dump_file, " no inline stack\n");
795*38fd1498Szrj       return false;
796*38fd1498Szrj     }
797*38fd1498Szrj   function_instance *s = get_function_instance_by_inline_stack (stack);
798*38fd1498Szrj   if (s == NULL)
799*38fd1498Szrj     {
800*38fd1498Szrj       if (dump_file)
801*38fd1498Szrj 	fprintf (dump_file, " function not found in inline stack\n");
802*38fd1498Szrj       return false;
803*38fd1498Szrj     }
804*38fd1498Szrj   icall_target_map map;
805*38fd1498Szrj   if (s->find_icall_target_map (stmt, &map) == 0)
806*38fd1498Szrj     {
807*38fd1498Szrj       if (dump_file)
808*38fd1498Szrj 	fprintf (dump_file, " no target map\n");
809*38fd1498Szrj       return false;
810*38fd1498Szrj     }
811*38fd1498Szrj   for (icall_target_map::const_iterator iter = map.begin ();
812*38fd1498Szrj        iter != map.end (); ++iter)
813*38fd1498Szrj     info->targets[iter->first] = iter->second;
814*38fd1498Szrj   if (dump_file)
815*38fd1498Szrj     fprintf (dump_file, " looks good\n");
816*38fd1498Szrj   return true;
817*38fd1498Szrj }
818*38fd1498Szrj 
819*38fd1498Szrj /* Find total count of the callee of EDGE.  */
820*38fd1498Szrj 
821*38fd1498Szrj gcov_type
get_callsite_total_count(struct cgraph_edge * edge)822*38fd1498Szrj autofdo_source_profile::get_callsite_total_count (
823*38fd1498Szrj     struct cgraph_edge *edge) const
824*38fd1498Szrj {
825*38fd1498Szrj   inline_stack stack;
826*38fd1498Szrj   stack.safe_push (std::make_pair (edge->callee->decl, 0));
827*38fd1498Szrj   get_inline_stack (gimple_location (edge->call_stmt), &stack);
828*38fd1498Szrj 
829*38fd1498Szrj   function_instance *s = get_function_instance_by_inline_stack (stack);
830*38fd1498Szrj   if (s == NULL
831*38fd1498Szrj       || afdo_string_table->get_index (IDENTIFIER_POINTER (
832*38fd1498Szrj              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
833*38fd1498Szrj     return 0;
834*38fd1498Szrj 
835*38fd1498Szrj   return s->total_count ();
836*38fd1498Szrj }
837*38fd1498Szrj 
838*38fd1498Szrj /* Read AutoFDO profile and returns TRUE on success.  */
839*38fd1498Szrj 
840*38fd1498Szrj /* source profile format:
841*38fd1498Szrj 
842*38fd1498Szrj    GCOV_TAG_AFDO_FUNCTION: 4 bytes
843*38fd1498Szrj    LENGTH: 4 bytes
844*38fd1498Szrj    NUM_FUNCTIONS: 4 bytes
845*38fd1498Szrj    FUNCTION_INSTANCE_1
846*38fd1498Szrj    FUNCTION_INSTANCE_2
847*38fd1498Szrj    ...
848*38fd1498Szrj    FUNCTION_INSTANCE_N.  */
849*38fd1498Szrj 
850*38fd1498Szrj bool
read()851*38fd1498Szrj autofdo_source_profile::read ()
852*38fd1498Szrj {
853*38fd1498Szrj   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
854*38fd1498Szrj     {
855*38fd1498Szrj       inform (UNKNOWN_LOCATION, "Not expected TAG.");
856*38fd1498Szrj       return false;
857*38fd1498Szrj     }
858*38fd1498Szrj 
859*38fd1498Szrj   /* Skip the length of the section.  */
860*38fd1498Szrj   gcov_read_unsigned ();
861*38fd1498Szrj 
862*38fd1498Szrj   /* Read in the function/callsite profile, and store it in local
863*38fd1498Szrj      data structure.  */
864*38fd1498Szrj   unsigned function_num = gcov_read_unsigned ();
865*38fd1498Szrj   for (unsigned i = 0; i < function_num; i++)
866*38fd1498Szrj     {
867*38fd1498Szrj       function_instance::function_instance_stack stack;
868*38fd1498Szrj       function_instance *s = function_instance::read_function_instance (
869*38fd1498Szrj           &stack, gcov_read_counter ());
870*38fd1498Szrj       afdo_profile_info->sum_all += s->total_count ();
871*38fd1498Szrj       map_[s->name ()] = s;
872*38fd1498Szrj     }
873*38fd1498Szrj   return true;
874*38fd1498Szrj }
875*38fd1498Szrj 
876*38fd1498Szrj /* Return the function_instance in the profile that correspond to the
877*38fd1498Szrj    inline STACK.  */
878*38fd1498Szrj 
879*38fd1498Szrj function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack)880*38fd1498Szrj autofdo_source_profile::get_function_instance_by_inline_stack (
881*38fd1498Szrj     const inline_stack &stack) const
882*38fd1498Szrj {
883*38fd1498Szrj   name_function_instance_map::const_iterator iter = map_.find (
884*38fd1498Szrj       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
885*38fd1498Szrj   if (iter == map_.end())
886*38fd1498Szrj     return NULL;
887*38fd1498Szrj   function_instance *s = iter->second;
888*38fd1498Szrj   for (unsigned i = stack.length() - 1; i > 0; i--)
889*38fd1498Szrj     {
890*38fd1498Szrj       s = s->get_function_instance_by_decl (
891*38fd1498Szrj           stack[i].second, stack[i - 1].first);
892*38fd1498Szrj       if (s == NULL)
893*38fd1498Szrj         return NULL;
894*38fd1498Szrj     }
895*38fd1498Szrj   return s;
896*38fd1498Szrj }
897*38fd1498Szrj 
898*38fd1498Szrj /* Module profile is only used by LIPO. Here we simply ignore it.  */
899*38fd1498Szrj 
900*38fd1498Szrj static void
fake_read_autofdo_module_profile()901*38fd1498Szrj fake_read_autofdo_module_profile ()
902*38fd1498Szrj {
903*38fd1498Szrj   /* Read in the module info.  */
904*38fd1498Szrj   gcov_read_unsigned ();
905*38fd1498Szrj 
906*38fd1498Szrj   /* Skip the length of the section.  */
907*38fd1498Szrj   gcov_read_unsigned ();
908*38fd1498Szrj 
909*38fd1498Szrj   /* Read in the file name table.  */
910*38fd1498Szrj   unsigned total_module_num = gcov_read_unsigned ();
911*38fd1498Szrj   gcc_assert (total_module_num == 0);
912*38fd1498Szrj }
913*38fd1498Szrj 
914*38fd1498Szrj /* Read data from profile data file.  */
915*38fd1498Szrj 
916*38fd1498Szrj static void
read_profile(void)917*38fd1498Szrj read_profile (void)
918*38fd1498Szrj {
919*38fd1498Szrj   if (gcov_open (auto_profile_file, 1) == 0)
920*38fd1498Szrj     {
921*38fd1498Szrj       error ("cannot open profile file %s", auto_profile_file);
922*38fd1498Szrj       return;
923*38fd1498Szrj     }
924*38fd1498Szrj 
925*38fd1498Szrj   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
926*38fd1498Szrj     {
927*38fd1498Szrj       error ("AutoFDO profile magic number does not match");
928*38fd1498Szrj       return;
929*38fd1498Szrj     }
930*38fd1498Szrj 
931*38fd1498Szrj   /* Skip the version number.  */
932*38fd1498Szrj   unsigned version = gcov_read_unsigned ();
933*38fd1498Szrj   if (version != AUTO_PROFILE_VERSION)
934*38fd1498Szrj     {
935*38fd1498Szrj       error ("AutoFDO profile version %u does match %u",
936*38fd1498Szrj 	     version, AUTO_PROFILE_VERSION);
937*38fd1498Szrj       return;
938*38fd1498Szrj     }
939*38fd1498Szrj 
940*38fd1498Szrj   /* Skip the empty integer.  */
941*38fd1498Szrj   gcov_read_unsigned ();
942*38fd1498Szrj 
943*38fd1498Szrj   /* string_table.  */
944*38fd1498Szrj   afdo_string_table = new string_table ();
945*38fd1498Szrj   if (!afdo_string_table->read())
946*38fd1498Szrj     {
947*38fd1498Szrj       error ("cannot read string table from %s", auto_profile_file);
948*38fd1498Szrj       return;
949*38fd1498Szrj     }
950*38fd1498Szrj 
951*38fd1498Szrj   /* autofdo_source_profile.  */
952*38fd1498Szrj   afdo_source_profile = autofdo_source_profile::create ();
953*38fd1498Szrj   if (afdo_source_profile == NULL)
954*38fd1498Szrj     {
955*38fd1498Szrj       error ("cannot read function profile from %s", auto_profile_file);
956*38fd1498Szrj       return;
957*38fd1498Szrj     }
958*38fd1498Szrj 
959*38fd1498Szrj   /* autofdo_module_profile.  */
960*38fd1498Szrj   fake_read_autofdo_module_profile ();
961*38fd1498Szrj 
962*38fd1498Szrj   /* Read in the working set.  */
963*38fd1498Szrj   if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
964*38fd1498Szrj     {
965*38fd1498Szrj       error ("cannot read working set from %s", auto_profile_file);
966*38fd1498Szrj       return;
967*38fd1498Szrj     }
968*38fd1498Szrj 
969*38fd1498Szrj   /* Skip the length of the section.  */
970*38fd1498Szrj   gcov_read_unsigned ();
971*38fd1498Szrj   gcov_working_set_t set[128];
972*38fd1498Szrj   for (unsigned i = 0; i < 128; i++)
973*38fd1498Szrj     {
974*38fd1498Szrj       set[i].num_counters = gcov_read_unsigned ();
975*38fd1498Szrj       set[i].min_counter = gcov_read_counter ();
976*38fd1498Szrj     }
977*38fd1498Szrj   add_working_set (set);
978*38fd1498Szrj }
979*38fd1498Szrj 
980*38fd1498Szrj /* From AutoFDO profiles, find values inside STMT for that we want to measure
981*38fd1498Szrj    histograms for indirect-call optimization.
982*38fd1498Szrj 
983*38fd1498Szrj    This function is actually served for 2 purposes:
984*38fd1498Szrj      * before annotation, we need to mark histogram, promote and inline
985*38fd1498Szrj      * after annotation, we just need to mark, and let follow-up logic to
986*38fd1498Szrj        decide if it needs to promote and inline.  */
987*38fd1498Szrj 
988*38fd1498Szrj static void
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)989*38fd1498Szrj afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
990*38fd1498Szrj                     bool transform)
991*38fd1498Szrj {
992*38fd1498Szrj   gimple *gs = gsi_stmt (*gsi);
993*38fd1498Szrj   tree callee;
994*38fd1498Szrj 
995*38fd1498Szrj   if (map.size () == 0)
996*38fd1498Szrj     return;
997*38fd1498Szrj   gcall *stmt = dyn_cast <gcall *> (gs);
998*38fd1498Szrj   if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
999*38fd1498Szrj     return;
1000*38fd1498Szrj 
1001*38fd1498Szrj   callee = gimple_call_fn (stmt);
1002*38fd1498Szrj 
1003*38fd1498Szrj   histogram_value hist = gimple_alloc_histogram_value (
1004*38fd1498Szrj       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1005*38fd1498Szrj   hist->n_counters = 3;
1006*38fd1498Szrj   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1007*38fd1498Szrj   gimple_add_histogram_value (cfun, stmt, hist);
1008*38fd1498Szrj 
1009*38fd1498Szrj   gcov_type total = 0;
1010*38fd1498Szrj   icall_target_map::const_iterator max_iter = map.end ();
1011*38fd1498Szrj 
1012*38fd1498Szrj   for (icall_target_map::const_iterator iter = map.begin ();
1013*38fd1498Szrj        iter != map.end (); ++iter)
1014*38fd1498Szrj     {
1015*38fd1498Szrj       total += iter->second;
1016*38fd1498Szrj       if (max_iter == map.end () || max_iter->second < iter->second)
1017*38fd1498Szrj         max_iter = iter;
1018*38fd1498Szrj     }
1019*38fd1498Szrj 
1020*38fd1498Szrj   hist->hvalue.counters[0]
1021*38fd1498Szrj       = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1022*38fd1498Szrj   hist->hvalue.counters[1] = max_iter->second;
1023*38fd1498Szrj   hist->hvalue.counters[2] = total;
1024*38fd1498Szrj 
1025*38fd1498Szrj   if (!transform)
1026*38fd1498Szrj     return;
1027*38fd1498Szrj 
1028*38fd1498Szrj   struct cgraph_edge *indirect_edge
1029*38fd1498Szrj       = cgraph_node::get (current_function_decl)->get_edge (stmt);
1030*38fd1498Szrj   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1031*38fd1498Szrj       get_identifier ((const char *) hist->hvalue.counters[0]));
1032*38fd1498Szrj 
1033*38fd1498Szrj   if (dump_file)
1034*38fd1498Szrj     {
1035*38fd1498Szrj       fprintf (dump_file, "Indirect call -> direct call ");
1036*38fd1498Szrj       print_generic_expr (dump_file, callee, TDF_SLIM);
1037*38fd1498Szrj       fprintf (dump_file, " => ");
1038*38fd1498Szrj       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1039*38fd1498Szrj     }
1040*38fd1498Szrj 
1041*38fd1498Szrj   if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1042*38fd1498Szrj     {
1043*38fd1498Szrj       if (dump_file)
1044*38fd1498Szrj         fprintf (dump_file, " not transforming\n");
1045*38fd1498Szrj       return;
1046*38fd1498Szrj     }
1047*38fd1498Szrj   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1048*38fd1498Szrj     {
1049*38fd1498Szrj       if (dump_file)
1050*38fd1498Szrj         fprintf (dump_file, " no declaration\n");
1051*38fd1498Szrj       return;
1052*38fd1498Szrj     }
1053*38fd1498Szrj 
1054*38fd1498Szrj   if (dump_file)
1055*38fd1498Szrj     {
1056*38fd1498Szrj       fprintf (dump_file, " transformation on insn ");
1057*38fd1498Szrj       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1058*38fd1498Szrj       fprintf (dump_file, "\n");
1059*38fd1498Szrj     }
1060*38fd1498Szrj 
1061*38fd1498Szrj   /* FIXME: Count should be initialized.  */
1062*38fd1498Szrj   struct cgraph_edge *new_edge
1063*38fd1498Szrj       = indirect_edge->make_speculative (direct_call,
1064*38fd1498Szrj 					 profile_count::uninitialized ());
1065*38fd1498Szrj   new_edge->redirect_call_stmt_to_callee ();
1066*38fd1498Szrj   gimple_remove_histogram_value (cfun, stmt, hist);
1067*38fd1498Szrj   inline_call (new_edge, true, NULL, NULL, false);
1068*38fd1498Szrj }
1069*38fd1498Szrj 
1070*38fd1498Szrj /* From AutoFDO profiles, find values inside STMT for that we want to measure
1071*38fd1498Szrj    histograms and adds them to list VALUES.  */
1072*38fd1498Szrj 
1073*38fd1498Szrj static void
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)1074*38fd1498Szrj afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1075*38fd1498Szrj           bool transform)
1076*38fd1498Szrj {
1077*38fd1498Szrj   afdo_indirect_call (gsi, map, transform);
1078*38fd1498Szrj }
1079*38fd1498Szrj 
1080*38fd1498Szrj typedef std::set<basic_block> bb_set;
1081*38fd1498Szrj typedef std::set<edge> edge_set;
1082*38fd1498Szrj 
1083*38fd1498Szrj static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1084*38fd1498Szrj is_bb_annotated (const basic_block bb, const bb_set &annotated)
1085*38fd1498Szrj {
1086*38fd1498Szrj   return annotated.find (bb) != annotated.end ();
1087*38fd1498Szrj }
1088*38fd1498Szrj 
1089*38fd1498Szrj static void
set_bb_annotated(basic_block bb,bb_set * annotated)1090*38fd1498Szrj set_bb_annotated (basic_block bb, bb_set *annotated)
1091*38fd1498Szrj {
1092*38fd1498Szrj   annotated->insert (bb);
1093*38fd1498Szrj }
1094*38fd1498Szrj 
1095*38fd1498Szrj static bool
is_edge_annotated(const edge e,const edge_set & annotated)1096*38fd1498Szrj is_edge_annotated (const edge e, const edge_set &annotated)
1097*38fd1498Szrj {
1098*38fd1498Szrj   return annotated.find (e) != annotated.end ();
1099*38fd1498Szrj }
1100*38fd1498Szrj 
1101*38fd1498Szrj static void
set_edge_annotated(edge e,edge_set * annotated)1102*38fd1498Szrj set_edge_annotated (edge e, edge_set *annotated)
1103*38fd1498Szrj {
1104*38fd1498Szrj   annotated->insert (e);
1105*38fd1498Szrj }
1106*38fd1498Szrj 
1107*38fd1498Szrj /* For a given BB, set its execution count. Attach value profile if a stmt
1108*38fd1498Szrj    is not in PROMOTED, because we only want to promote an indirect call once.
1109*38fd1498Szrj    Return TRUE if BB is annotated.  */
1110*38fd1498Szrj 
1111*38fd1498Szrj static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1112*38fd1498Szrj afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1113*38fd1498Szrj {
1114*38fd1498Szrj   gimple_stmt_iterator gsi;
1115*38fd1498Szrj   edge e;
1116*38fd1498Szrj   edge_iterator ei;
1117*38fd1498Szrj   gcov_type max_count = 0;
1118*38fd1498Szrj   bool has_annotated = false;
1119*38fd1498Szrj 
1120*38fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1121*38fd1498Szrj     {
1122*38fd1498Szrj       count_info info;
1123*38fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
1124*38fd1498Szrj       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1125*38fd1498Szrj         continue;
1126*38fd1498Szrj       if (afdo_source_profile->get_count_info (stmt, &info))
1127*38fd1498Szrj         {
1128*38fd1498Szrj           if (info.count > max_count)
1129*38fd1498Szrj             max_count = info.count;
1130*38fd1498Szrj           has_annotated = true;
1131*38fd1498Szrj           if (info.targets.size () > 0
1132*38fd1498Szrj               && promoted.find (stmt) == promoted.end ())
1133*38fd1498Szrj             afdo_vpt (&gsi, info.targets, false);
1134*38fd1498Szrj         }
1135*38fd1498Szrj     }
1136*38fd1498Szrj 
1137*38fd1498Szrj   if (!has_annotated)
1138*38fd1498Szrj     return false;
1139*38fd1498Szrj 
1140*38fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1141*38fd1498Szrj     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1142*38fd1498Szrj   for (gphi_iterator gpi = gsi_start_phis (bb);
1143*38fd1498Szrj        !gsi_end_p (gpi);
1144*38fd1498Szrj        gsi_next (&gpi))
1145*38fd1498Szrj     {
1146*38fd1498Szrj       gphi *phi = gpi.phi ();
1147*38fd1498Szrj       size_t i;
1148*38fd1498Szrj       for (i = 0; i < gimple_phi_num_args (phi); i++)
1149*38fd1498Szrj         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1150*38fd1498Szrj     }
1151*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
1152*38fd1498Szrj   afdo_source_profile->mark_annotated (e->goto_locus);
1153*38fd1498Szrj 
1154*38fd1498Szrj   bb->count = profile_count::from_gcov_type (max_count).afdo ();
1155*38fd1498Szrj   return true;
1156*38fd1498Szrj }
1157*38fd1498Szrj 
1158*38fd1498Szrj /* BB1 and BB2 are in an equivalent class iff:
1159*38fd1498Szrj    1. BB1 dominates BB2.
1160*38fd1498Szrj    2. BB2 post-dominates BB1.
1161*38fd1498Szrj    3. BB1 and BB2 are in the same loop nest.
1162*38fd1498Szrj    This function finds the equivalent class for each basic block, and
1163*38fd1498Szrj    stores a pointer to the first BB in its equivalent class. Meanwhile,
1164*38fd1498Szrj    set bb counts for the same equivalent class to be idenical. Update
1165*38fd1498Szrj    ANNOTATED_BB for the first BB in its equivalent class.  */
1166*38fd1498Szrj 
1167*38fd1498Szrj static void
afdo_find_equiv_class(bb_set * annotated_bb)1168*38fd1498Szrj afdo_find_equiv_class (bb_set *annotated_bb)
1169*38fd1498Szrj {
1170*38fd1498Szrj   basic_block bb;
1171*38fd1498Szrj 
1172*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1173*38fd1498Szrj   bb->aux = NULL;
1174*38fd1498Szrj 
1175*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1176*38fd1498Szrj   {
1177*38fd1498Szrj     vec<basic_block> dom_bbs;
1178*38fd1498Szrj     basic_block bb1;
1179*38fd1498Szrj     int i;
1180*38fd1498Szrj 
1181*38fd1498Szrj     if (bb->aux != NULL)
1182*38fd1498Szrj       continue;
1183*38fd1498Szrj     bb->aux = bb;
1184*38fd1498Szrj     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1185*38fd1498Szrj     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1186*38fd1498Szrj       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1187*38fd1498Szrj 	  && bb1->loop_father == bb->loop_father)
1188*38fd1498Szrj 	{
1189*38fd1498Szrj 	  bb1->aux = bb;
1190*38fd1498Szrj 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1191*38fd1498Szrj 	    {
1192*38fd1498Szrj 	      bb->count = bb1->count;
1193*38fd1498Szrj 	      set_bb_annotated (bb, annotated_bb);
1194*38fd1498Szrj 	    }
1195*38fd1498Szrj 	}
1196*38fd1498Szrj     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1197*38fd1498Szrj     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1198*38fd1498Szrj       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1199*38fd1498Szrj 	  && bb1->loop_father == bb->loop_father)
1200*38fd1498Szrj 	{
1201*38fd1498Szrj 	  bb1->aux = bb;
1202*38fd1498Szrj 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1203*38fd1498Szrj 	    {
1204*38fd1498Szrj 	      bb->count = bb1->count;
1205*38fd1498Szrj 	      set_bb_annotated (bb, annotated_bb);
1206*38fd1498Szrj 	    }
1207*38fd1498Szrj 	}
1208*38fd1498Szrj   }
1209*38fd1498Szrj }
1210*38fd1498Szrj 
1211*38fd1498Szrj /* If a basic block's count is known, and only one of its in/out edges' count
1212*38fd1498Szrj    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1213*38fd1498Szrj    edges' counts are known, then the basic block's unknown count can also be
1214*38fd1498Szrj    calculated.
1215*38fd1498Szrj    IS_SUCC is true if out edges of a basic blocks are examined.
1216*38fd1498Szrj    Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1217*38fd1498Szrj    Return TRUE if any basic block/edge count is changed.  */
1218*38fd1498Szrj 
1219*38fd1498Szrj static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb,edge_set * annotated_edge)1220*38fd1498Szrj afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1221*38fd1498Szrj                      edge_set *annotated_edge)
1222*38fd1498Szrj {
1223*38fd1498Szrj   basic_block bb;
1224*38fd1498Szrj   bool changed = false;
1225*38fd1498Szrj 
1226*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1227*38fd1498Szrj   {
1228*38fd1498Szrj     edge e, unknown_edge = NULL;
1229*38fd1498Szrj     edge_iterator ei;
1230*38fd1498Szrj     int num_unknown_edge = 0;
1231*38fd1498Szrj     profile_count total_known_count = profile_count::zero ().afdo ();
1232*38fd1498Szrj 
1233*38fd1498Szrj     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1234*38fd1498Szrj       if (!is_edge_annotated (e, *annotated_edge))
1235*38fd1498Szrj 	num_unknown_edge++, unknown_edge = e;
1236*38fd1498Szrj       else
1237*38fd1498Szrj 	total_known_count += e->count ();
1238*38fd1498Szrj 
1239*38fd1498Szrj     if (num_unknown_edge == 0)
1240*38fd1498Szrj       {
1241*38fd1498Szrj         if (total_known_count > bb->count)
1242*38fd1498Szrj           {
1243*38fd1498Szrj             bb->count = total_known_count;
1244*38fd1498Szrj             changed = true;
1245*38fd1498Szrj           }
1246*38fd1498Szrj         if (!is_bb_annotated (bb, *annotated_bb))
1247*38fd1498Szrj           {
1248*38fd1498Szrj             set_bb_annotated (bb, annotated_bb);
1249*38fd1498Szrj             changed = true;
1250*38fd1498Szrj           }
1251*38fd1498Szrj       }
1252*38fd1498Szrj     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1253*38fd1498Szrj       {
1254*38fd1498Szrj         unknown_edge->probability
1255*38fd1498Szrj 	  = total_known_count.probability_in (bb->count);
1256*38fd1498Szrj         set_edge_annotated (unknown_edge, annotated_edge);
1257*38fd1498Szrj         changed = true;
1258*38fd1498Szrj       }
1259*38fd1498Szrj   }
1260*38fd1498Szrj   return changed;
1261*38fd1498Szrj }
1262*38fd1498Szrj 
1263*38fd1498Szrj /* Special propagation for circuit expressions. Because GCC translates
1264*38fd1498Szrj    control flow into data flow for circuit expressions. E.g.
1265*38fd1498Szrj    BB1:
1266*38fd1498Szrj    if (a && b)
1267*38fd1498Szrj      BB2
1268*38fd1498Szrj    else
1269*38fd1498Szrj      BB3
1270*38fd1498Szrj 
1271*38fd1498Szrj    will be translated into:
1272*38fd1498Szrj 
1273*38fd1498Szrj    BB1:
1274*38fd1498Szrj      if (a)
1275*38fd1498Szrj        goto BB.t1
1276*38fd1498Szrj      else
1277*38fd1498Szrj        goto BB.t3
1278*38fd1498Szrj    BB.t1:
1279*38fd1498Szrj      if (b)
1280*38fd1498Szrj        goto BB.t2
1281*38fd1498Szrj      else
1282*38fd1498Szrj        goto BB.t3
1283*38fd1498Szrj    BB.t2:
1284*38fd1498Szrj      goto BB.t3
1285*38fd1498Szrj    BB.t3:
1286*38fd1498Szrj      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1287*38fd1498Szrj      if (tmp)
1288*38fd1498Szrj        goto BB2
1289*38fd1498Szrj      else
1290*38fd1498Szrj        goto BB3
1291*38fd1498Szrj 
1292*38fd1498Szrj    In this case, we need to propagate through PHI to determine the edge
1293*38fd1498Szrj    count of BB1->BB.t1, BB.t1->BB.t2.
1294*38fd1498Szrj    Update ANNOTATED_EDGE accordingly.  */
1295*38fd1498Szrj 
1296*38fd1498Szrj static void
afdo_propagate_circuit(const bb_set & annotated_bb,edge_set * annotated_edge)1297*38fd1498Szrj afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1298*38fd1498Szrj {
1299*38fd1498Szrj   basic_block bb;
1300*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1301*38fd1498Szrj   {
1302*38fd1498Szrj     gimple *def_stmt;
1303*38fd1498Szrj     tree cmp_rhs, cmp_lhs;
1304*38fd1498Szrj     gimple *cmp_stmt = last_stmt (bb);
1305*38fd1498Szrj     edge e;
1306*38fd1498Szrj     edge_iterator ei;
1307*38fd1498Szrj 
1308*38fd1498Szrj     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1309*38fd1498Szrj       continue;
1310*38fd1498Szrj     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1311*38fd1498Szrj     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1312*38fd1498Szrj     if (!TREE_CONSTANT (cmp_rhs)
1313*38fd1498Szrj         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1314*38fd1498Szrj       continue;
1315*38fd1498Szrj     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1316*38fd1498Szrj       continue;
1317*38fd1498Szrj     if (!is_bb_annotated (bb, annotated_bb))
1318*38fd1498Szrj       continue;
1319*38fd1498Szrj     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1320*38fd1498Szrj     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1321*38fd1498Szrj            && gimple_assign_single_p (def_stmt)
1322*38fd1498Szrj            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1323*38fd1498Szrj       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1324*38fd1498Szrj     if (!def_stmt)
1325*38fd1498Szrj       continue;
1326*38fd1498Szrj     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1327*38fd1498Szrj     if (!phi_stmt)
1328*38fd1498Szrj       continue;
1329*38fd1498Szrj     FOR_EACH_EDGE (e, ei, bb->succs)
1330*38fd1498Szrj     {
1331*38fd1498Szrj       unsigned i, total = 0;
1332*38fd1498Szrj       edge only_one;
1333*38fd1498Szrj       bool check_value_one = (((integer_onep (cmp_rhs))
1334*38fd1498Szrj                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1335*38fd1498Szrj                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1336*38fd1498Szrj       if (!is_edge_annotated (e, *annotated_edge))
1337*38fd1498Szrj         continue;
1338*38fd1498Szrj       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1339*38fd1498Szrj         {
1340*38fd1498Szrj           tree val = gimple_phi_arg_def (phi_stmt, i);
1341*38fd1498Szrj           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1342*38fd1498Szrj 
1343*38fd1498Szrj           if (!TREE_CONSTANT (val)
1344*38fd1498Szrj               || !(integer_zerop (val) || integer_onep (val)))
1345*38fd1498Szrj             continue;
1346*38fd1498Szrj           if (check_value_one ^ integer_onep (val))
1347*38fd1498Szrj             continue;
1348*38fd1498Szrj           total++;
1349*38fd1498Szrj           only_one = ep;
1350*38fd1498Szrj           if (!e->probability.initialized_p ()
1351*38fd1498Szrj 	      && !is_edge_annotated (ep, *annotated_edge))
1352*38fd1498Szrj             {
1353*38fd1498Szrj               ep->probability = profile_probability::never ().afdo ();
1354*38fd1498Szrj               set_edge_annotated (ep, annotated_edge);
1355*38fd1498Szrj             }
1356*38fd1498Szrj         }
1357*38fd1498Szrj       if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1358*38fd1498Szrj         {
1359*38fd1498Szrj           only_one->probability = e->probability;
1360*38fd1498Szrj           set_edge_annotated (only_one, annotated_edge);
1361*38fd1498Szrj         }
1362*38fd1498Szrj     }
1363*38fd1498Szrj   }
1364*38fd1498Szrj }
1365*38fd1498Szrj 
1366*38fd1498Szrj /* Propagate the basic block count and edge count on the control flow
1367*38fd1498Szrj    graph. We do the propagation iteratively until stablize.  */
1368*38fd1498Szrj 
1369*38fd1498Szrj static void
afdo_propagate(bb_set * annotated_bb,edge_set * annotated_edge)1370*38fd1498Szrj afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1371*38fd1498Szrj {
1372*38fd1498Szrj   basic_block bb;
1373*38fd1498Szrj   bool changed = true;
1374*38fd1498Szrj   int i = 0;
1375*38fd1498Szrj 
1376*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1377*38fd1498Szrj   {
1378*38fd1498Szrj     bb->count = ((basic_block)bb->aux)->count;
1379*38fd1498Szrj     if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
1380*38fd1498Szrj       set_bb_annotated (bb, annotated_bb);
1381*38fd1498Szrj   }
1382*38fd1498Szrj 
1383*38fd1498Szrj   while (changed && i++ < 10)
1384*38fd1498Szrj     {
1385*38fd1498Szrj       changed = false;
1386*38fd1498Szrj 
1387*38fd1498Szrj       if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1388*38fd1498Szrj         changed = true;
1389*38fd1498Szrj       if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1390*38fd1498Szrj         changed = true;
1391*38fd1498Szrj       afdo_propagate_circuit (*annotated_bb, annotated_edge);
1392*38fd1498Szrj     }
1393*38fd1498Szrj }
1394*38fd1498Szrj 
1395*38fd1498Szrj /* Propagate counts on control flow graph and calculate branch
1396*38fd1498Szrj    probabilities.  */
1397*38fd1498Szrj 
1398*38fd1498Szrj static void
afdo_calculate_branch_prob(bb_set * annotated_bb,edge_set * annotated_edge)1399*38fd1498Szrj afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1400*38fd1498Szrj {
1401*38fd1498Szrj   basic_block bb;
1402*38fd1498Szrj   bool has_sample = false;
1403*38fd1498Szrj 
1404*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1405*38fd1498Szrj   {
1406*38fd1498Szrj     if (bb->count > profile_count::zero ())
1407*38fd1498Szrj       {
1408*38fd1498Szrj 	has_sample = true;
1409*38fd1498Szrj 	break;
1410*38fd1498Szrj       }
1411*38fd1498Szrj   }
1412*38fd1498Szrj 
1413*38fd1498Szrj   if (!has_sample)
1414*38fd1498Szrj     return;
1415*38fd1498Szrj 
1416*38fd1498Szrj   calculate_dominance_info (CDI_POST_DOMINATORS);
1417*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
1418*38fd1498Szrj   loop_optimizer_init (0);
1419*38fd1498Szrj 
1420*38fd1498Szrj   afdo_find_equiv_class (annotated_bb);
1421*38fd1498Szrj   afdo_propagate (annotated_bb, annotated_edge);
1422*38fd1498Szrj 
1423*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1424*38fd1498Szrj   {
1425*38fd1498Szrj     edge e;
1426*38fd1498Szrj     edge_iterator ei;
1427*38fd1498Szrj     int num_unknown_succ = 0;
1428*38fd1498Szrj     profile_count total_count = profile_count::zero ();
1429*38fd1498Szrj 
1430*38fd1498Szrj     FOR_EACH_EDGE (e, ei, bb->succs)
1431*38fd1498Szrj     {
1432*38fd1498Szrj       if (!is_edge_annotated (e, *annotated_edge))
1433*38fd1498Szrj         num_unknown_succ++;
1434*38fd1498Szrj       else
1435*38fd1498Szrj         total_count += e->count ();
1436*38fd1498Szrj     }
1437*38fd1498Szrj     if (num_unknown_succ == 0 && total_count > profile_count::zero ())
1438*38fd1498Szrj       {
1439*38fd1498Szrj         FOR_EACH_EDGE (e, ei, bb->succs)
1440*38fd1498Szrj           e->probability = e->count ().probability_in (total_count);
1441*38fd1498Szrj       }
1442*38fd1498Szrj   }
1443*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1444*38fd1498Szrj     bb->aux = NULL;
1445*38fd1498Szrj 
1446*38fd1498Szrj   loop_optimizer_finalize ();
1447*38fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
1448*38fd1498Szrj   free_dominance_info (CDI_POST_DOMINATORS);
1449*38fd1498Szrj }
1450*38fd1498Szrj 
1451*38fd1498Szrj /* Perform value profile transformation using AutoFDO profile. Add the
1452*38fd1498Szrj    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1453*38fd1498Szrj    indirect call promoted.  */
1454*38fd1498Szrj 
1455*38fd1498Szrj static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1456*38fd1498Szrj afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1457*38fd1498Szrj {
1458*38fd1498Szrj   basic_block bb;
1459*38fd1498Szrj   if (afdo_source_profile->get_function_instance_by_decl (
1460*38fd1498Szrj           current_function_decl) == NULL)
1461*38fd1498Szrj     return false;
1462*38fd1498Szrj 
1463*38fd1498Szrj   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1464*38fd1498Szrj 
1465*38fd1498Szrj   bool has_vpt = false;
1466*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1467*38fd1498Szrj   {
1468*38fd1498Szrj     if (!has_indirect_call (bb))
1469*38fd1498Szrj       continue;
1470*38fd1498Szrj     gimple_stmt_iterator gsi;
1471*38fd1498Szrj 
1472*38fd1498Szrj     gcov_type bb_count = 0;
1473*38fd1498Szrj     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1474*38fd1498Szrj       {
1475*38fd1498Szrj         count_info info;
1476*38fd1498Szrj 	gimple *stmt = gsi_stmt (gsi);
1477*38fd1498Szrj         if (afdo_source_profile->get_count_info (stmt, &info))
1478*38fd1498Szrj           bb_count = MAX (bb_count, info.count);
1479*38fd1498Szrj       }
1480*38fd1498Szrj 
1481*38fd1498Szrj     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1482*38fd1498Szrj       {
1483*38fd1498Szrj         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1484*38fd1498Szrj         /* IC_promotion and early_inline_2 is done in multiple iterations.
1485*38fd1498Szrj            No need to promoted the stmt if its in promoted_stmts (means
1486*38fd1498Szrj            it is already been promoted in the previous iterations).  */
1487*38fd1498Szrj         if ((!stmt) || gimple_call_fn (stmt) == NULL
1488*38fd1498Szrj             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1489*38fd1498Szrj             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1490*38fd1498Szrj           continue;
1491*38fd1498Szrj 
1492*38fd1498Szrj         count_info info;
1493*38fd1498Szrj         afdo_source_profile->get_count_info (stmt, &info);
1494*38fd1498Szrj         info.count = bb_count;
1495*38fd1498Szrj         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1496*38fd1498Szrj           {
1497*38fd1498Szrj             /* Promote the indirect call and update the promoted_stmts.  */
1498*38fd1498Szrj             promoted_stmts->insert (stmt);
1499*38fd1498Szrj             afdo_vpt (&gsi, info.targets, true);
1500*38fd1498Szrj             has_vpt = true;
1501*38fd1498Szrj           }
1502*38fd1498Szrj       }
1503*38fd1498Szrj   }
1504*38fd1498Szrj 
1505*38fd1498Szrj   if (has_vpt)
1506*38fd1498Szrj     {
1507*38fd1498Szrj       unsigned todo = optimize_inline_calls (current_function_decl);
1508*38fd1498Szrj       if (todo & TODO_update_ssa_any)
1509*38fd1498Szrj        update_ssa (TODO_update_ssa);
1510*38fd1498Szrj       return true;
1511*38fd1498Szrj     }
1512*38fd1498Szrj 
1513*38fd1498Szrj   return false;
1514*38fd1498Szrj }
1515*38fd1498Szrj 
1516*38fd1498Szrj /* Annotate auto profile to the control flow graph. Do not annotate value
1517*38fd1498Szrj    profile for stmts in PROMOTED_STMTS.  */
1518*38fd1498Szrj 
1519*38fd1498Szrj static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1520*38fd1498Szrj afdo_annotate_cfg (const stmt_set &promoted_stmts)
1521*38fd1498Szrj {
1522*38fd1498Szrj   basic_block bb;
1523*38fd1498Szrj   bb_set annotated_bb;
1524*38fd1498Szrj   edge_set annotated_edge;
1525*38fd1498Szrj   const function_instance *s
1526*38fd1498Szrj       = afdo_source_profile->get_function_instance_by_decl (
1527*38fd1498Szrj           current_function_decl);
1528*38fd1498Szrj 
1529*38fd1498Szrj   if (s == NULL)
1530*38fd1498Szrj     return;
1531*38fd1498Szrj   cgraph_node::get (current_function_decl)->count
1532*38fd1498Szrj      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1533*38fd1498Szrj   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1534*38fd1498Szrj      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1535*38fd1498Szrj   profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1536*38fd1498Szrj 
1537*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1538*38fd1498Szrj   {
1539*38fd1498Szrj     edge e;
1540*38fd1498Szrj     edge_iterator ei;
1541*38fd1498Szrj 
1542*38fd1498Szrj     /* As autoFDO uses sampling approach, we have to assume that all
1543*38fd1498Szrj        counters are zero when not seen by autoFDO.  */
1544*38fd1498Szrj     bb->count = profile_count::zero ().afdo ();
1545*38fd1498Szrj     FOR_EACH_EDGE (e, ei, bb->succs)
1546*38fd1498Szrj       e->probability = profile_probability::uninitialized ();
1547*38fd1498Szrj 
1548*38fd1498Szrj     if (afdo_set_bb_count (bb, promoted_stmts))
1549*38fd1498Szrj       set_bb_annotated (bb, &annotated_bb);
1550*38fd1498Szrj     if (bb->count > max_count)
1551*38fd1498Szrj       max_count = bb->count;
1552*38fd1498Szrj   }
1553*38fd1498Szrj   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1554*38fd1498Szrj       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1555*38fd1498Szrj     {
1556*38fd1498Szrj       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1557*38fd1498Szrj           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1558*38fd1498Szrj       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1559*38fd1498Szrj     }
1560*38fd1498Szrj   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1561*38fd1498Szrj       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1562*38fd1498Szrj     {
1563*38fd1498Szrj       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1564*38fd1498Szrj           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1565*38fd1498Szrj       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1566*38fd1498Szrj     }
1567*38fd1498Szrj   afdo_source_profile->mark_annotated (
1568*38fd1498Szrj       DECL_SOURCE_LOCATION (current_function_decl));
1569*38fd1498Szrj   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1570*38fd1498Szrj   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1571*38fd1498Szrj   if (max_count > profile_count::zero ())
1572*38fd1498Szrj     {
1573*38fd1498Szrj       afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1574*38fd1498Szrj       update_max_bb_count ();
1575*38fd1498Szrj       profile_status_for_fn (cfun) = PROFILE_READ;
1576*38fd1498Szrj     }
1577*38fd1498Szrj   if (flag_value_profile_transformations)
1578*38fd1498Szrj     {
1579*38fd1498Szrj       gimple_value_profile_transformations ();
1580*38fd1498Szrj       free_dominance_info (CDI_DOMINATORS);
1581*38fd1498Szrj       free_dominance_info (CDI_POST_DOMINATORS);
1582*38fd1498Szrj       update_ssa (TODO_update_ssa);
1583*38fd1498Szrj     }
1584*38fd1498Szrj }
1585*38fd1498Szrj 
1586*38fd1498Szrj /* Wrapper function to invoke early inliner.  */
1587*38fd1498Szrj 
1588*38fd1498Szrj static void
early_inline()1589*38fd1498Szrj early_inline ()
1590*38fd1498Szrj {
1591*38fd1498Szrj   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1592*38fd1498Szrj   unsigned todo = early_inliner (cfun);
1593*38fd1498Szrj   if (todo & TODO_update_ssa_any)
1594*38fd1498Szrj     update_ssa (TODO_update_ssa);
1595*38fd1498Szrj }
1596*38fd1498Szrj 
1597*38fd1498Szrj /* Use AutoFDO profile to annoate the control flow graph.
1598*38fd1498Szrj    Return the todo flag.  */
1599*38fd1498Szrj 
1600*38fd1498Szrj static unsigned int
auto_profile(void)1601*38fd1498Szrj auto_profile (void)
1602*38fd1498Szrj {
1603*38fd1498Szrj   struct cgraph_node *node;
1604*38fd1498Szrj 
1605*38fd1498Szrj   if (symtab->state == FINISHED)
1606*38fd1498Szrj     return 0;
1607*38fd1498Szrj 
1608*38fd1498Szrj   init_node_map (true);
1609*38fd1498Szrj   profile_info = autofdo::afdo_profile_info;
1610*38fd1498Szrj 
1611*38fd1498Szrj   FOR_EACH_FUNCTION (node)
1612*38fd1498Szrj   {
1613*38fd1498Szrj     if (!gimple_has_body_p (node->decl))
1614*38fd1498Szrj       continue;
1615*38fd1498Szrj 
1616*38fd1498Szrj     /* Don't profile functions produced for builtin stuff.  */
1617*38fd1498Szrj     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1618*38fd1498Szrj       continue;
1619*38fd1498Szrj 
1620*38fd1498Szrj     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1621*38fd1498Szrj 
1622*38fd1498Szrj     /* First do indirect call promotion and early inline to make the
1623*38fd1498Szrj        IR match the profiled binary before actual annotation.
1624*38fd1498Szrj 
1625*38fd1498Szrj        This is needed because an indirect call might have been promoted
1626*38fd1498Szrj        and inlined in the profiled binary. If we do not promote and
1627*38fd1498Szrj        inline these indirect calls before annotation, the profile for
1628*38fd1498Szrj        these promoted functions will be lost.
1629*38fd1498Szrj 
1630*38fd1498Szrj        e.g. foo() --indirect_call--> bar()
1631*38fd1498Szrj        In profiled binary, the callsite is promoted and inlined, making
1632*38fd1498Szrj        the profile look like:
1633*38fd1498Szrj 
1634*38fd1498Szrj        foo: {
1635*38fd1498Szrj          loc_foo_1: count_1
1636*38fd1498Szrj          bar@loc_foo_2: {
1637*38fd1498Szrj            loc_bar_1: count_2
1638*38fd1498Szrj            loc_bar_2: count_3
1639*38fd1498Szrj          }
1640*38fd1498Szrj        }
1641*38fd1498Szrj 
1642*38fd1498Szrj        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1643*38fd1498Szrj        If we perform annotation on it, the profile inside bar@loc_foo2
1644*38fd1498Szrj        will be wasted.
1645*38fd1498Szrj 
1646*38fd1498Szrj        To avoid this, we promote loc_foo_2 and inline the promoted bar
1647*38fd1498Szrj        function before annotation, so the profile inside bar@loc_foo2
1648*38fd1498Szrj        will be useful.  */
1649*38fd1498Szrj     autofdo::stmt_set promoted_stmts;
1650*38fd1498Szrj     for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1651*38fd1498Szrj       {
1652*38fd1498Szrj         if (!flag_value_profile_transformations
1653*38fd1498Szrj             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1654*38fd1498Szrj           break;
1655*38fd1498Szrj         early_inline ();
1656*38fd1498Szrj       }
1657*38fd1498Szrj 
1658*38fd1498Szrj     early_inline ();
1659*38fd1498Szrj     autofdo::afdo_annotate_cfg (promoted_stmts);
1660*38fd1498Szrj     compute_function_frequency ();
1661*38fd1498Szrj 
1662*38fd1498Szrj     /* Local pure-const may imply need to fixup the cfg.  */
1663*38fd1498Szrj     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1664*38fd1498Szrj       cleanup_tree_cfg ();
1665*38fd1498Szrj 
1666*38fd1498Szrj     free_dominance_info (CDI_DOMINATORS);
1667*38fd1498Szrj     free_dominance_info (CDI_POST_DOMINATORS);
1668*38fd1498Szrj     cgraph_edge::rebuild_edges ();
1669*38fd1498Szrj     compute_fn_summary (cgraph_node::get (current_function_decl), true);
1670*38fd1498Szrj     pop_cfun ();
1671*38fd1498Szrj   }
1672*38fd1498Szrj 
1673*38fd1498Szrj   return TODO_rebuild_cgraph_edges;
1674*38fd1498Szrj }
1675*38fd1498Szrj } /* namespace autofdo.  */
1676*38fd1498Szrj 
1677*38fd1498Szrj /* Read the profile from the profile data file.  */
1678*38fd1498Szrj 
1679*38fd1498Szrj void
read_autofdo_file(void)1680*38fd1498Szrj read_autofdo_file (void)
1681*38fd1498Szrj {
1682*38fd1498Szrj   if (auto_profile_file == NULL)
1683*38fd1498Szrj     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1684*38fd1498Szrj 
1685*38fd1498Szrj   autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1686*38fd1498Szrj       1, sizeof (struct gcov_ctr_summary));
1687*38fd1498Szrj   autofdo::afdo_profile_info->runs = 1;
1688*38fd1498Szrj   autofdo::afdo_profile_info->sum_max = 0;
1689*38fd1498Szrj   autofdo::afdo_profile_info->sum_all = 0;
1690*38fd1498Szrj 
1691*38fd1498Szrj   /* Read the profile from the profile file.  */
1692*38fd1498Szrj   autofdo::read_profile ();
1693*38fd1498Szrj }
1694*38fd1498Szrj 
1695*38fd1498Szrj /* Free the resources.  */
1696*38fd1498Szrj 
1697*38fd1498Szrj void
end_auto_profile(void)1698*38fd1498Szrj end_auto_profile (void)
1699*38fd1498Szrj {
1700*38fd1498Szrj   delete autofdo::afdo_source_profile;
1701*38fd1498Szrj   delete autofdo::afdo_string_table;
1702*38fd1498Szrj   profile_info = NULL;
1703*38fd1498Szrj }
1704*38fd1498Szrj 
1705*38fd1498Szrj /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1706*38fd1498Szrj 
1707*38fd1498Szrj bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1708*38fd1498Szrj afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1709*38fd1498Szrj {
1710*38fd1498Szrj   gcov_type count
1711*38fd1498Szrj       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1712*38fd1498Szrj 
1713*38fd1498Szrj   if (count > 0)
1714*38fd1498Szrj     {
1715*38fd1498Szrj       bool is_hot;
1716*38fd1498Szrj       const struct gcov_ctr_summary *saved_profile_info = profile_info;
1717*38fd1498Szrj       /* At early inline stage, profile_info is not set yet. We need to
1718*38fd1498Szrj          temporarily set it to afdo_profile_info to calculate hotness.  */
1719*38fd1498Szrj       profile_info = autofdo::afdo_profile_info;
1720*38fd1498Szrj       is_hot = maybe_hot_count_p (NULL, profile_count::from_gcov_type (count));
1721*38fd1498Szrj       profile_info = saved_profile_info;
1722*38fd1498Szrj       return is_hot;
1723*38fd1498Szrj     }
1724*38fd1498Szrj 
1725*38fd1498Szrj   return false;
1726*38fd1498Szrj }
1727*38fd1498Szrj 
1728*38fd1498Szrj namespace
1729*38fd1498Szrj {
1730*38fd1498Szrj 
1731*38fd1498Szrj const pass_data pass_data_ipa_auto_profile = {
1732*38fd1498Szrj   SIMPLE_IPA_PASS, "afdo", /* name */
1733*38fd1498Szrj   OPTGROUP_NONE,           /* optinfo_flags */
1734*38fd1498Szrj   TV_IPA_AUTOFDO,          /* tv_id */
1735*38fd1498Szrj   0,                       /* properties_required */
1736*38fd1498Szrj   0,                       /* properties_provided */
1737*38fd1498Szrj   0,                       /* properties_destroyed */
1738*38fd1498Szrj   0,                       /* todo_flags_start */
1739*38fd1498Szrj   0,                       /* todo_flags_finish */
1740*38fd1498Szrj };
1741*38fd1498Szrj 
1742*38fd1498Szrj class pass_ipa_auto_profile : public simple_ipa_opt_pass
1743*38fd1498Szrj {
1744*38fd1498Szrj public:
pass_ipa_auto_profile(gcc::context * ctxt)1745*38fd1498Szrj   pass_ipa_auto_profile (gcc::context *ctxt)
1746*38fd1498Szrj       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1747*38fd1498Szrj   {
1748*38fd1498Szrj   }
1749*38fd1498Szrj 
1750*38fd1498Szrj   /* opt_pass methods: */
1751*38fd1498Szrj   virtual bool
gate(function *)1752*38fd1498Szrj   gate (function *)
1753*38fd1498Szrj   {
1754*38fd1498Szrj     return flag_auto_profile;
1755*38fd1498Szrj   }
1756*38fd1498Szrj   virtual unsigned int
execute(function *)1757*38fd1498Szrj   execute (function *)
1758*38fd1498Szrj   {
1759*38fd1498Szrj     return autofdo::auto_profile ();
1760*38fd1498Szrj   }
1761*38fd1498Szrj }; // class pass_ipa_auto_profile
1762*38fd1498Szrj 
1763*38fd1498Szrj } // anon namespace
1764*38fd1498Szrj 
1765*38fd1498Szrj simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1766*38fd1498Szrj make_pass_ipa_auto_profile (gcc::context *ctxt)
1767*38fd1498Szrj {
1768*38fd1498Szrj   return new pass_ipa_auto_profile (ctxt);
1769*38fd1498Szrj }
1770