xref: /dflybsd-src/contrib/gcc-8.0/gcc/predict.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Branch prediction routines for the GNU compiler.
2*38fd1498Szrj    Copyright (C) 2000-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj /* References:
21*38fd1498Szrj 
22*38fd1498Szrj    [1] "Branch Prediction for Free"
23*38fd1498Szrj        Ball and Larus; PLDI '93.
24*38fd1498Szrj    [2] "Static Branch Frequency and Program Profile Analysis"
25*38fd1498Szrj        Wu and Larus; MICRO-27.
26*38fd1498Szrj    [3] "Corpus-based Static Branch Prediction"
27*38fd1498Szrj        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28*38fd1498Szrj 
29*38fd1498Szrj 
30*38fd1498Szrj #include "config.h"
31*38fd1498Szrj #include "system.h"
32*38fd1498Szrj #include "coretypes.h"
33*38fd1498Szrj #include "backend.h"
34*38fd1498Szrj #include "rtl.h"
35*38fd1498Szrj #include "tree.h"
36*38fd1498Szrj #include "gimple.h"
37*38fd1498Szrj #include "cfghooks.h"
38*38fd1498Szrj #include "tree-pass.h"
39*38fd1498Szrj #include "ssa.h"
40*38fd1498Szrj #include "memmodel.h"
41*38fd1498Szrj #include "emit-rtl.h"
42*38fd1498Szrj #include "cgraph.h"
43*38fd1498Szrj #include "coverage.h"
44*38fd1498Szrj #include "diagnostic-core.h"
45*38fd1498Szrj #include "gimple-predict.h"
46*38fd1498Szrj #include "fold-const.h"
47*38fd1498Szrj #include "calls.h"
48*38fd1498Szrj #include "cfganal.h"
49*38fd1498Szrj #include "profile.h"
50*38fd1498Szrj #include "sreal.h"
51*38fd1498Szrj #include "params.h"
52*38fd1498Szrj #include "cfgloop.h"
53*38fd1498Szrj #include "gimple-iterator.h"
54*38fd1498Szrj #include "tree-cfg.h"
55*38fd1498Szrj #include "tree-ssa-loop-niter.h"
56*38fd1498Szrj #include "tree-ssa-loop.h"
57*38fd1498Szrj #include "tree-scalar-evolution.h"
58*38fd1498Szrj #include "ipa-utils.h"
59*38fd1498Szrj #include "gimple-pretty-print.h"
60*38fd1498Szrj #include "selftest.h"
61*38fd1498Szrj #include "cfgrtl.h"
62*38fd1498Szrj #include "stringpool.h"
63*38fd1498Szrj #include "attribs.h"
64*38fd1498Szrj 
65*38fd1498Szrj /* Enum with reasons why a predictor is ignored.  */
66*38fd1498Szrj 
67*38fd1498Szrj enum predictor_reason
68*38fd1498Szrj {
69*38fd1498Szrj   REASON_NONE,
70*38fd1498Szrj   REASON_IGNORED,
71*38fd1498Szrj   REASON_SINGLE_EDGE_DUPLICATE,
72*38fd1498Szrj   REASON_EDGE_PAIR_DUPLICATE
73*38fd1498Szrj };
74*38fd1498Szrj 
75*38fd1498Szrj /* String messages for the aforementioned enum.  */
76*38fd1498Szrj 
77*38fd1498Szrj static const char *reason_messages[] = {"", " (ignored)",
78*38fd1498Szrj     " (single edge duplicate)", " (edge pair duplicate)"};
79*38fd1498Szrj 
80*38fd1498Szrj /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
81*38fd1498Szrj 		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
82*38fd1498Szrj static sreal real_almost_one, real_br_prob_base,
83*38fd1498Szrj 	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
84*38fd1498Szrj 
85*38fd1498Szrj static void combine_predictions_for_insn (rtx_insn *, basic_block);
86*38fd1498Szrj static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
87*38fd1498Szrj 			     enum predictor_reason, edge);
88*38fd1498Szrj static void predict_paths_leading_to (basic_block, enum br_predictor,
89*38fd1498Szrj 				      enum prediction,
90*38fd1498Szrj 				      struct loop *in_loop = NULL);
91*38fd1498Szrj static void predict_paths_leading_to_edge (edge, enum br_predictor,
92*38fd1498Szrj 					   enum prediction,
93*38fd1498Szrj 					   struct loop *in_loop = NULL);
94*38fd1498Szrj static bool can_predict_insn_p (const rtx_insn *);
95*38fd1498Szrj 
96*38fd1498Szrj /* Information we hold about each branch predictor.
97*38fd1498Szrj    Filled using information from predict.def.  */
98*38fd1498Szrj 
99*38fd1498Szrj struct predictor_info
100*38fd1498Szrj {
101*38fd1498Szrj   const char *const name;	/* Name used in the debugging dumps.  */
102*38fd1498Szrj   const int hitrate;		/* Expected hitrate used by
103*38fd1498Szrj 				   predict_insn_def call.  */
104*38fd1498Szrj   const int flags;
105*38fd1498Szrj };
106*38fd1498Szrj 
107*38fd1498Szrj /* Use given predictor without Dempster-Shaffer theory if it matches
108*38fd1498Szrj    using first_match heuristics.  */
109*38fd1498Szrj #define PRED_FLAG_FIRST_MATCH 1
110*38fd1498Szrj 
111*38fd1498Szrj /* Recompute hitrate in percent to our representation.  */
112*38fd1498Szrj 
113*38fd1498Szrj #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
114*38fd1498Szrj 
115*38fd1498Szrj #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
116*38fd1498Szrj static const struct predictor_info predictor_info[]= {
117*38fd1498Szrj #include "predict.def"
118*38fd1498Szrj 
119*38fd1498Szrj   /* Upper bound on predictors.  */
120*38fd1498Szrj   {NULL, 0, 0}
121*38fd1498Szrj };
122*38fd1498Szrj #undef DEF_PREDICTOR
123*38fd1498Szrj 
124*38fd1498Szrj static gcov_type min_count = -1;
125*38fd1498Szrj 
126*38fd1498Szrj /* Determine the threshold for hot BB counts.  */
127*38fd1498Szrj 
128*38fd1498Szrj gcov_type
get_hot_bb_threshold()129*38fd1498Szrj get_hot_bb_threshold ()
130*38fd1498Szrj {
131*38fd1498Szrj   gcov_working_set_t *ws;
132*38fd1498Szrj   if (min_count == -1)
133*38fd1498Szrj     {
134*38fd1498Szrj       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
135*38fd1498Szrj       gcc_assert (ws);
136*38fd1498Szrj       min_count = ws->min_counter;
137*38fd1498Szrj     }
138*38fd1498Szrj   return min_count;
139*38fd1498Szrj }
140*38fd1498Szrj 
141*38fd1498Szrj /* Set the threshold for hot BB counts.  */
142*38fd1498Szrj 
143*38fd1498Szrj void
set_hot_bb_threshold(gcov_type min)144*38fd1498Szrj set_hot_bb_threshold (gcov_type min)
145*38fd1498Szrj {
146*38fd1498Szrj   min_count = min;
147*38fd1498Szrj }
148*38fd1498Szrj 
149*38fd1498Szrj /* Return TRUE if frequency FREQ is considered to be hot.  */
150*38fd1498Szrj 
151*38fd1498Szrj bool
maybe_hot_count_p(struct function * fun,profile_count count)152*38fd1498Szrj maybe_hot_count_p (struct function *fun, profile_count count)
153*38fd1498Szrj {
154*38fd1498Szrj   if (!count.initialized_p ())
155*38fd1498Szrj     return true;
156*38fd1498Szrj   if (count.ipa () == profile_count::zero ())
157*38fd1498Szrj     return false;
158*38fd1498Szrj   if (!count.ipa_p ())
159*38fd1498Szrj     {
160*38fd1498Szrj       struct cgraph_node *node = cgraph_node::get (fun->decl);
161*38fd1498Szrj       if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
162*38fd1498Szrj 	{
163*38fd1498Szrj 	  if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
164*38fd1498Szrj 	    return false;
165*38fd1498Szrj 	  if (node->frequency == NODE_FREQUENCY_HOT)
166*38fd1498Szrj 	    return true;
167*38fd1498Szrj 	}
168*38fd1498Szrj       if (profile_status_for_fn (fun) == PROFILE_ABSENT)
169*38fd1498Szrj 	return true;
170*38fd1498Szrj       if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
171*38fd1498Szrj 	  && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
172*38fd1498Szrj 	return false;
173*38fd1498Szrj       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
174*38fd1498Szrj 	return false;
175*38fd1498Szrj       if (count.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION), 1)
176*38fd1498Szrj 	  < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
177*38fd1498Szrj 	return false;
178*38fd1498Szrj       return true;
179*38fd1498Szrj     }
180*38fd1498Szrj   /* Code executed at most once is not hot.  */
181*38fd1498Szrj   if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
182*38fd1498Szrj     return false;
183*38fd1498Szrj   return (count.to_gcov_type () >= get_hot_bb_threshold ());
184*38fd1498Szrj }
185*38fd1498Szrj 
186*38fd1498Szrj /* Return true in case BB can be CPU intensive and should be optimized
187*38fd1498Szrj    for maximal performance.  */
188*38fd1498Szrj 
189*38fd1498Szrj bool
maybe_hot_bb_p(struct function * fun,const_basic_block bb)190*38fd1498Szrj maybe_hot_bb_p (struct function *fun, const_basic_block bb)
191*38fd1498Szrj {
192*38fd1498Szrj   gcc_checking_assert (fun);
193*38fd1498Szrj   return maybe_hot_count_p (fun, bb->count);
194*38fd1498Szrj }
195*38fd1498Szrj 
196*38fd1498Szrj /* Return true in case BB can be CPU intensive and should be optimized
197*38fd1498Szrj    for maximal performance.  */
198*38fd1498Szrj 
199*38fd1498Szrj bool
maybe_hot_edge_p(edge e)200*38fd1498Szrj maybe_hot_edge_p (edge e)
201*38fd1498Szrj {
202*38fd1498Szrj   return maybe_hot_count_p (cfun, e->count ());
203*38fd1498Szrj }
204*38fd1498Szrj 
205*38fd1498Szrj /* Return true if profile COUNT and FREQUENCY, or function FUN static
206*38fd1498Szrj    node frequency reflects never being executed.  */
207*38fd1498Szrj 
208*38fd1498Szrj static bool
probably_never_executed(struct function * fun,profile_count count)209*38fd1498Szrj probably_never_executed (struct function *fun,
210*38fd1498Szrj                          profile_count count)
211*38fd1498Szrj {
212*38fd1498Szrj   gcc_checking_assert (fun);
213*38fd1498Szrj   if (count.ipa () == profile_count::zero ())
214*38fd1498Szrj     return true;
215*38fd1498Szrj   /* Do not trust adjusted counts.  This will make us to drop int cold section
216*38fd1498Szrj      code with low execution count as a result of inlining. These low counts
217*38fd1498Szrj      are not safe even with read profile and may lead us to dropping
218*38fd1498Szrj      code which actually gets executed into cold section of binary that is not
219*38fd1498Szrj      desirable.  */
220*38fd1498Szrj   if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ)
221*38fd1498Szrj     {
222*38fd1498Szrj       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
223*38fd1498Szrj       if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
224*38fd1498Szrj 	return false;
225*38fd1498Szrj       return true;
226*38fd1498Szrj     }
227*38fd1498Szrj   if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
228*38fd1498Szrj       && (cgraph_node::get (fun->decl)->frequency
229*38fd1498Szrj 	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
230*38fd1498Szrj     return true;
231*38fd1498Szrj   return false;
232*38fd1498Szrj }
233*38fd1498Szrj 
234*38fd1498Szrj 
235*38fd1498Szrj /* Return true in case BB is probably never executed.  */
236*38fd1498Szrj 
237*38fd1498Szrj bool
probably_never_executed_bb_p(struct function * fun,const_basic_block bb)238*38fd1498Szrj probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
239*38fd1498Szrj {
240*38fd1498Szrj   return probably_never_executed (fun, bb->count);
241*38fd1498Szrj }
242*38fd1498Szrj 
243*38fd1498Szrj 
244*38fd1498Szrj /* Return true if E is unlikely executed for obvious reasons.  */
245*38fd1498Szrj 
246*38fd1498Szrj static bool
unlikely_executed_edge_p(edge e)247*38fd1498Szrj unlikely_executed_edge_p (edge e)
248*38fd1498Szrj {
249*38fd1498Szrj   return (e->count () == profile_count::zero ()
250*38fd1498Szrj 	  || e->probability == profile_probability::never ())
251*38fd1498Szrj 	 || (e->flags & (EDGE_EH | EDGE_FAKE));
252*38fd1498Szrj }
253*38fd1498Szrj 
254*38fd1498Szrj /* Return true in case edge E is probably never executed.  */
255*38fd1498Szrj 
256*38fd1498Szrj bool
probably_never_executed_edge_p(struct function * fun,edge e)257*38fd1498Szrj probably_never_executed_edge_p (struct function *fun, edge e)
258*38fd1498Szrj {
259*38fd1498Szrj   if (unlikely_executed_edge_p (e))
260*38fd1498Szrj     return true;
261*38fd1498Szrj   return probably_never_executed (fun, e->count ());
262*38fd1498Szrj }
263*38fd1498Szrj 
264*38fd1498Szrj /* Return true when current function should always be optimized for size.  */
265*38fd1498Szrj 
266*38fd1498Szrj bool
optimize_function_for_size_p(struct function * fun)267*38fd1498Szrj optimize_function_for_size_p (struct function *fun)
268*38fd1498Szrj {
269*38fd1498Szrj   if (!fun || !fun->decl)
270*38fd1498Szrj     return optimize_size;
271*38fd1498Szrj   cgraph_node *n = cgraph_node::get (fun->decl);
272*38fd1498Szrj   return n && n->optimize_for_size_p ();
273*38fd1498Szrj }
274*38fd1498Szrj 
275*38fd1498Szrj /* Return true when current function should always be optimized for speed.  */
276*38fd1498Szrj 
277*38fd1498Szrj bool
optimize_function_for_speed_p(struct function * fun)278*38fd1498Szrj optimize_function_for_speed_p (struct function *fun)
279*38fd1498Szrj {
280*38fd1498Szrj   return !optimize_function_for_size_p (fun);
281*38fd1498Szrj }
282*38fd1498Szrj 
283*38fd1498Szrj /* Return the optimization type that should be used for the function FUN.  */
284*38fd1498Szrj 
285*38fd1498Szrj optimization_type
function_optimization_type(struct function * fun)286*38fd1498Szrj function_optimization_type (struct function *fun)
287*38fd1498Szrj {
288*38fd1498Szrj   return (optimize_function_for_speed_p (fun)
289*38fd1498Szrj 	  ? OPTIMIZE_FOR_SPEED
290*38fd1498Szrj 	  : OPTIMIZE_FOR_SIZE);
291*38fd1498Szrj }
292*38fd1498Szrj 
293*38fd1498Szrj /* Return TRUE when BB should be optimized for size.  */
294*38fd1498Szrj 
295*38fd1498Szrj bool
optimize_bb_for_size_p(const_basic_block bb)296*38fd1498Szrj optimize_bb_for_size_p (const_basic_block bb)
297*38fd1498Szrj {
298*38fd1498Szrj   return (optimize_function_for_size_p (cfun)
299*38fd1498Szrj 	  || (bb && !maybe_hot_bb_p (cfun, bb)));
300*38fd1498Szrj }
301*38fd1498Szrj 
302*38fd1498Szrj /* Return TRUE when BB should be optimized for speed.  */
303*38fd1498Szrj 
304*38fd1498Szrj bool
optimize_bb_for_speed_p(const_basic_block bb)305*38fd1498Szrj optimize_bb_for_speed_p (const_basic_block bb)
306*38fd1498Szrj {
307*38fd1498Szrj   return !optimize_bb_for_size_p (bb);
308*38fd1498Szrj }
309*38fd1498Szrj 
310*38fd1498Szrj /* Return the optimization type that should be used for block BB.  */
311*38fd1498Szrj 
312*38fd1498Szrj optimization_type
bb_optimization_type(const_basic_block bb)313*38fd1498Szrj bb_optimization_type (const_basic_block bb)
314*38fd1498Szrj {
315*38fd1498Szrj   return (optimize_bb_for_speed_p (bb)
316*38fd1498Szrj 	  ? OPTIMIZE_FOR_SPEED
317*38fd1498Szrj 	  : OPTIMIZE_FOR_SIZE);
318*38fd1498Szrj }
319*38fd1498Szrj 
320*38fd1498Szrj /* Return TRUE when BB should be optimized for size.  */
321*38fd1498Szrj 
322*38fd1498Szrj bool
optimize_edge_for_size_p(edge e)323*38fd1498Szrj optimize_edge_for_size_p (edge e)
324*38fd1498Szrj {
325*38fd1498Szrj   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
326*38fd1498Szrj }
327*38fd1498Szrj 
328*38fd1498Szrj /* Return TRUE when BB should be optimized for speed.  */
329*38fd1498Szrj 
330*38fd1498Szrj bool
optimize_edge_for_speed_p(edge e)331*38fd1498Szrj optimize_edge_for_speed_p (edge e)
332*38fd1498Szrj {
333*38fd1498Szrj   return !optimize_edge_for_size_p (e);
334*38fd1498Szrj }
335*38fd1498Szrj 
336*38fd1498Szrj /* Return TRUE when BB should be optimized for size.  */
337*38fd1498Szrj 
338*38fd1498Szrj bool
optimize_insn_for_size_p(void)339*38fd1498Szrj optimize_insn_for_size_p (void)
340*38fd1498Szrj {
341*38fd1498Szrj   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
342*38fd1498Szrj }
343*38fd1498Szrj 
344*38fd1498Szrj /* Return TRUE when BB should be optimized for speed.  */
345*38fd1498Szrj 
346*38fd1498Szrj bool
optimize_insn_for_speed_p(void)347*38fd1498Szrj optimize_insn_for_speed_p (void)
348*38fd1498Szrj {
349*38fd1498Szrj   return !optimize_insn_for_size_p ();
350*38fd1498Szrj }
351*38fd1498Szrj 
352*38fd1498Szrj /* Return TRUE when LOOP should be optimized for size.  */
353*38fd1498Szrj 
354*38fd1498Szrj bool
optimize_loop_for_size_p(struct loop * loop)355*38fd1498Szrj optimize_loop_for_size_p (struct loop *loop)
356*38fd1498Szrj {
357*38fd1498Szrj   return optimize_bb_for_size_p (loop->header);
358*38fd1498Szrj }
359*38fd1498Szrj 
360*38fd1498Szrj /* Return TRUE when LOOP should be optimized for speed.  */
361*38fd1498Szrj 
362*38fd1498Szrj bool
optimize_loop_for_speed_p(struct loop * loop)363*38fd1498Szrj optimize_loop_for_speed_p (struct loop *loop)
364*38fd1498Szrj {
365*38fd1498Szrj   return optimize_bb_for_speed_p (loop->header);
366*38fd1498Szrj }
367*38fd1498Szrj 
368*38fd1498Szrj /* Return TRUE when LOOP nest should be optimized for speed.  */
369*38fd1498Szrj 
370*38fd1498Szrj bool
optimize_loop_nest_for_speed_p(struct loop * loop)371*38fd1498Szrj optimize_loop_nest_for_speed_p (struct loop *loop)
372*38fd1498Szrj {
373*38fd1498Szrj   struct loop *l = loop;
374*38fd1498Szrj   if (optimize_loop_for_speed_p (loop))
375*38fd1498Szrj     return true;
376*38fd1498Szrj   l = loop->inner;
377*38fd1498Szrj   while (l && l != loop)
378*38fd1498Szrj     {
379*38fd1498Szrj       if (optimize_loop_for_speed_p (l))
380*38fd1498Szrj         return true;
381*38fd1498Szrj       if (l->inner)
382*38fd1498Szrj         l = l->inner;
383*38fd1498Szrj       else if (l->next)
384*38fd1498Szrj         l = l->next;
385*38fd1498Szrj       else
386*38fd1498Szrj         {
387*38fd1498Szrj 	  while (l != loop && !l->next)
388*38fd1498Szrj 	    l = loop_outer (l);
389*38fd1498Szrj 	  if (l != loop)
390*38fd1498Szrj 	    l = l->next;
391*38fd1498Szrj 	}
392*38fd1498Szrj     }
393*38fd1498Szrj   return false;
394*38fd1498Szrj }
395*38fd1498Szrj 
396*38fd1498Szrj /* Return TRUE when LOOP nest should be optimized for size.  */
397*38fd1498Szrj 
398*38fd1498Szrj bool
optimize_loop_nest_for_size_p(struct loop * loop)399*38fd1498Szrj optimize_loop_nest_for_size_p (struct loop *loop)
400*38fd1498Szrj {
401*38fd1498Szrj   return !optimize_loop_nest_for_speed_p (loop);
402*38fd1498Szrj }
403*38fd1498Szrj 
404*38fd1498Szrj /* Return true when edge E is likely to be well predictable by branch
405*38fd1498Szrj    predictor.  */
406*38fd1498Szrj 
407*38fd1498Szrj bool
predictable_edge_p(edge e)408*38fd1498Szrj predictable_edge_p (edge e)
409*38fd1498Szrj {
410*38fd1498Szrj   if (!e->probability.initialized_p ())
411*38fd1498Szrj     return false;
412*38fd1498Szrj   if ((e->probability.to_reg_br_prob_base ()
413*38fd1498Szrj        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
414*38fd1498Szrj       || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
415*38fd1498Szrj           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
416*38fd1498Szrj     return true;
417*38fd1498Szrj   return false;
418*38fd1498Szrj }
419*38fd1498Szrj 
420*38fd1498Szrj 
421*38fd1498Szrj /* Set RTL expansion for BB profile.  */
422*38fd1498Szrj 
423*38fd1498Szrj void
rtl_profile_for_bb(basic_block bb)424*38fd1498Szrj rtl_profile_for_bb (basic_block bb)
425*38fd1498Szrj {
426*38fd1498Szrj   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
427*38fd1498Szrj }
428*38fd1498Szrj 
429*38fd1498Szrj /* Set RTL expansion for edge profile.  */
430*38fd1498Szrj 
431*38fd1498Szrj void
rtl_profile_for_edge(edge e)432*38fd1498Szrj rtl_profile_for_edge (edge e)
433*38fd1498Szrj {
434*38fd1498Szrj   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
435*38fd1498Szrj }
436*38fd1498Szrj 
437*38fd1498Szrj /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
438*38fd1498Szrj void
default_rtl_profile(void)439*38fd1498Szrj default_rtl_profile (void)
440*38fd1498Szrj {
441*38fd1498Szrj   crtl->maybe_hot_insn_p = true;
442*38fd1498Szrj }
443*38fd1498Szrj 
444*38fd1498Szrj /* Return true if the one of outgoing edges is already predicted by
445*38fd1498Szrj    PREDICTOR.  */
446*38fd1498Szrj 
447*38fd1498Szrj bool
rtl_predicted_by_p(const_basic_block bb,enum br_predictor predictor)448*38fd1498Szrj rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
449*38fd1498Szrj {
450*38fd1498Szrj   rtx note;
451*38fd1498Szrj   if (!INSN_P (BB_END (bb)))
452*38fd1498Szrj     return false;
453*38fd1498Szrj   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
454*38fd1498Szrj     if (REG_NOTE_KIND (note) == REG_BR_PRED
455*38fd1498Szrj 	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
456*38fd1498Szrj       return true;
457*38fd1498Szrj   return false;
458*38fd1498Szrj }
459*38fd1498Szrj 
460*38fd1498Szrj /*  Structure representing predictions in tree level. */
461*38fd1498Szrj 
462*38fd1498Szrj struct edge_prediction {
463*38fd1498Szrj     struct edge_prediction *ep_next;
464*38fd1498Szrj     edge ep_edge;
465*38fd1498Szrj     enum br_predictor ep_predictor;
466*38fd1498Szrj     int ep_probability;
467*38fd1498Szrj };
468*38fd1498Szrj 
469*38fd1498Szrj /* This map contains for a basic block the list of predictions for the
470*38fd1498Szrj    outgoing edges.  */
471*38fd1498Szrj 
472*38fd1498Szrj static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
473*38fd1498Szrj 
474*38fd1498Szrj /* Return true if the one of outgoing edges is already predicted by
475*38fd1498Szrj    PREDICTOR.  */
476*38fd1498Szrj 
477*38fd1498Szrj bool
gimple_predicted_by_p(const_basic_block bb,enum br_predictor predictor)478*38fd1498Szrj gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
479*38fd1498Szrj {
480*38fd1498Szrj   struct edge_prediction *i;
481*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
482*38fd1498Szrj 
483*38fd1498Szrj   if (!preds)
484*38fd1498Szrj     return false;
485*38fd1498Szrj 
486*38fd1498Szrj   for (i = *preds; i; i = i->ep_next)
487*38fd1498Szrj     if (i->ep_predictor == predictor)
488*38fd1498Szrj       return true;
489*38fd1498Szrj   return false;
490*38fd1498Szrj }
491*38fd1498Szrj 
492*38fd1498Szrj /* Return true if the one of outgoing edges is already predicted by
493*38fd1498Szrj    PREDICTOR for edge E predicted as TAKEN.  */
494*38fd1498Szrj 
495*38fd1498Szrj bool
edge_predicted_by_p(edge e,enum br_predictor predictor,bool taken)496*38fd1498Szrj edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
497*38fd1498Szrj {
498*38fd1498Szrj   struct edge_prediction *i;
499*38fd1498Szrj   basic_block bb = e->src;
500*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
501*38fd1498Szrj   if (!preds)
502*38fd1498Szrj     return false;
503*38fd1498Szrj 
504*38fd1498Szrj   int probability = predictor_info[(int) predictor].hitrate;
505*38fd1498Szrj 
506*38fd1498Szrj   if (taken != TAKEN)
507*38fd1498Szrj     probability = REG_BR_PROB_BASE - probability;
508*38fd1498Szrj 
509*38fd1498Szrj   for (i = *preds; i; i = i->ep_next)
510*38fd1498Szrj     if (i->ep_predictor == predictor
511*38fd1498Szrj 	&& i->ep_edge == e
512*38fd1498Szrj 	&& i->ep_probability == probability)
513*38fd1498Szrj       return true;
514*38fd1498Szrj   return false;
515*38fd1498Szrj }
516*38fd1498Szrj 
517*38fd1498Szrj /* Same predicate as above, working on edges.  */
518*38fd1498Szrj bool
edge_probability_reliable_p(const_edge e)519*38fd1498Szrj edge_probability_reliable_p (const_edge e)
520*38fd1498Szrj {
521*38fd1498Szrj   return e->probability.probably_reliable_p ();
522*38fd1498Szrj }
523*38fd1498Szrj 
524*38fd1498Szrj /* Same predicate as edge_probability_reliable_p, working on notes.  */
525*38fd1498Szrj bool
br_prob_note_reliable_p(const_rtx note)526*38fd1498Szrj br_prob_note_reliable_p (const_rtx note)
527*38fd1498Szrj {
528*38fd1498Szrj   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
529*38fd1498Szrj   return profile_probability::from_reg_br_prob_note
530*38fd1498Szrj 		 (XINT (note, 0)).probably_reliable_p ();
531*38fd1498Szrj }
532*38fd1498Szrj 
533*38fd1498Szrj static void
predict_insn(rtx_insn * insn,enum br_predictor predictor,int probability)534*38fd1498Szrj predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
535*38fd1498Szrj {
536*38fd1498Szrj   gcc_assert (any_condjump_p (insn));
537*38fd1498Szrj   if (!flag_guess_branch_prob)
538*38fd1498Szrj     return;
539*38fd1498Szrj 
540*38fd1498Szrj   add_reg_note (insn, REG_BR_PRED,
541*38fd1498Szrj 		gen_rtx_CONCAT (VOIDmode,
542*38fd1498Szrj 				GEN_INT ((int) predictor),
543*38fd1498Szrj 				GEN_INT ((int) probability)));
544*38fd1498Szrj }
545*38fd1498Szrj 
546*38fd1498Szrj /* Predict insn by given predictor.  */
547*38fd1498Szrj 
548*38fd1498Szrj void
predict_insn_def(rtx_insn * insn,enum br_predictor predictor,enum prediction taken)549*38fd1498Szrj predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
550*38fd1498Szrj 		  enum prediction taken)
551*38fd1498Szrj {
552*38fd1498Szrj    int probability = predictor_info[(int) predictor].hitrate;
553*38fd1498Szrj    gcc_assert (probability != PROB_UNINITIALIZED);
554*38fd1498Szrj 
555*38fd1498Szrj    if (taken != TAKEN)
556*38fd1498Szrj      probability = REG_BR_PROB_BASE - probability;
557*38fd1498Szrj 
558*38fd1498Szrj    predict_insn (insn, predictor, probability);
559*38fd1498Szrj }
560*38fd1498Szrj 
561*38fd1498Szrj /* Predict edge E with given probability if possible.  */
562*38fd1498Szrj 
563*38fd1498Szrj void
rtl_predict_edge(edge e,enum br_predictor predictor,int probability)564*38fd1498Szrj rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
565*38fd1498Szrj {
566*38fd1498Szrj   rtx_insn *last_insn;
567*38fd1498Szrj   last_insn = BB_END (e->src);
568*38fd1498Szrj 
569*38fd1498Szrj   /* We can store the branch prediction information only about
570*38fd1498Szrj      conditional jumps.  */
571*38fd1498Szrj   if (!any_condjump_p (last_insn))
572*38fd1498Szrj     return;
573*38fd1498Szrj 
574*38fd1498Szrj   /* We always store probability of branching.  */
575*38fd1498Szrj   if (e->flags & EDGE_FALLTHRU)
576*38fd1498Szrj     probability = REG_BR_PROB_BASE - probability;
577*38fd1498Szrj 
578*38fd1498Szrj   predict_insn (last_insn, predictor, probability);
579*38fd1498Szrj }
580*38fd1498Szrj 
581*38fd1498Szrj /* Predict edge E with the given PROBABILITY.  */
582*38fd1498Szrj void
gimple_predict_edge(edge e,enum br_predictor predictor,int probability)583*38fd1498Szrj gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
584*38fd1498Szrj {
585*38fd1498Szrj   if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
586*38fd1498Szrj       && EDGE_COUNT (e->src->succs) > 1
587*38fd1498Szrj       && flag_guess_branch_prob
588*38fd1498Szrj       && optimize)
589*38fd1498Szrj     {
590*38fd1498Szrj       struct edge_prediction *i = XNEW (struct edge_prediction);
591*38fd1498Szrj       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
592*38fd1498Szrj 
593*38fd1498Szrj       i->ep_next = preds;
594*38fd1498Szrj       preds = i;
595*38fd1498Szrj       i->ep_probability = probability;
596*38fd1498Szrj       i->ep_predictor = predictor;
597*38fd1498Szrj       i->ep_edge = e;
598*38fd1498Szrj     }
599*38fd1498Szrj }
600*38fd1498Szrj 
601*38fd1498Szrj /* Filter edge predictions PREDS by a function FILTER.  DATA are passed
602*38fd1498Szrj    to the filter function.  */
603*38fd1498Szrj 
604*38fd1498Szrj void
filter_predictions(edge_prediction ** preds,bool (* filter)(edge_prediction *,void *),void * data)605*38fd1498Szrj filter_predictions (edge_prediction **preds,
606*38fd1498Szrj 		    bool (*filter) (edge_prediction *, void *), void *data)
607*38fd1498Szrj {
608*38fd1498Szrj   if (!bb_predictions)
609*38fd1498Szrj     return;
610*38fd1498Szrj 
611*38fd1498Szrj   if (preds)
612*38fd1498Szrj     {
613*38fd1498Szrj       struct edge_prediction **prediction = preds;
614*38fd1498Szrj       struct edge_prediction *next;
615*38fd1498Szrj 
616*38fd1498Szrj       while (*prediction)
617*38fd1498Szrj 	{
618*38fd1498Szrj 	  if ((*filter) (*prediction, data))
619*38fd1498Szrj 	    prediction = &((*prediction)->ep_next);
620*38fd1498Szrj 	  else
621*38fd1498Szrj 	    {
622*38fd1498Szrj 	      next = (*prediction)->ep_next;
623*38fd1498Szrj 	      free (*prediction);
624*38fd1498Szrj 	      *prediction = next;
625*38fd1498Szrj 	    }
626*38fd1498Szrj 	}
627*38fd1498Szrj     }
628*38fd1498Szrj }
629*38fd1498Szrj 
630*38fd1498Szrj /* Filter function predicate that returns true for a edge predicate P
631*38fd1498Szrj    if its edge is equal to DATA.  */
632*38fd1498Szrj 
633*38fd1498Szrj bool
equal_edge_p(edge_prediction * p,void * data)634*38fd1498Szrj equal_edge_p (edge_prediction *p, void *data)
635*38fd1498Szrj {
636*38fd1498Szrj   return p->ep_edge == (edge)data;
637*38fd1498Szrj }
638*38fd1498Szrj 
639*38fd1498Szrj /* Remove all predictions on given basic block that are attached
640*38fd1498Szrj    to edge E.  */
641*38fd1498Szrj void
remove_predictions_associated_with_edge(edge e)642*38fd1498Szrj remove_predictions_associated_with_edge (edge e)
643*38fd1498Szrj {
644*38fd1498Szrj   if (!bb_predictions)
645*38fd1498Szrj     return;
646*38fd1498Szrj 
647*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (e->src);
648*38fd1498Szrj   filter_predictions (preds, equal_edge_p, e);
649*38fd1498Szrj }
650*38fd1498Szrj 
651*38fd1498Szrj /* Clears the list of predictions stored for BB.  */
652*38fd1498Szrj 
653*38fd1498Szrj static void
clear_bb_predictions(basic_block bb)654*38fd1498Szrj clear_bb_predictions (basic_block bb)
655*38fd1498Szrj {
656*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
657*38fd1498Szrj   struct edge_prediction *pred, *next;
658*38fd1498Szrj 
659*38fd1498Szrj   if (!preds)
660*38fd1498Szrj     return;
661*38fd1498Szrj 
662*38fd1498Szrj   for (pred = *preds; pred; pred = next)
663*38fd1498Szrj     {
664*38fd1498Szrj       next = pred->ep_next;
665*38fd1498Szrj       free (pred);
666*38fd1498Szrj     }
667*38fd1498Szrj   *preds = NULL;
668*38fd1498Szrj }
669*38fd1498Szrj 
670*38fd1498Szrj /* Return true when we can store prediction on insn INSN.
671*38fd1498Szrj    At the moment we represent predictions only on conditional
672*38fd1498Szrj    jumps, not at computed jump or other complicated cases.  */
673*38fd1498Szrj static bool
can_predict_insn_p(const rtx_insn * insn)674*38fd1498Szrj can_predict_insn_p (const rtx_insn *insn)
675*38fd1498Szrj {
676*38fd1498Szrj   return (JUMP_P (insn)
677*38fd1498Szrj 	  && any_condjump_p (insn)
678*38fd1498Szrj 	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
679*38fd1498Szrj }
680*38fd1498Szrj 
681*38fd1498Szrj /* Predict edge E by given predictor if possible.  */
682*38fd1498Szrj 
683*38fd1498Szrj void
predict_edge_def(edge e,enum br_predictor predictor,enum prediction taken)684*38fd1498Szrj predict_edge_def (edge e, enum br_predictor predictor,
685*38fd1498Szrj 		  enum prediction taken)
686*38fd1498Szrj {
687*38fd1498Szrj    int probability = predictor_info[(int) predictor].hitrate;
688*38fd1498Szrj 
689*38fd1498Szrj    if (taken != TAKEN)
690*38fd1498Szrj      probability = REG_BR_PROB_BASE - probability;
691*38fd1498Szrj 
692*38fd1498Szrj    predict_edge (e, predictor, probability);
693*38fd1498Szrj }
694*38fd1498Szrj 
695*38fd1498Szrj /* Invert all branch predictions or probability notes in the INSN.  This needs
696*38fd1498Szrj    to be done each time we invert the condition used by the jump.  */
697*38fd1498Szrj 
698*38fd1498Szrj void
invert_br_probabilities(rtx insn)699*38fd1498Szrj invert_br_probabilities (rtx insn)
700*38fd1498Szrj {
701*38fd1498Szrj   rtx note;
702*38fd1498Szrj 
703*38fd1498Szrj   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
704*38fd1498Szrj     if (REG_NOTE_KIND (note) == REG_BR_PROB)
705*38fd1498Szrj       XINT (note, 0) = profile_probability::from_reg_br_prob_note
706*38fd1498Szrj 			 (XINT (note, 0)).invert ().to_reg_br_prob_note ();
707*38fd1498Szrj     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
708*38fd1498Szrj       XEXP (XEXP (note, 0), 1)
709*38fd1498Szrj 	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
710*38fd1498Szrj }
711*38fd1498Szrj 
712*38fd1498Szrj /* Dump information about the branch prediction to the output file.  */
713*38fd1498Szrj 
714*38fd1498Szrj static void
715*38fd1498Szrj dump_prediction (FILE *file, enum br_predictor predictor, int probability,
716*38fd1498Szrj 		 basic_block bb, enum predictor_reason reason = REASON_NONE,
717*38fd1498Szrj 		 edge ep_edge = NULL)
718*38fd1498Szrj {
719*38fd1498Szrj   edge e = ep_edge;
720*38fd1498Szrj   edge_iterator ei;
721*38fd1498Szrj 
722*38fd1498Szrj   if (!file)
723*38fd1498Szrj     return;
724*38fd1498Szrj 
725*38fd1498Szrj   if (e == NULL)
726*38fd1498Szrj     FOR_EACH_EDGE (e, ei, bb->succs)
727*38fd1498Szrj       if (! (e->flags & EDGE_FALLTHRU))
728*38fd1498Szrj 	break;
729*38fd1498Szrj 
730*38fd1498Szrj   char edge_info_str[128];
731*38fd1498Szrj   if (ep_edge)
732*38fd1498Szrj     sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
733*38fd1498Szrj 	     ep_edge->dest->index);
734*38fd1498Szrj   else
735*38fd1498Szrj     edge_info_str[0] = '\0';
736*38fd1498Szrj 
737*38fd1498Szrj   fprintf (file, "  %s heuristics%s%s: %.1f%%",
738*38fd1498Szrj 	   predictor_info[predictor].name,
739*38fd1498Szrj 	   edge_info_str, reason_messages[reason],
740*38fd1498Szrj 	   probability * 100.0 / REG_BR_PROB_BASE);
741*38fd1498Szrj 
742*38fd1498Szrj   if (bb->count.initialized_p ())
743*38fd1498Szrj     {
744*38fd1498Szrj       fprintf (file, "  exec ");
745*38fd1498Szrj       bb->count.dump (file);
746*38fd1498Szrj       if (e)
747*38fd1498Szrj 	{
748*38fd1498Szrj 	  fprintf (file, " hit ");
749*38fd1498Szrj 	  e->count ().dump (file);
750*38fd1498Szrj 	  fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
751*38fd1498Szrj 		   / bb->count.to_gcov_type ());
752*38fd1498Szrj 	}
753*38fd1498Szrj     }
754*38fd1498Szrj 
755*38fd1498Szrj   fprintf (file, "\n");
756*38fd1498Szrj 
757*38fd1498Szrj   /* Print output that be easily read by analyze_brprob.py script. We are
758*38fd1498Szrj      interested only in counts that are read from GCDA files.  */
759*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS)
760*38fd1498Szrj       && bb->count.precise_p ()
761*38fd1498Szrj       && reason == REASON_NONE)
762*38fd1498Szrj     {
763*38fd1498Szrj       gcc_assert (e->count ().precise_p ());
764*38fd1498Szrj       fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
765*38fd1498Szrj 	       predictor_info[predictor].name,
766*38fd1498Szrj 	       bb->count.to_gcov_type (), e->count ().to_gcov_type (),
767*38fd1498Szrj 	       probability * 100.0 / REG_BR_PROB_BASE);
768*38fd1498Szrj     }
769*38fd1498Szrj }
770*38fd1498Szrj 
771*38fd1498Szrj /* Return true if STMT is known to be unlikely executed.  */
772*38fd1498Szrj 
773*38fd1498Szrj static bool
unlikely_executed_stmt_p(gimple * stmt)774*38fd1498Szrj unlikely_executed_stmt_p (gimple *stmt)
775*38fd1498Szrj {
776*38fd1498Szrj   if (!is_gimple_call (stmt))
777*38fd1498Szrj     return false;
778*38fd1498Szrj   /* NORETURN attribute alone is not strong enough: exit() may be quite
779*38fd1498Szrj      likely executed once during program run.  */
780*38fd1498Szrj   if (gimple_call_fntype (stmt)
781*38fd1498Szrj       && lookup_attribute ("cold",
782*38fd1498Szrj 			   TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
783*38fd1498Szrj       && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
784*38fd1498Szrj     return true;
785*38fd1498Szrj   tree decl = gimple_call_fndecl (stmt);
786*38fd1498Szrj   if (!decl)
787*38fd1498Szrj     return false;
788*38fd1498Szrj   if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
789*38fd1498Szrj       && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
790*38fd1498Szrj     return true;
791*38fd1498Szrj 
792*38fd1498Szrj   cgraph_node *n = cgraph_node::get (decl);
793*38fd1498Szrj   if (!n)
794*38fd1498Szrj     return false;
795*38fd1498Szrj 
796*38fd1498Szrj   availability avail;
797*38fd1498Szrj   n = n->ultimate_alias_target (&avail);
798*38fd1498Szrj   if (avail < AVAIL_AVAILABLE)
799*38fd1498Szrj     return false;
800*38fd1498Szrj   if (!n->analyzed
801*38fd1498Szrj       || n->decl == current_function_decl)
802*38fd1498Szrj     return false;
803*38fd1498Szrj   return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
804*38fd1498Szrj }
805*38fd1498Szrj 
806*38fd1498Szrj /* Return true if BB is unlikely executed.  */
807*38fd1498Szrj 
808*38fd1498Szrj static bool
unlikely_executed_bb_p(basic_block bb)809*38fd1498Szrj unlikely_executed_bb_p (basic_block bb)
810*38fd1498Szrj {
811*38fd1498Szrj   if (bb->count == profile_count::zero ())
812*38fd1498Szrj     return true;
813*38fd1498Szrj   if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
814*38fd1498Szrj     return false;
815*38fd1498Szrj   for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
816*38fd1498Szrj        !gsi_end_p (gsi); gsi_next (&gsi))
817*38fd1498Szrj     {
818*38fd1498Szrj       if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
819*38fd1498Szrj         return true;
820*38fd1498Szrj       if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
821*38fd1498Szrj 	return false;
822*38fd1498Szrj     }
823*38fd1498Szrj   return false;
824*38fd1498Szrj }
825*38fd1498Szrj 
826*38fd1498Szrj /* We can not predict the probabilities of outgoing edges of bb.  Set them
827*38fd1498Szrj    evenly and hope for the best.  If UNLIKELY_EDGES is not null, distribute
828*38fd1498Szrj    even probability for all edges not mentioned in the set.  These edges
829*38fd1498Szrj    are given PROB_VERY_UNLIKELY probability.  */
830*38fd1498Szrj 
831*38fd1498Szrj static void
832*38fd1498Szrj set_even_probabilities (basic_block bb,
833*38fd1498Szrj 			hash_set<edge> *unlikely_edges = NULL)
834*38fd1498Szrj {
835*38fd1498Szrj   unsigned nedges = 0, unlikely_count = 0;
836*38fd1498Szrj   edge e = NULL;
837*38fd1498Szrj   edge_iterator ei;
838*38fd1498Szrj   profile_probability all = profile_probability::always ();
839*38fd1498Szrj 
840*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
841*38fd1498Szrj     if (e->probability.initialized_p ())
842*38fd1498Szrj       all -= e->probability;
843*38fd1498Szrj     else if (!unlikely_executed_edge_p (e))
844*38fd1498Szrj       {
845*38fd1498Szrj         nedges ++;
846*38fd1498Szrj         if (unlikely_edges != NULL && unlikely_edges->contains (e))
847*38fd1498Szrj 	  {
848*38fd1498Szrj 	    all -= profile_probability::very_unlikely ();
849*38fd1498Szrj 	    unlikely_count++;
850*38fd1498Szrj 	  }
851*38fd1498Szrj       }
852*38fd1498Szrj 
853*38fd1498Szrj   /* Make the distribution even if all edges are unlikely.  */
854*38fd1498Szrj   if (unlikely_count == nedges)
855*38fd1498Szrj     {
856*38fd1498Szrj       unlikely_edges = NULL;
857*38fd1498Szrj       unlikely_count = 0;
858*38fd1498Szrj     }
859*38fd1498Szrj 
860*38fd1498Szrj   unsigned c = nedges - unlikely_count;
861*38fd1498Szrj 
862*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
863*38fd1498Szrj     if (e->probability.initialized_p ())
864*38fd1498Szrj       ;
865*38fd1498Szrj     else if (!unlikely_executed_edge_p (e))
866*38fd1498Szrj       {
867*38fd1498Szrj 	if (unlikely_edges != NULL && unlikely_edges->contains (e))
868*38fd1498Szrj 	  e->probability = profile_probability::very_unlikely ();
869*38fd1498Szrj 	else
870*38fd1498Szrj 	  e->probability = all.apply_scale (1, c).guessed ();
871*38fd1498Szrj       }
872*38fd1498Szrj     else
873*38fd1498Szrj       e->probability = profile_probability::never ();
874*38fd1498Szrj }
875*38fd1498Szrj 
876*38fd1498Szrj /* Add REG_BR_PROB note to JUMP with PROB.  */
877*38fd1498Szrj 
878*38fd1498Szrj void
add_reg_br_prob_note(rtx_insn * jump,profile_probability prob)879*38fd1498Szrj add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
880*38fd1498Szrj {
881*38fd1498Szrj   gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
882*38fd1498Szrj   add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
883*38fd1498Szrj }
884*38fd1498Szrj 
885*38fd1498Szrj /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
886*38fd1498Szrj    note if not already present.  Remove now useless REG_BR_PRED notes.  */
887*38fd1498Szrj 
888*38fd1498Szrj static void
combine_predictions_for_insn(rtx_insn * insn,basic_block bb)889*38fd1498Szrj combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
890*38fd1498Szrj {
891*38fd1498Szrj   rtx prob_note;
892*38fd1498Szrj   rtx *pnote;
893*38fd1498Szrj   rtx note;
894*38fd1498Szrj   int best_probability = PROB_EVEN;
895*38fd1498Szrj   enum br_predictor best_predictor = END_PREDICTORS;
896*38fd1498Szrj   int combined_probability = REG_BR_PROB_BASE / 2;
897*38fd1498Szrj   int d;
898*38fd1498Szrj   bool first_match = false;
899*38fd1498Szrj   bool found = false;
900*38fd1498Szrj 
901*38fd1498Szrj   if (!can_predict_insn_p (insn))
902*38fd1498Szrj     {
903*38fd1498Szrj       set_even_probabilities (bb);
904*38fd1498Szrj       return;
905*38fd1498Szrj     }
906*38fd1498Szrj 
907*38fd1498Szrj   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
908*38fd1498Szrj   pnote = &REG_NOTES (insn);
909*38fd1498Szrj   if (dump_file)
910*38fd1498Szrj     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
911*38fd1498Szrj 	     bb->index);
912*38fd1498Szrj 
913*38fd1498Szrj   /* We implement "first match" heuristics and use probability guessed
914*38fd1498Szrj      by predictor with smallest index.  */
915*38fd1498Szrj   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
916*38fd1498Szrj     if (REG_NOTE_KIND (note) == REG_BR_PRED)
917*38fd1498Szrj       {
918*38fd1498Szrj 	enum br_predictor predictor = ((enum br_predictor)
919*38fd1498Szrj 				       INTVAL (XEXP (XEXP (note, 0), 0)));
920*38fd1498Szrj 	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
921*38fd1498Szrj 
922*38fd1498Szrj 	found = true;
923*38fd1498Szrj 	if (best_predictor > predictor
924*38fd1498Szrj 	    && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
925*38fd1498Szrj 	  best_probability = probability, best_predictor = predictor;
926*38fd1498Szrj 
927*38fd1498Szrj 	d = (combined_probability * probability
928*38fd1498Szrj 	     + (REG_BR_PROB_BASE - combined_probability)
929*38fd1498Szrj 	     * (REG_BR_PROB_BASE - probability));
930*38fd1498Szrj 
931*38fd1498Szrj 	/* Use FP math to avoid overflows of 32bit integers.  */
932*38fd1498Szrj 	if (d == 0)
933*38fd1498Szrj 	  /* If one probability is 0% and one 100%, avoid division by zero.  */
934*38fd1498Szrj 	  combined_probability = REG_BR_PROB_BASE / 2;
935*38fd1498Szrj 	else
936*38fd1498Szrj 	  combined_probability = (((double) combined_probability) * probability
937*38fd1498Szrj 				  * REG_BR_PROB_BASE / d + 0.5);
938*38fd1498Szrj       }
939*38fd1498Szrj 
940*38fd1498Szrj   /* Decide which heuristic to use.  In case we didn't match anything,
941*38fd1498Szrj      use no_prediction heuristic, in case we did match, use either
942*38fd1498Szrj      first match or Dempster-Shaffer theory depending on the flags.  */
943*38fd1498Szrj 
944*38fd1498Szrj   if (best_predictor != END_PREDICTORS)
945*38fd1498Szrj     first_match = true;
946*38fd1498Szrj 
947*38fd1498Szrj   if (!found)
948*38fd1498Szrj     dump_prediction (dump_file, PRED_NO_PREDICTION,
949*38fd1498Szrj 		     combined_probability, bb);
950*38fd1498Szrj   else
951*38fd1498Szrj     {
952*38fd1498Szrj       if (!first_match)
953*38fd1498Szrj 	dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
954*38fd1498Szrj 			 bb, !first_match ? REASON_NONE : REASON_IGNORED);
955*38fd1498Szrj       else
956*38fd1498Szrj 	dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
957*38fd1498Szrj 			 bb, first_match ? REASON_NONE : REASON_IGNORED);
958*38fd1498Szrj     }
959*38fd1498Szrj 
960*38fd1498Szrj   if (first_match)
961*38fd1498Szrj     combined_probability = best_probability;
962*38fd1498Szrj   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
963*38fd1498Szrj 
964*38fd1498Szrj   while (*pnote)
965*38fd1498Szrj     {
966*38fd1498Szrj       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
967*38fd1498Szrj 	{
968*38fd1498Szrj 	  enum br_predictor predictor = ((enum br_predictor)
969*38fd1498Szrj 					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
970*38fd1498Szrj 	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
971*38fd1498Szrj 
972*38fd1498Szrj 	  dump_prediction (dump_file, predictor, probability, bb,
973*38fd1498Szrj 			   (!first_match || best_predictor == predictor)
974*38fd1498Szrj 			   ? REASON_NONE : REASON_IGNORED);
975*38fd1498Szrj 	  *pnote = XEXP (*pnote, 1);
976*38fd1498Szrj 	}
977*38fd1498Szrj       else
978*38fd1498Szrj 	pnote = &XEXP (*pnote, 1);
979*38fd1498Szrj     }
980*38fd1498Szrj 
981*38fd1498Szrj   if (!prob_note)
982*38fd1498Szrj     {
983*38fd1498Szrj       profile_probability p
984*38fd1498Szrj 	 = profile_probability::from_reg_br_prob_base (combined_probability);
985*38fd1498Szrj       add_reg_br_prob_note (insn, p);
986*38fd1498Szrj 
987*38fd1498Szrj       /* Save the prediction into CFG in case we are seeing non-degenerated
988*38fd1498Szrj 	 conditional jump.  */
989*38fd1498Szrj       if (!single_succ_p (bb))
990*38fd1498Szrj 	{
991*38fd1498Szrj 	  BRANCH_EDGE (bb)->probability = p;
992*38fd1498Szrj 	  FALLTHRU_EDGE (bb)->probability
993*38fd1498Szrj 	    = BRANCH_EDGE (bb)->probability.invert ();
994*38fd1498Szrj 	}
995*38fd1498Szrj     }
996*38fd1498Szrj   else if (!single_succ_p (bb))
997*38fd1498Szrj     {
998*38fd1498Szrj       profile_probability prob = profile_probability::from_reg_br_prob_note
999*38fd1498Szrj 					(XINT (prob_note, 0));
1000*38fd1498Szrj 
1001*38fd1498Szrj       BRANCH_EDGE (bb)->probability = prob;
1002*38fd1498Szrj       FALLTHRU_EDGE (bb)->probability = prob.invert ();
1003*38fd1498Szrj     }
1004*38fd1498Szrj   else
1005*38fd1498Szrj     single_succ_edge (bb)->probability = profile_probability::always ();
1006*38fd1498Szrj }
1007*38fd1498Szrj 
1008*38fd1498Szrj /* Edge prediction hash traits.  */
1009*38fd1498Szrj 
1010*38fd1498Szrj struct predictor_hash: pointer_hash <edge_prediction>
1011*38fd1498Szrj {
1012*38fd1498Szrj 
1013*38fd1498Szrj   static inline hashval_t hash (const edge_prediction *);
1014*38fd1498Szrj   static inline bool equal (const edge_prediction *, const edge_prediction *);
1015*38fd1498Szrj };
1016*38fd1498Szrj 
1017*38fd1498Szrj /* Calculate hash value of an edge prediction P based on predictor and
1018*38fd1498Szrj    normalized probability.  */
1019*38fd1498Szrj 
1020*38fd1498Szrj inline hashval_t
hash(const edge_prediction * p)1021*38fd1498Szrj predictor_hash::hash (const edge_prediction *p)
1022*38fd1498Szrj {
1023*38fd1498Szrj   inchash::hash hstate;
1024*38fd1498Szrj   hstate.add_int (p->ep_predictor);
1025*38fd1498Szrj 
1026*38fd1498Szrj   int prob = p->ep_probability;
1027*38fd1498Szrj   if (prob > REG_BR_PROB_BASE / 2)
1028*38fd1498Szrj     prob = REG_BR_PROB_BASE - prob;
1029*38fd1498Szrj 
1030*38fd1498Szrj   hstate.add_int (prob);
1031*38fd1498Szrj 
1032*38fd1498Szrj   return hstate.end ();
1033*38fd1498Szrj }
1034*38fd1498Szrj 
1035*38fd1498Szrj /* Return true whether edge predictions P1 and P2 use the same predictor and
1036*38fd1498Szrj    have equal (or opposed probability).  */
1037*38fd1498Szrj 
1038*38fd1498Szrj inline bool
equal(const edge_prediction * p1,const edge_prediction * p2)1039*38fd1498Szrj predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1040*38fd1498Szrj {
1041*38fd1498Szrj   return (p1->ep_predictor == p2->ep_predictor
1042*38fd1498Szrj 	  && (p1->ep_probability == p2->ep_probability
1043*38fd1498Szrj 	      || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1044*38fd1498Szrj }
1045*38fd1498Szrj 
1046*38fd1498Szrj struct predictor_hash_traits: predictor_hash,
1047*38fd1498Szrj   typed_noop_remove <edge_prediction *> {};
1048*38fd1498Szrj 
1049*38fd1498Szrj /* Return true if edge prediction P is not in DATA hash set.  */
1050*38fd1498Szrj 
1051*38fd1498Szrj static bool
not_removed_prediction_p(edge_prediction * p,void * data)1052*38fd1498Szrj not_removed_prediction_p (edge_prediction *p, void *data)
1053*38fd1498Szrj {
1054*38fd1498Szrj   hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1055*38fd1498Szrj   return !remove->contains (p);
1056*38fd1498Szrj }
1057*38fd1498Szrj 
1058*38fd1498Szrj /* Prune predictions for a basic block BB.  Currently we do following
1059*38fd1498Szrj    clean-up steps:
1060*38fd1498Szrj 
1061*38fd1498Szrj    1) remove duplicate prediction that is guessed with the same probability
1062*38fd1498Szrj       (different than 1/2) to both edge
1063*38fd1498Szrj    2) remove duplicates for a prediction that belongs with the same probability
1064*38fd1498Szrj       to a single edge
1065*38fd1498Szrj 
1066*38fd1498Szrj   */
1067*38fd1498Szrj 
1068*38fd1498Szrj static void
prune_predictions_for_bb(basic_block bb)1069*38fd1498Szrj prune_predictions_for_bb (basic_block bb)
1070*38fd1498Szrj {
1071*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
1072*38fd1498Szrj 
1073*38fd1498Szrj   if (preds)
1074*38fd1498Szrj     {
1075*38fd1498Szrj       hash_table <predictor_hash_traits> s (13);
1076*38fd1498Szrj       hash_set <edge_prediction *> remove;
1077*38fd1498Szrj 
1078*38fd1498Szrj       /* Step 1: identify predictors that should be removed.  */
1079*38fd1498Szrj       for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1080*38fd1498Szrj 	{
1081*38fd1498Szrj 	  edge_prediction *existing = s.find (pred);
1082*38fd1498Szrj 	  if (existing)
1083*38fd1498Szrj 	    {
1084*38fd1498Szrj 	      if (pred->ep_edge == existing->ep_edge
1085*38fd1498Szrj 		  && pred->ep_probability == existing->ep_probability)
1086*38fd1498Szrj 		{
1087*38fd1498Szrj 		  /* Remove a duplicate predictor.  */
1088*38fd1498Szrj 		  dump_prediction (dump_file, pred->ep_predictor,
1089*38fd1498Szrj 				   pred->ep_probability, bb,
1090*38fd1498Szrj 				   REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1091*38fd1498Szrj 
1092*38fd1498Szrj 		  remove.add (pred);
1093*38fd1498Szrj 		}
1094*38fd1498Szrj 	      else if (pred->ep_edge != existing->ep_edge
1095*38fd1498Szrj 		       && pred->ep_probability == existing->ep_probability
1096*38fd1498Szrj 		       && pred->ep_probability != REG_BR_PROB_BASE / 2)
1097*38fd1498Szrj 		{
1098*38fd1498Szrj 		  /* Remove both predictors as they predict the same
1099*38fd1498Szrj 		     for both edges.  */
1100*38fd1498Szrj 		  dump_prediction (dump_file, existing->ep_predictor,
1101*38fd1498Szrj 				   pred->ep_probability, bb,
1102*38fd1498Szrj 				   REASON_EDGE_PAIR_DUPLICATE,
1103*38fd1498Szrj 				   existing->ep_edge);
1104*38fd1498Szrj 		  dump_prediction (dump_file, pred->ep_predictor,
1105*38fd1498Szrj 				   pred->ep_probability, bb,
1106*38fd1498Szrj 				   REASON_EDGE_PAIR_DUPLICATE,
1107*38fd1498Szrj 				   pred->ep_edge);
1108*38fd1498Szrj 
1109*38fd1498Szrj 		  remove.add (existing);
1110*38fd1498Szrj 		  remove.add (pred);
1111*38fd1498Szrj 		}
1112*38fd1498Szrj 	    }
1113*38fd1498Szrj 
1114*38fd1498Szrj 	  edge_prediction **slot2 = s.find_slot (pred, INSERT);
1115*38fd1498Szrj 	  *slot2 = pred;
1116*38fd1498Szrj 	}
1117*38fd1498Szrj 
1118*38fd1498Szrj       /* Step 2: Remove predictors.  */
1119*38fd1498Szrj       filter_predictions (preds, not_removed_prediction_p, &remove);
1120*38fd1498Szrj     }
1121*38fd1498Szrj }
1122*38fd1498Szrj 
1123*38fd1498Szrj /* Combine predictions into single probability and store them into CFG.
1124*38fd1498Szrj    Remove now useless prediction entries.
1125*38fd1498Szrj    If DRY_RUN is set, only produce dumps and do not modify profile.  */
1126*38fd1498Szrj 
1127*38fd1498Szrj static void
combine_predictions_for_bb(basic_block bb,bool dry_run)1128*38fd1498Szrj combine_predictions_for_bb (basic_block bb, bool dry_run)
1129*38fd1498Szrj {
1130*38fd1498Szrj   int best_probability = PROB_EVEN;
1131*38fd1498Szrj   enum br_predictor best_predictor = END_PREDICTORS;
1132*38fd1498Szrj   int combined_probability = REG_BR_PROB_BASE / 2;
1133*38fd1498Szrj   int d;
1134*38fd1498Szrj   bool first_match = false;
1135*38fd1498Szrj   bool found = false;
1136*38fd1498Szrj   struct edge_prediction *pred;
1137*38fd1498Szrj   int nedges = 0;
1138*38fd1498Szrj   edge e, first = NULL, second = NULL;
1139*38fd1498Szrj   edge_iterator ei;
1140*38fd1498Szrj   int nzero = 0;
1141*38fd1498Szrj   int nunknown = 0;
1142*38fd1498Szrj 
1143*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
1144*38fd1498Szrj     {
1145*38fd1498Szrj       if (!unlikely_executed_edge_p (e))
1146*38fd1498Szrj         {
1147*38fd1498Szrj 	  nedges ++;
1148*38fd1498Szrj 	  if (first && !second)
1149*38fd1498Szrj 	    second = e;
1150*38fd1498Szrj 	  if (!first)
1151*38fd1498Szrj 	    first = e;
1152*38fd1498Szrj         }
1153*38fd1498Szrj       else if (!e->probability.initialized_p ())
1154*38fd1498Szrj         e->probability = profile_probability::never ();
1155*38fd1498Szrj      if (!e->probability.initialized_p ())
1156*38fd1498Szrj         nunknown++;
1157*38fd1498Szrj      else if (e->probability == profile_probability::never ())
1158*38fd1498Szrj 	nzero++;
1159*38fd1498Szrj     }
1160*38fd1498Szrj 
1161*38fd1498Szrj   /* When there is no successor or only one choice, prediction is easy.
1162*38fd1498Szrj 
1163*38fd1498Szrj      When we have a basic block with more than 2 successors, the situation
1164*38fd1498Szrj      is more complicated as DS theory cannot be used literally.
1165*38fd1498Szrj      More precisely, let's assume we predicted edge e1 with probability p1,
1166*38fd1498Szrj      thus: m1({b1}) = p1.  As we're going to combine more than 2 edges, we
1167*38fd1498Szrj      need to find probability of e.g. m1({b2}), which we don't know.
1168*38fd1498Szrj      The only approximation is to equally distribute 1-p1 to all edges
1169*38fd1498Szrj      different from b1.
1170*38fd1498Szrj 
1171*38fd1498Szrj      According to numbers we've got from SPEC2006 benchark, there's only
1172*38fd1498Szrj      one interesting reliable predictor (noreturn call), which can be
1173*38fd1498Szrj      handled with a bit easier approach.  */
1174*38fd1498Szrj   if (nedges != 2)
1175*38fd1498Szrj     {
1176*38fd1498Szrj       hash_set<edge> unlikely_edges (4);
1177*38fd1498Szrj 
1178*38fd1498Szrj       /* Identify all edges that have a probability close to very unlikely.
1179*38fd1498Szrj 	 Doing the approach for very unlikely doesn't worth for doing as
1180*38fd1498Szrj 	 there's no such probability in SPEC2006 benchmark.  */
1181*38fd1498Szrj       edge_prediction **preds = bb_predictions->get (bb);
1182*38fd1498Szrj       if (preds)
1183*38fd1498Szrj 	for (pred = *preds; pred; pred = pred->ep_next)
1184*38fd1498Szrj 	  if (pred->ep_probability <= PROB_VERY_UNLIKELY)
1185*38fd1498Szrj 	    unlikely_edges.add (pred->ep_edge);
1186*38fd1498Szrj 
1187*38fd1498Szrj       if (!dry_run)
1188*38fd1498Szrj 	set_even_probabilities (bb, &unlikely_edges);
1189*38fd1498Szrj       clear_bb_predictions (bb);
1190*38fd1498Szrj       if (dump_file)
1191*38fd1498Szrj 	{
1192*38fd1498Szrj 	  fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1193*38fd1498Szrj 	  if (unlikely_edges.elements () == 0)
1194*38fd1498Szrj 	    fprintf (dump_file,
1195*38fd1498Szrj 		     "%i edges in bb %i predicted to even probabilities\n",
1196*38fd1498Szrj 		     nedges, bb->index);
1197*38fd1498Szrj 	  else
1198*38fd1498Szrj 	    {
1199*38fd1498Szrj 	      fprintf (dump_file,
1200*38fd1498Szrj 		       "%i edges in bb %i predicted with some unlikely edges\n",
1201*38fd1498Szrj 		       nedges, bb->index);
1202*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, bb->succs)
1203*38fd1498Szrj 		if (!unlikely_executed_edge_p (e))
1204*38fd1498Szrj 		  dump_prediction (dump_file, PRED_COMBINED,
1205*38fd1498Szrj 		   e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1206*38fd1498Szrj 	    }
1207*38fd1498Szrj 	}
1208*38fd1498Szrj       return;
1209*38fd1498Szrj     }
1210*38fd1498Szrj 
1211*38fd1498Szrj   if (dump_file)
1212*38fd1498Szrj     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1213*38fd1498Szrj 
1214*38fd1498Szrj   prune_predictions_for_bb (bb);
1215*38fd1498Szrj 
1216*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
1217*38fd1498Szrj 
1218*38fd1498Szrj   if (preds)
1219*38fd1498Szrj     {
1220*38fd1498Szrj       /* We implement "first match" heuristics and use probability guessed
1221*38fd1498Szrj 	 by predictor with smallest index.  */
1222*38fd1498Szrj       for (pred = *preds; pred; pred = pred->ep_next)
1223*38fd1498Szrj 	{
1224*38fd1498Szrj 	  enum br_predictor predictor = pred->ep_predictor;
1225*38fd1498Szrj 	  int probability = pred->ep_probability;
1226*38fd1498Szrj 
1227*38fd1498Szrj 	  if (pred->ep_edge != first)
1228*38fd1498Szrj 	    probability = REG_BR_PROB_BASE - probability;
1229*38fd1498Szrj 
1230*38fd1498Szrj 	  found = true;
1231*38fd1498Szrj 	  /* First match heuristics would be widly confused if we predicted
1232*38fd1498Szrj 	     both directions.  */
1233*38fd1498Szrj 	  if (best_predictor > predictor
1234*38fd1498Szrj 	    && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1235*38fd1498Szrj 	    {
1236*38fd1498Szrj               struct edge_prediction *pred2;
1237*38fd1498Szrj 	      int prob = probability;
1238*38fd1498Szrj 
1239*38fd1498Szrj 	      for (pred2 = (struct edge_prediction *) *preds;
1240*38fd1498Szrj 		   pred2; pred2 = pred2->ep_next)
1241*38fd1498Szrj 	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1242*38fd1498Szrj 	         {
1243*38fd1498Szrj 		   int probability2 = pred2->ep_probability;
1244*38fd1498Szrj 
1245*38fd1498Szrj 		   if (pred2->ep_edge != first)
1246*38fd1498Szrj 		     probability2 = REG_BR_PROB_BASE - probability2;
1247*38fd1498Szrj 
1248*38fd1498Szrj 		   if ((probability < REG_BR_PROB_BASE / 2) !=
1249*38fd1498Szrj 		       (probability2 < REG_BR_PROB_BASE / 2))
1250*38fd1498Szrj 		     break;
1251*38fd1498Szrj 
1252*38fd1498Szrj 		   /* If the same predictor later gave better result, go for it! */
1253*38fd1498Szrj 		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1254*38fd1498Szrj 		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1255*38fd1498Szrj 		     prob = probability2;
1256*38fd1498Szrj 		 }
1257*38fd1498Szrj 	      if (!pred2)
1258*38fd1498Szrj 	        best_probability = prob, best_predictor = predictor;
1259*38fd1498Szrj 	    }
1260*38fd1498Szrj 
1261*38fd1498Szrj 	  d = (combined_probability * probability
1262*38fd1498Szrj 	       + (REG_BR_PROB_BASE - combined_probability)
1263*38fd1498Szrj 	       * (REG_BR_PROB_BASE - probability));
1264*38fd1498Szrj 
1265*38fd1498Szrj 	  /* Use FP math to avoid overflows of 32bit integers.  */
1266*38fd1498Szrj 	  if (d == 0)
1267*38fd1498Szrj 	    /* If one probability is 0% and one 100%, avoid division by zero.  */
1268*38fd1498Szrj 	    combined_probability = REG_BR_PROB_BASE / 2;
1269*38fd1498Szrj 	  else
1270*38fd1498Szrj 	    combined_probability = (((double) combined_probability)
1271*38fd1498Szrj 				    * probability
1272*38fd1498Szrj 		    		    * REG_BR_PROB_BASE / d + 0.5);
1273*38fd1498Szrj 	}
1274*38fd1498Szrj     }
1275*38fd1498Szrj 
1276*38fd1498Szrj   /* Decide which heuristic to use.  In case we didn't match anything,
1277*38fd1498Szrj      use no_prediction heuristic, in case we did match, use either
1278*38fd1498Szrj      first match or Dempster-Shaffer theory depending on the flags.  */
1279*38fd1498Szrj 
1280*38fd1498Szrj   if (best_predictor != END_PREDICTORS)
1281*38fd1498Szrj     first_match = true;
1282*38fd1498Szrj 
1283*38fd1498Szrj   if (!found)
1284*38fd1498Szrj     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1285*38fd1498Szrj   else
1286*38fd1498Szrj     {
1287*38fd1498Szrj       if (!first_match)
1288*38fd1498Szrj 	dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1289*38fd1498Szrj 			 !first_match ? REASON_NONE : REASON_IGNORED);
1290*38fd1498Szrj       else
1291*38fd1498Szrj 	dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1292*38fd1498Szrj 			 first_match ? REASON_NONE : REASON_IGNORED);
1293*38fd1498Szrj     }
1294*38fd1498Szrj 
1295*38fd1498Szrj   if (first_match)
1296*38fd1498Szrj     combined_probability = best_probability;
1297*38fd1498Szrj   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1298*38fd1498Szrj 
1299*38fd1498Szrj   if (preds)
1300*38fd1498Szrj     {
1301*38fd1498Szrj       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1302*38fd1498Szrj 	{
1303*38fd1498Szrj 	  enum br_predictor predictor = pred->ep_predictor;
1304*38fd1498Szrj 	  int probability = pred->ep_probability;
1305*38fd1498Szrj 
1306*38fd1498Szrj 	  dump_prediction (dump_file, predictor, probability, bb,
1307*38fd1498Szrj 			   (!first_match || best_predictor == predictor)
1308*38fd1498Szrj 			   ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1309*38fd1498Szrj 	}
1310*38fd1498Szrj     }
1311*38fd1498Szrj   clear_bb_predictions (bb);
1312*38fd1498Szrj 
1313*38fd1498Szrj 
1314*38fd1498Szrj   /* If we have only one successor which is unknown, we can compute missing
1315*38fd1498Szrj      probablity.  */
1316*38fd1498Szrj   if (nunknown == 1)
1317*38fd1498Szrj     {
1318*38fd1498Szrj       profile_probability prob = profile_probability::always ();
1319*38fd1498Szrj       edge missing = NULL;
1320*38fd1498Szrj 
1321*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1322*38fd1498Szrj 	if (e->probability.initialized_p ())
1323*38fd1498Szrj 	  prob -= e->probability;
1324*38fd1498Szrj 	else if (missing == NULL)
1325*38fd1498Szrj 	  missing = e;
1326*38fd1498Szrj 	else
1327*38fd1498Szrj 	  gcc_unreachable ();
1328*38fd1498Szrj        missing->probability = prob;
1329*38fd1498Szrj     }
1330*38fd1498Szrj   /* If nothing is unknown, we have nothing to update.  */
1331*38fd1498Szrj   else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
1332*38fd1498Szrj     ;
1333*38fd1498Szrj   else if (!dry_run)
1334*38fd1498Szrj     {
1335*38fd1498Szrj       first->probability
1336*38fd1498Szrj 	 = profile_probability::from_reg_br_prob_base (combined_probability);
1337*38fd1498Szrj       second->probability = first->probability.invert ();
1338*38fd1498Szrj     }
1339*38fd1498Szrj }
1340*38fd1498Szrj 
1341*38fd1498Szrj /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1342*38fd1498Szrj    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1343*38fd1498Szrj 
1344*38fd1498Szrj    T1 and T2 should be one of the following cases:
1345*38fd1498Szrj      1. T1 is SSA_NAME, T2 is NULL
1346*38fd1498Szrj      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1347*38fd1498Szrj      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1348*38fd1498Szrj 
1349*38fd1498Szrj static tree
strips_small_constant(tree t1,tree t2)1350*38fd1498Szrj strips_small_constant (tree t1, tree t2)
1351*38fd1498Szrj {
1352*38fd1498Szrj   tree ret = NULL;
1353*38fd1498Szrj   int value = 0;
1354*38fd1498Szrj 
1355*38fd1498Szrj   if (!t1)
1356*38fd1498Szrj     return NULL;
1357*38fd1498Szrj   else if (TREE_CODE (t1) == SSA_NAME)
1358*38fd1498Szrj     ret = t1;
1359*38fd1498Szrj   else if (tree_fits_shwi_p (t1))
1360*38fd1498Szrj     value = tree_to_shwi (t1);
1361*38fd1498Szrj   else
1362*38fd1498Szrj     return NULL;
1363*38fd1498Szrj 
1364*38fd1498Szrj   if (!t2)
1365*38fd1498Szrj     return ret;
1366*38fd1498Szrj   else if (tree_fits_shwi_p (t2))
1367*38fd1498Szrj     value = tree_to_shwi (t2);
1368*38fd1498Szrj   else if (TREE_CODE (t2) == SSA_NAME)
1369*38fd1498Szrj     {
1370*38fd1498Szrj       if (ret)
1371*38fd1498Szrj         return NULL;
1372*38fd1498Szrj       else
1373*38fd1498Szrj         ret = t2;
1374*38fd1498Szrj     }
1375*38fd1498Szrj 
1376*38fd1498Szrj   if (value <= 4 && value >= -4)
1377*38fd1498Szrj     return ret;
1378*38fd1498Szrj   else
1379*38fd1498Szrj     return NULL;
1380*38fd1498Szrj }
1381*38fd1498Szrj 
1382*38fd1498Szrj /* Return the SSA_NAME in T or T's operands.
1383*38fd1498Szrj    Return NULL if SSA_NAME cannot be found.  */
1384*38fd1498Szrj 
1385*38fd1498Szrj static tree
get_base_value(tree t)1386*38fd1498Szrj get_base_value (tree t)
1387*38fd1498Szrj {
1388*38fd1498Szrj   if (TREE_CODE (t) == SSA_NAME)
1389*38fd1498Szrj     return t;
1390*38fd1498Szrj 
1391*38fd1498Szrj   if (!BINARY_CLASS_P (t))
1392*38fd1498Szrj     return NULL;
1393*38fd1498Szrj 
1394*38fd1498Szrj   switch (TREE_OPERAND_LENGTH (t))
1395*38fd1498Szrj     {
1396*38fd1498Szrj     case 1:
1397*38fd1498Szrj       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1398*38fd1498Szrj     case 2:
1399*38fd1498Szrj       return strips_small_constant (TREE_OPERAND (t, 0),
1400*38fd1498Szrj 				    TREE_OPERAND (t, 1));
1401*38fd1498Szrj     default:
1402*38fd1498Szrj       return NULL;
1403*38fd1498Szrj     }
1404*38fd1498Szrj }
1405*38fd1498Szrj 
1406*38fd1498Szrj /* Check the compare STMT in LOOP. If it compares an induction
1407*38fd1498Szrj    variable to a loop invariant, return true, and save
1408*38fd1498Szrj    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1409*38fd1498Szrj    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1410*38fd1498Szrj 
1411*38fd1498Szrj static bool
is_comparison_with_loop_invariant_p(gcond * stmt,struct loop * loop,tree * loop_invariant,enum tree_code * compare_code,tree * loop_step,tree * loop_iv_base)1412*38fd1498Szrj is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1413*38fd1498Szrj 				     tree *loop_invariant,
1414*38fd1498Szrj 				     enum tree_code *compare_code,
1415*38fd1498Szrj 				     tree *loop_step,
1416*38fd1498Szrj 				     tree *loop_iv_base)
1417*38fd1498Szrj {
1418*38fd1498Szrj   tree op0, op1, bound, base;
1419*38fd1498Szrj   affine_iv iv0, iv1;
1420*38fd1498Szrj   enum tree_code code;
1421*38fd1498Szrj   tree step;
1422*38fd1498Szrj 
1423*38fd1498Szrj   code = gimple_cond_code (stmt);
1424*38fd1498Szrj   *loop_invariant = NULL;
1425*38fd1498Szrj 
1426*38fd1498Szrj   switch (code)
1427*38fd1498Szrj     {
1428*38fd1498Szrj     case GT_EXPR:
1429*38fd1498Szrj     case GE_EXPR:
1430*38fd1498Szrj     case NE_EXPR:
1431*38fd1498Szrj     case LT_EXPR:
1432*38fd1498Szrj     case LE_EXPR:
1433*38fd1498Szrj     case EQ_EXPR:
1434*38fd1498Szrj       break;
1435*38fd1498Szrj 
1436*38fd1498Szrj     default:
1437*38fd1498Szrj       return false;
1438*38fd1498Szrj     }
1439*38fd1498Szrj 
1440*38fd1498Szrj   op0 = gimple_cond_lhs (stmt);
1441*38fd1498Szrj   op1 = gimple_cond_rhs (stmt);
1442*38fd1498Szrj 
1443*38fd1498Szrj   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1444*38fd1498Szrj        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1445*38fd1498Szrj     return false;
1446*38fd1498Szrj   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1447*38fd1498Szrj     return false;
1448*38fd1498Szrj   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1449*38fd1498Szrj     return false;
1450*38fd1498Szrj   if (TREE_CODE (iv0.step) != INTEGER_CST
1451*38fd1498Szrj       || TREE_CODE (iv1.step) != INTEGER_CST)
1452*38fd1498Szrj     return false;
1453*38fd1498Szrj   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1454*38fd1498Szrj       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1455*38fd1498Szrj     return false;
1456*38fd1498Szrj 
1457*38fd1498Szrj   if (integer_zerop (iv0.step))
1458*38fd1498Szrj     {
1459*38fd1498Szrj       if (code != NE_EXPR && code != EQ_EXPR)
1460*38fd1498Szrj 	code = invert_tree_comparison (code, false);
1461*38fd1498Szrj       bound = iv0.base;
1462*38fd1498Szrj       base = iv1.base;
1463*38fd1498Szrj       if (tree_fits_shwi_p (iv1.step))
1464*38fd1498Szrj 	step = iv1.step;
1465*38fd1498Szrj       else
1466*38fd1498Szrj 	return false;
1467*38fd1498Szrj     }
1468*38fd1498Szrj   else
1469*38fd1498Szrj     {
1470*38fd1498Szrj       bound = iv1.base;
1471*38fd1498Szrj       base = iv0.base;
1472*38fd1498Szrj       if (tree_fits_shwi_p (iv0.step))
1473*38fd1498Szrj 	step = iv0.step;
1474*38fd1498Szrj       else
1475*38fd1498Szrj 	return false;
1476*38fd1498Szrj     }
1477*38fd1498Szrj 
1478*38fd1498Szrj   if (TREE_CODE (bound) != INTEGER_CST)
1479*38fd1498Szrj     bound = get_base_value (bound);
1480*38fd1498Szrj   if (!bound)
1481*38fd1498Szrj     return false;
1482*38fd1498Szrj   if (TREE_CODE (base) != INTEGER_CST)
1483*38fd1498Szrj     base = get_base_value (base);
1484*38fd1498Szrj   if (!base)
1485*38fd1498Szrj     return false;
1486*38fd1498Szrj 
1487*38fd1498Szrj   *loop_invariant = bound;
1488*38fd1498Szrj   *compare_code = code;
1489*38fd1498Szrj   *loop_step = step;
1490*38fd1498Szrj   *loop_iv_base = base;
1491*38fd1498Szrj   return true;
1492*38fd1498Szrj }
1493*38fd1498Szrj 
1494*38fd1498Szrj /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1495*38fd1498Szrj 
1496*38fd1498Szrj static bool
expr_coherent_p(tree t1,tree t2)1497*38fd1498Szrj expr_coherent_p (tree t1, tree t2)
1498*38fd1498Szrj {
1499*38fd1498Szrj   gimple *stmt;
1500*38fd1498Szrj   tree ssa_name_1 = NULL;
1501*38fd1498Szrj   tree ssa_name_2 = NULL;
1502*38fd1498Szrj 
1503*38fd1498Szrj   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1504*38fd1498Szrj   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1505*38fd1498Szrj 
1506*38fd1498Szrj   if (t1 == t2)
1507*38fd1498Szrj     return true;
1508*38fd1498Szrj 
1509*38fd1498Szrj   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1510*38fd1498Szrj     return true;
1511*38fd1498Szrj   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1512*38fd1498Szrj     return false;
1513*38fd1498Szrj 
1514*38fd1498Szrj   /* Check to see if t1 is expressed/defined with t2.  */
1515*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (t1);
1516*38fd1498Szrj   gcc_assert (stmt != NULL);
1517*38fd1498Szrj   if (is_gimple_assign (stmt))
1518*38fd1498Szrj     {
1519*38fd1498Szrj       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1520*38fd1498Szrj       if (ssa_name_1 && ssa_name_1 == t2)
1521*38fd1498Szrj 	return true;
1522*38fd1498Szrj     }
1523*38fd1498Szrj 
1524*38fd1498Szrj   /* Check to see if t2 is expressed/defined with t1.  */
1525*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (t2);
1526*38fd1498Szrj   gcc_assert (stmt != NULL);
1527*38fd1498Szrj   if (is_gimple_assign (stmt))
1528*38fd1498Szrj     {
1529*38fd1498Szrj       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1530*38fd1498Szrj       if (ssa_name_2 && ssa_name_2 == t1)
1531*38fd1498Szrj 	return true;
1532*38fd1498Szrj     }
1533*38fd1498Szrj 
1534*38fd1498Szrj   /* Compare if t1 and t2's def_stmts are identical.  */
1535*38fd1498Szrj   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1536*38fd1498Szrj     return true;
1537*38fd1498Szrj   else
1538*38fd1498Szrj     return false;
1539*38fd1498Szrj }
1540*38fd1498Szrj 
1541*38fd1498Szrj /* Return true if E is predicted by one of loop heuristics.  */
1542*38fd1498Szrj 
1543*38fd1498Szrj static bool
predicted_by_loop_heuristics_p(basic_block bb)1544*38fd1498Szrj predicted_by_loop_heuristics_p (basic_block bb)
1545*38fd1498Szrj {
1546*38fd1498Szrj   struct edge_prediction *i;
1547*38fd1498Szrj   edge_prediction **preds = bb_predictions->get (bb);
1548*38fd1498Szrj 
1549*38fd1498Szrj   if (!preds)
1550*38fd1498Szrj     return false;
1551*38fd1498Szrj 
1552*38fd1498Szrj   for (i = *preds; i; i = i->ep_next)
1553*38fd1498Szrj     if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1554*38fd1498Szrj 	|| i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1555*38fd1498Szrj 	|| i->ep_predictor == PRED_LOOP_ITERATIONS
1556*38fd1498Szrj 	|| i->ep_predictor == PRED_LOOP_EXIT
1557*38fd1498Szrj 	|| i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1558*38fd1498Szrj 	|| i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1559*38fd1498Szrj       return true;
1560*38fd1498Szrj   return false;
1561*38fd1498Szrj }
1562*38fd1498Szrj 
1563*38fd1498Szrj /* Predict branch probability of BB when BB contains a branch that compares
1564*38fd1498Szrj    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1565*38fd1498Szrj    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1566*38fd1498Szrj 
1567*38fd1498Szrj    E.g.
1568*38fd1498Szrj      for (int i = 0; i < bound; i++) {
1569*38fd1498Szrj        if (i < bound - 2)
1570*38fd1498Szrj 	 computation_1();
1571*38fd1498Szrj        else
1572*38fd1498Szrj 	 computation_2();
1573*38fd1498Szrj      }
1574*38fd1498Szrj 
1575*38fd1498Szrj   In this loop, we will predict the branch inside the loop to be taken.  */
1576*38fd1498Szrj 
1577*38fd1498Szrj static void
predict_iv_comparison(struct loop * loop,basic_block bb,tree loop_bound_var,tree loop_iv_base_var,enum tree_code loop_bound_code,int loop_bound_step)1578*38fd1498Szrj predict_iv_comparison (struct loop *loop, basic_block bb,
1579*38fd1498Szrj 		       tree loop_bound_var,
1580*38fd1498Szrj 		       tree loop_iv_base_var,
1581*38fd1498Szrj 		       enum tree_code loop_bound_code,
1582*38fd1498Szrj 		       int loop_bound_step)
1583*38fd1498Szrj {
1584*38fd1498Szrj   gimple *stmt;
1585*38fd1498Szrj   tree compare_var, compare_base;
1586*38fd1498Szrj   enum tree_code compare_code;
1587*38fd1498Szrj   tree compare_step_var;
1588*38fd1498Szrj   edge then_edge;
1589*38fd1498Szrj   edge_iterator ei;
1590*38fd1498Szrj 
1591*38fd1498Szrj   if (predicted_by_loop_heuristics_p (bb))
1592*38fd1498Szrj     return;
1593*38fd1498Szrj 
1594*38fd1498Szrj   stmt = last_stmt (bb);
1595*38fd1498Szrj   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1596*38fd1498Szrj     return;
1597*38fd1498Szrj   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1598*38fd1498Szrj 					    loop, &compare_var,
1599*38fd1498Szrj 					    &compare_code,
1600*38fd1498Szrj 					    &compare_step_var,
1601*38fd1498Szrj 					    &compare_base))
1602*38fd1498Szrj     return;
1603*38fd1498Szrj 
1604*38fd1498Szrj   /* Find the taken edge.  */
1605*38fd1498Szrj   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1606*38fd1498Szrj     if (then_edge->flags & EDGE_TRUE_VALUE)
1607*38fd1498Szrj       break;
1608*38fd1498Szrj 
1609*38fd1498Szrj   /* When comparing an IV to a loop invariant, NE is more likely to be
1610*38fd1498Szrj      taken while EQ is more likely to be not-taken.  */
1611*38fd1498Szrj   if (compare_code == NE_EXPR)
1612*38fd1498Szrj     {
1613*38fd1498Szrj       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1614*38fd1498Szrj       return;
1615*38fd1498Szrj     }
1616*38fd1498Szrj   else if (compare_code == EQ_EXPR)
1617*38fd1498Szrj     {
1618*38fd1498Szrj       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1619*38fd1498Szrj       return;
1620*38fd1498Szrj     }
1621*38fd1498Szrj 
1622*38fd1498Szrj   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1623*38fd1498Szrj     return;
1624*38fd1498Szrj 
1625*38fd1498Szrj   /* If loop bound, base and compare bound are all constants, we can
1626*38fd1498Szrj      calculate the probability directly.  */
1627*38fd1498Szrj   if (tree_fits_shwi_p (loop_bound_var)
1628*38fd1498Szrj       && tree_fits_shwi_p (compare_var)
1629*38fd1498Szrj       && tree_fits_shwi_p (compare_base))
1630*38fd1498Szrj     {
1631*38fd1498Szrj       int probability;
1632*38fd1498Szrj       bool overflow, overall_overflow = false;
1633*38fd1498Szrj       widest_int compare_count, tem;
1634*38fd1498Szrj 
1635*38fd1498Szrj       /* (loop_bound - base) / compare_step */
1636*38fd1498Szrj       tem = wi::sub (wi::to_widest (loop_bound_var),
1637*38fd1498Szrj 		     wi::to_widest (compare_base), SIGNED, &overflow);
1638*38fd1498Szrj       overall_overflow |= overflow;
1639*38fd1498Szrj       widest_int loop_count = wi::div_trunc (tem,
1640*38fd1498Szrj 					     wi::to_widest (compare_step_var),
1641*38fd1498Szrj 					     SIGNED, &overflow);
1642*38fd1498Szrj       overall_overflow |= overflow;
1643*38fd1498Szrj 
1644*38fd1498Szrj       if (!wi::neg_p (wi::to_widest (compare_step_var))
1645*38fd1498Szrj           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1646*38fd1498Szrj 	{
1647*38fd1498Szrj 	  /* (loop_bound - compare_bound) / compare_step */
1648*38fd1498Szrj 	  tem = wi::sub (wi::to_widest (loop_bound_var),
1649*38fd1498Szrj 			 wi::to_widest (compare_var), SIGNED, &overflow);
1650*38fd1498Szrj 	  overall_overflow |= overflow;
1651*38fd1498Szrj 	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1652*38fd1498Szrj 					 SIGNED, &overflow);
1653*38fd1498Szrj 	  overall_overflow |= overflow;
1654*38fd1498Szrj 	}
1655*38fd1498Szrj       else
1656*38fd1498Szrj         {
1657*38fd1498Szrj 	  /* (compare_bound - base) / compare_step */
1658*38fd1498Szrj 	  tem = wi::sub (wi::to_widest (compare_var),
1659*38fd1498Szrj 			 wi::to_widest (compare_base), SIGNED, &overflow);
1660*38fd1498Szrj 	  overall_overflow |= overflow;
1661*38fd1498Szrj           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1662*38fd1498Szrj 					 SIGNED, &overflow);
1663*38fd1498Szrj 	  overall_overflow |= overflow;
1664*38fd1498Szrj 	}
1665*38fd1498Szrj       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1666*38fd1498Szrj 	++compare_count;
1667*38fd1498Szrj       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1668*38fd1498Szrj 	++loop_count;
1669*38fd1498Szrj       if (wi::neg_p (compare_count))
1670*38fd1498Szrj         compare_count = 0;
1671*38fd1498Szrj       if (wi::neg_p (loop_count))
1672*38fd1498Szrj         loop_count = 0;
1673*38fd1498Szrj       if (loop_count == 0)
1674*38fd1498Szrj 	probability = 0;
1675*38fd1498Szrj       else if (wi::cmps (compare_count, loop_count) == 1)
1676*38fd1498Szrj 	probability = REG_BR_PROB_BASE;
1677*38fd1498Szrj       else
1678*38fd1498Szrj         {
1679*38fd1498Szrj 	  tem = compare_count * REG_BR_PROB_BASE;
1680*38fd1498Szrj 	  tem = wi::udiv_trunc (tem, loop_count);
1681*38fd1498Szrj 	  probability = tem.to_uhwi ();
1682*38fd1498Szrj 	}
1683*38fd1498Szrj 
1684*38fd1498Szrj       /* FIXME: The branch prediction seems broken. It has only 20% hitrate.  */
1685*38fd1498Szrj       if (!overall_overflow)
1686*38fd1498Szrj         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1687*38fd1498Szrj 
1688*38fd1498Szrj       return;
1689*38fd1498Szrj     }
1690*38fd1498Szrj 
1691*38fd1498Szrj   if (expr_coherent_p (loop_bound_var, compare_var))
1692*38fd1498Szrj     {
1693*38fd1498Szrj       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1694*38fd1498Szrj 	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1695*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1696*38fd1498Szrj       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1697*38fd1498Szrj 	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1698*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1699*38fd1498Szrj       else if (loop_bound_code == NE_EXPR)
1700*38fd1498Szrj 	{
1701*38fd1498Szrj 	  /* If the loop backedge condition is "(i != bound)", we do
1702*38fd1498Szrj 	     the comparison based on the step of IV:
1703*38fd1498Szrj 	     * step < 0 : backedge condition is like (i > bound)
1704*38fd1498Szrj 	     * step > 0 : backedge condition is like (i < bound)  */
1705*38fd1498Szrj 	  gcc_assert (loop_bound_step != 0);
1706*38fd1498Szrj 	  if (loop_bound_step > 0
1707*38fd1498Szrj 	      && (compare_code == LT_EXPR
1708*38fd1498Szrj 		  || compare_code == LE_EXPR))
1709*38fd1498Szrj 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1710*38fd1498Szrj 	  else if (loop_bound_step < 0
1711*38fd1498Szrj 		   && (compare_code == GT_EXPR
1712*38fd1498Szrj 		       || compare_code == GE_EXPR))
1713*38fd1498Szrj 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1714*38fd1498Szrj 	  else
1715*38fd1498Szrj 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1716*38fd1498Szrj 	}
1717*38fd1498Szrj       else
1718*38fd1498Szrj 	/* The branch is predicted not-taken if loop_bound_code is
1719*38fd1498Szrj 	   opposite with compare_code.  */
1720*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1721*38fd1498Szrj     }
1722*38fd1498Szrj   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1723*38fd1498Szrj     {
1724*38fd1498Szrj       /* For cases like:
1725*38fd1498Szrj 	   for (i = s; i < h; i++)
1726*38fd1498Szrj 	     if (i > s + 2) ....
1727*38fd1498Szrj 	 The branch should be predicted taken.  */
1728*38fd1498Szrj       if (loop_bound_step > 0
1729*38fd1498Szrj 	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1730*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1731*38fd1498Szrj       else if (loop_bound_step < 0
1732*38fd1498Szrj 	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1733*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1734*38fd1498Szrj       else
1735*38fd1498Szrj 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1736*38fd1498Szrj     }
1737*38fd1498Szrj }
1738*38fd1498Szrj 
1739*38fd1498Szrj /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1740*38fd1498Szrj    exits are resulted from short-circuit conditions that will generate an
1741*38fd1498Szrj    if_tmp. E.g.:
1742*38fd1498Szrj 
1743*38fd1498Szrj    if (foo() || global > 10)
1744*38fd1498Szrj      break;
1745*38fd1498Szrj 
1746*38fd1498Szrj    This will be translated into:
1747*38fd1498Szrj 
1748*38fd1498Szrj    BB3:
1749*38fd1498Szrj      loop header...
1750*38fd1498Szrj    BB4:
1751*38fd1498Szrj      if foo() goto BB6 else goto BB5
1752*38fd1498Szrj    BB5:
1753*38fd1498Szrj      if global > 10 goto BB6 else goto BB7
1754*38fd1498Szrj    BB6:
1755*38fd1498Szrj      goto BB7
1756*38fd1498Szrj    BB7:
1757*38fd1498Szrj      iftmp = (PHI 0(BB5), 1(BB6))
1758*38fd1498Szrj      if iftmp == 1 goto BB8 else goto BB3
1759*38fd1498Szrj    BB8:
1760*38fd1498Szrj      outside of the loop...
1761*38fd1498Szrj 
1762*38fd1498Szrj    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1763*38fd1498Szrj    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1764*38fd1498Szrj    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1765*38fd1498Szrj    exits to predict them using PRED_LOOP_EXTRA_EXIT.  */
1766*38fd1498Szrj 
1767*38fd1498Szrj static void
predict_extra_loop_exits(edge exit_edge)1768*38fd1498Szrj predict_extra_loop_exits (edge exit_edge)
1769*38fd1498Szrj {
1770*38fd1498Szrj   unsigned i;
1771*38fd1498Szrj   bool check_value_one;
1772*38fd1498Szrj   gimple *lhs_def_stmt;
1773*38fd1498Szrj   gphi *phi_stmt;
1774*38fd1498Szrj   tree cmp_rhs, cmp_lhs;
1775*38fd1498Szrj   gimple *last;
1776*38fd1498Szrj   gcond *cmp_stmt;
1777*38fd1498Szrj 
1778*38fd1498Szrj   last = last_stmt (exit_edge->src);
1779*38fd1498Szrj   if (!last)
1780*38fd1498Szrj     return;
1781*38fd1498Szrj   cmp_stmt = dyn_cast <gcond *> (last);
1782*38fd1498Szrj   if (!cmp_stmt)
1783*38fd1498Szrj     return;
1784*38fd1498Szrj 
1785*38fd1498Szrj   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1786*38fd1498Szrj   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1787*38fd1498Szrj   if (!TREE_CONSTANT (cmp_rhs)
1788*38fd1498Szrj       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1789*38fd1498Szrj     return;
1790*38fd1498Szrj   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1791*38fd1498Szrj     return;
1792*38fd1498Szrj 
1793*38fd1498Szrj   /* If check_value_one is true, only the phi_args with value '1' will lead
1794*38fd1498Szrj      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1795*38fd1498Szrj      loop exit.  */
1796*38fd1498Szrj   check_value_one = (((integer_onep (cmp_rhs))
1797*38fd1498Szrj 		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1798*38fd1498Szrj 		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1799*38fd1498Szrj 
1800*38fd1498Szrj   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1801*38fd1498Szrj   if (!lhs_def_stmt)
1802*38fd1498Szrj     return;
1803*38fd1498Szrj 
1804*38fd1498Szrj   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1805*38fd1498Szrj   if (!phi_stmt)
1806*38fd1498Szrj     return;
1807*38fd1498Szrj 
1808*38fd1498Szrj   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1809*38fd1498Szrj     {
1810*38fd1498Szrj       edge e1;
1811*38fd1498Szrj       edge_iterator ei;
1812*38fd1498Szrj       tree val = gimple_phi_arg_def (phi_stmt, i);
1813*38fd1498Szrj       edge e = gimple_phi_arg_edge (phi_stmt, i);
1814*38fd1498Szrj 
1815*38fd1498Szrj       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1816*38fd1498Szrj 	continue;
1817*38fd1498Szrj       if ((check_value_one ^ integer_onep (val)) == 1)
1818*38fd1498Szrj 	continue;
1819*38fd1498Szrj       if (EDGE_COUNT (e->src->succs) != 1)
1820*38fd1498Szrj 	{
1821*38fd1498Szrj 	  predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1822*38fd1498Szrj 	  continue;
1823*38fd1498Szrj 	}
1824*38fd1498Szrj 
1825*38fd1498Szrj       FOR_EACH_EDGE (e1, ei, e->src->preds)
1826*38fd1498Szrj 	predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
1827*38fd1498Szrj     }
1828*38fd1498Szrj }
1829*38fd1498Szrj 
1830*38fd1498Szrj 
1831*38fd1498Szrj /* Predict edge probabilities by exploiting loop structure.  */
1832*38fd1498Szrj 
1833*38fd1498Szrj static void
predict_loops(void)1834*38fd1498Szrj predict_loops (void)
1835*38fd1498Szrj {
1836*38fd1498Szrj   struct loop *loop;
1837*38fd1498Szrj   basic_block bb;
1838*38fd1498Szrj   hash_set <struct loop *> with_recursion(10);
1839*38fd1498Szrj 
1840*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1841*38fd1498Szrj     {
1842*38fd1498Szrj       gimple_stmt_iterator gsi;
1843*38fd1498Szrj       tree decl;
1844*38fd1498Szrj 
1845*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1846*38fd1498Szrj 	if (is_gimple_call (gsi_stmt (gsi))
1847*38fd1498Szrj 	    && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1848*38fd1498Szrj 	    && recursive_call_p (current_function_decl, decl))
1849*38fd1498Szrj 	  {
1850*38fd1498Szrj 	    loop = bb->loop_father;
1851*38fd1498Szrj 	    while (loop && !with_recursion.add (loop))
1852*38fd1498Szrj 	      loop = loop_outer (loop);
1853*38fd1498Szrj 	  }
1854*38fd1498Szrj     }
1855*38fd1498Szrj 
1856*38fd1498Szrj   /* Try to predict out blocks in a loop that are not part of a
1857*38fd1498Szrj      natural loop.  */
1858*38fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1859*38fd1498Szrj     {
1860*38fd1498Szrj       basic_block bb, *bbs;
1861*38fd1498Szrj       unsigned j, n_exits = 0;
1862*38fd1498Szrj       vec<edge> exits;
1863*38fd1498Szrj       struct tree_niter_desc niter_desc;
1864*38fd1498Szrj       edge ex;
1865*38fd1498Szrj       struct nb_iter_bound *nb_iter;
1866*38fd1498Szrj       enum tree_code loop_bound_code = ERROR_MARK;
1867*38fd1498Szrj       tree loop_bound_step = NULL;
1868*38fd1498Szrj       tree loop_bound_var = NULL;
1869*38fd1498Szrj       tree loop_iv_base = NULL;
1870*38fd1498Szrj       gcond *stmt = NULL;
1871*38fd1498Szrj       bool recursion = with_recursion.contains (loop);
1872*38fd1498Szrj 
1873*38fd1498Szrj       exits = get_loop_exit_edges (loop);
1874*38fd1498Szrj       FOR_EACH_VEC_ELT (exits, j, ex)
1875*38fd1498Szrj 	if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
1876*38fd1498Szrj 	  n_exits ++;
1877*38fd1498Szrj       if (!n_exits)
1878*38fd1498Szrj 	{
1879*38fd1498Szrj           exits.release ();
1880*38fd1498Szrj 	  continue;
1881*38fd1498Szrj 	}
1882*38fd1498Szrj 
1883*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
1884*38fd1498Szrj 	fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
1885*38fd1498Szrj 		 loop->num, recursion ? " (with recursion)":"", n_exits);
1886*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS)
1887*38fd1498Szrj 	  && max_loop_iterations_int (loop) >= 0)
1888*38fd1498Szrj 	{
1889*38fd1498Szrj 	  fprintf (dump_file,
1890*38fd1498Szrj 		   "Loop %d iterates at most %i times.\n", loop->num,
1891*38fd1498Szrj 		   (int)max_loop_iterations_int (loop));
1892*38fd1498Szrj 	}
1893*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS)
1894*38fd1498Szrj 	  && likely_max_loop_iterations_int (loop) >= 0)
1895*38fd1498Szrj 	{
1896*38fd1498Szrj 	  fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1897*38fd1498Szrj 		   loop->num, (int)likely_max_loop_iterations_int (loop));
1898*38fd1498Szrj 	}
1899*38fd1498Szrj 
1900*38fd1498Szrj       FOR_EACH_VEC_ELT (exits, j, ex)
1901*38fd1498Szrj 	{
1902*38fd1498Szrj 	  tree niter = NULL;
1903*38fd1498Szrj 	  HOST_WIDE_INT nitercst;
1904*38fd1498Szrj 	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1905*38fd1498Szrj 	  int probability;
1906*38fd1498Szrj 	  enum br_predictor predictor;
1907*38fd1498Szrj 	  widest_int nit;
1908*38fd1498Szrj 
1909*38fd1498Szrj 	  if (unlikely_executed_edge_p (ex)
1910*38fd1498Szrj 	      || (ex->flags & EDGE_ABNORMAL_CALL))
1911*38fd1498Szrj 	    continue;
1912*38fd1498Szrj 	  /* Loop heuristics do not expect exit conditional to be inside
1913*38fd1498Szrj 	     inner loop.  We predict from innermost to outermost loop.  */
1914*38fd1498Szrj 	  if (predicted_by_loop_heuristics_p (ex->src))
1915*38fd1498Szrj 	    {
1916*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
1917*38fd1498Szrj 		fprintf (dump_file, "Skipping exit %i->%i because "
1918*38fd1498Szrj 			 "it is already predicted.\n",
1919*38fd1498Szrj 			 ex->src->index, ex->dest->index);
1920*38fd1498Szrj 	      continue;
1921*38fd1498Szrj 	    }
1922*38fd1498Szrj 	  predict_extra_loop_exits (ex);
1923*38fd1498Szrj 
1924*38fd1498Szrj 	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1925*38fd1498Szrj 	    niter = niter_desc.niter;
1926*38fd1498Szrj 	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1927*38fd1498Szrj 	    niter = loop_niter_by_eval (loop, ex);
1928*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS)
1929*38fd1498Szrj 	      && TREE_CODE (niter) == INTEGER_CST)
1930*38fd1498Szrj 	    {
1931*38fd1498Szrj 	      fprintf (dump_file, "Exit %i->%i %d iterates ",
1932*38fd1498Szrj 		       ex->src->index, ex->dest->index,
1933*38fd1498Szrj 		       loop->num);
1934*38fd1498Szrj 	      print_generic_expr (dump_file, niter, TDF_SLIM);
1935*38fd1498Szrj 	      fprintf (dump_file, " times.\n");
1936*38fd1498Szrj 	    }
1937*38fd1498Szrj 
1938*38fd1498Szrj 	  if (TREE_CODE (niter) == INTEGER_CST)
1939*38fd1498Szrj 	    {
1940*38fd1498Szrj 	      if (tree_fits_uhwi_p (niter)
1941*38fd1498Szrj 		  && max
1942*38fd1498Szrj 		  && compare_tree_int (niter, max - 1) == -1)
1943*38fd1498Szrj 		nitercst = tree_to_uhwi (niter) + 1;
1944*38fd1498Szrj 	      else
1945*38fd1498Szrj 		nitercst = max;
1946*38fd1498Szrj 	      predictor = PRED_LOOP_ITERATIONS;
1947*38fd1498Szrj 	    }
1948*38fd1498Szrj 	  /* If we have just one exit and we can derive some information about
1949*38fd1498Szrj 	     the number of iterations of the loop from the statements inside
1950*38fd1498Szrj 	     the loop, use it to predict this exit.  */
1951*38fd1498Szrj 	  else if (n_exits == 1
1952*38fd1498Szrj 		   && estimated_stmt_executions (loop, &nit))
1953*38fd1498Szrj 	    {
1954*38fd1498Szrj 	      if (wi::gtu_p (nit, max))
1955*38fd1498Szrj 		nitercst = max;
1956*38fd1498Szrj 	      else
1957*38fd1498Szrj 		nitercst = nit.to_shwi ();
1958*38fd1498Szrj 	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1959*38fd1498Szrj 	    }
1960*38fd1498Szrj 	  /* If we have likely upper bound, trust it for very small iteration
1961*38fd1498Szrj 	     counts.  Such loops would otherwise get mispredicted by standard
1962*38fd1498Szrj 	     LOOP_EXIT heuristics.  */
1963*38fd1498Szrj 	  else if (n_exits == 1
1964*38fd1498Szrj 		   && likely_max_stmt_executions (loop, &nit)
1965*38fd1498Szrj 		   && wi::ltu_p (nit,
1966*38fd1498Szrj 				 RDIV (REG_BR_PROB_BASE,
1967*38fd1498Szrj 				       REG_BR_PROB_BASE
1968*38fd1498Szrj 					 - predictor_info
1969*38fd1498Szrj 						 [recursion
1970*38fd1498Szrj 						  ? PRED_LOOP_EXIT_WITH_RECURSION
1971*38fd1498Szrj 						  : PRED_LOOP_EXIT].hitrate)))
1972*38fd1498Szrj 	    {
1973*38fd1498Szrj 	      nitercst = nit.to_shwi ();
1974*38fd1498Szrj 	      predictor = PRED_LOOP_ITERATIONS_MAX;
1975*38fd1498Szrj 	    }
1976*38fd1498Szrj 	  else
1977*38fd1498Szrj 	    {
1978*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
1979*38fd1498Szrj 		fprintf (dump_file, "Nothing known about exit %i->%i.\n",
1980*38fd1498Szrj 			 ex->src->index, ex->dest->index);
1981*38fd1498Szrj 	      continue;
1982*38fd1498Szrj 	    }
1983*38fd1498Szrj 
1984*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
1985*38fd1498Szrj 	    fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
1986*38fd1498Szrj 		     (int)nitercst, predictor_info[predictor].name);
1987*38fd1498Szrj 	  /* If the prediction for number of iterations is zero, do not
1988*38fd1498Szrj 	     predict the exit edges.  */
1989*38fd1498Szrj 	  if (nitercst == 0)
1990*38fd1498Szrj 	    continue;
1991*38fd1498Szrj 
1992*38fd1498Szrj 	  probability = RDIV (REG_BR_PROB_BASE, nitercst);
1993*38fd1498Szrj 	  predict_edge (ex, predictor, probability);
1994*38fd1498Szrj 	}
1995*38fd1498Szrj       exits.release ();
1996*38fd1498Szrj 
1997*38fd1498Szrj       /* Find information about loop bound variables.  */
1998*38fd1498Szrj       for (nb_iter = loop->bounds; nb_iter;
1999*38fd1498Szrj 	   nb_iter = nb_iter->next)
2000*38fd1498Szrj 	if (nb_iter->stmt
2001*38fd1498Szrj 	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
2002*38fd1498Szrj 	  {
2003*38fd1498Szrj 	    stmt = as_a <gcond *> (nb_iter->stmt);
2004*38fd1498Szrj 	    break;
2005*38fd1498Szrj 	  }
2006*38fd1498Szrj       if (!stmt && last_stmt (loop->header)
2007*38fd1498Szrj 	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
2008*38fd1498Szrj 	stmt = as_a <gcond *> (last_stmt (loop->header));
2009*38fd1498Szrj       if (stmt)
2010*38fd1498Szrj 	is_comparison_with_loop_invariant_p (stmt, loop,
2011*38fd1498Szrj 					     &loop_bound_var,
2012*38fd1498Szrj 					     &loop_bound_code,
2013*38fd1498Szrj 					     &loop_bound_step,
2014*38fd1498Szrj 					     &loop_iv_base);
2015*38fd1498Szrj 
2016*38fd1498Szrj       bbs = get_loop_body (loop);
2017*38fd1498Szrj 
2018*38fd1498Szrj       for (j = 0; j < loop->num_nodes; j++)
2019*38fd1498Szrj 	{
2020*38fd1498Szrj 	  edge e;
2021*38fd1498Szrj 	  edge_iterator ei;
2022*38fd1498Szrj 
2023*38fd1498Szrj 	  bb = bbs[j];
2024*38fd1498Szrj 
2025*38fd1498Szrj 	  /* Bypass loop heuristics on continue statement.  These
2026*38fd1498Szrj 	     statements construct loops via "non-loop" constructs
2027*38fd1498Szrj 	     in the source language and are better to be handled
2028*38fd1498Szrj 	     separately.  */
2029*38fd1498Szrj 	  if (predicted_by_p (bb, PRED_CONTINUE))
2030*38fd1498Szrj 	    {
2031*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
2032*38fd1498Szrj 		fprintf (dump_file, "BB %i predicted by continue.\n",
2033*38fd1498Szrj 			 bb->index);
2034*38fd1498Szrj 	      continue;
2035*38fd1498Szrj 	    }
2036*38fd1498Szrj 
2037*38fd1498Szrj 	  /* If we already used more reliable loop exit predictors, do not
2038*38fd1498Szrj 	     bother with PRED_LOOP_EXIT.  */
2039*38fd1498Szrj 	  if (!predicted_by_loop_heuristics_p (bb))
2040*38fd1498Szrj 	    {
2041*38fd1498Szrj 	      /* For loop with many exits we don't want to predict all exits
2042*38fd1498Szrj 	         with the pretty large probability, because if all exits are
2043*38fd1498Szrj 		 considered in row, the loop would be predicted to iterate
2044*38fd1498Szrj 		 almost never.  The code to divide probability by number of
2045*38fd1498Szrj 		 exits is very rough.  It should compute the number of exits
2046*38fd1498Szrj 		 taken in each patch through function (not the overall number
2047*38fd1498Szrj 		 of exits that might be a lot higher for loops with wide switch
2048*38fd1498Szrj 		 statements in them) and compute n-th square root.
2049*38fd1498Szrj 
2050*38fd1498Szrj 		 We limit the minimal probability by 2% to avoid
2051*38fd1498Szrj 		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2052*38fd1498Szrj 		 as this was causing regression in perl benchmark containing such
2053*38fd1498Szrj 		 a wide loop.  */
2054*38fd1498Szrj 
2055*38fd1498Szrj 	      int probability = ((REG_BR_PROB_BASE
2056*38fd1498Szrj 		                  - predictor_info
2057*38fd1498Szrj 				     [recursion
2058*38fd1498Szrj 				      ? PRED_LOOP_EXIT_WITH_RECURSION
2059*38fd1498Szrj 				      : PRED_LOOP_EXIT].hitrate)
2060*38fd1498Szrj 				 / n_exits);
2061*38fd1498Szrj 	      if (probability < HITRATE (2))
2062*38fd1498Szrj 		probability = HITRATE (2);
2063*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, bb->succs)
2064*38fd1498Szrj 		if (e->dest->index < NUM_FIXED_BLOCKS
2065*38fd1498Szrj 		    || !flow_bb_inside_loop_p (loop, e->dest))
2066*38fd1498Szrj 		  {
2067*38fd1498Szrj 		    if (dump_file && (dump_flags & TDF_DETAILS))
2068*38fd1498Szrj 		      fprintf (dump_file,
2069*38fd1498Szrj 			       "Predicting exit %i->%i with prob %i.\n",
2070*38fd1498Szrj 			       e->src->index, e->dest->index, probability);
2071*38fd1498Szrj 		    predict_edge (e,
2072*38fd1498Szrj 				  recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2073*38fd1498Szrj 			          : PRED_LOOP_EXIT, probability);
2074*38fd1498Szrj 		  }
2075*38fd1498Szrj 	    }
2076*38fd1498Szrj 	  if (loop_bound_var)
2077*38fd1498Szrj 	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2078*38fd1498Szrj 				   loop_bound_code,
2079*38fd1498Szrj 				   tree_to_shwi (loop_bound_step));
2080*38fd1498Szrj 	}
2081*38fd1498Szrj 
2082*38fd1498Szrj       /* In the following code
2083*38fd1498Szrj 	 for (loop1)
2084*38fd1498Szrj 	   if (cond)
2085*38fd1498Szrj 	     for (loop2)
2086*38fd1498Szrj 	       body;
2087*38fd1498Szrj 	 guess that cond is unlikely.  */
2088*38fd1498Szrj       if (loop_outer (loop)->num)
2089*38fd1498Szrj 	{
2090*38fd1498Szrj 	  basic_block bb = NULL;
2091*38fd1498Szrj 	  edge preheader_edge = loop_preheader_edge (loop);
2092*38fd1498Szrj 
2093*38fd1498Szrj 	  if (single_pred_p (preheader_edge->src)
2094*38fd1498Szrj 	      && single_succ_p (preheader_edge->src))
2095*38fd1498Szrj 	    preheader_edge = single_pred_edge (preheader_edge->src);
2096*38fd1498Szrj 
2097*38fd1498Szrj 	  gimple *stmt = last_stmt (preheader_edge->src);
2098*38fd1498Szrj 	  /* Pattern match fortran loop preheader:
2099*38fd1498Szrj 	     _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2100*38fd1498Szrj 	     _17 = (logical(kind=4)) _16;
2101*38fd1498Szrj 	     if (_17 != 0)
2102*38fd1498Szrj 	       goto <bb 11>;
2103*38fd1498Szrj 	     else
2104*38fd1498Szrj 	       goto <bb 13>;
2105*38fd1498Szrj 
2106*38fd1498Szrj 	     Loop guard branch prediction says nothing about duplicated loop
2107*38fd1498Szrj 	     headers produced by fortran frontend and in this case we want
2108*38fd1498Szrj 	     to predict paths leading to this preheader.  */
2109*38fd1498Szrj 
2110*38fd1498Szrj 	  if (stmt
2111*38fd1498Szrj 	      && gimple_code (stmt) == GIMPLE_COND
2112*38fd1498Szrj 	      && gimple_cond_code (stmt) == NE_EXPR
2113*38fd1498Szrj 	      && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2114*38fd1498Szrj 	      && integer_zerop (gimple_cond_rhs (stmt)))
2115*38fd1498Szrj 	     {
2116*38fd1498Szrj 	       gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2117*38fd1498Szrj 	       if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2118*38fd1498Szrj 		   && gimple_expr_code (call_stmt) == NOP_EXPR
2119*38fd1498Szrj 		   && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2120*38fd1498Szrj 		 call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2121*38fd1498Szrj 	       if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2122*38fd1498Szrj 		   && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2123*38fd1498Szrj 		   && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2124*38fd1498Szrj 		   && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2125*38fd1498Szrj 			== PRED_FORTRAN_LOOP_PREHEADER)
2126*38fd1498Szrj 		 bb = preheader_edge->src;
2127*38fd1498Szrj 	     }
2128*38fd1498Szrj 	  if (!bb)
2129*38fd1498Szrj 	    {
2130*38fd1498Szrj 	      if (!dominated_by_p (CDI_DOMINATORS,
2131*38fd1498Szrj 				   loop_outer (loop)->latch, loop->header))
2132*38fd1498Szrj 		predict_paths_leading_to_edge (loop_preheader_edge (loop),
2133*38fd1498Szrj 					       recursion
2134*38fd1498Szrj 					       ? PRED_LOOP_GUARD_WITH_RECURSION
2135*38fd1498Szrj 					       : PRED_LOOP_GUARD,
2136*38fd1498Szrj 					       NOT_TAKEN,
2137*38fd1498Szrj 					       loop_outer (loop));
2138*38fd1498Szrj 	    }
2139*38fd1498Szrj 	  else
2140*38fd1498Szrj 	    {
2141*38fd1498Szrj 	      if (!dominated_by_p (CDI_DOMINATORS,
2142*38fd1498Szrj 				   loop_outer (loop)->latch, bb))
2143*38fd1498Szrj 		predict_paths_leading_to (bb,
2144*38fd1498Szrj 					  recursion
2145*38fd1498Szrj 					  ? PRED_LOOP_GUARD_WITH_RECURSION
2146*38fd1498Szrj 					  : PRED_LOOP_GUARD,
2147*38fd1498Szrj 					  NOT_TAKEN,
2148*38fd1498Szrj 					  loop_outer (loop));
2149*38fd1498Szrj 	    }
2150*38fd1498Szrj 	}
2151*38fd1498Szrj 
2152*38fd1498Szrj       /* Free basic blocks from get_loop_body.  */
2153*38fd1498Szrj       free (bbs);
2154*38fd1498Szrj     }
2155*38fd1498Szrj }
2156*38fd1498Szrj 
2157*38fd1498Szrj /* Attempt to predict probabilities of BB outgoing edges using local
2158*38fd1498Szrj    properties.  */
2159*38fd1498Szrj static void
bb_estimate_probability_locally(basic_block bb)2160*38fd1498Szrj bb_estimate_probability_locally (basic_block bb)
2161*38fd1498Szrj {
2162*38fd1498Szrj   rtx_insn *last_insn = BB_END (bb);
2163*38fd1498Szrj   rtx cond;
2164*38fd1498Szrj 
2165*38fd1498Szrj   if (! can_predict_insn_p (last_insn))
2166*38fd1498Szrj     return;
2167*38fd1498Szrj   cond = get_condition (last_insn, NULL, false, false);
2168*38fd1498Szrj   if (! cond)
2169*38fd1498Szrj     return;
2170*38fd1498Szrj 
2171*38fd1498Szrj   /* Try "pointer heuristic."
2172*38fd1498Szrj      A comparison ptr == 0 is predicted as false.
2173*38fd1498Szrj      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2174*38fd1498Szrj   if (COMPARISON_P (cond)
2175*38fd1498Szrj       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2176*38fd1498Szrj 	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2177*38fd1498Szrj     {
2178*38fd1498Szrj       if (GET_CODE (cond) == EQ)
2179*38fd1498Szrj 	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2180*38fd1498Szrj       else if (GET_CODE (cond) == NE)
2181*38fd1498Szrj 	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2182*38fd1498Szrj     }
2183*38fd1498Szrj   else
2184*38fd1498Szrj 
2185*38fd1498Szrj   /* Try "opcode heuristic."
2186*38fd1498Szrj      EQ tests are usually false and NE tests are usually true. Also,
2187*38fd1498Szrj      most quantities are positive, so we can make the appropriate guesses
2188*38fd1498Szrj      about signed comparisons against zero.  */
2189*38fd1498Szrj     switch (GET_CODE (cond))
2190*38fd1498Szrj       {
2191*38fd1498Szrj       case CONST_INT:
2192*38fd1498Szrj 	/* Unconditional branch.  */
2193*38fd1498Szrj 	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2194*38fd1498Szrj 			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
2195*38fd1498Szrj 	break;
2196*38fd1498Szrj 
2197*38fd1498Szrj       case EQ:
2198*38fd1498Szrj       case UNEQ:
2199*38fd1498Szrj 	/* Floating point comparisons appears to behave in a very
2200*38fd1498Szrj 	   unpredictable way because of special role of = tests in
2201*38fd1498Szrj 	   FP code.  */
2202*38fd1498Szrj 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2203*38fd1498Szrj 	  ;
2204*38fd1498Szrj 	/* Comparisons with 0 are often used for booleans and there is
2205*38fd1498Szrj 	   nothing useful to predict about them.  */
2206*38fd1498Szrj 	else if (XEXP (cond, 1) == const0_rtx
2207*38fd1498Szrj 		 || XEXP (cond, 0) == const0_rtx)
2208*38fd1498Szrj 	  ;
2209*38fd1498Szrj 	else
2210*38fd1498Szrj 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2211*38fd1498Szrj 	break;
2212*38fd1498Szrj 
2213*38fd1498Szrj       case NE:
2214*38fd1498Szrj       case LTGT:
2215*38fd1498Szrj 	/* Floating point comparisons appears to behave in a very
2216*38fd1498Szrj 	   unpredictable way because of special role of = tests in
2217*38fd1498Szrj 	   FP code.  */
2218*38fd1498Szrj 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2219*38fd1498Szrj 	  ;
2220*38fd1498Szrj 	/* Comparisons with 0 are often used for booleans and there is
2221*38fd1498Szrj 	   nothing useful to predict about them.  */
2222*38fd1498Szrj 	else if (XEXP (cond, 1) == const0_rtx
2223*38fd1498Szrj 		 || XEXP (cond, 0) == const0_rtx)
2224*38fd1498Szrj 	  ;
2225*38fd1498Szrj 	else
2226*38fd1498Szrj 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2227*38fd1498Szrj 	break;
2228*38fd1498Szrj 
2229*38fd1498Szrj       case ORDERED:
2230*38fd1498Szrj 	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2231*38fd1498Szrj 	break;
2232*38fd1498Szrj 
2233*38fd1498Szrj       case UNORDERED:
2234*38fd1498Szrj 	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2235*38fd1498Szrj 	break;
2236*38fd1498Szrj 
2237*38fd1498Szrj       case LE:
2238*38fd1498Szrj       case LT:
2239*38fd1498Szrj 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2240*38fd1498Szrj 	    || XEXP (cond, 1) == constm1_rtx)
2241*38fd1498Szrj 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2242*38fd1498Szrj 	break;
2243*38fd1498Szrj 
2244*38fd1498Szrj       case GE:
2245*38fd1498Szrj       case GT:
2246*38fd1498Szrj 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2247*38fd1498Szrj 	    || XEXP (cond, 1) == constm1_rtx)
2248*38fd1498Szrj 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2249*38fd1498Szrj 	break;
2250*38fd1498Szrj 
2251*38fd1498Szrj       default:
2252*38fd1498Szrj 	break;
2253*38fd1498Szrj       }
2254*38fd1498Szrj }
2255*38fd1498Szrj 
2256*38fd1498Szrj /* Set edge->probability for each successor edge of BB.  */
2257*38fd1498Szrj void
guess_outgoing_edge_probabilities(basic_block bb)2258*38fd1498Szrj guess_outgoing_edge_probabilities (basic_block bb)
2259*38fd1498Szrj {
2260*38fd1498Szrj   bb_estimate_probability_locally (bb);
2261*38fd1498Szrj   combine_predictions_for_insn (BB_END (bb), bb);
2262*38fd1498Szrj }
2263*38fd1498Szrj 
2264*38fd1498Szrj static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
2265*38fd1498Szrj 
2266*38fd1498Szrj /* Helper function for expr_expected_value.  */
2267*38fd1498Szrj 
2268*38fd1498Szrj static tree
expr_expected_value_1(tree type,tree op0,enum tree_code code,tree op1,bitmap visited,enum br_predictor * predictor)2269*38fd1498Szrj expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2270*38fd1498Szrj 		       tree op1, bitmap visited, enum br_predictor *predictor)
2271*38fd1498Szrj {
2272*38fd1498Szrj   gimple *def;
2273*38fd1498Szrj 
2274*38fd1498Szrj   if (predictor)
2275*38fd1498Szrj     *predictor = PRED_UNCONDITIONAL;
2276*38fd1498Szrj 
2277*38fd1498Szrj   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2278*38fd1498Szrj     {
2279*38fd1498Szrj       if (TREE_CONSTANT (op0))
2280*38fd1498Szrj 	return op0;
2281*38fd1498Szrj 
2282*38fd1498Szrj       if (code == IMAGPART_EXPR)
2283*38fd1498Szrj 	{
2284*38fd1498Szrj 	  if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2285*38fd1498Szrj 	    {
2286*38fd1498Szrj 	      def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2287*38fd1498Szrj 	      if (is_gimple_call (def)
2288*38fd1498Szrj 		  && gimple_call_internal_p (def)
2289*38fd1498Szrj 		  && (gimple_call_internal_fn (def)
2290*38fd1498Szrj 		      == IFN_ATOMIC_COMPARE_EXCHANGE))
2291*38fd1498Szrj 		{
2292*38fd1498Szrj 		  /* Assume that any given atomic operation has low contention,
2293*38fd1498Szrj 		     and thus the compare-and-swap operation succeeds.  */
2294*38fd1498Szrj 		  if (predictor)
2295*38fd1498Szrj 		    *predictor = PRED_COMPARE_AND_SWAP;
2296*38fd1498Szrj 		  return build_one_cst (TREE_TYPE (op0));
2297*38fd1498Szrj 		}
2298*38fd1498Szrj 	    }
2299*38fd1498Szrj 	}
2300*38fd1498Szrj 
2301*38fd1498Szrj       if (code != SSA_NAME)
2302*38fd1498Szrj 	return NULL_TREE;
2303*38fd1498Szrj 
2304*38fd1498Szrj       def = SSA_NAME_DEF_STMT (op0);
2305*38fd1498Szrj 
2306*38fd1498Szrj       /* If we were already here, break the infinite cycle.  */
2307*38fd1498Szrj       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
2308*38fd1498Szrj 	return NULL;
2309*38fd1498Szrj 
2310*38fd1498Szrj       if (gimple_code (def) == GIMPLE_PHI)
2311*38fd1498Szrj 	{
2312*38fd1498Szrj 	  /* All the arguments of the PHI node must have the same constant
2313*38fd1498Szrj 	     length.  */
2314*38fd1498Szrj 	  int i, n = gimple_phi_num_args (def);
2315*38fd1498Szrj 	  tree val = NULL, new_val;
2316*38fd1498Szrj 
2317*38fd1498Szrj 	  for (i = 0; i < n; i++)
2318*38fd1498Szrj 	    {
2319*38fd1498Szrj 	      tree arg = PHI_ARG_DEF (def, i);
2320*38fd1498Szrj 	      enum br_predictor predictor2;
2321*38fd1498Szrj 
2322*38fd1498Szrj 	      /* If this PHI has itself as an argument, we cannot
2323*38fd1498Szrj 		 determine the string length of this argument.  However,
2324*38fd1498Szrj 		 if we can find an expected constant value for the other
2325*38fd1498Szrj 		 PHI args then we can still be sure that this is
2326*38fd1498Szrj 		 likely a constant.  So be optimistic and just
2327*38fd1498Szrj 		 continue with the next argument.  */
2328*38fd1498Szrj 	      if (arg == PHI_RESULT (def))
2329*38fd1498Szrj 		continue;
2330*38fd1498Szrj 
2331*38fd1498Szrj 	      new_val = expr_expected_value (arg, visited, &predictor2);
2332*38fd1498Szrj 
2333*38fd1498Szrj 	      /* It is difficult to combine value predictors.  Simply assume
2334*38fd1498Szrj 		 that later predictor is weaker and take its prediction.  */
2335*38fd1498Szrj 	      if (predictor && *predictor < predictor2)
2336*38fd1498Szrj 		*predictor = predictor2;
2337*38fd1498Szrj 	      if (!new_val)
2338*38fd1498Szrj 		return NULL;
2339*38fd1498Szrj 	      if (!val)
2340*38fd1498Szrj 		val = new_val;
2341*38fd1498Szrj 	      else if (!operand_equal_p (val, new_val, false))
2342*38fd1498Szrj 		return NULL;
2343*38fd1498Szrj 	    }
2344*38fd1498Szrj 	  return val;
2345*38fd1498Szrj 	}
2346*38fd1498Szrj       if (is_gimple_assign (def))
2347*38fd1498Szrj 	{
2348*38fd1498Szrj 	  if (gimple_assign_lhs (def) != op0)
2349*38fd1498Szrj 	    return NULL;
2350*38fd1498Szrj 
2351*38fd1498Szrj 	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2352*38fd1498Szrj 					gimple_assign_rhs1 (def),
2353*38fd1498Szrj 					gimple_assign_rhs_code (def),
2354*38fd1498Szrj 					gimple_assign_rhs2 (def),
2355*38fd1498Szrj 					visited, predictor);
2356*38fd1498Szrj 	}
2357*38fd1498Szrj 
2358*38fd1498Szrj       if (is_gimple_call (def))
2359*38fd1498Szrj 	{
2360*38fd1498Szrj 	  tree decl = gimple_call_fndecl (def);
2361*38fd1498Szrj 	  if (!decl)
2362*38fd1498Szrj 	    {
2363*38fd1498Szrj 	      if (gimple_call_internal_p (def)
2364*38fd1498Szrj 		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2365*38fd1498Szrj 		{
2366*38fd1498Szrj 		  gcc_assert (gimple_call_num_args (def) == 3);
2367*38fd1498Szrj 		  tree val = gimple_call_arg (def, 0);
2368*38fd1498Szrj 		  if (TREE_CONSTANT (val))
2369*38fd1498Szrj 		    return val;
2370*38fd1498Szrj 		  if (predictor)
2371*38fd1498Szrj 		    {
2372*38fd1498Szrj 		      tree val2 = gimple_call_arg (def, 2);
2373*38fd1498Szrj 		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
2374*38fd1498Szrj 				  && tree_fits_uhwi_p (val2)
2375*38fd1498Szrj 				  && tree_to_uhwi (val2) < END_PREDICTORS);
2376*38fd1498Szrj 		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
2377*38fd1498Szrj 		    }
2378*38fd1498Szrj 		  return gimple_call_arg (def, 1);
2379*38fd1498Szrj 		}
2380*38fd1498Szrj 	      return NULL;
2381*38fd1498Szrj 	    }
2382*38fd1498Szrj 	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2383*38fd1498Szrj 	    switch (DECL_FUNCTION_CODE (decl))
2384*38fd1498Szrj 	      {
2385*38fd1498Szrj 	      case BUILT_IN_EXPECT:
2386*38fd1498Szrj 		{
2387*38fd1498Szrj 		  tree val;
2388*38fd1498Szrj 		  if (gimple_call_num_args (def) != 2)
2389*38fd1498Szrj 		    return NULL;
2390*38fd1498Szrj 		  val = gimple_call_arg (def, 0);
2391*38fd1498Szrj 		  if (TREE_CONSTANT (val))
2392*38fd1498Szrj 		    return val;
2393*38fd1498Szrj 		  if (predictor)
2394*38fd1498Szrj 		    *predictor = PRED_BUILTIN_EXPECT;
2395*38fd1498Szrj 		  return gimple_call_arg (def, 1);
2396*38fd1498Szrj 		}
2397*38fd1498Szrj 
2398*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2399*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2400*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2401*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2402*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2403*38fd1498Szrj 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2404*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2405*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2406*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2407*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2408*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2409*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2410*38fd1498Szrj 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2411*38fd1498Szrj 		/* Assume that any given atomic operation has low contention,
2412*38fd1498Szrj 		   and thus the compare-and-swap operation succeeds.  */
2413*38fd1498Szrj 		if (predictor)
2414*38fd1498Szrj 		  *predictor = PRED_COMPARE_AND_SWAP;
2415*38fd1498Szrj 		return boolean_true_node;
2416*38fd1498Szrj 	      default:
2417*38fd1498Szrj 		break;
2418*38fd1498Szrj 	    }
2419*38fd1498Szrj 	}
2420*38fd1498Szrj 
2421*38fd1498Szrj       return NULL;
2422*38fd1498Szrj     }
2423*38fd1498Szrj 
2424*38fd1498Szrj   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2425*38fd1498Szrj     {
2426*38fd1498Szrj       tree res;
2427*38fd1498Szrj       enum br_predictor predictor2;
2428*38fd1498Szrj       op0 = expr_expected_value (op0, visited, predictor);
2429*38fd1498Szrj       if (!op0)
2430*38fd1498Szrj 	return NULL;
2431*38fd1498Szrj       op1 = expr_expected_value (op1, visited, &predictor2);
2432*38fd1498Szrj       if (predictor && *predictor < predictor2)
2433*38fd1498Szrj 	*predictor = predictor2;
2434*38fd1498Szrj       if (!op1)
2435*38fd1498Szrj 	return NULL;
2436*38fd1498Szrj       res = fold_build2 (code, type, op0, op1);
2437*38fd1498Szrj       if (TREE_CONSTANT (res))
2438*38fd1498Szrj 	return res;
2439*38fd1498Szrj       return NULL;
2440*38fd1498Szrj     }
2441*38fd1498Szrj   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2442*38fd1498Szrj     {
2443*38fd1498Szrj       tree res;
2444*38fd1498Szrj       op0 = expr_expected_value (op0, visited, predictor);
2445*38fd1498Szrj       if (!op0)
2446*38fd1498Szrj 	return NULL;
2447*38fd1498Szrj       res = fold_build1 (code, type, op0);
2448*38fd1498Szrj       if (TREE_CONSTANT (res))
2449*38fd1498Szrj 	return res;
2450*38fd1498Szrj       return NULL;
2451*38fd1498Szrj     }
2452*38fd1498Szrj   return NULL;
2453*38fd1498Szrj }
2454*38fd1498Szrj 
2455*38fd1498Szrj /* Return constant EXPR will likely have at execution time, NULL if unknown.
2456*38fd1498Szrj    The function is used by builtin_expect branch predictor so the evidence
2457*38fd1498Szrj    must come from this construct and additional possible constant folding.
2458*38fd1498Szrj 
2459*38fd1498Szrj    We may want to implement more involved value guess (such as value range
2460*38fd1498Szrj    propagation based prediction), but such tricks shall go to new
2461*38fd1498Szrj    implementation.  */
2462*38fd1498Szrj 
2463*38fd1498Szrj static tree
expr_expected_value(tree expr,bitmap visited,enum br_predictor * predictor)2464*38fd1498Szrj expr_expected_value (tree expr, bitmap visited,
2465*38fd1498Szrj 		     enum br_predictor *predictor)
2466*38fd1498Szrj {
2467*38fd1498Szrj   enum tree_code code;
2468*38fd1498Szrj   tree op0, op1;
2469*38fd1498Szrj 
2470*38fd1498Szrj   if (TREE_CONSTANT (expr))
2471*38fd1498Szrj     {
2472*38fd1498Szrj       if (predictor)
2473*38fd1498Szrj 	*predictor = PRED_UNCONDITIONAL;
2474*38fd1498Szrj       return expr;
2475*38fd1498Szrj     }
2476*38fd1498Szrj 
2477*38fd1498Szrj   extract_ops_from_tree (expr, &code, &op0, &op1);
2478*38fd1498Szrj   return expr_expected_value_1 (TREE_TYPE (expr),
2479*38fd1498Szrj 				op0, code, op1, visited, predictor);
2480*38fd1498Szrj }
2481*38fd1498Szrj 
2482*38fd1498Szrj /* Predict using opcode of the last statement in basic block.  */
2483*38fd1498Szrj static void
tree_predict_by_opcode(basic_block bb)2484*38fd1498Szrj tree_predict_by_opcode (basic_block bb)
2485*38fd1498Szrj {
2486*38fd1498Szrj   gimple *stmt = last_stmt (bb);
2487*38fd1498Szrj   edge then_edge;
2488*38fd1498Szrj   tree op0, op1;
2489*38fd1498Szrj   tree type;
2490*38fd1498Szrj   tree val;
2491*38fd1498Szrj   enum tree_code cmp;
2492*38fd1498Szrj   edge_iterator ei;
2493*38fd1498Szrj   enum br_predictor predictor;
2494*38fd1498Szrj 
2495*38fd1498Szrj   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2496*38fd1498Szrj     return;
2497*38fd1498Szrj   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2498*38fd1498Szrj     if (then_edge->flags & EDGE_TRUE_VALUE)
2499*38fd1498Szrj       break;
2500*38fd1498Szrj   op0 = gimple_cond_lhs (stmt);
2501*38fd1498Szrj   op1 = gimple_cond_rhs (stmt);
2502*38fd1498Szrj   cmp = gimple_cond_code (stmt);
2503*38fd1498Szrj   type = TREE_TYPE (op0);
2504*38fd1498Szrj   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, auto_bitmap (),
2505*38fd1498Szrj 			       &predictor);
2506*38fd1498Szrj   if (val && TREE_CODE (val) == INTEGER_CST)
2507*38fd1498Szrj     {
2508*38fd1498Szrj       if (predictor == PRED_BUILTIN_EXPECT)
2509*38fd1498Szrj 	{
2510*38fd1498Szrj 	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2511*38fd1498Szrj 
2512*38fd1498Szrj 	  gcc_assert (percent >= 0 && percent <= 100);
2513*38fd1498Szrj 	  if (integer_zerop (val))
2514*38fd1498Szrj 	    percent = 100 - percent;
2515*38fd1498Szrj 	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2516*38fd1498Szrj 	}
2517*38fd1498Szrj       else
2518*38fd1498Szrj 	predict_edge_def (then_edge, predictor,
2519*38fd1498Szrj 			  integer_zerop (val) ? NOT_TAKEN : TAKEN);
2520*38fd1498Szrj     }
2521*38fd1498Szrj   /* Try "pointer heuristic."
2522*38fd1498Szrj      A comparison ptr == 0 is predicted as false.
2523*38fd1498Szrj      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2524*38fd1498Szrj   if (POINTER_TYPE_P (type))
2525*38fd1498Szrj     {
2526*38fd1498Szrj       if (cmp == EQ_EXPR)
2527*38fd1498Szrj 	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2528*38fd1498Szrj       else if (cmp == NE_EXPR)
2529*38fd1498Szrj 	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2530*38fd1498Szrj     }
2531*38fd1498Szrj   else
2532*38fd1498Szrj 
2533*38fd1498Szrj   /* Try "opcode heuristic."
2534*38fd1498Szrj      EQ tests are usually false and NE tests are usually true. Also,
2535*38fd1498Szrj      most quantities are positive, so we can make the appropriate guesses
2536*38fd1498Szrj      about signed comparisons against zero.  */
2537*38fd1498Szrj     switch (cmp)
2538*38fd1498Szrj       {
2539*38fd1498Szrj       case EQ_EXPR:
2540*38fd1498Szrj       case UNEQ_EXPR:
2541*38fd1498Szrj 	/* Floating point comparisons appears to behave in a very
2542*38fd1498Szrj 	   unpredictable way because of special role of = tests in
2543*38fd1498Szrj 	   FP code.  */
2544*38fd1498Szrj 	if (FLOAT_TYPE_P (type))
2545*38fd1498Szrj 	  ;
2546*38fd1498Szrj 	/* Comparisons with 0 are often used for booleans and there is
2547*38fd1498Szrj 	   nothing useful to predict about them.  */
2548*38fd1498Szrj 	else if (integer_zerop (op0) || integer_zerop (op1))
2549*38fd1498Szrj 	  ;
2550*38fd1498Szrj 	else
2551*38fd1498Szrj 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2552*38fd1498Szrj 	break;
2553*38fd1498Szrj 
2554*38fd1498Szrj       case NE_EXPR:
2555*38fd1498Szrj       case LTGT_EXPR:
2556*38fd1498Szrj 	/* Floating point comparisons appears to behave in a very
2557*38fd1498Szrj 	   unpredictable way because of special role of = tests in
2558*38fd1498Szrj 	   FP code.  */
2559*38fd1498Szrj 	if (FLOAT_TYPE_P (type))
2560*38fd1498Szrj 	  ;
2561*38fd1498Szrj 	/* Comparisons with 0 are often used for booleans and there is
2562*38fd1498Szrj 	   nothing useful to predict about them.  */
2563*38fd1498Szrj 	else if (integer_zerop (op0)
2564*38fd1498Szrj 		 || integer_zerop (op1))
2565*38fd1498Szrj 	  ;
2566*38fd1498Szrj 	else
2567*38fd1498Szrj 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2568*38fd1498Szrj 	break;
2569*38fd1498Szrj 
2570*38fd1498Szrj       case ORDERED_EXPR:
2571*38fd1498Szrj 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2572*38fd1498Szrj 	break;
2573*38fd1498Szrj 
2574*38fd1498Szrj       case UNORDERED_EXPR:
2575*38fd1498Szrj 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2576*38fd1498Szrj 	break;
2577*38fd1498Szrj 
2578*38fd1498Szrj       case LE_EXPR:
2579*38fd1498Szrj       case LT_EXPR:
2580*38fd1498Szrj 	if (integer_zerop (op1)
2581*38fd1498Szrj 	    || integer_onep (op1)
2582*38fd1498Szrj 	    || integer_all_onesp (op1)
2583*38fd1498Szrj 	    || real_zerop (op1)
2584*38fd1498Szrj 	    || real_onep (op1)
2585*38fd1498Szrj 	    || real_minus_onep (op1))
2586*38fd1498Szrj 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2587*38fd1498Szrj 	break;
2588*38fd1498Szrj 
2589*38fd1498Szrj       case GE_EXPR:
2590*38fd1498Szrj       case GT_EXPR:
2591*38fd1498Szrj 	if (integer_zerop (op1)
2592*38fd1498Szrj 	    || integer_onep (op1)
2593*38fd1498Szrj 	    || integer_all_onesp (op1)
2594*38fd1498Szrj 	    || real_zerop (op1)
2595*38fd1498Szrj 	    || real_onep (op1)
2596*38fd1498Szrj 	    || real_minus_onep (op1))
2597*38fd1498Szrj 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2598*38fd1498Szrj 	break;
2599*38fd1498Szrj 
2600*38fd1498Szrj       default:
2601*38fd1498Szrj 	break;
2602*38fd1498Szrj       }
2603*38fd1498Szrj }
2604*38fd1498Szrj 
2605*38fd1498Szrj /* Returns TRUE if the STMT is exit(0) like statement. */
2606*38fd1498Szrj 
2607*38fd1498Szrj static bool
is_exit_with_zero_arg(const gimple * stmt)2608*38fd1498Szrj is_exit_with_zero_arg (const gimple *stmt)
2609*38fd1498Szrj {
2610*38fd1498Szrj   /* This is not exit, _exit or _Exit. */
2611*38fd1498Szrj   if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2612*38fd1498Szrj       && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2613*38fd1498Szrj       && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2614*38fd1498Szrj     return false;
2615*38fd1498Szrj 
2616*38fd1498Szrj   /* Argument is an interger zero. */
2617*38fd1498Szrj   return integer_zerop (gimple_call_arg (stmt, 0));
2618*38fd1498Szrj }
2619*38fd1498Szrj 
2620*38fd1498Szrj /* Try to guess whether the value of return means error code.  */
2621*38fd1498Szrj 
2622*38fd1498Szrj static enum br_predictor
return_prediction(tree val,enum prediction * prediction)2623*38fd1498Szrj return_prediction (tree val, enum prediction *prediction)
2624*38fd1498Szrj {
2625*38fd1498Szrj   /* VOID.  */
2626*38fd1498Szrj   if (!val)
2627*38fd1498Szrj     return PRED_NO_PREDICTION;
2628*38fd1498Szrj   /* Different heuristics for pointers and scalars.  */
2629*38fd1498Szrj   if (POINTER_TYPE_P (TREE_TYPE (val)))
2630*38fd1498Szrj     {
2631*38fd1498Szrj       /* NULL is usually not returned.  */
2632*38fd1498Szrj       if (integer_zerop (val))
2633*38fd1498Szrj 	{
2634*38fd1498Szrj 	  *prediction = NOT_TAKEN;
2635*38fd1498Szrj 	  return PRED_NULL_RETURN;
2636*38fd1498Szrj 	}
2637*38fd1498Szrj     }
2638*38fd1498Szrj   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2639*38fd1498Szrj     {
2640*38fd1498Szrj       /* Negative return values are often used to indicate
2641*38fd1498Szrj          errors.  */
2642*38fd1498Szrj       if (TREE_CODE (val) == INTEGER_CST
2643*38fd1498Szrj 	  && tree_int_cst_sgn (val) < 0)
2644*38fd1498Szrj 	{
2645*38fd1498Szrj 	  *prediction = NOT_TAKEN;
2646*38fd1498Szrj 	  return PRED_NEGATIVE_RETURN;
2647*38fd1498Szrj 	}
2648*38fd1498Szrj       /* Constant return values seems to be commonly taken.
2649*38fd1498Szrj          Zero/one often represent booleans so exclude them from the
2650*38fd1498Szrj 	 heuristics.  */
2651*38fd1498Szrj       if (TREE_CONSTANT (val)
2652*38fd1498Szrj 	  && (!integer_zerop (val) && !integer_onep (val)))
2653*38fd1498Szrj 	{
2654*38fd1498Szrj 	  *prediction = NOT_TAKEN;
2655*38fd1498Szrj 	  return PRED_CONST_RETURN;
2656*38fd1498Szrj 	}
2657*38fd1498Szrj     }
2658*38fd1498Szrj   return PRED_NO_PREDICTION;
2659*38fd1498Szrj }
2660*38fd1498Szrj 
2661*38fd1498Szrj /* Return zero if phi result could have values other than -1, 0 or 1,
2662*38fd1498Szrj    otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
2663*38fd1498Szrj    values are used or likely.  */
2664*38fd1498Szrj 
2665*38fd1498Szrj static int
zero_one_minusone(gphi * phi,int limit)2666*38fd1498Szrj zero_one_minusone (gphi *phi, int limit)
2667*38fd1498Szrj {
2668*38fd1498Szrj   int phi_num_args = gimple_phi_num_args (phi);
2669*38fd1498Szrj   int ret = 0;
2670*38fd1498Szrj   for (int i = 0; i < phi_num_args; i++)
2671*38fd1498Szrj     {
2672*38fd1498Szrj       tree t = PHI_ARG_DEF (phi, i);
2673*38fd1498Szrj       if (TREE_CODE (t) != INTEGER_CST)
2674*38fd1498Szrj 	continue;
2675*38fd1498Szrj       wide_int w = wi::to_wide (t);
2676*38fd1498Szrj       if (w == -1)
2677*38fd1498Szrj 	ret |= 1;
2678*38fd1498Szrj       else if (w == 0)
2679*38fd1498Szrj 	ret |= 2;
2680*38fd1498Szrj       else if (w == 1)
2681*38fd1498Szrj 	ret |= 4;
2682*38fd1498Szrj       else
2683*38fd1498Szrj 	return 0;
2684*38fd1498Szrj     }
2685*38fd1498Szrj   for (int i = 0; i < phi_num_args; i++)
2686*38fd1498Szrj     {
2687*38fd1498Szrj       tree t = PHI_ARG_DEF (phi, i);
2688*38fd1498Szrj       if (TREE_CODE (t) == INTEGER_CST)
2689*38fd1498Szrj 	continue;
2690*38fd1498Szrj       if (TREE_CODE (t) != SSA_NAME)
2691*38fd1498Szrj 	return 0;
2692*38fd1498Szrj       gimple *g = SSA_NAME_DEF_STMT (t);
2693*38fd1498Szrj       if (gimple_code (g) == GIMPLE_PHI && limit > 0)
2694*38fd1498Szrj 	if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
2695*38fd1498Szrj 	  {
2696*38fd1498Szrj 	    ret |= r;
2697*38fd1498Szrj 	    continue;
2698*38fd1498Szrj 	  }
2699*38fd1498Szrj       if (!is_gimple_assign (g))
2700*38fd1498Szrj 	return 0;
2701*38fd1498Szrj       if (gimple_assign_cast_p (g))
2702*38fd1498Szrj 	{
2703*38fd1498Szrj 	  tree rhs1 = gimple_assign_rhs1 (g);
2704*38fd1498Szrj 	  if (TREE_CODE (rhs1) != SSA_NAME
2705*38fd1498Szrj 	      || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
2706*38fd1498Szrj 	      || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2707*38fd1498Szrj 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2708*38fd1498Szrj 	    return 0;
2709*38fd1498Szrj 	  ret |= (2 | 4);
2710*38fd1498Szrj 	  continue;
2711*38fd1498Szrj 	}
2712*38fd1498Szrj       if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
2713*38fd1498Szrj 	return 0;
2714*38fd1498Szrj       ret |= (2 | 4);
2715*38fd1498Szrj     }
2716*38fd1498Szrj   return ret;
2717*38fd1498Szrj }
2718*38fd1498Szrj 
2719*38fd1498Szrj /* Find the basic block with return expression and look up for possible
2720*38fd1498Szrj    return value trying to apply RETURN_PREDICTION heuristics.  */
2721*38fd1498Szrj static void
apply_return_prediction(void)2722*38fd1498Szrj apply_return_prediction (void)
2723*38fd1498Szrj {
2724*38fd1498Szrj   greturn *return_stmt = NULL;
2725*38fd1498Szrj   tree return_val;
2726*38fd1498Szrj   edge e;
2727*38fd1498Szrj   gphi *phi;
2728*38fd1498Szrj   int phi_num_args, i;
2729*38fd1498Szrj   enum br_predictor pred;
2730*38fd1498Szrj   enum prediction direction;
2731*38fd1498Szrj   edge_iterator ei;
2732*38fd1498Szrj 
2733*38fd1498Szrj   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2734*38fd1498Szrj     {
2735*38fd1498Szrj       gimple *last = last_stmt (e->src);
2736*38fd1498Szrj       if (last
2737*38fd1498Szrj 	  && gimple_code (last) == GIMPLE_RETURN)
2738*38fd1498Szrj 	{
2739*38fd1498Szrj 	  return_stmt = as_a <greturn *> (last);
2740*38fd1498Szrj 	  break;
2741*38fd1498Szrj 	}
2742*38fd1498Szrj     }
2743*38fd1498Szrj   if (!e)
2744*38fd1498Szrj     return;
2745*38fd1498Szrj   return_val = gimple_return_retval (return_stmt);
2746*38fd1498Szrj   if (!return_val)
2747*38fd1498Szrj     return;
2748*38fd1498Szrj   if (TREE_CODE (return_val) != SSA_NAME
2749*38fd1498Szrj       || !SSA_NAME_DEF_STMT (return_val)
2750*38fd1498Szrj       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2751*38fd1498Szrj     return;
2752*38fd1498Szrj   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2753*38fd1498Szrj   phi_num_args = gimple_phi_num_args (phi);
2754*38fd1498Szrj   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2755*38fd1498Szrj 
2756*38fd1498Szrj   /* Avoid the case where the function returns -1, 0 and 1 values and
2757*38fd1498Szrj      nothing else.  Those could be qsort etc. comparison functions
2758*38fd1498Szrj      where the negative return isn't less probable than positive.
2759*38fd1498Szrj      For this require that the function returns at least -1 or 1
2760*38fd1498Szrj      or -1 and a boolean value or comparison result, so that functions
2761*38fd1498Szrj      returning just -1 and 0 are treated as if -1 represents error value.  */
2762*38fd1498Szrj   if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
2763*38fd1498Szrj       && !TYPE_UNSIGNED (TREE_TYPE (return_val))
2764*38fd1498Szrj       && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
2765*38fd1498Szrj     if (int r = zero_one_minusone (phi, 3))
2766*38fd1498Szrj       if ((r & (1 | 4)) == (1 | 4))
2767*38fd1498Szrj 	return;
2768*38fd1498Szrj 
2769*38fd1498Szrj   /* Avoid the degenerate case where all return values form the function
2770*38fd1498Szrj      belongs to same category (ie they are all positive constants)
2771*38fd1498Szrj      so we can hardly say something about them.  */
2772*38fd1498Szrj   for (i = 1; i < phi_num_args; i++)
2773*38fd1498Szrj     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2774*38fd1498Szrj       break;
2775*38fd1498Szrj   if (i != phi_num_args)
2776*38fd1498Szrj     for (i = 0; i < phi_num_args; i++)
2777*38fd1498Szrj       {
2778*38fd1498Szrj 	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2779*38fd1498Szrj 	if (pred != PRED_NO_PREDICTION)
2780*38fd1498Szrj 	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2781*38fd1498Szrj 				         direction);
2782*38fd1498Szrj       }
2783*38fd1498Szrj }
2784*38fd1498Szrj 
2785*38fd1498Szrj /* Look for basic block that contains unlikely to happen events
2786*38fd1498Szrj    (such as noreturn calls) and mark all paths leading to execution
2787*38fd1498Szrj    of this basic blocks as unlikely.  */
2788*38fd1498Szrj 
2789*38fd1498Szrj static void
tree_bb_level_predictions(void)2790*38fd1498Szrj tree_bb_level_predictions (void)
2791*38fd1498Szrj {
2792*38fd1498Szrj   basic_block bb;
2793*38fd1498Szrj   bool has_return_edges = false;
2794*38fd1498Szrj   edge e;
2795*38fd1498Szrj   edge_iterator ei;
2796*38fd1498Szrj 
2797*38fd1498Szrj   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2798*38fd1498Szrj     if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
2799*38fd1498Szrj       {
2800*38fd1498Szrj         has_return_edges = true;
2801*38fd1498Szrj 	break;
2802*38fd1498Szrj       }
2803*38fd1498Szrj 
2804*38fd1498Szrj   apply_return_prediction ();
2805*38fd1498Szrj 
2806*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2807*38fd1498Szrj     {
2808*38fd1498Szrj       gimple_stmt_iterator gsi;
2809*38fd1498Szrj 
2810*38fd1498Szrj       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2811*38fd1498Szrj 	{
2812*38fd1498Szrj 	  gimple *stmt = gsi_stmt (gsi);
2813*38fd1498Szrj 	  tree decl;
2814*38fd1498Szrj 
2815*38fd1498Szrj 	  if (is_gimple_call (stmt))
2816*38fd1498Szrj 	    {
2817*38fd1498Szrj 	      if (gimple_call_noreturn_p (stmt)
2818*38fd1498Szrj 		  && has_return_edges
2819*38fd1498Szrj 		  && !is_exit_with_zero_arg (stmt))
2820*38fd1498Szrj 		predict_paths_leading_to (bb, PRED_NORETURN,
2821*38fd1498Szrj 					  NOT_TAKEN);
2822*38fd1498Szrj 	      decl = gimple_call_fndecl (stmt);
2823*38fd1498Szrj 	      if (decl
2824*38fd1498Szrj 		  && lookup_attribute ("cold",
2825*38fd1498Szrj 				       DECL_ATTRIBUTES (decl)))
2826*38fd1498Szrj 		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2827*38fd1498Szrj 					  NOT_TAKEN);
2828*38fd1498Szrj 	      if (decl && recursive_call_p (current_function_decl, decl))
2829*38fd1498Szrj 		predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
2830*38fd1498Szrj 					  NOT_TAKEN);
2831*38fd1498Szrj 	    }
2832*38fd1498Szrj 	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2833*38fd1498Szrj 	    {
2834*38fd1498Szrj 	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2835*38fd1498Szrj 					gimple_predict_outcome (stmt));
2836*38fd1498Szrj 	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2837*38fd1498Szrj 	         hints to callers.  */
2838*38fd1498Szrj 	    }
2839*38fd1498Szrj 	}
2840*38fd1498Szrj     }
2841*38fd1498Szrj }
2842*38fd1498Szrj 
2843*38fd1498Szrj /* Callback for hash_map::traverse, asserts that the pointer map is
2844*38fd1498Szrj    empty.  */
2845*38fd1498Szrj 
2846*38fd1498Szrj bool
assert_is_empty(const_basic_block const &,edge_prediction * const & value,void *)2847*38fd1498Szrj assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2848*38fd1498Szrj 		 void *)
2849*38fd1498Szrj {
2850*38fd1498Szrj   gcc_assert (!value);
2851*38fd1498Szrj   return false;
2852*38fd1498Szrj }
2853*38fd1498Szrj 
2854*38fd1498Szrj /* Predict branch probabilities and estimate profile for basic block BB.
2855*38fd1498Szrj    When LOCAL_ONLY is set do not use any global properties of CFG.  */
2856*38fd1498Szrj 
2857*38fd1498Szrj static void
tree_estimate_probability_bb(basic_block bb,bool local_only)2858*38fd1498Szrj tree_estimate_probability_bb (basic_block bb, bool local_only)
2859*38fd1498Szrj {
2860*38fd1498Szrj   edge e;
2861*38fd1498Szrj   edge_iterator ei;
2862*38fd1498Szrj 
2863*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
2864*38fd1498Szrj     {
2865*38fd1498Szrj       /* Look for block we are guarding (ie we dominate it,
2866*38fd1498Szrj 	 but it doesn't postdominate us).  */
2867*38fd1498Szrj       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2868*38fd1498Szrj 	  && !local_only
2869*38fd1498Szrj 	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2870*38fd1498Szrj 	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2871*38fd1498Szrj 	{
2872*38fd1498Szrj 	  gimple_stmt_iterator bi;
2873*38fd1498Szrj 
2874*38fd1498Szrj 	  /* The call heuristic claims that a guarded function call
2875*38fd1498Szrj 	     is improbable.  This is because such calls are often used
2876*38fd1498Szrj 	     to signal exceptional situations such as printing error
2877*38fd1498Szrj 	     messages.  */
2878*38fd1498Szrj 	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2879*38fd1498Szrj 	       gsi_next (&bi))
2880*38fd1498Szrj 	    {
2881*38fd1498Szrj 	      gimple *stmt = gsi_stmt (bi);
2882*38fd1498Szrj 	      if (is_gimple_call (stmt)
2883*38fd1498Szrj 		  && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt))
2884*38fd1498Szrj 		  /* Constant and pure calls are hardly used to signalize
2885*38fd1498Szrj 		     something exceptional.  */
2886*38fd1498Szrj 		  && gimple_has_side_effects (stmt))
2887*38fd1498Szrj 		{
2888*38fd1498Szrj 		  if (gimple_call_fndecl (stmt))
2889*38fd1498Szrj 		    predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2890*38fd1498Szrj 		  else if (virtual_method_call_p (gimple_call_fn (stmt)))
2891*38fd1498Szrj 		    predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
2892*38fd1498Szrj 		  else
2893*38fd1498Szrj 		    predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
2894*38fd1498Szrj 		  break;
2895*38fd1498Szrj 		}
2896*38fd1498Szrj 	    }
2897*38fd1498Szrj 	}
2898*38fd1498Szrj     }
2899*38fd1498Szrj   tree_predict_by_opcode (bb);
2900*38fd1498Szrj }
2901*38fd1498Szrj 
2902*38fd1498Szrj /* Predict branch probabilities and estimate profile of the tree CFG.
2903*38fd1498Szrj    This function can be called from the loop optimizers to recompute
2904*38fd1498Szrj    the profile information.
2905*38fd1498Szrj    If DRY_RUN is set, do not modify CFG and only produce dump files.  */
2906*38fd1498Szrj 
2907*38fd1498Szrj void
tree_estimate_probability(bool dry_run)2908*38fd1498Szrj tree_estimate_probability (bool dry_run)
2909*38fd1498Szrj {
2910*38fd1498Szrj   basic_block bb;
2911*38fd1498Szrj 
2912*38fd1498Szrj   add_noreturn_fake_exit_edges ();
2913*38fd1498Szrj   connect_infinite_loops_to_exit ();
2914*38fd1498Szrj   /* We use loop_niter_by_eval, which requires that the loops have
2915*38fd1498Szrj      preheaders.  */
2916*38fd1498Szrj   create_preheaders (CP_SIMPLE_PREHEADERS);
2917*38fd1498Szrj   calculate_dominance_info (CDI_POST_DOMINATORS);
2918*38fd1498Szrj 
2919*38fd1498Szrj   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2920*38fd1498Szrj   tree_bb_level_predictions ();
2921*38fd1498Szrj   record_loop_exits ();
2922*38fd1498Szrj 
2923*38fd1498Szrj   if (number_of_loops (cfun) > 1)
2924*38fd1498Szrj     predict_loops ();
2925*38fd1498Szrj 
2926*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2927*38fd1498Szrj     tree_estimate_probability_bb (bb, false);
2928*38fd1498Szrj 
2929*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
2930*38fd1498Szrj     combine_predictions_for_bb (bb, dry_run);
2931*38fd1498Szrj 
2932*38fd1498Szrj   if (flag_checking)
2933*38fd1498Szrj     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2934*38fd1498Szrj 
2935*38fd1498Szrj   delete bb_predictions;
2936*38fd1498Szrj   bb_predictions = NULL;
2937*38fd1498Szrj 
2938*38fd1498Szrj   if (!dry_run)
2939*38fd1498Szrj     estimate_bb_frequencies (false);
2940*38fd1498Szrj   free_dominance_info (CDI_POST_DOMINATORS);
2941*38fd1498Szrj   remove_fake_exit_edges ();
2942*38fd1498Szrj }
2943*38fd1498Szrj 
2944*38fd1498Szrj /* Set edge->probability for each successor edge of BB.  */
2945*38fd1498Szrj void
tree_guess_outgoing_edge_probabilities(basic_block bb)2946*38fd1498Szrj tree_guess_outgoing_edge_probabilities (basic_block bb)
2947*38fd1498Szrj {
2948*38fd1498Szrj   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2949*38fd1498Szrj   tree_estimate_probability_bb (bb, true);
2950*38fd1498Szrj   combine_predictions_for_bb (bb, false);
2951*38fd1498Szrj   if (flag_checking)
2952*38fd1498Szrj     bb_predictions->traverse<void *, assert_is_empty> (NULL);
2953*38fd1498Szrj   delete bb_predictions;
2954*38fd1498Szrj   bb_predictions = NULL;
2955*38fd1498Szrj }
2956*38fd1498Szrj 
2957*38fd1498Szrj /* Predict edges to successors of CUR whose sources are not postdominated by
2958*38fd1498Szrj    BB by PRED and recurse to all postdominators.  */
2959*38fd1498Szrj 
2960*38fd1498Szrj static void
2961*38fd1498Szrj predict_paths_for_bb (basic_block cur, basic_block bb,
2962*38fd1498Szrj 		      enum br_predictor pred,
2963*38fd1498Szrj 		      enum prediction taken,
2964*38fd1498Szrj 		      bitmap visited, struct loop *in_loop = NULL)
2965*38fd1498Szrj {
2966*38fd1498Szrj   edge e;
2967*38fd1498Szrj   edge_iterator ei;
2968*38fd1498Szrj   basic_block son;
2969*38fd1498Szrj 
2970*38fd1498Szrj   /* If we exited the loop or CUR is unconditional in the loop, there is
2971*38fd1498Szrj      nothing to do.  */
2972*38fd1498Szrj   if (in_loop
2973*38fd1498Szrj       && (!flow_bb_inside_loop_p (in_loop, cur)
2974*38fd1498Szrj 	  || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
2975*38fd1498Szrj     return;
2976*38fd1498Szrj 
2977*38fd1498Szrj   /* We are looking for all edges forming edge cut induced by
2978*38fd1498Szrj      set of all blocks postdominated by BB.  */
2979*38fd1498Szrj   FOR_EACH_EDGE (e, ei, cur->preds)
2980*38fd1498Szrj     if (e->src->index >= NUM_FIXED_BLOCKS
2981*38fd1498Szrj 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2982*38fd1498Szrj     {
2983*38fd1498Szrj       edge e2;
2984*38fd1498Szrj       edge_iterator ei2;
2985*38fd1498Szrj       bool found = false;
2986*38fd1498Szrj 
2987*38fd1498Szrj       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2988*38fd1498Szrj       if (unlikely_executed_edge_p (e))
2989*38fd1498Szrj 	continue;
2990*38fd1498Szrj       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2991*38fd1498Szrj 
2992*38fd1498Szrj       /* See if there is an edge from e->src that is not abnormal
2993*38fd1498Szrj 	 and does not lead to BB and does not exit the loop.  */
2994*38fd1498Szrj       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2995*38fd1498Szrj 	if (e2 != e
2996*38fd1498Szrj 	    && !unlikely_executed_edge_p (e2)
2997*38fd1498Szrj 	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
2998*38fd1498Szrj 	    && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
2999*38fd1498Szrj 	  {
3000*38fd1498Szrj 	    found = true;
3001*38fd1498Szrj 	    break;
3002*38fd1498Szrj 	  }
3003*38fd1498Szrj 
3004*38fd1498Szrj       /* If there is non-abnormal path leaving e->src, predict edge
3005*38fd1498Szrj 	 using predictor.  Otherwise we need to look for paths
3006*38fd1498Szrj 	 leading to e->src.
3007*38fd1498Szrj 
3008*38fd1498Szrj 	 The second may lead to infinite loop in the case we are predicitng
3009*38fd1498Szrj 	 regions that are only reachable by abnormal edges.  We simply
3010*38fd1498Szrj 	 prevent visiting given BB twice.  */
3011*38fd1498Szrj       if (found)
3012*38fd1498Szrj 	{
3013*38fd1498Szrj 	  if (!edge_predicted_by_p (e, pred, taken))
3014*38fd1498Szrj             predict_edge_def (e, pred, taken);
3015*38fd1498Szrj 	}
3016*38fd1498Szrj       else if (bitmap_set_bit (visited, e->src->index))
3017*38fd1498Szrj 	predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
3018*38fd1498Szrj     }
3019*38fd1498Szrj   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
3020*38fd1498Szrj        son;
3021*38fd1498Szrj        son = next_dom_son (CDI_POST_DOMINATORS, son))
3022*38fd1498Szrj     predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
3023*38fd1498Szrj }
3024*38fd1498Szrj 
3025*38fd1498Szrj /* Sets branch probabilities according to PREDiction and
3026*38fd1498Szrj    FLAGS.  */
3027*38fd1498Szrj 
3028*38fd1498Szrj static void
predict_paths_leading_to(basic_block bb,enum br_predictor pred,enum prediction taken,struct loop * in_loop)3029*38fd1498Szrj predict_paths_leading_to (basic_block bb, enum br_predictor pred,
3030*38fd1498Szrj 			  enum prediction taken, struct loop *in_loop)
3031*38fd1498Szrj {
3032*38fd1498Szrj   predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3033*38fd1498Szrj }
3034*38fd1498Szrj 
3035*38fd1498Szrj /* Like predict_paths_leading_to but take edge instead of basic block.  */
3036*38fd1498Szrj 
3037*38fd1498Szrj static void
predict_paths_leading_to_edge(edge e,enum br_predictor pred,enum prediction taken,struct loop * in_loop)3038*38fd1498Szrj predict_paths_leading_to_edge (edge e, enum br_predictor pred,
3039*38fd1498Szrj 			       enum prediction taken, struct loop *in_loop)
3040*38fd1498Szrj {
3041*38fd1498Szrj   bool has_nonloop_edge = false;
3042*38fd1498Szrj   edge_iterator ei;
3043*38fd1498Szrj   edge e2;
3044*38fd1498Szrj 
3045*38fd1498Szrj   basic_block bb = e->src;
3046*38fd1498Szrj   FOR_EACH_EDGE (e2, ei, bb->succs)
3047*38fd1498Szrj     if (e2->dest != e->src && e2->dest != e->dest
3048*38fd1498Szrj 	&& !unlikely_executed_edge_p (e)
3049*38fd1498Szrj 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
3050*38fd1498Szrj       {
3051*38fd1498Szrj 	has_nonloop_edge = true;
3052*38fd1498Szrj 	break;
3053*38fd1498Szrj       }
3054*38fd1498Szrj   if (!has_nonloop_edge)
3055*38fd1498Szrj     {
3056*38fd1498Szrj       predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3057*38fd1498Szrj     }
3058*38fd1498Szrj   else
3059*38fd1498Szrj     predict_edge_def (e, pred, taken);
3060*38fd1498Szrj }
3061*38fd1498Szrj 
3062*38fd1498Szrj /* This is used to carry information about basic blocks.  It is
3063*38fd1498Szrj    attached to the AUX field of the standard CFG block.  */
3064*38fd1498Szrj 
3065*38fd1498Szrj struct block_info
3066*38fd1498Szrj {
3067*38fd1498Szrj   /* Estimated frequency of execution of basic_block.  */
3068*38fd1498Szrj   sreal frequency;
3069*38fd1498Szrj 
3070*38fd1498Szrj   /* To keep queue of basic blocks to process.  */
3071*38fd1498Szrj   basic_block next;
3072*38fd1498Szrj 
3073*38fd1498Szrj   /* Number of predecessors we need to visit first.  */
3074*38fd1498Szrj   int npredecessors;
3075*38fd1498Szrj };
3076*38fd1498Szrj 
3077*38fd1498Szrj /* Similar information for edges.  */
3078*38fd1498Szrj struct edge_prob_info
3079*38fd1498Szrj {
3080*38fd1498Szrj   /* In case edge is a loopback edge, the probability edge will be reached
3081*38fd1498Szrj      in case header is.  Estimated number of iterations of the loop can be
3082*38fd1498Szrj      then computed as 1 / (1 - back_edge_prob).  */
3083*38fd1498Szrj   sreal back_edge_prob;
3084*38fd1498Szrj   /* True if the edge is a loopback edge in the natural loop.  */
3085*38fd1498Szrj   unsigned int back_edge:1;
3086*38fd1498Szrj };
3087*38fd1498Szrj 
3088*38fd1498Szrj #define BLOCK_INFO(B)	((block_info *) (B)->aux)
3089*38fd1498Szrj #undef EDGE_INFO
3090*38fd1498Szrj #define EDGE_INFO(E)	((edge_prob_info *) (E)->aux)
3091*38fd1498Szrj 
3092*38fd1498Szrj /* Helper function for estimate_bb_frequencies.
3093*38fd1498Szrj    Propagate the frequencies in blocks marked in
3094*38fd1498Szrj    TOVISIT, starting in HEAD.  */
3095*38fd1498Szrj 
3096*38fd1498Szrj static void
propagate_freq(basic_block head,bitmap tovisit)3097*38fd1498Szrj propagate_freq (basic_block head, bitmap tovisit)
3098*38fd1498Szrj {
3099*38fd1498Szrj   basic_block bb;
3100*38fd1498Szrj   basic_block last;
3101*38fd1498Szrj   unsigned i;
3102*38fd1498Szrj   edge e;
3103*38fd1498Szrj   basic_block nextbb;
3104*38fd1498Szrj   bitmap_iterator bi;
3105*38fd1498Szrj 
3106*38fd1498Szrj   /* For each basic block we need to visit count number of his predecessors
3107*38fd1498Szrj      we need to visit first.  */
3108*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
3109*38fd1498Szrj     {
3110*38fd1498Szrj       edge_iterator ei;
3111*38fd1498Szrj       int count = 0;
3112*38fd1498Szrj 
3113*38fd1498Szrj       bb = BASIC_BLOCK_FOR_FN (cfun, i);
3114*38fd1498Szrj 
3115*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
3116*38fd1498Szrj 	{
3117*38fd1498Szrj 	  bool visit = bitmap_bit_p (tovisit, e->src->index);
3118*38fd1498Szrj 
3119*38fd1498Szrj 	  if (visit && !(e->flags & EDGE_DFS_BACK))
3120*38fd1498Szrj 	    count++;
3121*38fd1498Szrj 	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3122*38fd1498Szrj 	    fprintf (dump_file,
3123*38fd1498Szrj 		     "Irreducible region hit, ignoring edge to %i->%i\n",
3124*38fd1498Szrj 		     e->src->index, bb->index);
3125*38fd1498Szrj 	}
3126*38fd1498Szrj       BLOCK_INFO (bb)->npredecessors = count;
3127*38fd1498Szrj       /* When function never returns, we will never process exit block.  */
3128*38fd1498Szrj       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3129*38fd1498Szrj 	bb->count = profile_count::zero ();
3130*38fd1498Szrj     }
3131*38fd1498Szrj 
3132*38fd1498Szrj   BLOCK_INFO (head)->frequency = 1;
3133*38fd1498Szrj   last = head;
3134*38fd1498Szrj   for (bb = head; bb; bb = nextbb)
3135*38fd1498Szrj     {
3136*38fd1498Szrj       edge_iterator ei;
3137*38fd1498Szrj       sreal cyclic_probability = 0;
3138*38fd1498Szrj       sreal frequency = 0;
3139*38fd1498Szrj 
3140*38fd1498Szrj       nextbb = BLOCK_INFO (bb)->next;
3141*38fd1498Szrj       BLOCK_INFO (bb)->next = NULL;
3142*38fd1498Szrj 
3143*38fd1498Szrj       /* Compute frequency of basic block.  */
3144*38fd1498Szrj       if (bb != head)
3145*38fd1498Szrj 	{
3146*38fd1498Szrj 	  if (flag_checking)
3147*38fd1498Szrj 	    FOR_EACH_EDGE (e, ei, bb->preds)
3148*38fd1498Szrj 	      gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3149*38fd1498Szrj 			  || (e->flags & EDGE_DFS_BACK));
3150*38fd1498Szrj 
3151*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->preds)
3152*38fd1498Szrj 	    if (EDGE_INFO (e)->back_edge)
3153*38fd1498Szrj 	      {
3154*38fd1498Szrj 		cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3155*38fd1498Szrj 	      }
3156*38fd1498Szrj 	    else if (!(e->flags & EDGE_DFS_BACK))
3157*38fd1498Szrj 	      {
3158*38fd1498Szrj 		/*  frequency += (e->probability
3159*38fd1498Szrj 				  * BLOCK_INFO (e->src)->frequency /
3160*38fd1498Szrj 				  REG_BR_PROB_BASE);  */
3161*38fd1498Szrj 
3162*38fd1498Szrj 		/* FIXME: Graphite is producing edges with no profile. Once
3163*38fd1498Szrj 		   this is fixed, drop this.  */
3164*38fd1498Szrj 		sreal tmp = e->probability.initialized_p () ?
3165*38fd1498Szrj 			    e->probability.to_reg_br_prob_base () : 0;
3166*38fd1498Szrj 		tmp *= BLOCK_INFO (e->src)->frequency;
3167*38fd1498Szrj 		tmp *= real_inv_br_prob_base;
3168*38fd1498Szrj 		frequency += tmp;
3169*38fd1498Szrj 	      }
3170*38fd1498Szrj 
3171*38fd1498Szrj 	  if (cyclic_probability == 0)
3172*38fd1498Szrj 	    {
3173*38fd1498Szrj 	      BLOCK_INFO (bb)->frequency = frequency;
3174*38fd1498Szrj 	    }
3175*38fd1498Szrj 	  else
3176*38fd1498Szrj 	    {
3177*38fd1498Szrj 	      if (cyclic_probability > real_almost_one)
3178*38fd1498Szrj 		cyclic_probability = real_almost_one;
3179*38fd1498Szrj 
3180*38fd1498Szrj 	      /* BLOCK_INFO (bb)->frequency = frequency
3181*38fd1498Szrj 					      / (1 - cyclic_probability) */
3182*38fd1498Szrj 
3183*38fd1498Szrj 	      cyclic_probability = sreal (1) - cyclic_probability;
3184*38fd1498Szrj 	      BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
3185*38fd1498Szrj 	    }
3186*38fd1498Szrj 	}
3187*38fd1498Szrj 
3188*38fd1498Szrj       bitmap_clear_bit (tovisit, bb->index);
3189*38fd1498Szrj 
3190*38fd1498Szrj       e = find_edge (bb, head);
3191*38fd1498Szrj       if (e)
3192*38fd1498Szrj 	{
3193*38fd1498Szrj 	  /* EDGE_INFO (e)->back_edge_prob
3194*38fd1498Szrj 	     = ((e->probability * BLOCK_INFO (bb)->frequency)
3195*38fd1498Szrj 	     / REG_BR_PROB_BASE); */
3196*38fd1498Szrj 
3197*38fd1498Szrj 	  /* FIXME: Graphite is producing edges with no profile. Once
3198*38fd1498Szrj 	     this is fixed, drop this.  */
3199*38fd1498Szrj 	  sreal tmp = e->probability.initialized_p () ?
3200*38fd1498Szrj 		      e->probability.to_reg_br_prob_base () : 0;
3201*38fd1498Szrj 	  tmp *= BLOCK_INFO (bb)->frequency;
3202*38fd1498Szrj 	  EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
3203*38fd1498Szrj 	}
3204*38fd1498Szrj 
3205*38fd1498Szrj       /* Propagate to successor blocks.  */
3206*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
3207*38fd1498Szrj 	if (!(e->flags & EDGE_DFS_BACK)
3208*38fd1498Szrj 	    && BLOCK_INFO (e->dest)->npredecessors)
3209*38fd1498Szrj 	  {
3210*38fd1498Szrj 	    BLOCK_INFO (e->dest)->npredecessors--;
3211*38fd1498Szrj 	    if (!BLOCK_INFO (e->dest)->npredecessors)
3212*38fd1498Szrj 	      {
3213*38fd1498Szrj 		if (!nextbb)
3214*38fd1498Szrj 		  nextbb = e->dest;
3215*38fd1498Szrj 		else
3216*38fd1498Szrj 		  BLOCK_INFO (last)->next = e->dest;
3217*38fd1498Szrj 
3218*38fd1498Szrj 		last = e->dest;
3219*38fd1498Szrj 	      }
3220*38fd1498Szrj 	  }
3221*38fd1498Szrj     }
3222*38fd1498Szrj }
3223*38fd1498Szrj 
3224*38fd1498Szrj /* Estimate frequencies in loops at same nest level.  */
3225*38fd1498Szrj 
3226*38fd1498Szrj static void
estimate_loops_at_level(struct loop * first_loop)3227*38fd1498Szrj estimate_loops_at_level (struct loop *first_loop)
3228*38fd1498Szrj {
3229*38fd1498Szrj   struct loop *loop;
3230*38fd1498Szrj 
3231*38fd1498Szrj   for (loop = first_loop; loop; loop = loop->next)
3232*38fd1498Szrj     {
3233*38fd1498Szrj       edge e;
3234*38fd1498Szrj       basic_block *bbs;
3235*38fd1498Szrj       unsigned i;
3236*38fd1498Szrj       auto_bitmap tovisit;
3237*38fd1498Szrj 
3238*38fd1498Szrj       estimate_loops_at_level (loop->inner);
3239*38fd1498Szrj 
3240*38fd1498Szrj       /* Find current loop back edge and mark it.  */
3241*38fd1498Szrj       e = loop_latch_edge (loop);
3242*38fd1498Szrj       EDGE_INFO (e)->back_edge = 1;
3243*38fd1498Szrj 
3244*38fd1498Szrj       bbs = get_loop_body (loop);
3245*38fd1498Szrj       for (i = 0; i < loop->num_nodes; i++)
3246*38fd1498Szrj 	bitmap_set_bit (tovisit, bbs[i]->index);
3247*38fd1498Szrj       free (bbs);
3248*38fd1498Szrj       propagate_freq (loop->header, tovisit);
3249*38fd1498Szrj     }
3250*38fd1498Szrj }
3251*38fd1498Szrj 
3252*38fd1498Szrj /* Propagates frequencies through structure of loops.  */
3253*38fd1498Szrj 
3254*38fd1498Szrj static void
estimate_loops(void)3255*38fd1498Szrj estimate_loops (void)
3256*38fd1498Szrj {
3257*38fd1498Szrj   auto_bitmap tovisit;
3258*38fd1498Szrj   basic_block bb;
3259*38fd1498Szrj 
3260*38fd1498Szrj   /* Start by estimating the frequencies in the loops.  */
3261*38fd1498Szrj   if (number_of_loops (cfun) > 1)
3262*38fd1498Szrj     estimate_loops_at_level (current_loops->tree_root->inner);
3263*38fd1498Szrj 
3264*38fd1498Szrj   /* Now propagate the frequencies through all the blocks.  */
3265*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
3266*38fd1498Szrj     {
3267*38fd1498Szrj       bitmap_set_bit (tovisit, bb->index);
3268*38fd1498Szrj     }
3269*38fd1498Szrj   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
3270*38fd1498Szrj }
3271*38fd1498Szrj 
3272*38fd1498Szrj /* Drop the profile for NODE to guessed, and update its frequency based on
3273*38fd1498Szrj    whether it is expected to be hot given the CALL_COUNT.  */
3274*38fd1498Szrj 
3275*38fd1498Szrj static void
drop_profile(struct cgraph_node * node,profile_count call_count)3276*38fd1498Szrj drop_profile (struct cgraph_node *node, profile_count call_count)
3277*38fd1498Szrj {
3278*38fd1498Szrj   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3279*38fd1498Szrj   /* In the case where this was called by another function with a
3280*38fd1498Szrj      dropped profile, call_count will be 0. Since there are no
3281*38fd1498Szrj      non-zero call counts to this function, we don't know for sure
3282*38fd1498Szrj      whether it is hot, and therefore it will be marked normal below.  */
3283*38fd1498Szrj   bool hot = maybe_hot_count_p (NULL, call_count);
3284*38fd1498Szrj 
3285*38fd1498Szrj   if (dump_file)
3286*38fd1498Szrj     fprintf (dump_file,
3287*38fd1498Szrj 	     "Dropping 0 profile for %s. %s based on calls.\n",
3288*38fd1498Szrj 	     node->dump_name (),
3289*38fd1498Szrj 	     hot ? "Function is hot" : "Function is normal");
3290*38fd1498Szrj   /* We only expect to miss profiles for functions that are reached
3291*38fd1498Szrj      via non-zero call edges in cases where the function may have
3292*38fd1498Szrj      been linked from another module or library (COMDATs and extern
3293*38fd1498Szrj      templates). See the comments below for handle_missing_profiles.
3294*38fd1498Szrj      Also, only warn in cases where the missing counts exceed the
3295*38fd1498Szrj      number of training runs. In certain cases with an execv followed
3296*38fd1498Szrj      by a no-return call the profile for the no-return call is not
3297*38fd1498Szrj      dumped and there can be a mismatch.  */
3298*38fd1498Szrj   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3299*38fd1498Szrj       && call_count > profile_info->runs)
3300*38fd1498Szrj     {
3301*38fd1498Szrj       if (flag_profile_correction)
3302*38fd1498Szrj         {
3303*38fd1498Szrj           if (dump_file)
3304*38fd1498Szrj             fprintf (dump_file,
3305*38fd1498Szrj 		     "Missing counts for called function %s\n",
3306*38fd1498Szrj 		     node->dump_name ());
3307*38fd1498Szrj         }
3308*38fd1498Szrj       else
3309*38fd1498Szrj 	warning (0, "Missing counts for called function %s",
3310*38fd1498Szrj 		 node->dump_name ());
3311*38fd1498Szrj     }
3312*38fd1498Szrj 
3313*38fd1498Szrj   basic_block bb;
3314*38fd1498Szrj   if (opt_for_fn (node->decl, flag_guess_branch_prob))
3315*38fd1498Szrj     {
3316*38fd1498Szrj       bool clear_zeros
3317*38fd1498Szrj 	 = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p ();
3318*38fd1498Szrj       FOR_ALL_BB_FN (bb, fn)
3319*38fd1498Szrj 	if (clear_zeros || !(bb->count == profile_count::zero ()))
3320*38fd1498Szrj 	  bb->count = bb->count.guessed_local ();
3321*38fd1498Szrj       fn->cfg->count_max = fn->cfg->count_max.guessed_local ();
3322*38fd1498Szrj     }
3323*38fd1498Szrj   else
3324*38fd1498Szrj     {
3325*38fd1498Szrj       FOR_ALL_BB_FN (bb, fn)
3326*38fd1498Szrj 	bb->count = profile_count::uninitialized ();
3327*38fd1498Szrj       fn->cfg->count_max = profile_count::uninitialized ();
3328*38fd1498Szrj     }
3329*38fd1498Szrj 
3330*38fd1498Szrj   struct cgraph_edge *e;
3331*38fd1498Szrj   for (e = node->callees; e; e = e->next_callee)
3332*38fd1498Szrj     e->count = gimple_bb (e->call_stmt)->count;
3333*38fd1498Szrj   for (e = node->indirect_calls; e; e = e->next_callee)
3334*38fd1498Szrj     e->count = gimple_bb (e->call_stmt)->count;
3335*38fd1498Szrj   node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count;
3336*38fd1498Szrj 
3337*38fd1498Szrj   profile_status_for_fn (fn)
3338*38fd1498Szrj       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3339*38fd1498Szrj   node->frequency
3340*38fd1498Szrj       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3341*38fd1498Szrj }
3342*38fd1498Szrj 
3343*38fd1498Szrj /* In the case of COMDAT routines, multiple object files will contain the same
3344*38fd1498Szrj    function and the linker will select one for the binary. In that case
3345*38fd1498Szrj    all the other copies from the profile instrument binary will be missing
3346*38fd1498Szrj    profile counts. Look for cases where this happened, due to non-zero
3347*38fd1498Szrj    call counts going to 0-count functions, and drop the profile to guessed
3348*38fd1498Szrj    so that we can use the estimated probabilities and avoid optimizing only
3349*38fd1498Szrj    for size.
3350*38fd1498Szrj 
3351*38fd1498Szrj    The other case where the profile may be missing is when the routine
3352*38fd1498Szrj    is not going to be emitted to the object file, e.g. for "extern template"
3353*38fd1498Szrj    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3354*38fd1498Szrj    all other cases of non-zero calls to 0-count functions.  */
3355*38fd1498Szrj 
3356*38fd1498Szrj void
handle_missing_profiles(void)3357*38fd1498Szrj handle_missing_profiles (void)
3358*38fd1498Szrj {
3359*38fd1498Szrj   struct cgraph_node *node;
3360*38fd1498Szrj   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
3361*38fd1498Szrj   auto_vec<struct cgraph_node *, 64> worklist;
3362*38fd1498Szrj 
3363*38fd1498Szrj   /* See if 0 count function has non-0 count callers.  In this case we
3364*38fd1498Szrj      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
3365*38fd1498Szrj   FOR_EACH_DEFINED_FUNCTION (node)
3366*38fd1498Szrj     {
3367*38fd1498Szrj       struct cgraph_edge *e;
3368*38fd1498Szrj       profile_count call_count = profile_count::zero ();
3369*38fd1498Szrj       gcov_type max_tp_first_run = 0;
3370*38fd1498Szrj       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3371*38fd1498Szrj 
3372*38fd1498Szrj       if (node->count.ipa ().nonzero_p ())
3373*38fd1498Szrj         continue;
3374*38fd1498Szrj       for (e = node->callers; e; e = e->next_caller)
3375*38fd1498Szrj 	if (e->count.ipa ().initialized_p () && e->count.ipa () > 0)
3376*38fd1498Szrj 	  {
3377*38fd1498Szrj             call_count = call_count + e->count.ipa ();
3378*38fd1498Szrj 
3379*38fd1498Szrj 	    if (e->caller->tp_first_run > max_tp_first_run)
3380*38fd1498Szrj 	      max_tp_first_run = e->caller->tp_first_run;
3381*38fd1498Szrj 	  }
3382*38fd1498Szrj 
3383*38fd1498Szrj       /* If time profile is missing, let assign the maximum that comes from
3384*38fd1498Szrj 	 caller functions.  */
3385*38fd1498Szrj       if (!node->tp_first_run && max_tp_first_run)
3386*38fd1498Szrj 	node->tp_first_run = max_tp_first_run + 1;
3387*38fd1498Szrj 
3388*38fd1498Szrj       if (call_count > 0
3389*38fd1498Szrj           && fn && fn->cfg
3390*38fd1498Szrj           && (call_count.apply_scale (unlikely_count_fraction, 1)
3391*38fd1498Szrj 	      >= profile_info->runs))
3392*38fd1498Szrj         {
3393*38fd1498Szrj           drop_profile (node, call_count);
3394*38fd1498Szrj           worklist.safe_push (node);
3395*38fd1498Szrj         }
3396*38fd1498Szrj     }
3397*38fd1498Szrj 
3398*38fd1498Szrj   /* Propagate the profile dropping to other 0-count COMDATs that are
3399*38fd1498Szrj      potentially called by COMDATs we already dropped the profile on.  */
3400*38fd1498Szrj   while (worklist.length () > 0)
3401*38fd1498Szrj     {
3402*38fd1498Szrj       struct cgraph_edge *e;
3403*38fd1498Szrj 
3404*38fd1498Szrj       node = worklist.pop ();
3405*38fd1498Szrj       for (e = node->callees; e; e = e->next_caller)
3406*38fd1498Szrj         {
3407*38fd1498Szrj           struct cgraph_node *callee = e->callee;
3408*38fd1498Szrj           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3409*38fd1498Szrj 
3410*38fd1498Szrj           if (!(e->count.ipa () == profile_count::zero ())
3411*38fd1498Szrj 	      && callee->count.ipa ().nonzero_p ())
3412*38fd1498Szrj             continue;
3413*38fd1498Szrj           if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3414*38fd1498Szrj 	      && fn && fn->cfg
3415*38fd1498Szrj               && profile_status_for_fn (fn) == PROFILE_READ)
3416*38fd1498Szrj             {
3417*38fd1498Szrj               drop_profile (node, profile_count::zero ());
3418*38fd1498Szrj               worklist.safe_push (callee);
3419*38fd1498Szrj             }
3420*38fd1498Szrj         }
3421*38fd1498Szrj     }
3422*38fd1498Szrj }
3423*38fd1498Szrj 
3424*38fd1498Szrj /* Convert counts measured by profile driven feedback to frequencies.
3425*38fd1498Szrj    Return nonzero iff there was any nonzero execution count.  */
3426*38fd1498Szrj 
3427*38fd1498Szrj bool
update_max_bb_count(void)3428*38fd1498Szrj update_max_bb_count (void)
3429*38fd1498Szrj {
3430*38fd1498Szrj   profile_count true_count_max = profile_count::uninitialized ();
3431*38fd1498Szrj   basic_block bb;
3432*38fd1498Szrj 
3433*38fd1498Szrj   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3434*38fd1498Szrj     true_count_max = true_count_max.max (bb->count);
3435*38fd1498Szrj 
3436*38fd1498Szrj   cfun->cfg->count_max = true_count_max;
3437*38fd1498Szrj 
3438*38fd1498Szrj   return true_count_max.ipa ().nonzero_p ();
3439*38fd1498Szrj }
3440*38fd1498Szrj 
3441*38fd1498Szrj /* Return true if function is likely to be expensive, so there is no point to
3442*38fd1498Szrj    optimize performance of prologue, epilogue or do inlining at the expense
3443*38fd1498Szrj    of code size growth.  THRESHOLD is the limit of number of instructions
3444*38fd1498Szrj    function can execute at average to be still considered not expensive.  */
3445*38fd1498Szrj 
3446*38fd1498Szrj bool
expensive_function_p(int threshold)3447*38fd1498Szrj expensive_function_p (int threshold)
3448*38fd1498Szrj {
3449*38fd1498Szrj   basic_block bb;
3450*38fd1498Szrj 
3451*38fd1498Szrj   /* If profile was scaled in a way entry block has count 0, then the function
3452*38fd1498Szrj      is deifnitly taking a lot of time.  */
3453*38fd1498Szrj   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
3454*38fd1498Szrj     return true;
3455*38fd1498Szrj 
3456*38fd1498Szrj   profile_count limit = ENTRY_BLOCK_PTR_FOR_FN
3457*38fd1498Szrj 			   (cfun)->count.apply_scale (threshold, 1);
3458*38fd1498Szrj   profile_count sum = profile_count::zero ();
3459*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
3460*38fd1498Szrj     {
3461*38fd1498Szrj       rtx_insn *insn;
3462*38fd1498Szrj 
3463*38fd1498Szrj       if (!bb->count.initialized_p ())
3464*38fd1498Szrj 	{
3465*38fd1498Szrj 	  if (dump_file)
3466*38fd1498Szrj 	    fprintf (dump_file, "Function is considered expensive because"
3467*38fd1498Szrj 		     " count of bb %i is not initialized\n", bb->index);
3468*38fd1498Szrj 	  return true;
3469*38fd1498Szrj 	}
3470*38fd1498Szrj 
3471*38fd1498Szrj       FOR_BB_INSNS (bb, insn)
3472*38fd1498Szrj 	if (active_insn_p (insn))
3473*38fd1498Szrj 	  {
3474*38fd1498Szrj 	    sum += bb->count;
3475*38fd1498Szrj 	    if (sum > limit)
3476*38fd1498Szrj 	      return true;
3477*38fd1498Szrj 	}
3478*38fd1498Szrj     }
3479*38fd1498Szrj 
3480*38fd1498Szrj   return false;
3481*38fd1498Szrj }
3482*38fd1498Szrj 
3483*38fd1498Szrj /* All basic blocks that are reachable only from unlikely basic blocks are
3484*38fd1498Szrj    unlikely.  */
3485*38fd1498Szrj 
3486*38fd1498Szrj void
propagate_unlikely_bbs_forward(void)3487*38fd1498Szrj propagate_unlikely_bbs_forward (void)
3488*38fd1498Szrj {
3489*38fd1498Szrj   auto_vec<basic_block, 64> worklist;
3490*38fd1498Szrj   basic_block bb;
3491*38fd1498Szrj   edge_iterator ei;
3492*38fd1498Szrj   edge e;
3493*38fd1498Szrj 
3494*38fd1498Szrj   if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3495*38fd1498Szrj     {
3496*38fd1498Szrj       ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3497*38fd1498Szrj       worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3498*38fd1498Szrj 
3499*38fd1498Szrj       while (worklist.length () > 0)
3500*38fd1498Szrj 	{
3501*38fd1498Szrj 	  bb = worklist.pop ();
3502*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->succs)
3503*38fd1498Szrj 	    if (!(e->count () == profile_count::zero ())
3504*38fd1498Szrj 		&& !(e->dest->count == profile_count::zero ())
3505*38fd1498Szrj 		&& !e->dest->aux)
3506*38fd1498Szrj 	      {
3507*38fd1498Szrj 		e->dest->aux = (void *)(size_t) 1;
3508*38fd1498Szrj 		worklist.safe_push (e->dest);
3509*38fd1498Szrj 	      }
3510*38fd1498Szrj 	}
3511*38fd1498Szrj     }
3512*38fd1498Szrj 
3513*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
3514*38fd1498Szrj     {
3515*38fd1498Szrj       if (!bb->aux)
3516*38fd1498Szrj 	{
3517*38fd1498Szrj 	  if (!(bb->count == profile_count::zero ())
3518*38fd1498Szrj 	      && (dump_file && (dump_flags & TDF_DETAILS)))
3519*38fd1498Szrj 	    fprintf (dump_file,
3520*38fd1498Szrj 		     "Basic block %i is marked unlikely by forward prop\n",
3521*38fd1498Szrj 		     bb->index);
3522*38fd1498Szrj 	  bb->count = profile_count::zero ();
3523*38fd1498Szrj 	}
3524*38fd1498Szrj       else
3525*38fd1498Szrj         bb->aux = NULL;
3526*38fd1498Szrj     }
3527*38fd1498Szrj }
3528*38fd1498Szrj 
3529*38fd1498Szrj /* Determine basic blocks/edges that are known to be unlikely executed and set
3530*38fd1498Szrj    their counters to zero.
3531*38fd1498Szrj    This is done with first identifying obviously unlikely BBs/edges and then
3532*38fd1498Szrj    propagating in both directions.  */
3533*38fd1498Szrj 
3534*38fd1498Szrj static void
determine_unlikely_bbs()3535*38fd1498Szrj determine_unlikely_bbs ()
3536*38fd1498Szrj {
3537*38fd1498Szrj   basic_block bb;
3538*38fd1498Szrj   auto_vec<basic_block, 64> worklist;
3539*38fd1498Szrj   edge_iterator ei;
3540*38fd1498Szrj   edge e;
3541*38fd1498Szrj 
3542*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
3543*38fd1498Szrj     {
3544*38fd1498Szrj       if (!(bb->count == profile_count::zero ())
3545*38fd1498Szrj 	  && unlikely_executed_bb_p (bb))
3546*38fd1498Szrj 	{
3547*38fd1498Szrj           if (dump_file && (dump_flags & TDF_DETAILS))
3548*38fd1498Szrj 	    fprintf (dump_file, "Basic block %i is locally unlikely\n",
3549*38fd1498Szrj 		     bb->index);
3550*38fd1498Szrj 	  bb->count = profile_count::zero ();
3551*38fd1498Szrj 	}
3552*38fd1498Szrj 
3553*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
3554*38fd1498Szrj 	if (!(e->probability == profile_probability::never ())
3555*38fd1498Szrj 	    && unlikely_executed_edge_p (e))
3556*38fd1498Szrj 	  {
3557*38fd1498Szrj             if (dump_file && (dump_flags & TDF_DETAILS))
3558*38fd1498Szrj 	      fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3559*38fd1498Szrj 		       bb->index, e->dest->index);
3560*38fd1498Szrj 	    e->probability = profile_probability::never ();
3561*38fd1498Szrj 	  }
3562*38fd1498Szrj 
3563*38fd1498Szrj       gcc_checking_assert (!bb->aux);
3564*38fd1498Szrj     }
3565*38fd1498Szrj   propagate_unlikely_bbs_forward ();
3566*38fd1498Szrj 
3567*38fd1498Szrj   auto_vec<int, 64> nsuccs;
3568*38fd1498Szrj   nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun));
3569*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
3570*38fd1498Szrj     if (!(bb->count == profile_count::zero ())
3571*38fd1498Szrj 	&& bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3572*38fd1498Szrj       {
3573*38fd1498Szrj 	nsuccs[bb->index] = 0;
3574*38fd1498Szrj         FOR_EACH_EDGE (e, ei, bb->succs)
3575*38fd1498Szrj 	  if (!(e->probability == profile_probability::never ())
3576*38fd1498Szrj 	      && !(e->dest->count == profile_count::zero ()))
3577*38fd1498Szrj 	    nsuccs[bb->index]++;
3578*38fd1498Szrj 	if (!nsuccs[bb->index])
3579*38fd1498Szrj 	  worklist.safe_push (bb);
3580*38fd1498Szrj       }
3581*38fd1498Szrj   while (worklist.length () > 0)
3582*38fd1498Szrj     {
3583*38fd1498Szrj       bb = worklist.pop ();
3584*38fd1498Szrj       if (bb->count == profile_count::zero ())
3585*38fd1498Szrj 	continue;
3586*38fd1498Szrj       if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3587*38fd1498Szrj 	{
3588*38fd1498Szrj 	  bool found = false;
3589*38fd1498Szrj           for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
3590*38fd1498Szrj                !gsi_end_p (gsi); gsi_next (&gsi))
3591*38fd1498Szrj 	    if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
3592*38fd1498Szrj 		/* stmt_can_terminate_bb_p special cases noreturns because it
3593*38fd1498Szrj 		   assumes that fake edges are created.  We want to know that
3594*38fd1498Szrj 		   noreturn alone does not imply BB to be unlikely.  */
3595*38fd1498Szrj 		|| (is_gimple_call (gsi_stmt (gsi))
3596*38fd1498Szrj 		    && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
3597*38fd1498Szrj 	      {
3598*38fd1498Szrj 		found = true;
3599*38fd1498Szrj 		break;
3600*38fd1498Szrj 	      }
3601*38fd1498Szrj 	  if (found)
3602*38fd1498Szrj 	    continue;
3603*38fd1498Szrj 	}
3604*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
3605*38fd1498Szrj 	fprintf (dump_file,
3606*38fd1498Szrj 		 "Basic block %i is marked unlikely by backward prop\n",
3607*38fd1498Szrj 		 bb->index);
3608*38fd1498Szrj       bb->count = profile_count::zero ();
3609*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
3610*38fd1498Szrj 	if (!(e->probability == profile_probability::never ()))
3611*38fd1498Szrj 	  {
3612*38fd1498Szrj 	    if (!(e->src->count == profile_count::zero ()))
3613*38fd1498Szrj 	      {
3614*38fd1498Szrj 		gcc_checking_assert (nsuccs[e->src->index] > 0);
3615*38fd1498Szrj 	        nsuccs[e->src->index]--;
3616*38fd1498Szrj 	        if (!nsuccs[e->src->index])
3617*38fd1498Szrj 		  worklist.safe_push (e->src);
3618*38fd1498Szrj 	      }
3619*38fd1498Szrj 	  }
3620*38fd1498Szrj     }
3621*38fd1498Szrj   /* Finally all edges from non-0 regions to 0 are unlikely.  */
3622*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
3623*38fd1498Szrj     if (!(bb->count == profile_count::zero ()))
3624*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
3625*38fd1498Szrj 	if (!(e->probability == profile_probability::never ())
3626*38fd1498Szrj 	    && e->dest->count == profile_count::zero ())
3627*38fd1498Szrj 	   {
3628*38fd1498Szrj 	     if (dump_file && (dump_flags & TDF_DETAILS))
3629*38fd1498Szrj 	       fprintf (dump_file, "Edge %i->%i is unlikely because "
3630*38fd1498Szrj 		 	"it enters unlikely block\n",
3631*38fd1498Szrj 			bb->index, e->dest->index);
3632*38fd1498Szrj 	     e->probability = profile_probability::never ();
3633*38fd1498Szrj 	   }
3634*38fd1498Szrj   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
3635*38fd1498Szrj     cgraph_node::get (current_function_decl)->count = profile_count::zero ();
3636*38fd1498Szrj }
3637*38fd1498Szrj 
3638*38fd1498Szrj /* Estimate and propagate basic block frequencies using the given branch
3639*38fd1498Szrj    probabilities.  If FORCE is true, the frequencies are used to estimate
3640*38fd1498Szrj    the counts even when there are already non-zero profile counts.  */
3641*38fd1498Szrj 
3642*38fd1498Szrj void
estimate_bb_frequencies(bool force)3643*38fd1498Szrj estimate_bb_frequencies (bool force)
3644*38fd1498Szrj {
3645*38fd1498Szrj   basic_block bb;
3646*38fd1498Szrj   sreal freq_max;
3647*38fd1498Szrj 
3648*38fd1498Szrj   determine_unlikely_bbs ();
3649*38fd1498Szrj 
3650*38fd1498Szrj   if (force || profile_status_for_fn (cfun) != PROFILE_READ
3651*38fd1498Szrj       || !update_max_bb_count ())
3652*38fd1498Szrj     {
3653*38fd1498Szrj       static int real_values_initialized = 0;
3654*38fd1498Szrj 
3655*38fd1498Szrj       if (!real_values_initialized)
3656*38fd1498Szrj         {
3657*38fd1498Szrj 	  real_values_initialized = 1;
3658*38fd1498Szrj 	  real_br_prob_base = REG_BR_PROB_BASE;
3659*38fd1498Szrj 	  /* Scaling frequencies up to maximal profile count may result in
3660*38fd1498Szrj 	     frequent overflows especially when inlining loops.
3661*38fd1498Szrj 	     Small scalling results in unnecesary precision loss.  Stay in
3662*38fd1498Szrj 	     the half of the (exponential) range.  */
3663*38fd1498Szrj 	  real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
3664*38fd1498Szrj 	  real_one_half = sreal (1, -1);
3665*38fd1498Szrj 	  real_inv_br_prob_base = sreal (1) / real_br_prob_base;
3666*38fd1498Szrj 	  real_almost_one = sreal (1) - real_inv_br_prob_base;
3667*38fd1498Szrj 	}
3668*38fd1498Szrj 
3669*38fd1498Szrj       mark_dfs_back_edges ();
3670*38fd1498Szrj 
3671*38fd1498Szrj       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3672*38fd1498Szrj 	 profile_probability::always ();
3673*38fd1498Szrj 
3674*38fd1498Szrj       /* Set up block info for each basic block.  */
3675*38fd1498Szrj       alloc_aux_for_blocks (sizeof (block_info));
3676*38fd1498Szrj       alloc_aux_for_edges (sizeof (edge_prob_info));
3677*38fd1498Szrj       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3678*38fd1498Szrj 	{
3679*38fd1498Szrj 	  edge e;
3680*38fd1498Szrj 	  edge_iterator ei;
3681*38fd1498Szrj 
3682*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->succs)
3683*38fd1498Szrj 	    {
3684*38fd1498Szrj 	      /* FIXME: Graphite is producing edges with no profile. Once
3685*38fd1498Szrj 		 this is fixed, drop this.  */
3686*38fd1498Szrj 	      if (e->probability.initialized_p ())
3687*38fd1498Szrj 	        EDGE_INFO (e)->back_edge_prob
3688*38fd1498Szrj 		   = e->probability.to_reg_br_prob_base ();
3689*38fd1498Szrj 	      else
3690*38fd1498Szrj 		EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
3691*38fd1498Szrj 	      EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
3692*38fd1498Szrj 	    }
3693*38fd1498Szrj 	}
3694*38fd1498Szrj 
3695*38fd1498Szrj       /* First compute frequencies locally for each loop from innermost
3696*38fd1498Szrj          to outermost to examine frequencies for back edges.  */
3697*38fd1498Szrj       estimate_loops ();
3698*38fd1498Szrj 
3699*38fd1498Szrj       freq_max = 0;
3700*38fd1498Szrj       FOR_EACH_BB_FN (bb, cfun)
3701*38fd1498Szrj 	if (freq_max < BLOCK_INFO (bb)->frequency)
3702*38fd1498Szrj 	  freq_max = BLOCK_INFO (bb)->frequency;
3703*38fd1498Szrj 
3704*38fd1498Szrj       freq_max = real_bb_freq_max / freq_max;
3705*38fd1498Szrj       if (freq_max < 16)
3706*38fd1498Szrj 	freq_max = 16;
3707*38fd1498Szrj       profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
3708*38fd1498Szrj       cfun->cfg->count_max = profile_count::uninitialized ();
3709*38fd1498Szrj       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3710*38fd1498Szrj 	{
3711*38fd1498Szrj 	  sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
3712*38fd1498Szrj 	  profile_count count = profile_count::from_gcov_type (tmp.to_int ());
3713*38fd1498Szrj 
3714*38fd1498Szrj 	  /* If we have profile feedback in which this function was never
3715*38fd1498Szrj 	     executed, then preserve this info.  */
3716*38fd1498Szrj 	  if (!(bb->count == profile_count::zero ()))
3717*38fd1498Szrj 	    bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
3718*38fd1498Szrj           cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
3719*38fd1498Szrj 	}
3720*38fd1498Szrj 
3721*38fd1498Szrj       free_aux_for_blocks ();
3722*38fd1498Szrj       free_aux_for_edges ();
3723*38fd1498Szrj     }
3724*38fd1498Szrj   compute_function_frequency ();
3725*38fd1498Szrj }
3726*38fd1498Szrj 
3727*38fd1498Szrj /* Decide whether function is hot, cold or unlikely executed.  */
3728*38fd1498Szrj void
compute_function_frequency(void)3729*38fd1498Szrj compute_function_frequency (void)
3730*38fd1498Szrj {
3731*38fd1498Szrj   basic_block bb;
3732*38fd1498Szrj   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3733*38fd1498Szrj 
3734*38fd1498Szrj   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3735*38fd1498Szrj       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3736*38fd1498Szrj     node->only_called_at_startup = true;
3737*38fd1498Szrj   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3738*38fd1498Szrj     node->only_called_at_exit = true;
3739*38fd1498Szrj 
3740*38fd1498Szrj   if (profile_status_for_fn (cfun) != PROFILE_READ)
3741*38fd1498Szrj     {
3742*38fd1498Szrj       int flags = flags_from_decl_or_type (current_function_decl);
3743*38fd1498Szrj       if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ()
3744*38fd1498Szrj 	   && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
3745*38fd1498Szrj 	  || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3746*38fd1498Szrj 	     != NULL)
3747*38fd1498Szrj 	{
3748*38fd1498Szrj           node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3749*38fd1498Szrj 	  warn_function_cold (current_function_decl);
3750*38fd1498Szrj 	}
3751*38fd1498Szrj       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3752*38fd1498Szrj 	       != NULL)
3753*38fd1498Szrj         node->frequency = NODE_FREQUENCY_HOT;
3754*38fd1498Szrj       else if (flags & ECF_NORETURN)
3755*38fd1498Szrj         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3756*38fd1498Szrj       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3757*38fd1498Szrj         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3758*38fd1498Szrj       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3759*38fd1498Szrj 	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3760*38fd1498Szrj         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3761*38fd1498Szrj       return;
3762*38fd1498Szrj     }
3763*38fd1498Szrj 
3764*38fd1498Szrj   node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3765*38fd1498Szrj   warn_function_cold (current_function_decl);
3766*38fd1498Szrj   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
3767*38fd1498Szrj     return;
3768*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
3769*38fd1498Szrj     {
3770*38fd1498Szrj       if (maybe_hot_bb_p (cfun, bb))
3771*38fd1498Szrj 	{
3772*38fd1498Szrj 	  node->frequency = NODE_FREQUENCY_HOT;
3773*38fd1498Szrj 	  return;
3774*38fd1498Szrj 	}
3775*38fd1498Szrj       if (!probably_never_executed_bb_p (cfun, bb))
3776*38fd1498Szrj 	node->frequency = NODE_FREQUENCY_NORMAL;
3777*38fd1498Szrj     }
3778*38fd1498Szrj }
3779*38fd1498Szrj 
3780*38fd1498Szrj /* Build PREDICT_EXPR.  */
3781*38fd1498Szrj tree
build_predict_expr(enum br_predictor predictor,enum prediction taken)3782*38fd1498Szrj build_predict_expr (enum br_predictor predictor, enum prediction taken)
3783*38fd1498Szrj {
3784*38fd1498Szrj   tree t = build1 (PREDICT_EXPR, void_type_node,
3785*38fd1498Szrj 		   build_int_cst (integer_type_node, predictor));
3786*38fd1498Szrj   SET_PREDICT_EXPR_OUTCOME (t, taken);
3787*38fd1498Szrj   return t;
3788*38fd1498Szrj }
3789*38fd1498Szrj 
3790*38fd1498Szrj const char *
predictor_name(enum br_predictor predictor)3791*38fd1498Szrj predictor_name (enum br_predictor predictor)
3792*38fd1498Szrj {
3793*38fd1498Szrj   return predictor_info[predictor].name;
3794*38fd1498Szrj }
3795*38fd1498Szrj 
3796*38fd1498Szrj /* Predict branch probabilities and estimate profile of the tree CFG. */
3797*38fd1498Szrj 
3798*38fd1498Szrj namespace {
3799*38fd1498Szrj 
3800*38fd1498Szrj const pass_data pass_data_profile =
3801*38fd1498Szrj {
3802*38fd1498Szrj   GIMPLE_PASS, /* type */
3803*38fd1498Szrj   "profile_estimate", /* name */
3804*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3805*38fd1498Szrj   TV_BRANCH_PROB, /* tv_id */
3806*38fd1498Szrj   PROP_cfg, /* properties_required */
3807*38fd1498Szrj   0, /* properties_provided */
3808*38fd1498Szrj   0, /* properties_destroyed */
3809*38fd1498Szrj   0, /* todo_flags_start */
3810*38fd1498Szrj   0, /* todo_flags_finish */
3811*38fd1498Szrj };
3812*38fd1498Szrj 
3813*38fd1498Szrj class pass_profile : public gimple_opt_pass
3814*38fd1498Szrj {
3815*38fd1498Szrj public:
pass_profile(gcc::context * ctxt)3816*38fd1498Szrj   pass_profile (gcc::context *ctxt)
3817*38fd1498Szrj     : gimple_opt_pass (pass_data_profile, ctxt)
3818*38fd1498Szrj   {}
3819*38fd1498Szrj 
3820*38fd1498Szrj   /* opt_pass methods: */
gate(function *)3821*38fd1498Szrj   virtual bool gate (function *) { return flag_guess_branch_prob; }
3822*38fd1498Szrj   virtual unsigned int execute (function *);
3823*38fd1498Szrj 
3824*38fd1498Szrj }; // class pass_profile
3825*38fd1498Szrj 
3826*38fd1498Szrj unsigned int
execute(function * fun)3827*38fd1498Szrj pass_profile::execute (function *fun)
3828*38fd1498Szrj {
3829*38fd1498Szrj   unsigned nb_loops;
3830*38fd1498Szrj 
3831*38fd1498Szrj   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3832*38fd1498Szrj     return 0;
3833*38fd1498Szrj 
3834*38fd1498Szrj   loop_optimizer_init (LOOPS_NORMAL);
3835*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
3836*38fd1498Szrj     flow_loops_dump (dump_file, NULL, 0);
3837*38fd1498Szrj 
3838*38fd1498Szrj   mark_irreducible_loops ();
3839*38fd1498Szrj 
3840*38fd1498Szrj   nb_loops = number_of_loops (fun);
3841*38fd1498Szrj   if (nb_loops > 1)
3842*38fd1498Szrj     scev_initialize ();
3843*38fd1498Szrj 
3844*38fd1498Szrj   tree_estimate_probability (false);
3845*38fd1498Szrj 
3846*38fd1498Szrj   if (nb_loops > 1)
3847*38fd1498Szrj     scev_finalize ();
3848*38fd1498Szrj 
3849*38fd1498Szrj   loop_optimizer_finalize ();
3850*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
3851*38fd1498Szrj     gimple_dump_cfg (dump_file, dump_flags);
3852*38fd1498Szrj  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3853*38fd1498Szrj     profile_status_for_fn (fun) = PROFILE_GUESSED;
3854*38fd1498Szrj  if (dump_file && (dump_flags & TDF_DETAILS))
3855*38fd1498Szrj    {
3856*38fd1498Szrj      struct loop *loop;
3857*38fd1498Szrj      FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
3858*38fd1498Szrj        if (loop->header->count.initialized_p ())
3859*38fd1498Szrj          fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
3860*38fd1498Szrj        	   loop->num,
3861*38fd1498Szrj        	   (int)expected_loop_iterations_unbounded (loop));
3862*38fd1498Szrj    }
3863*38fd1498Szrj   return 0;
3864*38fd1498Szrj }
3865*38fd1498Szrj 
3866*38fd1498Szrj } // anon namespace
3867*38fd1498Szrj 
3868*38fd1498Szrj gimple_opt_pass *
make_pass_profile(gcc::context * ctxt)3869*38fd1498Szrj make_pass_profile (gcc::context *ctxt)
3870*38fd1498Szrj {
3871*38fd1498Szrj   return new pass_profile (ctxt);
3872*38fd1498Szrj }
3873*38fd1498Szrj 
3874*38fd1498Szrj namespace {
3875*38fd1498Szrj 
3876*38fd1498Szrj const pass_data pass_data_strip_predict_hints =
3877*38fd1498Szrj {
3878*38fd1498Szrj   GIMPLE_PASS, /* type */
3879*38fd1498Szrj   "*strip_predict_hints", /* name */
3880*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
3881*38fd1498Szrj   TV_BRANCH_PROB, /* tv_id */
3882*38fd1498Szrj   PROP_cfg, /* properties_required */
3883*38fd1498Szrj   0, /* properties_provided */
3884*38fd1498Szrj   0, /* properties_destroyed */
3885*38fd1498Szrj   0, /* todo_flags_start */
3886*38fd1498Szrj   0, /* todo_flags_finish */
3887*38fd1498Szrj };
3888*38fd1498Szrj 
3889*38fd1498Szrj class pass_strip_predict_hints : public gimple_opt_pass
3890*38fd1498Szrj {
3891*38fd1498Szrj public:
pass_strip_predict_hints(gcc::context * ctxt)3892*38fd1498Szrj   pass_strip_predict_hints (gcc::context *ctxt)
3893*38fd1498Szrj     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3894*38fd1498Szrj   {}
3895*38fd1498Szrj 
3896*38fd1498Szrj   /* opt_pass methods: */
clone()3897*38fd1498Szrj   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3898*38fd1498Szrj   virtual unsigned int execute (function *);
3899*38fd1498Szrj 
3900*38fd1498Szrj }; // class pass_strip_predict_hints
3901*38fd1498Szrj 
3902*38fd1498Szrj /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3903*38fd1498Szrj    we no longer need.  */
3904*38fd1498Szrj unsigned int
execute(function * fun)3905*38fd1498Szrj pass_strip_predict_hints::execute (function *fun)
3906*38fd1498Szrj {
3907*38fd1498Szrj   basic_block bb;
3908*38fd1498Szrj   gimple *ass_stmt;
3909*38fd1498Szrj   tree var;
3910*38fd1498Szrj   bool changed = false;
3911*38fd1498Szrj 
3912*38fd1498Szrj   FOR_EACH_BB_FN (bb, fun)
3913*38fd1498Szrj     {
3914*38fd1498Szrj       gimple_stmt_iterator bi;
3915*38fd1498Szrj       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3916*38fd1498Szrj 	{
3917*38fd1498Szrj 	  gimple *stmt = gsi_stmt (bi);
3918*38fd1498Szrj 
3919*38fd1498Szrj 	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3920*38fd1498Szrj 	    {
3921*38fd1498Szrj 	      gsi_remove (&bi, true);
3922*38fd1498Szrj 	      changed = true;
3923*38fd1498Szrj 	      continue;
3924*38fd1498Szrj 	    }
3925*38fd1498Szrj 	  else if (is_gimple_call (stmt))
3926*38fd1498Szrj 	    {
3927*38fd1498Szrj 	      tree fndecl = gimple_call_fndecl (stmt);
3928*38fd1498Szrj 
3929*38fd1498Szrj 	      if ((fndecl
3930*38fd1498Szrj 		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3931*38fd1498Szrj 		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3932*38fd1498Szrj 		   && gimple_call_num_args (stmt) == 2)
3933*38fd1498Szrj 		  || (gimple_call_internal_p (stmt)
3934*38fd1498Szrj 		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3935*38fd1498Szrj 		{
3936*38fd1498Szrj 		  var = gimple_call_lhs (stmt);
3937*38fd1498Szrj 	          changed = true;
3938*38fd1498Szrj 		  if (var)
3939*38fd1498Szrj 		    {
3940*38fd1498Szrj 		      ass_stmt
3941*38fd1498Szrj 			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3942*38fd1498Szrj 		      gsi_replace (&bi, ass_stmt, true);
3943*38fd1498Szrj 		    }
3944*38fd1498Szrj 		  else
3945*38fd1498Szrj 		    {
3946*38fd1498Szrj 		      gsi_remove (&bi, true);
3947*38fd1498Szrj 		      continue;
3948*38fd1498Szrj 		    }
3949*38fd1498Szrj 		}
3950*38fd1498Szrj 	    }
3951*38fd1498Szrj 	  gsi_next (&bi);
3952*38fd1498Szrj 	}
3953*38fd1498Szrj     }
3954*38fd1498Szrj   return changed ? TODO_cleanup_cfg : 0;
3955*38fd1498Szrj }
3956*38fd1498Szrj 
3957*38fd1498Szrj } // anon namespace
3958*38fd1498Szrj 
3959*38fd1498Szrj gimple_opt_pass *
make_pass_strip_predict_hints(gcc::context * ctxt)3960*38fd1498Szrj make_pass_strip_predict_hints (gcc::context *ctxt)
3961*38fd1498Szrj {
3962*38fd1498Szrj   return new pass_strip_predict_hints (ctxt);
3963*38fd1498Szrj }
3964*38fd1498Szrj 
3965*38fd1498Szrj /* Rebuild function frequencies.  Passes are in general expected to
3966*38fd1498Szrj    maintain profile by hand, however in some cases this is not possible:
3967*38fd1498Szrj    for example when inlining several functions with loops freuqencies might run
3968*38fd1498Szrj    out of scale and thus needs to be recomputed.  */
3969*38fd1498Szrj 
3970*38fd1498Szrj void
rebuild_frequencies(void)3971*38fd1498Szrj rebuild_frequencies (void)
3972*38fd1498Szrj {
3973*38fd1498Szrj   timevar_push (TV_REBUILD_FREQUENCIES);
3974*38fd1498Szrj 
3975*38fd1498Szrj   /* When the max bb count in the function is small, there is a higher
3976*38fd1498Szrj      chance that there were truncation errors in the integer scaling
3977*38fd1498Szrj      of counts by inlining and other optimizations. This could lead
3978*38fd1498Szrj      to incorrect classification of code as being cold when it isn't.
3979*38fd1498Szrj      In that case, force the estimation of bb counts/frequencies from the
3980*38fd1498Szrj      branch probabilities, rather than computing frequencies from counts,
3981*38fd1498Szrj      which may also lead to frequencies incorrectly reduced to 0. There
3982*38fd1498Szrj      is less precision in the probabilities, so we only do this for small
3983*38fd1498Szrj      max counts.  */
3984*38fd1498Szrj   cfun->cfg->count_max = profile_count::uninitialized ();
3985*38fd1498Szrj   basic_block bb;
3986*38fd1498Szrj   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3987*38fd1498Szrj     cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
3988*38fd1498Szrj 
3989*38fd1498Szrj   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3990*38fd1498Szrj     {
3991*38fd1498Szrj       loop_optimizer_init (0);
3992*38fd1498Szrj       add_noreturn_fake_exit_edges ();
3993*38fd1498Szrj       mark_irreducible_loops ();
3994*38fd1498Szrj       connect_infinite_loops_to_exit ();
3995*38fd1498Szrj       estimate_bb_frequencies (true);
3996*38fd1498Szrj       remove_fake_exit_edges ();
3997*38fd1498Szrj       loop_optimizer_finalize ();
3998*38fd1498Szrj     }
3999*38fd1498Szrj   else if (profile_status_for_fn (cfun) == PROFILE_READ)
4000*38fd1498Szrj     update_max_bb_count ();
4001*38fd1498Szrj   else if (profile_status_for_fn (cfun) == PROFILE_ABSENT
4002*38fd1498Szrj 	   && !flag_guess_branch_prob)
4003*38fd1498Szrj     ;
4004*38fd1498Szrj   else
4005*38fd1498Szrj     gcc_unreachable ();
4006*38fd1498Szrj   timevar_pop (TV_REBUILD_FREQUENCIES);
4007*38fd1498Szrj }
4008*38fd1498Szrj 
4009*38fd1498Szrj /* Perform a dry run of the branch prediction pass and report comparsion of
4010*38fd1498Szrj    the predicted and real profile into the dump file.  */
4011*38fd1498Szrj 
4012*38fd1498Szrj void
report_predictor_hitrates(void)4013*38fd1498Szrj report_predictor_hitrates (void)
4014*38fd1498Szrj {
4015*38fd1498Szrj   unsigned nb_loops;
4016*38fd1498Szrj 
4017*38fd1498Szrj   loop_optimizer_init (LOOPS_NORMAL);
4018*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
4019*38fd1498Szrj     flow_loops_dump (dump_file, NULL, 0);
4020*38fd1498Szrj 
4021*38fd1498Szrj   mark_irreducible_loops ();
4022*38fd1498Szrj 
4023*38fd1498Szrj   nb_loops = number_of_loops (cfun);
4024*38fd1498Szrj   if (nb_loops > 1)
4025*38fd1498Szrj     scev_initialize ();
4026*38fd1498Szrj 
4027*38fd1498Szrj   tree_estimate_probability (true);
4028*38fd1498Szrj 
4029*38fd1498Szrj   if (nb_loops > 1)
4030*38fd1498Szrj     scev_finalize ();
4031*38fd1498Szrj 
4032*38fd1498Szrj   loop_optimizer_finalize ();
4033*38fd1498Szrj }
4034*38fd1498Szrj 
4035*38fd1498Szrj /* Force edge E to be cold.
4036*38fd1498Szrj    If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4037*38fd1498Szrj    keep low probability to represent possible error in a guess.  This is used
4038*38fd1498Szrj    i.e. in case we predict loop to likely iterate given number of times but
4039*38fd1498Szrj    we are not 100% sure.
4040*38fd1498Szrj 
4041*38fd1498Szrj    This function locally updates profile without attempt to keep global
4042*38fd1498Szrj    consistency which can not be reached in full generality without full profile
4043*38fd1498Szrj    rebuild from probabilities alone.  Doing so is not necessarily a good idea
4044*38fd1498Szrj    because frequencies and counts may be more realistic then probabilities.
4045*38fd1498Szrj 
4046*38fd1498Szrj    In some cases (such as for elimination of early exits during full loop
4047*38fd1498Szrj    unrolling) the caller can ensure that profile will get consistent
4048*38fd1498Szrj    afterwards.  */
4049*38fd1498Szrj 
4050*38fd1498Szrj void
force_edge_cold(edge e,bool impossible)4051*38fd1498Szrj force_edge_cold (edge e, bool impossible)
4052*38fd1498Szrj {
4053*38fd1498Szrj   profile_count count_sum = profile_count::zero ();
4054*38fd1498Szrj   profile_probability prob_sum = profile_probability::never ();
4055*38fd1498Szrj   edge_iterator ei;
4056*38fd1498Szrj   edge e2;
4057*38fd1498Szrj   bool uninitialized_exit = false;
4058*38fd1498Szrj 
4059*38fd1498Szrj   /* When branch probability guesses are not known, then do nothing.  */
4060*38fd1498Szrj   if (!impossible && !e->count ().initialized_p ())
4061*38fd1498Szrj     return;
4062*38fd1498Szrj 
4063*38fd1498Szrj   profile_probability goal = (impossible ? profile_probability::never ()
4064*38fd1498Szrj 			      : profile_probability::very_unlikely ());
4065*38fd1498Szrj 
4066*38fd1498Szrj   /* If edge is already improbably or cold, just return.  */
4067*38fd1498Szrj   if (e->probability <= goal
4068*38fd1498Szrj       && (!impossible || e->count () == profile_count::zero ()))
4069*38fd1498Szrj     return;
4070*38fd1498Szrj   FOR_EACH_EDGE (e2, ei, e->src->succs)
4071*38fd1498Szrj     if (e2 != e)
4072*38fd1498Szrj       {
4073*38fd1498Szrj 	if (e->flags & EDGE_FAKE)
4074*38fd1498Szrj 	  continue;
4075*38fd1498Szrj 	if (e2->count ().initialized_p ())
4076*38fd1498Szrj 	  count_sum += e2->count ();
4077*38fd1498Szrj 	if (e2->probability.initialized_p ())
4078*38fd1498Szrj 	  prob_sum += e2->probability;
4079*38fd1498Szrj 	else
4080*38fd1498Szrj 	  uninitialized_exit = true;
4081*38fd1498Szrj       }
4082*38fd1498Szrj 
4083*38fd1498Szrj   /* If we are not guessing profiles but have some other edges out,
4084*38fd1498Szrj      just assume the control flow goes elsewhere.  */
4085*38fd1498Szrj   if (uninitialized_exit)
4086*38fd1498Szrj     e->probability = goal;
4087*38fd1498Szrj   /* If there are other edges out of e->src, redistribute probabilitity
4088*38fd1498Szrj      there.  */
4089*38fd1498Szrj   else if (prob_sum > profile_probability::never ())
4090*38fd1498Szrj     {
4091*38fd1498Szrj       if (!(e->probability < goal))
4092*38fd1498Szrj 	e->probability = goal;
4093*38fd1498Szrj 
4094*38fd1498Szrj       profile_probability prob_comp = prob_sum / e->probability.invert ();
4095*38fd1498Szrj 
4096*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
4097*38fd1498Szrj 	fprintf (dump_file, "Making edge %i->%i %s by redistributing "
4098*38fd1498Szrj 		 "probability to other edges.\n",
4099*38fd1498Szrj 		 e->src->index, e->dest->index,
4100*38fd1498Szrj 		 impossible ? "impossible" : "cold");
4101*38fd1498Szrj       FOR_EACH_EDGE (e2, ei, e->src->succs)
4102*38fd1498Szrj 	if (e2 != e)
4103*38fd1498Szrj 	  {
4104*38fd1498Szrj 	    e2->probability /= prob_comp;
4105*38fd1498Szrj 	  }
4106*38fd1498Szrj       if (current_ir_type () != IR_GIMPLE
4107*38fd1498Szrj 	  && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4108*38fd1498Szrj 	update_br_prob_note (e->src);
4109*38fd1498Szrj     }
4110*38fd1498Szrj   /* If all edges out of e->src are unlikely, the basic block itself
4111*38fd1498Szrj      is unlikely.  */
4112*38fd1498Szrj   else
4113*38fd1498Szrj     {
4114*38fd1498Szrj       if (prob_sum == profile_probability::never ())
4115*38fd1498Szrj         e->probability = profile_probability::always ();
4116*38fd1498Szrj       else
4117*38fd1498Szrj 	{
4118*38fd1498Szrj 	  if (impossible)
4119*38fd1498Szrj 	    e->probability = profile_probability::never ();
4120*38fd1498Szrj 	  /* If BB has some edges out that are not impossible, we can not
4121*38fd1498Szrj 	     assume that BB itself is.  */
4122*38fd1498Szrj 	  impossible = false;
4123*38fd1498Szrj 	}
4124*38fd1498Szrj       if (current_ir_type () != IR_GIMPLE
4125*38fd1498Szrj 	  && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4126*38fd1498Szrj 	update_br_prob_note (e->src);
4127*38fd1498Szrj       if (e->src->count == profile_count::zero ())
4128*38fd1498Szrj 	return;
4129*38fd1498Szrj       if (count_sum == profile_count::zero () && impossible)
4130*38fd1498Szrj 	{
4131*38fd1498Szrj 	  bool found = false;
4132*38fd1498Szrj 	  if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4133*38fd1498Szrj 	    ;
4134*38fd1498Szrj 	  else if (current_ir_type () == IR_GIMPLE)
4135*38fd1498Szrj 	    for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
4136*38fd1498Szrj 	         !gsi_end_p (gsi); gsi_next (&gsi))
4137*38fd1498Szrj 	      {
4138*38fd1498Szrj 	        if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
4139*38fd1498Szrj 		  {
4140*38fd1498Szrj 		    found = true;
4141*38fd1498Szrj 	            break;
4142*38fd1498Szrj 		  }
4143*38fd1498Szrj 	      }
4144*38fd1498Szrj 	  /* FIXME: Implement RTL path.  */
4145*38fd1498Szrj 	  else
4146*38fd1498Szrj 	    found = true;
4147*38fd1498Szrj 	  if (!found)
4148*38fd1498Szrj 	    {
4149*38fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
4150*38fd1498Szrj 		fprintf (dump_file,
4151*38fd1498Szrj 			 "Making bb %i impossible and dropping count to 0.\n",
4152*38fd1498Szrj 			 e->src->index);
4153*38fd1498Szrj 	      e->src->count = profile_count::zero ();
4154*38fd1498Szrj 	      FOR_EACH_EDGE (e2, ei, e->src->preds)
4155*38fd1498Szrj 		force_edge_cold (e2, impossible);
4156*38fd1498Szrj 	      return;
4157*38fd1498Szrj 	    }
4158*38fd1498Szrj 	}
4159*38fd1498Szrj 
4160*38fd1498Szrj       /* If we did not adjusting, the source basic block has no likely edeges
4161*38fd1498Szrj  	 leaving other direction. In that case force that bb cold, too.
4162*38fd1498Szrj 	 This in general is difficult task to do, but handle special case when
4163*38fd1498Szrj 	 BB has only one predecestor.  This is common case when we are updating
4164*38fd1498Szrj 	 after loop transforms.  */
4165*38fd1498Szrj       if (!(prob_sum > profile_probability::never ())
4166*38fd1498Szrj 	  && count_sum == profile_count::zero ()
4167*38fd1498Szrj 	  && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
4168*38fd1498Szrj 	     > (impossible ? 0 : 1))
4169*38fd1498Szrj 	{
4170*38fd1498Szrj 	  int old_frequency = e->src->count.to_frequency (cfun);
4171*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
4172*38fd1498Szrj 	    fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4173*38fd1498Szrj 		     impossible ? "impossible" : "cold");
4174*38fd1498Szrj 	  int new_frequency = MIN (e->src->count.to_frequency (cfun),
4175*38fd1498Szrj 				   impossible ? 0 : 1);
4176*38fd1498Szrj 	  if (impossible)
4177*38fd1498Szrj 	    e->src->count = profile_count::zero ();
4178*38fd1498Szrj 	  else
4179*38fd1498Szrj 	    e->src->count = e->count ().apply_scale (new_frequency,
4180*38fd1498Szrj 						     old_frequency);
4181*38fd1498Szrj 	  force_edge_cold (single_pred_edge (e->src), impossible);
4182*38fd1498Szrj 	}
4183*38fd1498Szrj       else if (dump_file && (dump_flags & TDF_DETAILS)
4184*38fd1498Szrj 	       && maybe_hot_bb_p (cfun, e->src))
4185*38fd1498Szrj 	fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4186*38fd1498Szrj 		 impossible ? "impossible" : "cold");
4187*38fd1498Szrj     }
4188*38fd1498Szrj }
4189*38fd1498Szrj 
4190*38fd1498Szrj #if CHECKING_P
4191*38fd1498Szrj 
4192*38fd1498Szrj namespace selftest {
4193*38fd1498Szrj 
4194*38fd1498Szrj /* Test that value range of predictor values defined in predict.def is
4195*38fd1498Szrj    within range (50, 100].  */
4196*38fd1498Szrj 
4197*38fd1498Szrj struct branch_predictor
4198*38fd1498Szrj {
4199*38fd1498Szrj   const char *name;
4200*38fd1498Szrj   int probability;
4201*38fd1498Szrj };
4202*38fd1498Szrj 
4203*38fd1498Szrj #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4204*38fd1498Szrj 
4205*38fd1498Szrj static void
test_prediction_value_range()4206*38fd1498Szrj test_prediction_value_range ()
4207*38fd1498Szrj {
4208*38fd1498Szrj   branch_predictor predictors[] = {
4209*38fd1498Szrj #include "predict.def"
4210*38fd1498Szrj     { NULL, PROB_UNINITIALIZED }
4211*38fd1498Szrj   };
4212*38fd1498Szrj 
4213*38fd1498Szrj   for (unsigned i = 0; predictors[i].name != NULL; i++)
4214*38fd1498Szrj     {
4215*38fd1498Szrj       if (predictors[i].probability == PROB_UNINITIALIZED)
4216*38fd1498Szrj 	continue;
4217*38fd1498Szrj 
4218*38fd1498Szrj       unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4219*38fd1498Szrj       ASSERT_TRUE (p >= 50 && p <= 100);
4220*38fd1498Szrj     }
4221*38fd1498Szrj }
4222*38fd1498Szrj 
4223*38fd1498Szrj #undef DEF_PREDICTOR
4224*38fd1498Szrj 
4225*38fd1498Szrj /* Run all of the selfests within this file.  */
4226*38fd1498Szrj 
4227*38fd1498Szrj void
predict_c_tests()4228*38fd1498Szrj predict_c_tests ()
4229*38fd1498Szrj {
4230*38fd1498Szrj   test_prediction_value_range ();
4231*38fd1498Szrj }
4232*38fd1498Szrj 
4233*38fd1498Szrj } // namespace selftest
4234*38fd1498Szrj #endif /* CHECKING_P.  */
4235