xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/predict.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Branch prediction routines for the GNU compiler.
2    Copyright (C) 2000-2015 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* References:
21 
22    [1] "Branch Prediction for Free"
23        Ball and Larus; PLDI '93.
24    [2] "Static Branch Frequency and Program Profile Analysis"
25        Wu and Larus; MICRO-27.
26    [3] "Corpus-based Static Branch Prediction"
27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28 
29 
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "calls.h"
46 #include "rtl.h"
47 #include "tm_p.h"
48 #include "hard-reg-set.h"
49 #include "predict.h"
50 #include "function.h"
51 #include "dominance.h"
52 #include "cfg.h"
53 #include "cfganal.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "flags.h"
58 #include "profile.h"
59 #include "except.h"
60 #include "diagnostic-core.h"
61 #include "recog.h"
62 #include "hashtab.h"
63 #include "statistics.h"
64 #include "real.h"
65 #include "fixed-value.h"
66 #include "expmed.h"
67 #include "dojump.h"
68 #include "explow.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "coverage.h"
74 #include "sreal.h"
75 #include "params.h"
76 #include "target.h"
77 #include "cfgloop.h"
78 #include "hash-map.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-expr.h"
82 #include "is-a.h"
83 #include "gimple.h"
84 #include "gimple-iterator.h"
85 #include "gimple-ssa.h"
86 #include "plugin-api.h"
87 #include "ipa-ref.h"
88 #include "cgraph.h"
89 #include "tree-cfg.h"
90 #include "tree-phinodes.h"
91 #include "ssa-iterators.h"
92 #include "tree-ssa-loop-niter.h"
93 #include "tree-ssa-loop.h"
94 #include "tree-pass.h"
95 #include "tree-scalar-evolution.h"
96 
97 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
98 		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
99 static sreal real_almost_one, real_br_prob_base,
100 	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
101 
102 static void combine_predictions_for_insn (rtx_insn *, basic_block);
103 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
104 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
105 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
106 static bool can_predict_insn_p (const rtx_insn *);
107 
108 /* Information we hold about each branch predictor.
109    Filled using information from predict.def.  */
110 
111 struct predictor_info
112 {
113   const char *const name;	/* Name used in the debugging dumps.  */
114   const int hitrate;		/* Expected hitrate used by
115 				   predict_insn_def call.  */
116   const int flags;
117 };
118 
119 /* Use given predictor without Dempster-Shaffer theory if it matches
120    using first_match heuristics.  */
121 #define PRED_FLAG_FIRST_MATCH 1
122 
123 /* Recompute hitrate in percent to our representation.  */
124 
125 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
126 
127 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
128 static const struct predictor_info predictor_info[]= {
129 #include "predict.def"
130 
131   /* Upper bound on predictors.  */
132   {NULL, 0, 0}
133 };
134 #undef DEF_PREDICTOR
135 
136 /* Return TRUE if frequency FREQ is considered to be hot.  */
137 
138 static inline bool
139 maybe_hot_frequency_p (struct function *fun, int freq)
140 {
141   struct cgraph_node *node = cgraph_node::get (fun->decl);
142   if (!profile_info
143       || !opt_for_fn (fun->decl, flag_branch_probabilities))
144     {
145       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
146         return false;
147       if (node->frequency == NODE_FREQUENCY_HOT)
148         return true;
149     }
150   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
151     return true;
152   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
153       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
154     return false;
155   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
156     return false;
157   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
158 	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
159     return false;
160   return true;
161 }
162 
163 static gcov_type min_count = -1;
164 
165 /* Determine the threshold for hot BB counts.  */
166 
167 gcov_type
168 get_hot_bb_threshold ()
169 {
170   gcov_working_set_t *ws;
171   if (min_count == -1)
172     {
173       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
174       gcc_assert (ws);
175       min_count = ws->min_counter;
176     }
177   return min_count;
178 }
179 
180 /* Set the threshold for hot BB counts.  */
181 
182 void
183 set_hot_bb_threshold (gcov_type min)
184 {
185   min_count = min;
186 }
187 
188 /* Return TRUE if frequency FREQ is considered to be hot.  */
189 
190 bool
191 maybe_hot_count_p (struct function *fun, gcov_type count)
192 {
193   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
194     return true;
195   /* Code executed at most once is not hot.  */
196   if (profile_info->runs >= count)
197     return false;
198   return (count >= get_hot_bb_threshold ());
199 }
200 
201 /* Return true in case BB can be CPU intensive and should be optimized
202    for maximal performance.  */
203 
204 bool
205 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
206 {
207   gcc_checking_assert (fun);
208   if (profile_status_for_fn (fun) == PROFILE_READ)
209     return maybe_hot_count_p (fun, bb->count);
210   return maybe_hot_frequency_p (fun, bb->frequency);
211 }
212 
213 /* Return true in case BB can be CPU intensive and should be optimized
214    for maximal performance.  */
215 
216 bool
217 maybe_hot_edge_p (edge e)
218 {
219   if (profile_status_for_fn (cfun) == PROFILE_READ)
220     return maybe_hot_count_p (cfun, e->count);
221   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
222 }
223 
224 /* Return true if profile COUNT and FREQUENCY, or function FUN static
225    node frequency reflects never being executed.  */
226 
227 static bool
228 probably_never_executed (struct function *fun,
229                          gcov_type count, int frequency)
230 {
231   gcc_checking_assert (fun);
232   if (profile_status_for_fn (fun) == PROFILE_READ)
233     {
234       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
235       if (count * unlikely_count_fraction >= profile_info->runs)
236 	return false;
237       if (!frequency)
238 	return true;
239       if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
240 	return false;
241       if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
242 	{
243           gcov_type computed_count;
244           /* Check for possibility of overflow, in which case entry bb count
245              is large enough to do the division first without losing much
246              precision.  */
247 	  if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE *
248 	      REG_BR_PROB_BASE)
249             {
250               gcov_type scaled_count
251 		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count *
252 	     unlikely_count_fraction;
253 	      computed_count = RDIV (scaled_count,
254 				     ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
255             }
256           else
257             {
258 	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count,
259 				     ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency);
260               computed_count *= frequency * unlikely_count_fraction;
261             }
262           if (computed_count >= profile_info->runs)
263             return false;
264 	}
265       return true;
266     }
267   if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities)))
268       && (cgraph_node::get (fun->decl)->frequency
269 	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
270     return true;
271   return false;
272 }
273 
274 
275 /* Return true in case BB is probably never executed.  */
276 
277 bool
278 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
279 {
280   return probably_never_executed (fun, bb->count, bb->frequency);
281 }
282 
283 
284 /* Return true in case edge E is probably never executed.  */
285 
286 bool
287 probably_never_executed_edge_p (struct function *fun, edge e)
288 {
289   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
290 }
291 
292 /* Return true when current function should always be optimized for size.  */
293 
294 bool
295 optimize_function_for_size_p (struct function *fun)
296 {
297   if (!fun || !fun->decl)
298     return optimize_size;
299   cgraph_node *n = cgraph_node::get (fun->decl);
300   return n && n->optimize_for_size_p ();
301 }
302 
303 /* Return true when current function should always be optimized for speed.  */
304 
305 bool
306 optimize_function_for_speed_p (struct function *fun)
307 {
308   return !optimize_function_for_size_p (fun);
309 }
310 
311 /* Return TRUE when BB should be optimized for size.  */
312 
313 bool
314 optimize_bb_for_size_p (const_basic_block bb)
315 {
316   return (optimize_function_for_size_p (cfun)
317 	  || (bb && !maybe_hot_bb_p (cfun, bb)));
318 }
319 
320 /* Return TRUE when BB should be optimized for speed.  */
321 
322 bool
323 optimize_bb_for_speed_p (const_basic_block bb)
324 {
325   return !optimize_bb_for_size_p (bb);
326 }
327 
328 /* Return TRUE when BB should be optimized for size.  */
329 
330 bool
331 optimize_edge_for_size_p (edge e)
332 {
333   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
334 }
335 
336 /* Return TRUE when BB should be optimized for speed.  */
337 
338 bool
339 optimize_edge_for_speed_p (edge e)
340 {
341   return !optimize_edge_for_size_p (e);
342 }
343 
344 /* Return TRUE when BB should be optimized for size.  */
345 
346 bool
347 optimize_insn_for_size_p (void)
348 {
349   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
350 }
351 
352 /* Return TRUE when BB should be optimized for speed.  */
353 
354 bool
355 optimize_insn_for_speed_p (void)
356 {
357   return !optimize_insn_for_size_p ();
358 }
359 
360 /* Return TRUE when LOOP should be optimized for size.  */
361 
362 bool
363 optimize_loop_for_size_p (struct loop *loop)
364 {
365   return optimize_bb_for_size_p (loop->header);
366 }
367 
368 /* Return TRUE when LOOP should be optimized for speed.  */
369 
370 bool
371 optimize_loop_for_speed_p (struct loop *loop)
372 {
373   return optimize_bb_for_speed_p (loop->header);
374 }
375 
376 /* Return TRUE when LOOP nest should be optimized for speed.  */
377 
378 bool
379 optimize_loop_nest_for_speed_p (struct loop *loop)
380 {
381   struct loop *l = loop;
382   if (optimize_loop_for_speed_p (loop))
383     return true;
384   l = loop->inner;
385   while (l && l != loop)
386     {
387       if (optimize_loop_for_speed_p (l))
388         return true;
389       if (l->inner)
390         l = l->inner;
391       else if (l->next)
392         l = l->next;
393       else
394         {
395 	  while (l != loop && !l->next)
396 	    l = loop_outer (l);
397 	  if (l != loop)
398 	    l = l->next;
399 	}
400     }
401   return false;
402 }
403 
404 /* Return TRUE when LOOP nest should be optimized for size.  */
405 
406 bool
407 optimize_loop_nest_for_size_p (struct loop *loop)
408 {
409   return !optimize_loop_nest_for_speed_p (loop);
410 }
411 
412 /* Return true when edge E is likely to be well predictable by branch
413    predictor.  */
414 
415 bool
416 predictable_edge_p (edge e)
417 {
418   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
419     return false;
420   if ((e->probability
421        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
422       || (REG_BR_PROB_BASE - e->probability
423           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
424     return true;
425   return false;
426 }
427 
428 
429 /* Set RTL expansion for BB profile.  */
430 
431 void
432 rtl_profile_for_bb (basic_block bb)
433 {
434   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
435 }
436 
437 /* Set RTL expansion for edge profile.  */
438 
439 void
440 rtl_profile_for_edge (edge e)
441 {
442   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
443 }
444 
445 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
446 void
447 default_rtl_profile (void)
448 {
449   crtl->maybe_hot_insn_p = true;
450 }
451 
452 /* Return true if the one of outgoing edges is already predicted by
453    PREDICTOR.  */
454 
455 bool
456 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
457 {
458   rtx note;
459   if (!INSN_P (BB_END (bb)))
460     return false;
461   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
462     if (REG_NOTE_KIND (note) == REG_BR_PRED
463 	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
464       return true;
465   return false;
466 }
467 
468 /*  Structure representing predictions in tree level. */
469 
470 struct edge_prediction {
471     struct edge_prediction *ep_next;
472     edge ep_edge;
473     enum br_predictor ep_predictor;
474     int ep_probability;
475 };
476 
477 /* This map contains for a basic block the list of predictions for the
478    outgoing edges.  */
479 
480 static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
481 
482 /* Return true if the one of outgoing edges is already predicted by
483    PREDICTOR.  */
484 
485 bool
486 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
487 {
488   struct edge_prediction *i;
489   edge_prediction **preds = bb_predictions->get (bb);
490 
491   if (!preds)
492     return false;
493 
494   for (i = *preds; i; i = i->ep_next)
495     if (i->ep_predictor == predictor)
496       return true;
497   return false;
498 }
499 
500 /* Return true when the probability of edge is reliable.
501 
502    The profile guessing code is good at predicting branch outcome (ie.
503    taken/not taken), that is predicted right slightly over 75% of time.
504    It is however notoriously poor on predicting the probability itself.
505    In general the profile appear a lot flatter (with probabilities closer
506    to 50%) than the reality so it is bad idea to use it to drive optimization
507    such as those disabling dynamic branch prediction for well predictable
508    branches.
509 
510    There are two exceptions - edges leading to noreturn edges and edges
511    predicted by number of iterations heuristics are predicted well.  This macro
512    should be able to distinguish those, but at the moment it simply check for
513    noreturn heuristic that is only one giving probability over 99% or bellow
514    1%.  In future we might want to propagate reliability information across the
515    CFG if we find this information useful on multiple places.   */
516 static bool
517 probability_reliable_p (int prob)
518 {
519   return (profile_status_for_fn (cfun) == PROFILE_READ
520 	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
521 	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
522 }
523 
524 /* Same predicate as above, working on edges.  */
525 bool
526 edge_probability_reliable_p (const_edge e)
527 {
528   return probability_reliable_p (e->probability);
529 }
530 
531 /* Same predicate as edge_probability_reliable_p, working on notes.  */
532 bool
533 br_prob_note_reliable_p (const_rtx note)
534 {
535   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
536   return probability_reliable_p (XINT (note, 0));
537 }
538 
539 static void
540 predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
541 {
542   gcc_assert (any_condjump_p (insn));
543   if (!flag_guess_branch_prob)
544     return;
545 
546   add_reg_note (insn, REG_BR_PRED,
547 		gen_rtx_CONCAT (VOIDmode,
548 				GEN_INT ((int) predictor),
549 				GEN_INT ((int) probability)));
550 }
551 
552 /* Predict insn by given predictor.  */
553 
554 void
555 predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
556 		  enum prediction taken)
557 {
558    int probability = predictor_info[(int) predictor].hitrate;
559 
560    if (taken != TAKEN)
561      probability = REG_BR_PROB_BASE - probability;
562 
563    predict_insn (insn, predictor, probability);
564 }
565 
566 /* Predict edge E with given probability if possible.  */
567 
568 void
569 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
570 {
571   rtx_insn *last_insn;
572   last_insn = BB_END (e->src);
573 
574   /* We can store the branch prediction information only about
575      conditional jumps.  */
576   if (!any_condjump_p (last_insn))
577     return;
578 
579   /* We always store probability of branching.  */
580   if (e->flags & EDGE_FALLTHRU)
581     probability = REG_BR_PROB_BASE - probability;
582 
583   predict_insn (last_insn, predictor, probability);
584 }
585 
586 /* Predict edge E with the given PROBABILITY.  */
587 void
588 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
589 {
590   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
591   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
592        1)
593       && flag_guess_branch_prob && optimize)
594     {
595       struct edge_prediction *i = XNEW (struct edge_prediction);
596       edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
597 
598       i->ep_next = preds;
599       preds = i;
600       i->ep_probability = probability;
601       i->ep_predictor = predictor;
602       i->ep_edge = e;
603     }
604 }
605 
606 /* Remove all predictions on given basic block that are attached
607    to edge E.  */
608 void
609 remove_predictions_associated_with_edge (edge e)
610 {
611   if (!bb_predictions)
612     return;
613 
614   edge_prediction **preds = bb_predictions->get (e->src);
615 
616   if (preds)
617     {
618       struct edge_prediction **prediction = preds;
619       struct edge_prediction *next;
620 
621       while (*prediction)
622 	{
623 	  if ((*prediction)->ep_edge == e)
624 	    {
625 	      next = (*prediction)->ep_next;
626 	      free (*prediction);
627 	      *prediction = next;
628 	    }
629 	  else
630 	    prediction = &((*prediction)->ep_next);
631 	}
632     }
633 }
634 
635 /* Clears the list of predictions stored for BB.  */
636 
637 static void
638 clear_bb_predictions (basic_block bb)
639 {
640   edge_prediction **preds = bb_predictions->get (bb);
641   struct edge_prediction *pred, *next;
642 
643   if (!preds)
644     return;
645 
646   for (pred = *preds; pred; pred = next)
647     {
648       next = pred->ep_next;
649       free (pred);
650     }
651   *preds = NULL;
652 }
653 
654 /* Return true when we can store prediction on insn INSN.
655    At the moment we represent predictions only on conditional
656    jumps, not at computed jump or other complicated cases.  */
657 static bool
658 can_predict_insn_p (const rtx_insn *insn)
659 {
660   return (JUMP_P (insn)
661 	  && any_condjump_p (insn)
662 	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
663 }
664 
665 /* Predict edge E by given predictor if possible.  */
666 
667 void
668 predict_edge_def (edge e, enum br_predictor predictor,
669 		  enum prediction taken)
670 {
671    int probability = predictor_info[(int) predictor].hitrate;
672 
673    if (taken != TAKEN)
674      probability = REG_BR_PROB_BASE - probability;
675 
676    predict_edge (e, predictor, probability);
677 }
678 
679 /* Invert all branch predictions or probability notes in the INSN.  This needs
680    to be done each time we invert the condition used by the jump.  */
681 
682 void
683 invert_br_probabilities (rtx insn)
684 {
685   rtx note;
686 
687   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
688     if (REG_NOTE_KIND (note) == REG_BR_PROB)
689       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
690     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
691       XEXP (XEXP (note, 0), 1)
692 	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
693 }
694 
695 /* Dump information about the branch prediction to the output file.  */
696 
697 static void
698 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
699 		 basic_block bb, int used)
700 {
701   edge e;
702   edge_iterator ei;
703 
704   if (!file)
705     return;
706 
707   FOR_EACH_EDGE (e, ei, bb->succs)
708     if (! (e->flags & EDGE_FALLTHRU))
709       break;
710 
711   fprintf (file, "  %s heuristics%s: %.1f%%",
712 	   predictor_info[predictor].name,
713 	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
714 
715   if (bb->count)
716     {
717       fprintf (file, "  exec %"PRId64, bb->count);
718       if (e)
719 	{
720 	  fprintf (file, " hit %"PRId64, e->count);
721 	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
722 	}
723     }
724 
725   fprintf (file, "\n");
726 }
727 
728 /* We can not predict the probabilities of outgoing edges of bb.  Set them
729    evenly and hope for the best.  */
730 static void
731 set_even_probabilities (basic_block bb)
732 {
733   int nedges = 0;
734   edge e;
735   edge_iterator ei;
736 
737   FOR_EACH_EDGE (e, ei, bb->succs)
738     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
739       nedges ++;
740   FOR_EACH_EDGE (e, ei, bb->succs)
741     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
742       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
743     else
744       e->probability = 0;
745 }
746 
747 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
748    note if not already present.  Remove now useless REG_BR_PRED notes.  */
749 
750 static void
751 combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
752 {
753   rtx prob_note;
754   rtx *pnote;
755   rtx note;
756   int best_probability = PROB_EVEN;
757   enum br_predictor best_predictor = END_PREDICTORS;
758   int combined_probability = REG_BR_PROB_BASE / 2;
759   int d;
760   bool first_match = false;
761   bool found = false;
762 
763   if (!can_predict_insn_p (insn))
764     {
765       set_even_probabilities (bb);
766       return;
767     }
768 
769   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
770   pnote = &REG_NOTES (insn);
771   if (dump_file)
772     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
773 	     bb->index);
774 
775   /* We implement "first match" heuristics and use probability guessed
776      by predictor with smallest index.  */
777   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
778     if (REG_NOTE_KIND (note) == REG_BR_PRED)
779       {
780 	enum br_predictor predictor = ((enum br_predictor)
781 				       INTVAL (XEXP (XEXP (note, 0), 0)));
782 	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
783 
784 	found = true;
785 	if (best_predictor > predictor)
786 	  best_probability = probability, best_predictor = predictor;
787 
788 	d = (combined_probability * probability
789 	     + (REG_BR_PROB_BASE - combined_probability)
790 	     * (REG_BR_PROB_BASE - probability));
791 
792 	/* Use FP math to avoid overflows of 32bit integers.  */
793 	if (d == 0)
794 	  /* If one probability is 0% and one 100%, avoid division by zero.  */
795 	  combined_probability = REG_BR_PROB_BASE / 2;
796 	else
797 	  combined_probability = (((double) combined_probability) * probability
798 				  * REG_BR_PROB_BASE / d + 0.5);
799       }
800 
801   /* Decide which heuristic to use.  In case we didn't match anything,
802      use no_prediction heuristic, in case we did match, use either
803      first match or Dempster-Shaffer theory depending on the flags.  */
804 
805   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
806     first_match = true;
807 
808   if (!found)
809     dump_prediction (dump_file, PRED_NO_PREDICTION,
810 		     combined_probability, bb, true);
811   else
812     {
813       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
814 		       bb, !first_match);
815       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
816 		       bb, first_match);
817     }
818 
819   if (first_match)
820     combined_probability = best_probability;
821   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
822 
823   while (*pnote)
824     {
825       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
826 	{
827 	  enum br_predictor predictor = ((enum br_predictor)
828 					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
829 	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
830 
831 	  dump_prediction (dump_file, predictor, probability, bb,
832 			   !first_match || best_predictor == predictor);
833 	  *pnote = XEXP (*pnote, 1);
834 	}
835       else
836 	pnote = &XEXP (*pnote, 1);
837     }
838 
839   if (!prob_note)
840     {
841       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
842 
843       /* Save the prediction into CFG in case we are seeing non-degenerated
844 	 conditional jump.  */
845       if (!single_succ_p (bb))
846 	{
847 	  BRANCH_EDGE (bb)->probability = combined_probability;
848 	  FALLTHRU_EDGE (bb)->probability
849 	    = REG_BR_PROB_BASE - combined_probability;
850 	}
851     }
852   else if (!single_succ_p (bb))
853     {
854       int prob = XINT (prob_note, 0);
855 
856       BRANCH_EDGE (bb)->probability = prob;
857       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
858     }
859   else
860     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
861 }
862 
863 /* Combine predictions into single probability and store them into CFG.
864    Remove now useless prediction entries.  */
865 
866 static void
867 combine_predictions_for_bb (basic_block bb)
868 {
869   int best_probability = PROB_EVEN;
870   enum br_predictor best_predictor = END_PREDICTORS;
871   int combined_probability = REG_BR_PROB_BASE / 2;
872   int d;
873   bool first_match = false;
874   bool found = false;
875   struct edge_prediction *pred;
876   int nedges = 0;
877   edge e, first = NULL, second = NULL;
878   edge_iterator ei;
879 
880   FOR_EACH_EDGE (e, ei, bb->succs)
881     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
882       {
883 	nedges ++;
884 	if (first && !second)
885 	  second = e;
886 	if (!first)
887 	  first = e;
888       }
889 
890   /* When there is no successor or only one choice, prediction is easy.
891 
892      We are lazy for now and predict only basic blocks with two outgoing
893      edges.  It is possible to predict generic case too, but we have to
894      ignore first match heuristics and do more involved combining.  Implement
895      this later.  */
896   if (nedges != 2)
897     {
898       if (!bb->count)
899 	set_even_probabilities (bb);
900       clear_bb_predictions (bb);
901       if (dump_file)
902 	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
903 		 nedges, bb->index);
904       return;
905     }
906 
907   if (dump_file)
908     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
909 
910   edge_prediction **preds = bb_predictions->get (bb);
911   if (preds)
912     {
913       /* We implement "first match" heuristics and use probability guessed
914 	 by predictor with smallest index.  */
915       for (pred = *preds; pred; pred = pred->ep_next)
916 	{
917 	  enum br_predictor predictor = pred->ep_predictor;
918 	  int probability = pred->ep_probability;
919 
920 	  if (pred->ep_edge != first)
921 	    probability = REG_BR_PROB_BASE - probability;
922 
923 	  found = true;
924 	  /* First match heuristics would be widly confused if we predicted
925 	     both directions.  */
926 	  if (best_predictor > predictor)
927 	    {
928               struct edge_prediction *pred2;
929 	      int prob = probability;
930 
931 	      for (pred2 = (struct edge_prediction *) *preds;
932 		   pred2; pred2 = pred2->ep_next)
933 	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
934 	         {
935 	           int probability2 = pred->ep_probability;
936 
937 		   if (pred2->ep_edge != first)
938 		     probability2 = REG_BR_PROB_BASE - probability2;
939 
940 		   if ((probability < REG_BR_PROB_BASE / 2) !=
941 		       (probability2 < REG_BR_PROB_BASE / 2))
942 		     break;
943 
944 		   /* If the same predictor later gave better result, go for it! */
945 		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
946 		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
947 		     prob = probability2;
948 		 }
949 	      if (!pred2)
950 	        best_probability = prob, best_predictor = predictor;
951 	    }
952 
953 	  d = (combined_probability * probability
954 	       + (REG_BR_PROB_BASE - combined_probability)
955 	       * (REG_BR_PROB_BASE - probability));
956 
957 	  /* Use FP math to avoid overflows of 32bit integers.  */
958 	  if (d == 0)
959 	    /* If one probability is 0% and one 100%, avoid division by zero.  */
960 	    combined_probability = REG_BR_PROB_BASE / 2;
961 	  else
962 	    combined_probability = (((double) combined_probability)
963 				    * probability
964 		    		    * REG_BR_PROB_BASE / d + 0.5);
965 	}
966     }
967 
968   /* Decide which heuristic to use.  In case we didn't match anything,
969      use no_prediction heuristic, in case we did match, use either
970      first match or Dempster-Shaffer theory depending on the flags.  */
971 
972   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
973     first_match = true;
974 
975   if (!found)
976     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
977   else
978     {
979       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
980 		       !first_match);
981       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
982 		       first_match);
983     }
984 
985   if (first_match)
986     combined_probability = best_probability;
987   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
988 
989   if (preds)
990     {
991       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
992 	{
993 	  enum br_predictor predictor = pred->ep_predictor;
994 	  int probability = pred->ep_probability;
995 
996 	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
997 	    probability = REG_BR_PROB_BASE - probability;
998 	  dump_prediction (dump_file, predictor, probability, bb,
999 			   !first_match || best_predictor == predictor);
1000 	}
1001     }
1002   clear_bb_predictions (bb);
1003 
1004   if (!bb->count)
1005     {
1006       first->probability = combined_probability;
1007       second->probability = REG_BR_PROB_BASE - combined_probability;
1008     }
1009 }
1010 
1011 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1012    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1013 
1014    T1 and T2 should be one of the following cases:
1015      1. T1 is SSA_NAME, T2 is NULL
1016      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1017      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1018 
1019 static tree
1020 strips_small_constant (tree t1, tree t2)
1021 {
1022   tree ret = NULL;
1023   int value = 0;
1024 
1025   if (!t1)
1026     return NULL;
1027   else if (TREE_CODE (t1) == SSA_NAME)
1028     ret = t1;
1029   else if (tree_fits_shwi_p (t1))
1030     value = tree_to_shwi (t1);
1031   else
1032     return NULL;
1033 
1034   if (!t2)
1035     return ret;
1036   else if (tree_fits_shwi_p (t2))
1037     value = tree_to_shwi (t2);
1038   else if (TREE_CODE (t2) == SSA_NAME)
1039     {
1040       if (ret)
1041         return NULL;
1042       else
1043         ret = t2;
1044     }
1045 
1046   if (value <= 4 && value >= -4)
1047     return ret;
1048   else
1049     return NULL;
1050 }
1051 
1052 /* Return the SSA_NAME in T or T's operands.
1053    Return NULL if SSA_NAME cannot be found.  */
1054 
1055 static tree
1056 get_base_value (tree t)
1057 {
1058   if (TREE_CODE (t) == SSA_NAME)
1059     return t;
1060 
1061   if (!BINARY_CLASS_P (t))
1062     return NULL;
1063 
1064   switch (TREE_OPERAND_LENGTH (t))
1065     {
1066     case 1:
1067       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1068     case 2:
1069       return strips_small_constant (TREE_OPERAND (t, 0),
1070 				    TREE_OPERAND (t, 1));
1071     default:
1072       return NULL;
1073     }
1074 }
1075 
1076 /* Check the compare STMT in LOOP. If it compares an induction
1077    variable to a loop invariant, return true, and save
1078    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1079    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1080 
1081 static bool
1082 is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop,
1083 				     tree *loop_invariant,
1084 				     enum tree_code *compare_code,
1085 				     tree *loop_step,
1086 				     tree *loop_iv_base)
1087 {
1088   tree op0, op1, bound, base;
1089   affine_iv iv0, iv1;
1090   enum tree_code code;
1091   tree step;
1092 
1093   code = gimple_cond_code (stmt);
1094   *loop_invariant = NULL;
1095 
1096   switch (code)
1097     {
1098     case GT_EXPR:
1099     case GE_EXPR:
1100     case NE_EXPR:
1101     case LT_EXPR:
1102     case LE_EXPR:
1103     case EQ_EXPR:
1104       break;
1105 
1106     default:
1107       return false;
1108     }
1109 
1110   op0 = gimple_cond_lhs (stmt);
1111   op1 = gimple_cond_rhs (stmt);
1112 
1113   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1114        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1115     return false;
1116   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1117     return false;
1118   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1119     return false;
1120   if (TREE_CODE (iv0.step) != INTEGER_CST
1121       || TREE_CODE (iv1.step) != INTEGER_CST)
1122     return false;
1123   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1124       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1125     return false;
1126 
1127   if (integer_zerop (iv0.step))
1128     {
1129       if (code != NE_EXPR && code != EQ_EXPR)
1130 	code = invert_tree_comparison (code, false);
1131       bound = iv0.base;
1132       base = iv1.base;
1133       if (tree_fits_shwi_p (iv1.step))
1134 	step = iv1.step;
1135       else
1136 	return false;
1137     }
1138   else
1139     {
1140       bound = iv1.base;
1141       base = iv0.base;
1142       if (tree_fits_shwi_p (iv0.step))
1143 	step = iv0.step;
1144       else
1145 	return false;
1146     }
1147 
1148   if (TREE_CODE (bound) != INTEGER_CST)
1149     bound = get_base_value (bound);
1150   if (!bound)
1151     return false;
1152   if (TREE_CODE (base) != INTEGER_CST)
1153     base = get_base_value (base);
1154   if (!base)
1155     return false;
1156 
1157   *loop_invariant = bound;
1158   *compare_code = code;
1159   *loop_step = step;
1160   *loop_iv_base = base;
1161   return true;
1162 }
1163 
1164 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1165 
1166 static bool
1167 expr_coherent_p (tree t1, tree t2)
1168 {
1169   gimple stmt;
1170   tree ssa_name_1 = NULL;
1171   tree ssa_name_2 = NULL;
1172 
1173   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1174   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1175 
1176   if (t1 == t2)
1177     return true;
1178 
1179   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1180     return true;
1181   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1182     return false;
1183 
1184   /* Check to see if t1 is expressed/defined with t2.  */
1185   stmt = SSA_NAME_DEF_STMT (t1);
1186   gcc_assert (stmt != NULL);
1187   if (is_gimple_assign (stmt))
1188     {
1189       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1190       if (ssa_name_1 && ssa_name_1 == t2)
1191 	return true;
1192     }
1193 
1194   /* Check to see if t2 is expressed/defined with t1.  */
1195   stmt = SSA_NAME_DEF_STMT (t2);
1196   gcc_assert (stmt != NULL);
1197   if (is_gimple_assign (stmt))
1198     {
1199       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1200       if (ssa_name_2 && ssa_name_2 == t1)
1201 	return true;
1202     }
1203 
1204   /* Compare if t1 and t2's def_stmts are identical.  */
1205   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1206     return true;
1207   else
1208     return false;
1209 }
1210 
1211 /* Predict branch probability of BB when BB contains a branch that compares
1212    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1213    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1214 
1215    E.g.
1216      for (int i = 0; i < bound; i++) {
1217        if (i < bound - 2)
1218 	 computation_1();
1219        else
1220 	 computation_2();
1221      }
1222 
1223   In this loop, we will predict the branch inside the loop to be taken.  */
1224 
1225 static void
1226 predict_iv_comparison (struct loop *loop, basic_block bb,
1227 		       tree loop_bound_var,
1228 		       tree loop_iv_base_var,
1229 		       enum tree_code loop_bound_code,
1230 		       int loop_bound_step)
1231 {
1232   gimple stmt;
1233   tree compare_var, compare_base;
1234   enum tree_code compare_code;
1235   tree compare_step_var;
1236   edge then_edge;
1237   edge_iterator ei;
1238 
1239   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1240       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1241       || predicted_by_p (bb, PRED_LOOP_EXIT))
1242     return;
1243 
1244   stmt = last_stmt (bb);
1245   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1246     return;
1247   if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt),
1248 					    loop, &compare_var,
1249 					    &compare_code,
1250 					    &compare_step_var,
1251 					    &compare_base))
1252     return;
1253 
1254   /* Find the taken edge.  */
1255   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1256     if (then_edge->flags & EDGE_TRUE_VALUE)
1257       break;
1258 
1259   /* When comparing an IV to a loop invariant, NE is more likely to be
1260      taken while EQ is more likely to be not-taken.  */
1261   if (compare_code == NE_EXPR)
1262     {
1263       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1264       return;
1265     }
1266   else if (compare_code == EQ_EXPR)
1267     {
1268       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1269       return;
1270     }
1271 
1272   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1273     return;
1274 
1275   /* If loop bound, base and compare bound are all constants, we can
1276      calculate the probability directly.  */
1277   if (tree_fits_shwi_p (loop_bound_var)
1278       && tree_fits_shwi_p (compare_var)
1279       && tree_fits_shwi_p (compare_base))
1280     {
1281       int probability;
1282       bool overflow, overall_overflow = false;
1283       widest_int compare_count, tem;
1284 
1285       /* (loop_bound - base) / compare_step */
1286       tem = wi::sub (wi::to_widest (loop_bound_var),
1287 		     wi::to_widest (compare_base), SIGNED, &overflow);
1288       overall_overflow |= overflow;
1289       widest_int loop_count = wi::div_trunc (tem,
1290 					     wi::to_widest (compare_step_var),
1291 					     SIGNED, &overflow);
1292       overall_overflow |= overflow;
1293 
1294       if (!wi::neg_p (wi::to_widest (compare_step_var))
1295           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1296 	{
1297 	  /* (loop_bound - compare_bound) / compare_step */
1298 	  tem = wi::sub (wi::to_widest (loop_bound_var),
1299 			 wi::to_widest (compare_var), SIGNED, &overflow);
1300 	  overall_overflow |= overflow;
1301 	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1302 					 SIGNED, &overflow);
1303 	  overall_overflow |= overflow;
1304 	}
1305       else
1306         {
1307 	  /* (compare_bound - base) / compare_step */
1308 	  tem = wi::sub (wi::to_widest (compare_var),
1309 			 wi::to_widest (compare_base), SIGNED, &overflow);
1310 	  overall_overflow |= overflow;
1311           compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1312 					 SIGNED, &overflow);
1313 	  overall_overflow |= overflow;
1314 	}
1315       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1316 	++compare_count;
1317       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1318 	++loop_count;
1319       if (wi::neg_p (compare_count))
1320         compare_count = 0;
1321       if (wi::neg_p (loop_count))
1322         loop_count = 0;
1323       if (loop_count == 0)
1324 	probability = 0;
1325       else if (wi::cmps (compare_count, loop_count) == 1)
1326 	probability = REG_BR_PROB_BASE;
1327       else
1328         {
1329 	  tem = compare_count * REG_BR_PROB_BASE;
1330 	  tem = wi::udiv_trunc (tem, loop_count);
1331 	  probability = tem.to_uhwi ();
1332 	}
1333 
1334       if (!overall_overflow)
1335         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1336 
1337       return;
1338     }
1339 
1340   if (expr_coherent_p (loop_bound_var, compare_var))
1341     {
1342       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1343 	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1344 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1345       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1346 	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1347 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1348       else if (loop_bound_code == NE_EXPR)
1349 	{
1350 	  /* If the loop backedge condition is "(i != bound)", we do
1351 	     the comparison based on the step of IV:
1352 	     * step < 0 : backedge condition is like (i > bound)
1353 	     * step > 0 : backedge condition is like (i < bound)  */
1354 	  gcc_assert (loop_bound_step != 0);
1355 	  if (loop_bound_step > 0
1356 	      && (compare_code == LT_EXPR
1357 		  || compare_code == LE_EXPR))
1358 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1359 	  else if (loop_bound_step < 0
1360 		   && (compare_code == GT_EXPR
1361 		       || compare_code == GE_EXPR))
1362 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1363 	  else
1364 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1365 	}
1366       else
1367 	/* The branch is predicted not-taken if loop_bound_code is
1368 	   opposite with compare_code.  */
1369 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1370     }
1371   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1372     {
1373       /* For cases like:
1374 	   for (i = s; i < h; i++)
1375 	     if (i > s + 2) ....
1376 	 The branch should be predicted taken.  */
1377       if (loop_bound_step > 0
1378 	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1379 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1380       else if (loop_bound_step < 0
1381 	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1382 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383       else
1384 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1385     }
1386 }
1387 
1388 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1389    exits are resulted from short-circuit conditions that will generate an
1390    if_tmp. E.g.:
1391 
1392    if (foo() || global > 10)
1393      break;
1394 
1395    This will be translated into:
1396 
1397    BB3:
1398      loop header...
1399    BB4:
1400      if foo() goto BB6 else goto BB5
1401    BB5:
1402      if global > 10 goto BB6 else goto BB7
1403    BB6:
1404      goto BB7
1405    BB7:
1406      iftmp = (PHI 0(BB5), 1(BB6))
1407      if iftmp == 1 goto BB8 else goto BB3
1408    BB8:
1409      outside of the loop...
1410 
1411    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1412    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1413    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1414    exits to predict them using PRED_LOOP_EXIT.  */
1415 
1416 static void
1417 predict_extra_loop_exits (edge exit_edge)
1418 {
1419   unsigned i;
1420   bool check_value_one;
1421   gimple lhs_def_stmt;
1422   gphi *phi_stmt;
1423   tree cmp_rhs, cmp_lhs;
1424   gimple last;
1425   gcond *cmp_stmt;
1426 
1427   last = last_stmt (exit_edge->src);
1428   if (!last)
1429     return;
1430   cmp_stmt = dyn_cast <gcond *> (last);
1431   if (!cmp_stmt)
1432     return;
1433 
1434   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1435   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1436   if (!TREE_CONSTANT (cmp_rhs)
1437       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1438     return;
1439   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1440     return;
1441 
1442   /* If check_value_one is true, only the phi_args with value '1' will lead
1443      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1444      loop exit.  */
1445   check_value_one = (((integer_onep (cmp_rhs))
1446 		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1447 		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1448 
1449   lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1450   if (!lhs_def_stmt)
1451     return;
1452 
1453   phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1454   if (!phi_stmt)
1455     return;
1456 
1457   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1458     {
1459       edge e1;
1460       edge_iterator ei;
1461       tree val = gimple_phi_arg_def (phi_stmt, i);
1462       edge e = gimple_phi_arg_edge (phi_stmt, i);
1463 
1464       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1465 	continue;
1466       if ((check_value_one ^ integer_onep (val)) == 1)
1467 	continue;
1468       if (EDGE_COUNT (e->src->succs) != 1)
1469 	{
1470 	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1471 	  continue;
1472 	}
1473 
1474       FOR_EACH_EDGE (e1, ei, e->src->preds)
1475 	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1476     }
1477 }
1478 
1479 /* Predict edge probabilities by exploiting loop structure.  */
1480 
1481 static void
1482 predict_loops (void)
1483 {
1484   struct loop *loop;
1485 
1486   /* Try to predict out blocks in a loop that are not part of a
1487      natural loop.  */
1488   FOR_EACH_LOOP (loop, 0)
1489     {
1490       basic_block bb, *bbs;
1491       unsigned j, n_exits;
1492       vec<edge> exits;
1493       struct tree_niter_desc niter_desc;
1494       edge ex;
1495       struct nb_iter_bound *nb_iter;
1496       enum tree_code loop_bound_code = ERROR_MARK;
1497       tree loop_bound_step = NULL;
1498       tree loop_bound_var = NULL;
1499       tree loop_iv_base = NULL;
1500       gcond *stmt = NULL;
1501 
1502       exits = get_loop_exit_edges (loop);
1503       n_exits = exits.length ();
1504       if (!n_exits)
1505 	{
1506           exits.release ();
1507 	  continue;
1508 	}
1509 
1510       FOR_EACH_VEC_ELT (exits, j, ex)
1511 	{
1512 	  tree niter = NULL;
1513 	  HOST_WIDE_INT nitercst;
1514 	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1515 	  int probability;
1516 	  enum br_predictor predictor;
1517 
1518 	  predict_extra_loop_exits (ex);
1519 
1520 	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1521 	    niter = niter_desc.niter;
1522 	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1523 	    niter = loop_niter_by_eval (loop, ex);
1524 
1525 	  if (TREE_CODE (niter) == INTEGER_CST)
1526 	    {
1527 	      if (tree_fits_uhwi_p (niter)
1528 		  && max
1529 		  && compare_tree_int (niter, max - 1) == -1)
1530 		nitercst = tree_to_uhwi (niter) + 1;
1531 	      else
1532 		nitercst = max;
1533 	      predictor = PRED_LOOP_ITERATIONS;
1534 	    }
1535 	  /* If we have just one exit and we can derive some information about
1536 	     the number of iterations of the loop from the statements inside
1537 	     the loop, use it to predict this exit.  */
1538 	  else if (n_exits == 1)
1539 	    {
1540 	      nitercst = estimated_stmt_executions_int (loop);
1541 	      if (nitercst < 0)
1542 		continue;
1543 	      if (nitercst > max)
1544 		nitercst = max;
1545 
1546 	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1547 	    }
1548 	  else
1549 	    continue;
1550 
1551 	  /* If the prediction for number of iterations is zero, do not
1552 	     predict the exit edges.  */
1553 	  if (nitercst == 0)
1554 	    continue;
1555 
1556 	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1557 	  predict_edge (ex, predictor, probability);
1558 	}
1559       exits.release ();
1560 
1561       /* Find information about loop bound variables.  */
1562       for (nb_iter = loop->bounds; nb_iter;
1563 	   nb_iter = nb_iter->next)
1564 	if (nb_iter->stmt
1565 	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1566 	  {
1567 	    stmt = as_a <gcond *> (nb_iter->stmt);
1568 	    break;
1569 	  }
1570       if (!stmt && last_stmt (loop->header)
1571 	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1572 	stmt = as_a <gcond *> (last_stmt (loop->header));
1573       if (stmt)
1574 	is_comparison_with_loop_invariant_p (stmt, loop,
1575 					     &loop_bound_var,
1576 					     &loop_bound_code,
1577 					     &loop_bound_step,
1578 					     &loop_iv_base);
1579 
1580       bbs = get_loop_body (loop);
1581 
1582       for (j = 0; j < loop->num_nodes; j++)
1583 	{
1584 	  int header_found = 0;
1585 	  edge e;
1586 	  edge_iterator ei;
1587 
1588 	  bb = bbs[j];
1589 
1590 	  /* Bypass loop heuristics on continue statement.  These
1591 	     statements construct loops via "non-loop" constructs
1592 	     in the source language and are better to be handled
1593 	     separately.  */
1594 	  if (predicted_by_p (bb, PRED_CONTINUE))
1595 	    continue;
1596 
1597 	  /* Loop branch heuristics - predict an edge back to a
1598 	     loop's head as taken.  */
1599 	  if (bb == loop->latch)
1600 	    {
1601 	      e = find_edge (loop->latch, loop->header);
1602 	      if (e)
1603 		{
1604 		  header_found = 1;
1605 		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1606 		}
1607 	    }
1608 
1609 	  /* Loop exit heuristics - predict an edge exiting the loop if the
1610 	     conditional has no loop header successors as not taken.  */
1611 	  if (!header_found
1612 	      /* If we already used more reliable loop exit predictors, do not
1613 		 bother with PRED_LOOP_EXIT.  */
1614 	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1615 	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1616 	    {
1617 	      /* For loop with many exits we don't want to predict all exits
1618 	         with the pretty large probability, because if all exits are
1619 		 considered in row, the loop would be predicted to iterate
1620 		 almost never.  The code to divide probability by number of
1621 		 exits is very rough.  It should compute the number of exits
1622 		 taken in each patch through function (not the overall number
1623 		 of exits that might be a lot higher for loops with wide switch
1624 		 statements in them) and compute n-th square root.
1625 
1626 		 We limit the minimal probability by 2% to avoid
1627 		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1628 		 as this was causing regression in perl benchmark containing such
1629 		 a wide loop.  */
1630 
1631 	      int probability = ((REG_BR_PROB_BASE
1632 		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1633 				 / n_exits);
1634 	      if (probability < HITRATE (2))
1635 		probability = HITRATE (2);
1636 	      FOR_EACH_EDGE (e, ei, bb->succs)
1637 		if (e->dest->index < NUM_FIXED_BLOCKS
1638 		    || !flow_bb_inside_loop_p (loop, e->dest))
1639 		  predict_edge (e, PRED_LOOP_EXIT, probability);
1640 	    }
1641 	  if (loop_bound_var)
1642 	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1643 				   loop_bound_code,
1644 				   tree_to_shwi (loop_bound_step));
1645 	}
1646 
1647       /* Free basic blocks from get_loop_body.  */
1648       free (bbs);
1649     }
1650 }
1651 
1652 /* Attempt to predict probabilities of BB outgoing edges using local
1653    properties.  */
1654 static void
1655 bb_estimate_probability_locally (basic_block bb)
1656 {
1657   rtx_insn *last_insn = BB_END (bb);
1658   rtx cond;
1659 
1660   if (! can_predict_insn_p (last_insn))
1661     return;
1662   cond = get_condition (last_insn, NULL, false, false);
1663   if (! cond)
1664     return;
1665 
1666   /* Try "pointer heuristic."
1667      A comparison ptr == 0 is predicted as false.
1668      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1669   if (COMPARISON_P (cond)
1670       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1671 	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1672     {
1673       if (GET_CODE (cond) == EQ)
1674 	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1675       else if (GET_CODE (cond) == NE)
1676 	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1677     }
1678   else
1679 
1680   /* Try "opcode heuristic."
1681      EQ tests are usually false and NE tests are usually true. Also,
1682      most quantities are positive, so we can make the appropriate guesses
1683      about signed comparisons against zero.  */
1684     switch (GET_CODE (cond))
1685       {
1686       case CONST_INT:
1687 	/* Unconditional branch.  */
1688 	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1689 			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1690 	break;
1691 
1692       case EQ:
1693       case UNEQ:
1694 	/* Floating point comparisons appears to behave in a very
1695 	   unpredictable way because of special role of = tests in
1696 	   FP code.  */
1697 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1698 	  ;
1699 	/* Comparisons with 0 are often used for booleans and there is
1700 	   nothing useful to predict about them.  */
1701 	else if (XEXP (cond, 1) == const0_rtx
1702 		 || XEXP (cond, 0) == const0_rtx)
1703 	  ;
1704 	else
1705 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1706 	break;
1707 
1708       case NE:
1709       case LTGT:
1710 	/* Floating point comparisons appears to behave in a very
1711 	   unpredictable way because of special role of = tests in
1712 	   FP code.  */
1713 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1714 	  ;
1715 	/* Comparisons with 0 are often used for booleans and there is
1716 	   nothing useful to predict about them.  */
1717 	else if (XEXP (cond, 1) == const0_rtx
1718 		 || XEXP (cond, 0) == const0_rtx)
1719 	  ;
1720 	else
1721 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1722 	break;
1723 
1724       case ORDERED:
1725 	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1726 	break;
1727 
1728       case UNORDERED:
1729 	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1730 	break;
1731 
1732       case LE:
1733       case LT:
1734 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1735 	    || XEXP (cond, 1) == constm1_rtx)
1736 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1737 	break;
1738 
1739       case GE:
1740       case GT:
1741 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1742 	    || XEXP (cond, 1) == constm1_rtx)
1743 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1744 	break;
1745 
1746       default:
1747 	break;
1748       }
1749 }
1750 
1751 /* Set edge->probability for each successor edge of BB.  */
1752 void
1753 guess_outgoing_edge_probabilities (basic_block bb)
1754 {
1755   bb_estimate_probability_locally (bb);
1756   combine_predictions_for_insn (BB_END (bb), bb);
1757 }
1758 
1759 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1760 
1761 /* Helper function for expr_expected_value.  */
1762 
1763 static tree
1764 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1765 		       tree op1, bitmap visited, enum br_predictor *predictor)
1766 {
1767   gimple def;
1768 
1769   if (predictor)
1770     *predictor = PRED_UNCONDITIONAL;
1771 
1772   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1773     {
1774       if (TREE_CONSTANT (op0))
1775 	return op0;
1776 
1777       if (code != SSA_NAME)
1778 	return NULL_TREE;
1779 
1780       def = SSA_NAME_DEF_STMT (op0);
1781 
1782       /* If we were already here, break the infinite cycle.  */
1783       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1784 	return NULL;
1785 
1786       if (gimple_code (def) == GIMPLE_PHI)
1787 	{
1788 	  /* All the arguments of the PHI node must have the same constant
1789 	     length.  */
1790 	  int i, n = gimple_phi_num_args (def);
1791 	  tree val = NULL, new_val;
1792 
1793 	  for (i = 0; i < n; i++)
1794 	    {
1795 	      tree arg = PHI_ARG_DEF (def, i);
1796 	      enum br_predictor predictor2;
1797 
1798 	      /* If this PHI has itself as an argument, we cannot
1799 		 determine the string length of this argument.  However,
1800 		 if we can find an expected constant value for the other
1801 		 PHI args then we can still be sure that this is
1802 		 likely a constant.  So be optimistic and just
1803 		 continue with the next argument.  */
1804 	      if (arg == PHI_RESULT (def))
1805 		continue;
1806 
1807 	      new_val = expr_expected_value (arg, visited, &predictor2);
1808 
1809 	      /* It is difficult to combine value predictors.  Simply assume
1810 		 that later predictor is weaker and take its prediction.  */
1811 	      if (predictor && *predictor < predictor2)
1812 		*predictor = predictor2;
1813 	      if (!new_val)
1814 		return NULL;
1815 	      if (!val)
1816 		val = new_val;
1817 	      else if (!operand_equal_p (val, new_val, false))
1818 		return NULL;
1819 	    }
1820 	  return val;
1821 	}
1822       if (is_gimple_assign (def))
1823 	{
1824 	  if (gimple_assign_lhs (def) != op0)
1825 	    return NULL;
1826 
1827 	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1828 					gimple_assign_rhs1 (def),
1829 					gimple_assign_rhs_code (def),
1830 					gimple_assign_rhs2 (def),
1831 					visited, predictor);
1832 	}
1833 
1834       if (is_gimple_call (def))
1835 	{
1836 	  tree decl = gimple_call_fndecl (def);
1837 	  if (!decl)
1838 	    {
1839 	      if (gimple_call_internal_p (def)
1840 		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1841 		{
1842 		  gcc_assert (gimple_call_num_args (def) == 3);
1843 		  tree val = gimple_call_arg (def, 0);
1844 		  if (TREE_CONSTANT (val))
1845 		    return val;
1846 		  if (predictor)
1847 		    {
1848 		      tree val2 = gimple_call_arg (def, 2);
1849 		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1850 				  && tree_fits_uhwi_p (val2)
1851 				  && tree_to_uhwi (val2) < END_PREDICTORS);
1852 		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1853 		    }
1854 		  return gimple_call_arg (def, 1);
1855 		}
1856 	      return NULL;
1857 	    }
1858 	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1859 	    switch (DECL_FUNCTION_CODE (decl))
1860 	      {
1861 	      case BUILT_IN_EXPECT:
1862 		{
1863 		  tree val;
1864 		  if (gimple_call_num_args (def) != 2)
1865 		    return NULL;
1866 		  val = gimple_call_arg (def, 0);
1867 		  if (TREE_CONSTANT (val))
1868 		    return val;
1869 		  if (predictor)
1870 		    *predictor = PRED_BUILTIN_EXPECT;
1871 		  return gimple_call_arg (def, 1);
1872 		}
1873 
1874 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1875 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1876 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1877 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1878 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1879 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1880 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1881 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1882 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1883 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1884 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1885 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1886 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1887 		/* Assume that any given atomic operation has low contention,
1888 		   and thus the compare-and-swap operation succeeds.  */
1889 		if (predictor)
1890 		  *predictor = PRED_COMPARE_AND_SWAP;
1891 		return boolean_true_node;
1892 	      default:
1893 		break;
1894 	    }
1895 	}
1896 
1897       return NULL;
1898     }
1899 
1900   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1901     {
1902       tree res;
1903       enum br_predictor predictor2;
1904       op0 = expr_expected_value (op0, visited, predictor);
1905       if (!op0)
1906 	return NULL;
1907       op1 = expr_expected_value (op1, visited, &predictor2);
1908       if (predictor && *predictor < predictor2)
1909 	*predictor = predictor2;
1910       if (!op1)
1911 	return NULL;
1912       res = fold_build2 (code, type, op0, op1);
1913       if (TREE_CONSTANT (res))
1914 	return res;
1915       return NULL;
1916     }
1917   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1918     {
1919       tree res;
1920       op0 = expr_expected_value (op0, visited, predictor);
1921       if (!op0)
1922 	return NULL;
1923       res = fold_build1 (code, type, op0);
1924       if (TREE_CONSTANT (res))
1925 	return res;
1926       return NULL;
1927     }
1928   return NULL;
1929 }
1930 
1931 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1932    The function is used by builtin_expect branch predictor so the evidence
1933    must come from this construct and additional possible constant folding.
1934 
1935    We may want to implement more involved value guess (such as value range
1936    propagation based prediction), but such tricks shall go to new
1937    implementation.  */
1938 
1939 static tree
1940 expr_expected_value (tree expr, bitmap visited,
1941 		     enum br_predictor *predictor)
1942 {
1943   enum tree_code code;
1944   tree op0, op1;
1945 
1946   if (TREE_CONSTANT (expr))
1947     {
1948       if (predictor)
1949 	*predictor = PRED_UNCONDITIONAL;
1950       return expr;
1951     }
1952 
1953   extract_ops_from_tree (expr, &code, &op0, &op1);
1954   return expr_expected_value_1 (TREE_TYPE (expr),
1955 				op0, code, op1, visited, predictor);
1956 }
1957 
1958 /* Predict using opcode of the last statement in basic block.  */
1959 static void
1960 tree_predict_by_opcode (basic_block bb)
1961 {
1962   gimple stmt = last_stmt (bb);
1963   edge then_edge;
1964   tree op0, op1;
1965   tree type;
1966   tree val;
1967   enum tree_code cmp;
1968   bitmap visited;
1969   edge_iterator ei;
1970   enum br_predictor predictor;
1971 
1972   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1973     return;
1974   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1975     if (then_edge->flags & EDGE_TRUE_VALUE)
1976       break;
1977   op0 = gimple_cond_lhs (stmt);
1978   op1 = gimple_cond_rhs (stmt);
1979   cmp = gimple_cond_code (stmt);
1980   type = TREE_TYPE (op0);
1981   visited = BITMAP_ALLOC (NULL);
1982   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1983 			       &predictor);
1984   BITMAP_FREE (visited);
1985   if (val && TREE_CODE (val) == INTEGER_CST)
1986     {
1987       if (predictor == PRED_BUILTIN_EXPECT)
1988 	{
1989 	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
1990 
1991 	  gcc_assert (percent >= 0 && percent <= 100);
1992 	  if (integer_zerop (val))
1993 	    percent = 100 - percent;
1994 	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
1995 	}
1996       else
1997 	predict_edge (then_edge, predictor,
1998 		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
1999     }
2000   /* Try "pointer heuristic."
2001      A comparison ptr == 0 is predicted as false.
2002      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2003   if (POINTER_TYPE_P (type))
2004     {
2005       if (cmp == EQ_EXPR)
2006 	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2007       else if (cmp == NE_EXPR)
2008 	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2009     }
2010   else
2011 
2012   /* Try "opcode heuristic."
2013      EQ tests are usually false and NE tests are usually true. Also,
2014      most quantities are positive, so we can make the appropriate guesses
2015      about signed comparisons against zero.  */
2016     switch (cmp)
2017       {
2018       case EQ_EXPR:
2019       case UNEQ_EXPR:
2020 	/* Floating point comparisons appears to behave in a very
2021 	   unpredictable way because of special role of = tests in
2022 	   FP code.  */
2023 	if (FLOAT_TYPE_P (type))
2024 	  ;
2025 	/* Comparisons with 0 are often used for booleans and there is
2026 	   nothing useful to predict about them.  */
2027 	else if (integer_zerop (op0) || integer_zerop (op1))
2028 	  ;
2029 	else
2030 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2031 	break;
2032 
2033       case NE_EXPR:
2034       case LTGT_EXPR:
2035 	/* Floating point comparisons appears to behave in a very
2036 	   unpredictable way because of special role of = tests in
2037 	   FP code.  */
2038 	if (FLOAT_TYPE_P (type))
2039 	  ;
2040 	/* Comparisons with 0 are often used for booleans and there is
2041 	   nothing useful to predict about them.  */
2042 	else if (integer_zerop (op0)
2043 		 || integer_zerop (op1))
2044 	  ;
2045 	else
2046 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2047 	break;
2048 
2049       case ORDERED_EXPR:
2050 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2051 	break;
2052 
2053       case UNORDERED_EXPR:
2054 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2055 	break;
2056 
2057       case LE_EXPR:
2058       case LT_EXPR:
2059 	if (integer_zerop (op1)
2060 	    || integer_onep (op1)
2061 	    || integer_all_onesp (op1)
2062 	    || real_zerop (op1)
2063 	    || real_onep (op1)
2064 	    || real_minus_onep (op1))
2065 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2066 	break;
2067 
2068       case GE_EXPR:
2069       case GT_EXPR:
2070 	if (integer_zerop (op1)
2071 	    || integer_onep (op1)
2072 	    || integer_all_onesp (op1)
2073 	    || real_zerop (op1)
2074 	    || real_onep (op1)
2075 	    || real_minus_onep (op1))
2076 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2077 	break;
2078 
2079       default:
2080 	break;
2081       }
2082 }
2083 
2084 /* Try to guess whether the value of return means error code.  */
2085 
2086 static enum br_predictor
2087 return_prediction (tree val, enum prediction *prediction)
2088 {
2089   /* VOID.  */
2090   if (!val)
2091     return PRED_NO_PREDICTION;
2092   /* Different heuristics for pointers and scalars.  */
2093   if (POINTER_TYPE_P (TREE_TYPE (val)))
2094     {
2095       /* NULL is usually not returned.  */
2096       if (integer_zerop (val))
2097 	{
2098 	  *prediction = NOT_TAKEN;
2099 	  return PRED_NULL_RETURN;
2100 	}
2101     }
2102   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2103     {
2104       /* Negative return values are often used to indicate
2105          errors.  */
2106       if (TREE_CODE (val) == INTEGER_CST
2107 	  && tree_int_cst_sgn (val) < 0)
2108 	{
2109 	  *prediction = NOT_TAKEN;
2110 	  return PRED_NEGATIVE_RETURN;
2111 	}
2112       /* Constant return values seems to be commonly taken.
2113          Zero/one often represent booleans so exclude them from the
2114 	 heuristics.  */
2115       if (TREE_CONSTANT (val)
2116 	  && (!integer_zerop (val) && !integer_onep (val)))
2117 	{
2118 	  *prediction = TAKEN;
2119 	  return PRED_CONST_RETURN;
2120 	}
2121     }
2122   return PRED_NO_PREDICTION;
2123 }
2124 
2125 /* Find the basic block with return expression and look up for possible
2126    return value trying to apply RETURN_PREDICTION heuristics.  */
2127 static void
2128 apply_return_prediction (void)
2129 {
2130   greturn *return_stmt = NULL;
2131   tree return_val;
2132   edge e;
2133   gphi *phi;
2134   int phi_num_args, i;
2135   enum br_predictor pred;
2136   enum prediction direction;
2137   edge_iterator ei;
2138 
2139   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2140     {
2141       gimple last = last_stmt (e->src);
2142       if (last
2143 	  && gimple_code (last) == GIMPLE_RETURN)
2144 	{
2145 	  return_stmt = as_a <greturn *> (last);
2146 	  break;
2147 	}
2148     }
2149   if (!e)
2150     return;
2151   return_val = gimple_return_retval (return_stmt);
2152   if (!return_val)
2153     return;
2154   if (TREE_CODE (return_val) != SSA_NAME
2155       || !SSA_NAME_DEF_STMT (return_val)
2156       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2157     return;
2158   phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
2159   phi_num_args = gimple_phi_num_args (phi);
2160   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2161 
2162   /* Avoid the degenerate case where all return values form the function
2163      belongs to same category (ie they are all positive constants)
2164      so we can hardly say something about them.  */
2165   for (i = 1; i < phi_num_args; i++)
2166     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2167       break;
2168   if (i != phi_num_args)
2169     for (i = 0; i < phi_num_args; i++)
2170       {
2171 	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2172 	if (pred != PRED_NO_PREDICTION)
2173 	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2174 				         direction);
2175       }
2176 }
2177 
2178 /* Look for basic block that contains unlikely to happen events
2179    (such as noreturn calls) and mark all paths leading to execution
2180    of this basic blocks as unlikely.  */
2181 
2182 static void
2183 tree_bb_level_predictions (void)
2184 {
2185   basic_block bb;
2186   bool has_return_edges = false;
2187   edge e;
2188   edge_iterator ei;
2189 
2190   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2191     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2192       {
2193         has_return_edges = true;
2194 	break;
2195       }
2196 
2197   apply_return_prediction ();
2198 
2199   FOR_EACH_BB_FN (bb, cfun)
2200     {
2201       gimple_stmt_iterator gsi;
2202 
2203       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2204 	{
2205 	  gimple stmt = gsi_stmt (gsi);
2206 	  tree decl;
2207 
2208 	  if (is_gimple_call (stmt))
2209 	    {
2210 	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2211 	          && has_return_edges)
2212 		predict_paths_leading_to (bb, PRED_NORETURN,
2213 					  NOT_TAKEN);
2214 	      decl = gimple_call_fndecl (stmt);
2215 	      if (decl
2216 		  && lookup_attribute ("cold",
2217 				       DECL_ATTRIBUTES (decl)))
2218 		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2219 					  NOT_TAKEN);
2220 	    }
2221 	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2222 	    {
2223 	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2224 					gimple_predict_outcome (stmt));
2225 	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2226 	         hints to callers.  */
2227 	    }
2228 	}
2229     }
2230 }
2231 
2232 #ifdef ENABLE_CHECKING
2233 
2234 /* Callback for hash_map::traverse, asserts that the pointer map is
2235    empty.  */
2236 
2237 bool
2238 assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2239 		 void *)
2240 {
2241   gcc_assert (!value);
2242   return false;
2243 }
2244 #endif
2245 
2246 /* Predict branch probabilities and estimate profile for basic block BB.  */
2247 
2248 static void
2249 tree_estimate_probability_bb (basic_block bb)
2250 {
2251   edge e;
2252   edge_iterator ei;
2253   gimple last;
2254 
2255   FOR_EACH_EDGE (e, ei, bb->succs)
2256     {
2257       /* Predict edges to user labels with attributes.  */
2258       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2259 	{
2260 	  gimple_stmt_iterator gi;
2261 	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2262 	    {
2263 	      glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
2264 	      tree decl;
2265 
2266 	      if (!label_stmt)
2267 		break;
2268 	      decl = gimple_label_label (label_stmt);
2269 	      if (DECL_ARTIFICIAL (decl))
2270 		continue;
2271 
2272 	      /* Finally, we have a user-defined label.  */
2273 	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2274 		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2275 	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2276 		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2277 	    }
2278 	}
2279 
2280       /* Predict early returns to be probable, as we've already taken
2281 	 care for error returns and other cases are often used for
2282 	 fast paths through function.
2283 
2284 	 Since we've already removed the return statements, we are
2285 	 looking for CFG like:
2286 
2287 	 if (conditional)
2288 	 {
2289 	 ..
2290 	 goto return_block
2291 	 }
2292 	 some other blocks
2293 	 return_block:
2294 	 return_stmt.  */
2295       if (e->dest != bb->next_bb
2296 	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2297 	  && single_succ_p (e->dest)
2298 	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2299 	  && (last = last_stmt (e->dest)) != NULL
2300 	  && gimple_code (last) == GIMPLE_RETURN)
2301 	{
2302 	  edge e1;
2303 	  edge_iterator ei1;
2304 
2305 	  if (single_succ_p (bb))
2306 	    {
2307 	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2308 		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2309 		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2310 		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2311 		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2312 	    }
2313 	  else
2314 	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2315 		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2316 		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2317 	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2318 	}
2319 
2320       /* Look for block we are guarding (ie we dominate it,
2321 	 but it doesn't postdominate us).  */
2322       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2323 	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2324 	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2325 	{
2326 	  gimple_stmt_iterator bi;
2327 
2328 	  /* The call heuristic claims that a guarded function call
2329 	     is improbable.  This is because such calls are often used
2330 	     to signal exceptional situations such as printing error
2331 	     messages.  */
2332 	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2333 	       gsi_next (&bi))
2334 	    {
2335 	      gimple stmt = gsi_stmt (bi);
2336 	      if (is_gimple_call (stmt)
2337 		  /* Constant and pure calls are hardly used to signalize
2338 		     something exceptional.  */
2339 		  && gimple_has_side_effects (stmt))
2340 		{
2341 		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2342 		  break;
2343 		}
2344 	    }
2345 	}
2346     }
2347   tree_predict_by_opcode (bb);
2348 }
2349 
2350 /* Predict branch probabilities and estimate profile of the tree CFG.
2351    This function can be called from the loop optimizers to recompute
2352    the profile information.  */
2353 
2354 void
2355 tree_estimate_probability (void)
2356 {
2357   basic_block bb;
2358 
2359   add_noreturn_fake_exit_edges ();
2360   connect_infinite_loops_to_exit ();
2361   /* We use loop_niter_by_eval, which requires that the loops have
2362      preheaders.  */
2363   create_preheaders (CP_SIMPLE_PREHEADERS);
2364   calculate_dominance_info (CDI_POST_DOMINATORS);
2365 
2366   bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2367   tree_bb_level_predictions ();
2368   record_loop_exits ();
2369 
2370   if (number_of_loops (cfun) > 1)
2371     predict_loops ();
2372 
2373   FOR_EACH_BB_FN (bb, cfun)
2374     tree_estimate_probability_bb (bb);
2375 
2376   FOR_EACH_BB_FN (bb, cfun)
2377     combine_predictions_for_bb (bb);
2378 
2379 #ifdef ENABLE_CHECKING
2380   bb_predictions->traverse<void *, assert_is_empty> (NULL);
2381 #endif
2382   delete bb_predictions;
2383   bb_predictions = NULL;
2384 
2385   estimate_bb_frequencies (false);
2386   free_dominance_info (CDI_POST_DOMINATORS);
2387   remove_fake_exit_edges ();
2388 }
2389 
2390 /* Predict edges to successors of CUR whose sources are not postdominated by
2391    BB by PRED and recurse to all postdominators.  */
2392 
2393 static void
2394 predict_paths_for_bb (basic_block cur, basic_block bb,
2395 		      enum br_predictor pred,
2396 		      enum prediction taken,
2397 		      bitmap visited)
2398 {
2399   edge e;
2400   edge_iterator ei;
2401   basic_block son;
2402 
2403   /* We are looking for all edges forming edge cut induced by
2404      set of all blocks postdominated by BB.  */
2405   FOR_EACH_EDGE (e, ei, cur->preds)
2406     if (e->src->index >= NUM_FIXED_BLOCKS
2407 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2408     {
2409       edge e2;
2410       edge_iterator ei2;
2411       bool found = false;
2412 
2413       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2414       if (e->flags & (EDGE_EH | EDGE_FAKE))
2415 	continue;
2416       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2417 
2418       /* See if there is an edge from e->src that is not abnormal
2419 	 and does not lead to BB.  */
2420       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2421 	if (e2 != e
2422 	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2423 	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2424 	  {
2425 	    found = true;
2426 	    break;
2427 	  }
2428 
2429       /* If there is non-abnormal path leaving e->src, predict edge
2430 	 using predictor.  Otherwise we need to look for paths
2431 	 leading to e->src.
2432 
2433 	 The second may lead to infinite loop in the case we are predicitng
2434 	 regions that are only reachable by abnormal edges.  We simply
2435 	 prevent visiting given BB twice.  */
2436       if (found)
2437         predict_edge_def (e, pred, taken);
2438       else if (bitmap_set_bit (visited, e->src->index))
2439 	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2440     }
2441   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2442        son;
2443        son = next_dom_son (CDI_POST_DOMINATORS, son))
2444     predict_paths_for_bb (son, bb, pred, taken, visited);
2445 }
2446 
2447 /* Sets branch probabilities according to PREDiction and
2448    FLAGS.  */
2449 
2450 static void
2451 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2452 			  enum prediction taken)
2453 {
2454   bitmap visited = BITMAP_ALLOC (NULL);
2455   predict_paths_for_bb (bb, bb, pred, taken, visited);
2456   BITMAP_FREE (visited);
2457 }
2458 
2459 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2460 
2461 static void
2462 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2463 			       enum prediction taken)
2464 {
2465   bool has_nonloop_edge = false;
2466   edge_iterator ei;
2467   edge e2;
2468 
2469   basic_block bb = e->src;
2470   FOR_EACH_EDGE (e2, ei, bb->succs)
2471     if (e2->dest != e->src && e2->dest != e->dest
2472 	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2473 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2474       {
2475 	has_nonloop_edge = true;
2476 	break;
2477       }
2478   if (!has_nonloop_edge)
2479     {
2480       bitmap visited = BITMAP_ALLOC (NULL);
2481       predict_paths_for_bb (bb, bb, pred, taken, visited);
2482       BITMAP_FREE (visited);
2483     }
2484   else
2485     predict_edge_def (e, pred, taken);
2486 }
2487 
2488 /* This is used to carry information about basic blocks.  It is
2489    attached to the AUX field of the standard CFG block.  */
2490 
2491 struct block_info
2492 {
2493   /* Estimated frequency of execution of basic_block.  */
2494   sreal frequency;
2495 
2496   /* To keep queue of basic blocks to process.  */
2497   basic_block next;
2498 
2499   /* Number of predecessors we need to visit first.  */
2500   int npredecessors;
2501 };
2502 
2503 /* Similar information for edges.  */
2504 struct edge_prob_info
2505 {
2506   /* In case edge is a loopback edge, the probability edge will be reached
2507      in case header is.  Estimated number of iterations of the loop can be
2508      then computed as 1 / (1 - back_edge_prob).  */
2509   sreal back_edge_prob;
2510   /* True if the edge is a loopback edge in the natural loop.  */
2511   unsigned int back_edge:1;
2512 };
2513 
2514 #define BLOCK_INFO(B)	((block_info *) (B)->aux)
2515 #undef EDGE_INFO
2516 #define EDGE_INFO(E)	((edge_prob_info *) (E)->aux)
2517 
2518 /* Helper function for estimate_bb_frequencies.
2519    Propagate the frequencies in blocks marked in
2520    TOVISIT, starting in HEAD.  */
2521 
2522 static void
2523 propagate_freq (basic_block head, bitmap tovisit)
2524 {
2525   basic_block bb;
2526   basic_block last;
2527   unsigned i;
2528   edge e;
2529   basic_block nextbb;
2530   bitmap_iterator bi;
2531 
2532   /* For each basic block we need to visit count number of his predecessors
2533      we need to visit first.  */
2534   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2535     {
2536       edge_iterator ei;
2537       int count = 0;
2538 
2539       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2540 
2541       FOR_EACH_EDGE (e, ei, bb->preds)
2542 	{
2543 	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2544 
2545 	  if (visit && !(e->flags & EDGE_DFS_BACK))
2546 	    count++;
2547 	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2548 	    fprintf (dump_file,
2549 		     "Irreducible region hit, ignoring edge to %i->%i\n",
2550 		     e->src->index, bb->index);
2551 	}
2552       BLOCK_INFO (bb)->npredecessors = count;
2553       /* When function never returns, we will never process exit block.  */
2554       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2555 	bb->count = bb->frequency = 0;
2556     }
2557 
2558   BLOCK_INFO (head)->frequency = 1;
2559   last = head;
2560   for (bb = head; bb; bb = nextbb)
2561     {
2562       edge_iterator ei;
2563       sreal cyclic_probability = 0;
2564       sreal frequency = 0;
2565 
2566       nextbb = BLOCK_INFO (bb)->next;
2567       BLOCK_INFO (bb)->next = NULL;
2568 
2569       /* Compute frequency of basic block.  */
2570       if (bb != head)
2571 	{
2572 #ifdef ENABLE_CHECKING
2573 	  FOR_EACH_EDGE (e, ei, bb->preds)
2574 	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2575 			|| (e->flags & EDGE_DFS_BACK));
2576 #endif
2577 
2578 	  FOR_EACH_EDGE (e, ei, bb->preds)
2579 	    if (EDGE_INFO (e)->back_edge)
2580 	      {
2581 		cyclic_probability += EDGE_INFO (e)->back_edge_prob;
2582 	      }
2583 	    else if (!(e->flags & EDGE_DFS_BACK))
2584 	      {
2585 		/*  frequency += (e->probability
2586 				  * BLOCK_INFO (e->src)->frequency /
2587 				  REG_BR_PROB_BASE);  */
2588 
2589 		sreal tmp = e->probability;
2590 		tmp *= BLOCK_INFO (e->src)->frequency;
2591 		tmp *= real_inv_br_prob_base;
2592 		frequency += tmp;
2593 	      }
2594 
2595 	  if (cyclic_probability == 0)
2596 	    {
2597 	      BLOCK_INFO (bb)->frequency = frequency;
2598 	    }
2599 	  else
2600 	    {
2601 	      if (cyclic_probability > real_almost_one)
2602 		cyclic_probability = real_almost_one;
2603 
2604 	      /* BLOCK_INFO (bb)->frequency = frequency
2605 					      / (1 - cyclic_probability) */
2606 
2607 	      cyclic_probability = sreal (1) - cyclic_probability;
2608 	      BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
2609 	    }
2610 	}
2611 
2612       bitmap_clear_bit (tovisit, bb->index);
2613 
2614       e = find_edge (bb, head);
2615       if (e)
2616 	{
2617 	  /* EDGE_INFO (e)->back_edge_prob
2618 	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2619 	     / REG_BR_PROB_BASE); */
2620 
2621 	  sreal tmp = e->probability;
2622 	  tmp *= BLOCK_INFO (bb)->frequency;
2623 	  EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
2624 	}
2625 
2626       /* Propagate to successor blocks.  */
2627       FOR_EACH_EDGE (e, ei, bb->succs)
2628 	if (!(e->flags & EDGE_DFS_BACK)
2629 	    && BLOCK_INFO (e->dest)->npredecessors)
2630 	  {
2631 	    BLOCK_INFO (e->dest)->npredecessors--;
2632 	    if (!BLOCK_INFO (e->dest)->npredecessors)
2633 	      {
2634 		if (!nextbb)
2635 		  nextbb = e->dest;
2636 		else
2637 		  BLOCK_INFO (last)->next = e->dest;
2638 
2639 		last = e->dest;
2640 	      }
2641 	  }
2642     }
2643 }
2644 
2645 /* Estimate frequencies in loops at same nest level.  */
2646 
2647 static void
2648 estimate_loops_at_level (struct loop *first_loop)
2649 {
2650   struct loop *loop;
2651 
2652   for (loop = first_loop; loop; loop = loop->next)
2653     {
2654       edge e;
2655       basic_block *bbs;
2656       unsigned i;
2657       bitmap tovisit = BITMAP_ALLOC (NULL);
2658 
2659       estimate_loops_at_level (loop->inner);
2660 
2661       /* Find current loop back edge and mark it.  */
2662       e = loop_latch_edge (loop);
2663       EDGE_INFO (e)->back_edge = 1;
2664 
2665       bbs = get_loop_body (loop);
2666       for (i = 0; i < loop->num_nodes; i++)
2667 	bitmap_set_bit (tovisit, bbs[i]->index);
2668       free (bbs);
2669       propagate_freq (loop->header, tovisit);
2670       BITMAP_FREE (tovisit);
2671     }
2672 }
2673 
2674 /* Propagates frequencies through structure of loops.  */
2675 
2676 static void
2677 estimate_loops (void)
2678 {
2679   bitmap tovisit = BITMAP_ALLOC (NULL);
2680   basic_block bb;
2681 
2682   /* Start by estimating the frequencies in the loops.  */
2683   if (number_of_loops (cfun) > 1)
2684     estimate_loops_at_level (current_loops->tree_root->inner);
2685 
2686   /* Now propagate the frequencies through all the blocks.  */
2687   FOR_ALL_BB_FN (bb, cfun)
2688     {
2689       bitmap_set_bit (tovisit, bb->index);
2690     }
2691   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2692   BITMAP_FREE (tovisit);
2693 }
2694 
2695 /* Drop the profile for NODE to guessed, and update its frequency based on
2696    whether it is expected to be hot given the CALL_COUNT.  */
2697 
2698 static void
2699 drop_profile (struct cgraph_node *node, gcov_type call_count)
2700 {
2701   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2702   /* In the case where this was called by another function with a
2703      dropped profile, call_count will be 0. Since there are no
2704      non-zero call counts to this function, we don't know for sure
2705      whether it is hot, and therefore it will be marked normal below.  */
2706   bool hot = maybe_hot_count_p (NULL, call_count);
2707 
2708   if (dump_file)
2709     fprintf (dump_file,
2710              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2711              node->name (), node->order,
2712              hot ? "Function is hot" : "Function is normal");
2713   /* We only expect to miss profiles for functions that are reached
2714      via non-zero call edges in cases where the function may have
2715      been linked from another module or library (COMDATs and extern
2716      templates). See the comments below for handle_missing_profiles.
2717      Also, only warn in cases where the missing counts exceed the
2718      number of training runs. In certain cases with an execv followed
2719      by a no-return call the profile for the no-return call is not
2720      dumped and there can be a mismatch.  */
2721   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2722       && call_count > profile_info->runs)
2723     {
2724       if (flag_profile_correction)
2725         {
2726           if (dump_file)
2727             fprintf (dump_file,
2728                      "Missing counts for called function %s/%i\n",
2729                      node->name (), node->order);
2730         }
2731       else
2732         warning (0, "Missing counts for called function %s/%i",
2733                  node->name (), node->order);
2734     }
2735 
2736   profile_status_for_fn (fn)
2737       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2738   node->frequency
2739       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2740 }
2741 
2742 /* In the case of COMDAT routines, multiple object files will contain the same
2743    function and the linker will select one for the binary. In that case
2744    all the other copies from the profile instrument binary will be missing
2745    profile counts. Look for cases where this happened, due to non-zero
2746    call counts going to 0-count functions, and drop the profile to guessed
2747    so that we can use the estimated probabilities and avoid optimizing only
2748    for size.
2749 
2750    The other case where the profile may be missing is when the routine
2751    is not going to be emitted to the object file, e.g. for "extern template"
2752    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2753    all other cases of non-zero calls to 0-count functions.  */
2754 
2755 void
2756 handle_missing_profiles (void)
2757 {
2758   struct cgraph_node *node;
2759   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2760   vec<struct cgraph_node *> worklist;
2761   worklist.create (64);
2762 
2763   /* See if 0 count function has non-0 count callers.  In this case we
2764      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2765   FOR_EACH_DEFINED_FUNCTION (node)
2766     {
2767       struct cgraph_edge *e;
2768       gcov_type call_count = 0;
2769       gcov_type max_tp_first_run = 0;
2770       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2771 
2772       if (node->count)
2773         continue;
2774       for (e = node->callers; e; e = e->next_caller)
2775       {
2776         call_count += e->count;
2777 
2778 	if (e->caller->tp_first_run > max_tp_first_run)
2779 	  max_tp_first_run = e->caller->tp_first_run;
2780       }
2781 
2782       /* If time profile is missing, let assign the maximum that comes from
2783 	 caller functions.  */
2784       if (!node->tp_first_run && max_tp_first_run)
2785 	node->tp_first_run = max_tp_first_run + 1;
2786 
2787       if (call_count
2788           && fn && fn->cfg
2789           && (call_count * unlikely_count_fraction >= profile_info->runs))
2790         {
2791           drop_profile (node, call_count);
2792           worklist.safe_push (node);
2793         }
2794     }
2795 
2796   /* Propagate the profile dropping to other 0-count COMDATs that are
2797      potentially called by COMDATs we already dropped the profile on.  */
2798   while (worklist.length () > 0)
2799     {
2800       struct cgraph_edge *e;
2801 
2802       node = worklist.pop ();
2803       for (e = node->callees; e; e = e->next_caller)
2804         {
2805           struct cgraph_node *callee = e->callee;
2806           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2807 
2808           if (callee->count > 0)
2809             continue;
2810           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2811               && profile_status_for_fn (fn) == PROFILE_READ)
2812             {
2813               drop_profile (node, 0);
2814               worklist.safe_push (callee);
2815             }
2816         }
2817     }
2818   worklist.release ();
2819 }
2820 
2821 /* Convert counts measured by profile driven feedback to frequencies.
2822    Return nonzero iff there was any nonzero execution count.  */
2823 
2824 int
2825 counts_to_freqs (void)
2826 {
2827   gcov_type count_max, true_count_max = 0;
2828   basic_block bb;
2829 
2830   /* Don't overwrite the estimated frequencies when the profile for
2831      the function is missing.  We may drop this function PROFILE_GUESSED
2832      later in drop_profile ().  */
2833   if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2834     return 0;
2835 
2836   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2837     true_count_max = MAX (bb->count, true_count_max);
2838 
2839   count_max = MAX (true_count_max, 1);
2840   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2841     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2842 
2843   return true_count_max;
2844 }
2845 
2846 /* Return true if function is likely to be expensive, so there is no point to
2847    optimize performance of prologue, epilogue or do inlining at the expense
2848    of code size growth.  THRESHOLD is the limit of number of instructions
2849    function can execute at average to be still considered not expensive.  */
2850 
2851 bool
2852 expensive_function_p (int threshold)
2853 {
2854   unsigned int sum = 0;
2855   basic_block bb;
2856   unsigned int limit;
2857 
2858   /* We can not compute accurately for large thresholds due to scaled
2859      frequencies.  */
2860   gcc_assert (threshold <= BB_FREQ_MAX);
2861 
2862   /* Frequencies are out of range.  This either means that function contains
2863      internal loop executing more than BB_FREQ_MAX times or profile feedback
2864      is available and function has not been executed at all.  */
2865   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2866     return true;
2867 
2868   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2869   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2870   FOR_EACH_BB_FN (bb, cfun)
2871     {
2872       rtx_insn *insn;
2873 
2874       FOR_BB_INSNS (bb, insn)
2875 	if (active_insn_p (insn))
2876 	  {
2877 	    sum += bb->frequency;
2878 	    if (sum > limit)
2879 	      return true;
2880 	}
2881     }
2882 
2883   return false;
2884 }
2885 
2886 /* Estimate and propagate basic block frequencies using the given branch
2887    probabilities.  If FORCE is true, the frequencies are used to estimate
2888    the counts even when there are already non-zero profile counts.  */
2889 
2890 void
2891 estimate_bb_frequencies (bool force)
2892 {
2893   basic_block bb;
2894   sreal freq_max;
2895 
2896   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2897     {
2898       static int real_values_initialized = 0;
2899 
2900       if (!real_values_initialized)
2901         {
2902 	  real_values_initialized = 1;
2903 	  real_br_prob_base = REG_BR_PROB_BASE;
2904 	  real_bb_freq_max = BB_FREQ_MAX;
2905 	  real_one_half = sreal (1, -1);
2906 	  real_inv_br_prob_base = sreal (1) / real_br_prob_base;
2907 	  real_almost_one = sreal (1) - real_inv_br_prob_base;
2908 	}
2909 
2910       mark_dfs_back_edges ();
2911 
2912       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2913 	 REG_BR_PROB_BASE;
2914 
2915       /* Set up block info for each basic block.  */
2916       alloc_aux_for_blocks (sizeof (block_info));
2917       alloc_aux_for_edges (sizeof (edge_prob_info));
2918       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2919 	{
2920 	  edge e;
2921 	  edge_iterator ei;
2922 
2923 	  FOR_EACH_EDGE (e, ei, bb->succs)
2924 	    {
2925 	      EDGE_INFO (e)->back_edge_prob = e->probability;
2926 	      EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
2927 	    }
2928 	}
2929 
2930       /* First compute frequencies locally for each loop from innermost
2931          to outermost to examine frequencies for back edges.  */
2932       estimate_loops ();
2933 
2934       freq_max = 0;
2935       FOR_EACH_BB_FN (bb, cfun)
2936 	if (freq_max < BLOCK_INFO (bb)->frequency)
2937 	  freq_max = BLOCK_INFO (bb)->frequency;
2938 
2939       freq_max = real_bb_freq_max / freq_max;
2940       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2941 	{
2942 	  sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
2943 	  bb->frequency = tmp.to_int ();
2944 	}
2945 
2946       free_aux_for_blocks ();
2947       free_aux_for_edges ();
2948     }
2949   compute_function_frequency ();
2950 }
2951 
2952 /* Decide whether function is hot, cold or unlikely executed.  */
2953 void
2954 compute_function_frequency (void)
2955 {
2956   basic_block bb;
2957   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958 
2959   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2960       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2961     node->only_called_at_startup = true;
2962   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2963     node->only_called_at_exit = true;
2964 
2965   if (profile_status_for_fn (cfun) != PROFILE_READ)
2966     {
2967       int flags = flags_from_decl_or_type (current_function_decl);
2968       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2969 	  != NULL)
2970         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2971       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2972 	       != NULL)
2973         node->frequency = NODE_FREQUENCY_HOT;
2974       else if (flags & ECF_NORETURN)
2975         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2976       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
2977         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2978       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2979 	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
2980         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
2981       return;
2982     }
2983 
2984   /* Only first time try to drop function into unlikely executed.
2985      After inlining the roundoff errors may confuse us.
2986      Ipa-profile pass will drop functions only called from unlikely
2987      functions to unlikely and that is most of what we care about.  */
2988   if (!cfun->after_inlining)
2989     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2990   FOR_EACH_BB_FN (bb, cfun)
2991     {
2992       if (maybe_hot_bb_p (cfun, bb))
2993 	{
2994 	  node->frequency = NODE_FREQUENCY_HOT;
2995 	  return;
2996 	}
2997       if (!probably_never_executed_bb_p (cfun, bb))
2998 	node->frequency = NODE_FREQUENCY_NORMAL;
2999     }
3000 }
3001 
3002 /* Build PREDICT_EXPR.  */
3003 tree
3004 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3005 {
3006   tree t = build1 (PREDICT_EXPR, void_type_node,
3007 		   build_int_cst (integer_type_node, predictor));
3008   SET_PREDICT_EXPR_OUTCOME (t, taken);
3009   return t;
3010 }
3011 
3012 const char *
3013 predictor_name (enum br_predictor predictor)
3014 {
3015   return predictor_info[predictor].name;
3016 }
3017 
3018 /* Predict branch probabilities and estimate profile of the tree CFG. */
3019 
3020 namespace {
3021 
3022 const pass_data pass_data_profile =
3023 {
3024   GIMPLE_PASS, /* type */
3025   "profile_estimate", /* name */
3026   OPTGROUP_NONE, /* optinfo_flags */
3027   TV_BRANCH_PROB, /* tv_id */
3028   PROP_cfg, /* properties_required */
3029   0, /* properties_provided */
3030   0, /* properties_destroyed */
3031   0, /* todo_flags_start */
3032   0, /* todo_flags_finish */
3033 };
3034 
3035 class pass_profile : public gimple_opt_pass
3036 {
3037 public:
3038   pass_profile (gcc::context *ctxt)
3039     : gimple_opt_pass (pass_data_profile, ctxt)
3040   {}
3041 
3042   /* opt_pass methods: */
3043   virtual bool gate (function *) { return flag_guess_branch_prob; }
3044   virtual unsigned int execute (function *);
3045 
3046 }; // class pass_profile
3047 
3048 unsigned int
3049 pass_profile::execute (function *fun)
3050 {
3051   unsigned nb_loops;
3052 
3053   if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
3054     return 0;
3055 
3056   loop_optimizer_init (LOOPS_NORMAL);
3057   if (dump_file && (dump_flags & TDF_DETAILS))
3058     flow_loops_dump (dump_file, NULL, 0);
3059 
3060   mark_irreducible_loops ();
3061 
3062   nb_loops = number_of_loops (fun);
3063   if (nb_loops > 1)
3064     scev_initialize ();
3065 
3066   tree_estimate_probability ();
3067 
3068   if (nb_loops > 1)
3069     scev_finalize ();
3070 
3071   loop_optimizer_finalize ();
3072   if (dump_file && (dump_flags & TDF_DETAILS))
3073     gimple_dump_cfg (dump_file, dump_flags);
3074  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3075     profile_status_for_fn (fun) = PROFILE_GUESSED;
3076   return 0;
3077 }
3078 
3079 } // anon namespace
3080 
3081 gimple_opt_pass *
3082 make_pass_profile (gcc::context *ctxt)
3083 {
3084   return new pass_profile (ctxt);
3085 }
3086 
3087 namespace {
3088 
3089 const pass_data pass_data_strip_predict_hints =
3090 {
3091   GIMPLE_PASS, /* type */
3092   "*strip_predict_hints", /* name */
3093   OPTGROUP_NONE, /* optinfo_flags */
3094   TV_BRANCH_PROB, /* tv_id */
3095   PROP_cfg, /* properties_required */
3096   0, /* properties_provided */
3097   0, /* properties_destroyed */
3098   0, /* todo_flags_start */
3099   0, /* todo_flags_finish */
3100 };
3101 
3102 class pass_strip_predict_hints : public gimple_opt_pass
3103 {
3104 public:
3105   pass_strip_predict_hints (gcc::context *ctxt)
3106     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3107   {}
3108 
3109   /* opt_pass methods: */
3110   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3111   virtual unsigned int execute (function *);
3112 
3113 }; // class pass_strip_predict_hints
3114 
3115 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3116    we no longer need.  */
3117 unsigned int
3118 pass_strip_predict_hints::execute (function *fun)
3119 {
3120   basic_block bb;
3121   gimple ass_stmt;
3122   tree var;
3123 
3124   FOR_EACH_BB_FN (bb, fun)
3125     {
3126       gimple_stmt_iterator bi;
3127       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3128 	{
3129 	  gimple stmt = gsi_stmt (bi);
3130 
3131 	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3132 	    {
3133 	      gsi_remove (&bi, true);
3134 	      continue;
3135 	    }
3136 	  else if (is_gimple_call (stmt))
3137 	    {
3138 	      tree fndecl = gimple_call_fndecl (stmt);
3139 
3140 	      if ((fndecl
3141 		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3142 		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3143 		   && gimple_call_num_args (stmt) == 2)
3144 		  || (gimple_call_internal_p (stmt)
3145 		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3146 		{
3147 		  var = gimple_call_lhs (stmt);
3148 		  if (var)
3149 		    {
3150 		      ass_stmt
3151 			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3152 		      gsi_replace (&bi, ass_stmt, true);
3153 		    }
3154 		  else
3155 		    {
3156 		      gsi_remove (&bi, true);
3157 		      continue;
3158 		    }
3159 		}
3160 	    }
3161 	  gsi_next (&bi);
3162 	}
3163     }
3164   return 0;
3165 }
3166 
3167 } // anon namespace
3168 
3169 gimple_opt_pass *
3170 make_pass_strip_predict_hints (gcc::context *ctxt)
3171 {
3172   return new pass_strip_predict_hints (ctxt);
3173 }
3174 
3175 /* Rebuild function frequencies.  Passes are in general expected to
3176    maintain profile by hand, however in some cases this is not possible:
3177    for example when inlining several functions with loops freuqencies might run
3178    out of scale and thus needs to be recomputed.  */
3179 
3180 void
3181 rebuild_frequencies (void)
3182 {
3183   timevar_push (TV_REBUILD_FREQUENCIES);
3184 
3185   /* When the max bb count in the function is small, there is a higher
3186      chance that there were truncation errors in the integer scaling
3187      of counts by inlining and other optimizations. This could lead
3188      to incorrect classification of code as being cold when it isn't.
3189      In that case, force the estimation of bb counts/frequencies from the
3190      branch probabilities, rather than computing frequencies from counts,
3191      which may also lead to frequencies incorrectly reduced to 0. There
3192      is less precision in the probabilities, so we only do this for small
3193      max counts.  */
3194   gcov_type count_max = 0;
3195   basic_block bb;
3196   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3197     count_max = MAX (bb->count, count_max);
3198 
3199   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3200       || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
3201 	  && count_max < REG_BR_PROB_BASE/10))
3202     {
3203       loop_optimizer_init (0);
3204       add_noreturn_fake_exit_edges ();
3205       mark_irreducible_loops ();
3206       connect_infinite_loops_to_exit ();
3207       estimate_bb_frequencies (true);
3208       remove_fake_exit_edges ();
3209       loop_optimizer_finalize ();
3210     }
3211   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3212     counts_to_freqs ();
3213   else
3214     gcc_unreachable ();
3215   timevar_pop (TV_REBUILD_FREQUENCIES);
3216 }
3217