1 /* Branch prediction routines for the GNU compiler. 2 Copyright (C) 2000-2013 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* References: 21 22 [1] "Branch Prediction for Free" 23 Ball and Larus; PLDI '93. 24 [2] "Static Branch Frequency and Program Profile Analysis" 25 Wu and Larus; MICRO-27. 26 [3] "Corpus-based Static Branch Prediction" 27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */ 28 29 30 #include "config.h" 31 #include "system.h" 32 #include "coretypes.h" 33 #include "tm.h" 34 #include "tree.h" 35 #include "rtl.h" 36 #include "tm_p.h" 37 #include "hard-reg-set.h" 38 #include "basic-block.h" 39 #include "insn-config.h" 40 #include "regs.h" 41 #include "flags.h" 42 #include "function.h" 43 #include "except.h" 44 #include "diagnostic-core.h" 45 #include "recog.h" 46 #include "expr.h" 47 #include "predict.h" 48 #include "coverage.h" 49 #include "sreal.h" 50 #include "params.h" 51 #include "target.h" 52 #include "cfgloop.h" 53 #include "tree-flow.h" 54 #include "ggc.h" 55 #include "tree-pass.h" 56 #include "tree-scalar-evolution.h" 57 #include "cfgloop.h" 58 #include "pointer-set.h" 59 60 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 61 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ 62 static sreal real_zero, real_one, real_almost_one, real_br_prob_base, 63 real_inv_br_prob_base, real_one_half, real_bb_freq_max; 64 65 /* Random guesstimation given names. 66 PROV_VERY_UNLIKELY should be small enough so basic block predicted 67 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */ 68 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1) 69 #define PROB_EVEN (REG_BR_PROB_BASE / 2) 70 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) 71 #define PROB_ALWAYS (REG_BR_PROB_BASE) 72 73 static void combine_predictions_for_insn (rtx, basic_block); 74 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); 75 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction); 76 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction); 77 static bool can_predict_insn_p (const_rtx); 78 79 /* Information we hold about each branch predictor. 80 Filled using information from predict.def. */ 81 82 struct predictor_info 83 { 84 const char *const name; /* Name used in the debugging dumps. */ 85 const int hitrate; /* Expected hitrate used by 86 predict_insn_def call. */ 87 const int flags; 88 }; 89 90 /* Use given predictor without Dempster-Shaffer theory if it matches 91 using first_match heuristics. */ 92 #define PRED_FLAG_FIRST_MATCH 1 93 94 /* Recompute hitrate in percent to our representation. */ 95 96 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100) 97 98 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS}, 99 static const struct predictor_info predictor_info[]= { 100 #include "predict.def" 101 102 /* Upper bound on predictors. */ 103 {NULL, 0, 0} 104 }; 105 #undef DEF_PREDICTOR 106 107 /* Return TRUE if frequency FREQ is considered to be hot. */ 108 109 static inline bool 110 maybe_hot_frequency_p (struct function *fun, int freq) 111 { 112 struct cgraph_node *node = cgraph_get_node (fun->decl); 113 if (!profile_info || !flag_branch_probabilities) 114 { 115 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) 116 return false; 117 if (node->frequency == NODE_FREQUENCY_HOT) 118 return true; 119 } 120 if (profile_status_for_function (fun) == PROFILE_ABSENT) 121 return true; 122 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE 123 && freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency * 2 / 3)) 124 return false; 125 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) 126 return false; 127 if (freq < (ENTRY_BLOCK_PTR_FOR_FUNCTION (fun)->frequency 128 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) 129 return false; 130 return true; 131 } 132 133 /* Return TRUE if frequency FREQ is considered to be hot. */ 134 135 static inline bool 136 maybe_hot_count_p (struct function *fun, gcov_type count) 137 { 138 gcov_working_set_t *ws; 139 static gcov_type min_count = -1; 140 if (fun && profile_status_for_function (fun) != PROFILE_READ) 141 return true; 142 /* Code executed at most once is not hot. */ 143 if (profile_info->runs >= count) 144 return false; 145 if (min_count == -1) 146 { 147 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); 148 gcc_assert (ws); 149 min_count = ws->min_counter; 150 } 151 return (count >= min_count); 152 } 153 154 /* Return true in case BB can be CPU intensive and should be optimized 155 for maximal performance. */ 156 157 bool 158 maybe_hot_bb_p (struct function *fun, const_basic_block bb) 159 { 160 gcc_checking_assert (fun); 161 if (profile_status_for_function (fun) == PROFILE_READ) 162 return maybe_hot_count_p (fun, bb->count); 163 return maybe_hot_frequency_p (fun, bb->frequency); 164 } 165 166 /* Return true if the call can be hot. */ 167 168 bool 169 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge) 170 { 171 if (profile_info && flag_branch_probabilities 172 && !maybe_hot_count_p (NULL, 173 edge->count)) 174 return false; 175 if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED 176 || (edge->callee 177 && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)) 178 return false; 179 if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED 180 && (edge->callee 181 && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE)) 182 return false; 183 if (optimize_size) 184 return false; 185 if (edge->caller->frequency == NODE_FREQUENCY_HOT) 186 return true; 187 if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE 188 && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2) 189 return false; 190 if (flag_guess_branch_prob) 191 { 192 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0 193 || edge->frequency <= (CGRAPH_FREQ_BASE 194 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) 195 return false; 196 } 197 return true; 198 } 199 200 /* Return true in case BB can be CPU intensive and should be optimized 201 for maximal performance. */ 202 203 bool 204 maybe_hot_edge_p (edge e) 205 { 206 if (profile_status == PROFILE_READ) 207 return maybe_hot_count_p (cfun, e->count); 208 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); 209 } 210 211 212 /* Return true in case BB is probably never executed. */ 213 214 bool 215 probably_never_executed_bb_p (struct function *fun, const_basic_block bb) 216 { 217 gcc_checking_assert (fun); 218 if (profile_info && flag_branch_probabilities) 219 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; 220 if ((!profile_info || !flag_branch_probabilities) 221 && (cgraph_get_node (fun->decl)->frequency 222 == NODE_FREQUENCY_UNLIKELY_EXECUTED)) 223 return true; 224 return false; 225 } 226 227 /* Return true if NODE should be optimized for size. */ 228 229 bool 230 cgraph_optimize_for_size_p (struct cgraph_node *node) 231 { 232 if (optimize_size) 233 return true; 234 if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)) 235 return true; 236 else 237 return false; 238 } 239 240 /* Return true when current function should always be optimized for size. */ 241 242 bool 243 optimize_function_for_size_p (struct function *fun) 244 { 245 if (optimize_size) 246 return true; 247 if (!fun || !fun->decl) 248 return false; 249 return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl)); 250 } 251 252 /* Return true when current function should always be optimized for speed. */ 253 254 bool 255 optimize_function_for_speed_p (struct function *fun) 256 { 257 return !optimize_function_for_size_p (fun); 258 } 259 260 /* Return TRUE when BB should be optimized for size. */ 261 262 bool 263 optimize_bb_for_size_p (const_basic_block bb) 264 { 265 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb); 266 } 267 268 /* Return TRUE when BB should be optimized for speed. */ 269 270 bool 271 optimize_bb_for_speed_p (const_basic_block bb) 272 { 273 return !optimize_bb_for_size_p (bb); 274 } 275 276 /* Return TRUE when BB should be optimized for size. */ 277 278 bool 279 optimize_edge_for_size_p (edge e) 280 { 281 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e); 282 } 283 284 /* Return TRUE when BB should be optimized for speed. */ 285 286 bool 287 optimize_edge_for_speed_p (edge e) 288 { 289 return !optimize_edge_for_size_p (e); 290 } 291 292 /* Return TRUE when BB should be optimized for size. */ 293 294 bool 295 optimize_insn_for_size_p (void) 296 { 297 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p; 298 } 299 300 /* Return TRUE when BB should be optimized for speed. */ 301 302 bool 303 optimize_insn_for_speed_p (void) 304 { 305 return !optimize_insn_for_size_p (); 306 } 307 308 /* Return TRUE when LOOP should be optimized for size. */ 309 310 bool 311 optimize_loop_for_size_p (struct loop *loop) 312 { 313 return optimize_bb_for_size_p (loop->header); 314 } 315 316 /* Return TRUE when LOOP should be optimized for speed. */ 317 318 bool 319 optimize_loop_for_speed_p (struct loop *loop) 320 { 321 return optimize_bb_for_speed_p (loop->header); 322 } 323 324 /* Return TRUE when LOOP nest should be optimized for speed. */ 325 326 bool 327 optimize_loop_nest_for_speed_p (struct loop *loop) 328 { 329 struct loop *l = loop; 330 if (optimize_loop_for_speed_p (loop)) 331 return true; 332 l = loop->inner; 333 while (l && l != loop) 334 { 335 if (optimize_loop_for_speed_p (l)) 336 return true; 337 if (l->inner) 338 l = l->inner; 339 else if (l->next) 340 l = l->next; 341 else 342 { 343 while (l != loop && !l->next) 344 l = loop_outer (l); 345 if (l != loop) 346 l = l->next; 347 } 348 } 349 return false; 350 } 351 352 /* Return TRUE when LOOP nest should be optimized for size. */ 353 354 bool 355 optimize_loop_nest_for_size_p (struct loop *loop) 356 { 357 return !optimize_loop_nest_for_speed_p (loop); 358 } 359 360 /* Return true when edge E is likely to be well predictable by branch 361 predictor. */ 362 363 bool 364 predictable_edge_p (edge e) 365 { 366 if (profile_status == PROFILE_ABSENT) 367 return false; 368 if ((e->probability 369 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) 370 || (REG_BR_PROB_BASE - e->probability 371 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) 372 return true; 373 return false; 374 } 375 376 377 /* Set RTL expansion for BB profile. */ 378 379 void 380 rtl_profile_for_bb (basic_block bb) 381 { 382 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb); 383 } 384 385 /* Set RTL expansion for edge profile. */ 386 387 void 388 rtl_profile_for_edge (edge e) 389 { 390 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e); 391 } 392 393 /* Set RTL expansion to default mode (i.e. when profile info is not known). */ 394 void 395 default_rtl_profile (void) 396 { 397 crtl->maybe_hot_insn_p = true; 398 } 399 400 /* Return true if the one of outgoing edges is already predicted by 401 PREDICTOR. */ 402 403 bool 404 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor) 405 { 406 rtx note; 407 if (!INSN_P (BB_END (bb))) 408 return false; 409 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1)) 410 if (REG_NOTE_KIND (note) == REG_BR_PRED 411 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) 412 return true; 413 return false; 414 } 415 416 /* This map contains for a basic block the list of predictions for the 417 outgoing edges. */ 418 419 static struct pointer_map_t *bb_predictions; 420 421 /* Structure representing predictions in tree level. */ 422 423 struct edge_prediction { 424 struct edge_prediction *ep_next; 425 edge ep_edge; 426 enum br_predictor ep_predictor; 427 int ep_probability; 428 }; 429 430 /* Return true if the one of outgoing edges is already predicted by 431 PREDICTOR. */ 432 433 bool 434 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) 435 { 436 struct edge_prediction *i; 437 void **preds = pointer_map_contains (bb_predictions, bb); 438 439 if (!preds) 440 return false; 441 442 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next) 443 if (i->ep_predictor == predictor) 444 return true; 445 return false; 446 } 447 448 /* Return true when the probability of edge is reliable. 449 450 The profile guessing code is good at predicting branch outcome (ie. 451 taken/not taken), that is predicted right slightly over 75% of time. 452 It is however notoriously poor on predicting the probability itself. 453 In general the profile appear a lot flatter (with probabilities closer 454 to 50%) than the reality so it is bad idea to use it to drive optimization 455 such as those disabling dynamic branch prediction for well predictable 456 branches. 457 458 There are two exceptions - edges leading to noreturn edges and edges 459 predicted by number of iterations heuristics are predicted well. This macro 460 should be able to distinguish those, but at the moment it simply check for 461 noreturn heuristic that is only one giving probability over 99% or bellow 462 1%. In future we might want to propagate reliability information across the 463 CFG if we find this information useful on multiple places. */ 464 static bool 465 probability_reliable_p (int prob) 466 { 467 return (profile_status == PROFILE_READ 468 || (profile_status == PROFILE_GUESSED 469 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); 470 } 471 472 /* Same predicate as above, working on edges. */ 473 bool 474 edge_probability_reliable_p (const_edge e) 475 { 476 return probability_reliable_p (e->probability); 477 } 478 479 /* Same predicate as edge_probability_reliable_p, working on notes. */ 480 bool 481 br_prob_note_reliable_p (const_rtx note) 482 { 483 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); 484 return probability_reliable_p (INTVAL (XEXP (note, 0))); 485 } 486 487 static void 488 predict_insn (rtx insn, enum br_predictor predictor, int probability) 489 { 490 gcc_assert (any_condjump_p (insn)); 491 if (!flag_guess_branch_prob) 492 return; 493 494 add_reg_note (insn, REG_BR_PRED, 495 gen_rtx_CONCAT (VOIDmode, 496 GEN_INT ((int) predictor), 497 GEN_INT ((int) probability))); 498 } 499 500 /* Predict insn by given predictor. */ 501 502 void 503 predict_insn_def (rtx insn, enum br_predictor predictor, 504 enum prediction taken) 505 { 506 int probability = predictor_info[(int) predictor].hitrate; 507 508 if (taken != TAKEN) 509 probability = REG_BR_PROB_BASE - probability; 510 511 predict_insn (insn, predictor, probability); 512 } 513 514 /* Predict edge E with given probability if possible. */ 515 516 void 517 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) 518 { 519 rtx last_insn; 520 last_insn = BB_END (e->src); 521 522 /* We can store the branch prediction information only about 523 conditional jumps. */ 524 if (!any_condjump_p (last_insn)) 525 return; 526 527 /* We always store probability of branching. */ 528 if (e->flags & EDGE_FALLTHRU) 529 probability = REG_BR_PROB_BASE - probability; 530 531 predict_insn (last_insn, predictor, probability); 532 } 533 534 /* Predict edge E with the given PROBABILITY. */ 535 void 536 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) 537 { 538 gcc_assert (profile_status != PROFILE_GUESSED); 539 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1) 540 && flag_guess_branch_prob && optimize) 541 { 542 struct edge_prediction *i = XNEW (struct edge_prediction); 543 void **preds = pointer_map_insert (bb_predictions, e->src); 544 545 i->ep_next = (struct edge_prediction *) *preds; 546 *preds = i; 547 i->ep_probability = probability; 548 i->ep_predictor = predictor; 549 i->ep_edge = e; 550 } 551 } 552 553 /* Remove all predictions on given basic block that are attached 554 to edge E. */ 555 void 556 remove_predictions_associated_with_edge (edge e) 557 { 558 void **preds; 559 560 if (!bb_predictions) 561 return; 562 563 preds = pointer_map_contains (bb_predictions, e->src); 564 565 if (preds) 566 { 567 struct edge_prediction **prediction = (struct edge_prediction **) preds; 568 struct edge_prediction *next; 569 570 while (*prediction) 571 { 572 if ((*prediction)->ep_edge == e) 573 { 574 next = (*prediction)->ep_next; 575 free (*prediction); 576 *prediction = next; 577 } 578 else 579 prediction = &((*prediction)->ep_next); 580 } 581 } 582 } 583 584 /* Clears the list of predictions stored for BB. */ 585 586 static void 587 clear_bb_predictions (basic_block bb) 588 { 589 void **preds = pointer_map_contains (bb_predictions, bb); 590 struct edge_prediction *pred, *next; 591 592 if (!preds) 593 return; 594 595 for (pred = (struct edge_prediction *) *preds; pred; pred = next) 596 { 597 next = pred->ep_next; 598 free (pred); 599 } 600 *preds = NULL; 601 } 602 603 /* Return true when we can store prediction on insn INSN. 604 At the moment we represent predictions only on conditional 605 jumps, not at computed jump or other complicated cases. */ 606 static bool 607 can_predict_insn_p (const_rtx insn) 608 { 609 return (JUMP_P (insn) 610 && any_condjump_p (insn) 611 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); 612 } 613 614 /* Predict edge E by given predictor if possible. */ 615 616 void 617 predict_edge_def (edge e, enum br_predictor predictor, 618 enum prediction taken) 619 { 620 int probability = predictor_info[(int) predictor].hitrate; 621 622 if (taken != TAKEN) 623 probability = REG_BR_PROB_BASE - probability; 624 625 predict_edge (e, predictor, probability); 626 } 627 628 /* Invert all branch predictions or probability notes in the INSN. This needs 629 to be done each time we invert the condition used by the jump. */ 630 631 void 632 invert_br_probabilities (rtx insn) 633 { 634 rtx note; 635 636 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 637 if (REG_NOTE_KIND (note) == REG_BR_PROB) 638 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); 639 else if (REG_NOTE_KIND (note) == REG_BR_PRED) 640 XEXP (XEXP (note, 0), 1) 641 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); 642 } 643 644 /* Dump information about the branch prediction to the output file. */ 645 646 static void 647 dump_prediction (FILE *file, enum br_predictor predictor, int probability, 648 basic_block bb, int used) 649 { 650 edge e; 651 edge_iterator ei; 652 653 if (!file) 654 return; 655 656 FOR_EACH_EDGE (e, ei, bb->succs) 657 if (! (e->flags & EDGE_FALLTHRU)) 658 break; 659 660 fprintf (file, " %s heuristics%s: %.1f%%", 661 predictor_info[predictor].name, 662 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); 663 664 if (bb->count) 665 { 666 fprintf (file, " exec "); 667 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count); 668 if (e) 669 { 670 fprintf (file, " hit "); 671 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count); 672 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); 673 } 674 } 675 676 fprintf (file, "\n"); 677 } 678 679 /* We can not predict the probabilities of outgoing edges of bb. Set them 680 evenly and hope for the best. */ 681 static void 682 set_even_probabilities (basic_block bb) 683 { 684 int nedges = 0; 685 edge e; 686 edge_iterator ei; 687 688 FOR_EACH_EDGE (e, ei, bb->succs) 689 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 690 nedges ++; 691 FOR_EACH_EDGE (e, ei, bb->succs) 692 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 693 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; 694 else 695 e->probability = 0; 696 } 697 698 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB 699 note if not already present. Remove now useless REG_BR_PRED notes. */ 700 701 static void 702 combine_predictions_for_insn (rtx insn, basic_block bb) 703 { 704 rtx prob_note; 705 rtx *pnote; 706 rtx note; 707 int best_probability = PROB_EVEN; 708 enum br_predictor best_predictor = END_PREDICTORS; 709 int combined_probability = REG_BR_PROB_BASE / 2; 710 int d; 711 bool first_match = false; 712 bool found = false; 713 714 if (!can_predict_insn_p (insn)) 715 { 716 set_even_probabilities (bb); 717 return; 718 } 719 720 prob_note = find_reg_note (insn, REG_BR_PROB, 0); 721 pnote = ®_NOTES (insn); 722 if (dump_file) 723 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn), 724 bb->index); 725 726 /* We implement "first match" heuristics and use probability guessed 727 by predictor with smallest index. */ 728 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 729 if (REG_NOTE_KIND (note) == REG_BR_PRED) 730 { 731 enum br_predictor predictor = ((enum br_predictor) 732 INTVAL (XEXP (XEXP (note, 0), 0))); 733 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); 734 735 found = true; 736 if (best_predictor > predictor) 737 best_probability = probability, best_predictor = predictor; 738 739 d = (combined_probability * probability 740 + (REG_BR_PROB_BASE - combined_probability) 741 * (REG_BR_PROB_BASE - probability)); 742 743 /* Use FP math to avoid overflows of 32bit integers. */ 744 if (d == 0) 745 /* If one probability is 0% and one 100%, avoid division by zero. */ 746 combined_probability = REG_BR_PROB_BASE / 2; 747 else 748 combined_probability = (((double) combined_probability) * probability 749 * REG_BR_PROB_BASE / d + 0.5); 750 } 751 752 /* Decide which heuristic to use. In case we didn't match anything, 753 use no_prediction heuristic, in case we did match, use either 754 first match or Dempster-Shaffer theory depending on the flags. */ 755 756 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 757 first_match = true; 758 759 if (!found) 760 dump_prediction (dump_file, PRED_NO_PREDICTION, 761 combined_probability, bb, true); 762 else 763 { 764 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, 765 bb, !first_match); 766 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, 767 bb, first_match); 768 } 769 770 if (first_match) 771 combined_probability = best_probability; 772 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 773 774 while (*pnote) 775 { 776 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) 777 { 778 enum br_predictor predictor = ((enum br_predictor) 779 INTVAL (XEXP (XEXP (*pnote, 0), 0))); 780 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); 781 782 dump_prediction (dump_file, predictor, probability, bb, 783 !first_match || best_predictor == predictor); 784 *pnote = XEXP (*pnote, 1); 785 } 786 else 787 pnote = &XEXP (*pnote, 1); 788 } 789 790 if (!prob_note) 791 { 792 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability)); 793 794 /* Save the prediction into CFG in case we are seeing non-degenerated 795 conditional jump. */ 796 if (!single_succ_p (bb)) 797 { 798 BRANCH_EDGE (bb)->probability = combined_probability; 799 FALLTHRU_EDGE (bb)->probability 800 = REG_BR_PROB_BASE - combined_probability; 801 } 802 } 803 else if (!single_succ_p (bb)) 804 { 805 int prob = INTVAL (XEXP (prob_note, 0)); 806 807 BRANCH_EDGE (bb)->probability = prob; 808 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; 809 } 810 else 811 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; 812 } 813 814 /* Combine predictions into single probability and store them into CFG. 815 Remove now useless prediction entries. */ 816 817 static void 818 combine_predictions_for_bb (basic_block bb) 819 { 820 int best_probability = PROB_EVEN; 821 enum br_predictor best_predictor = END_PREDICTORS; 822 int combined_probability = REG_BR_PROB_BASE / 2; 823 int d; 824 bool first_match = false; 825 bool found = false; 826 struct edge_prediction *pred; 827 int nedges = 0; 828 edge e, first = NULL, second = NULL; 829 edge_iterator ei; 830 void **preds; 831 832 FOR_EACH_EDGE (e, ei, bb->succs) 833 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 834 { 835 nedges ++; 836 if (first && !second) 837 second = e; 838 if (!first) 839 first = e; 840 } 841 842 /* When there is no successor or only one choice, prediction is easy. 843 844 We are lazy for now and predict only basic blocks with two outgoing 845 edges. It is possible to predict generic case too, but we have to 846 ignore first match heuristics and do more involved combining. Implement 847 this later. */ 848 if (nedges != 2) 849 { 850 if (!bb->count) 851 set_even_probabilities (bb); 852 clear_bb_predictions (bb); 853 if (dump_file) 854 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", 855 nedges, bb->index); 856 return; 857 } 858 859 if (dump_file) 860 fprintf (dump_file, "Predictions for bb %i\n", bb->index); 861 862 preds = pointer_map_contains (bb_predictions, bb); 863 if (preds) 864 { 865 /* We implement "first match" heuristics and use probability guessed 866 by predictor with smallest index. */ 867 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) 868 { 869 enum br_predictor predictor = pred->ep_predictor; 870 int probability = pred->ep_probability; 871 872 if (pred->ep_edge != first) 873 probability = REG_BR_PROB_BASE - probability; 874 875 found = true; 876 /* First match heuristics would be widly confused if we predicted 877 both directions. */ 878 if (best_predictor > predictor) 879 { 880 struct edge_prediction *pred2; 881 int prob = probability; 882 883 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next) 884 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) 885 { 886 int probability2 = pred->ep_probability; 887 888 if (pred2->ep_edge != first) 889 probability2 = REG_BR_PROB_BASE - probability2; 890 891 if ((probability < REG_BR_PROB_BASE / 2) != 892 (probability2 < REG_BR_PROB_BASE / 2)) 893 break; 894 895 /* If the same predictor later gave better result, go for it! */ 896 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability)) 897 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability))) 898 prob = probability2; 899 } 900 if (!pred2) 901 best_probability = prob, best_predictor = predictor; 902 } 903 904 d = (combined_probability * probability 905 + (REG_BR_PROB_BASE - combined_probability) 906 * (REG_BR_PROB_BASE - probability)); 907 908 /* Use FP math to avoid overflows of 32bit integers. */ 909 if (d == 0) 910 /* If one probability is 0% and one 100%, avoid division by zero. */ 911 combined_probability = REG_BR_PROB_BASE / 2; 912 else 913 combined_probability = (((double) combined_probability) 914 * probability 915 * REG_BR_PROB_BASE / d + 0.5); 916 } 917 } 918 919 /* Decide which heuristic to use. In case we didn't match anything, 920 use no_prediction heuristic, in case we did match, use either 921 first match or Dempster-Shaffer theory depending on the flags. */ 922 923 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 924 first_match = true; 925 926 if (!found) 927 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); 928 else 929 { 930 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, 931 !first_match); 932 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, 933 first_match); 934 } 935 936 if (first_match) 937 combined_probability = best_probability; 938 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 939 940 if (preds) 941 { 942 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) 943 { 944 enum br_predictor predictor = pred->ep_predictor; 945 int probability = pred->ep_probability; 946 947 if (pred->ep_edge != EDGE_SUCC (bb, 0)) 948 probability = REG_BR_PROB_BASE - probability; 949 dump_prediction (dump_file, predictor, probability, bb, 950 !first_match || best_predictor == predictor); 951 } 952 } 953 clear_bb_predictions (bb); 954 955 if (!bb->count) 956 { 957 first->probability = combined_probability; 958 second->probability = REG_BR_PROB_BASE - combined_probability; 959 } 960 } 961 962 /* Check if T1 and T2 satisfy the IV_COMPARE condition. 963 Return the SSA_NAME if the condition satisfies, NULL otherwise. 964 965 T1 and T2 should be one of the following cases: 966 1. T1 is SSA_NAME, T2 is NULL 967 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4] 968 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */ 969 970 static tree 971 strips_small_constant (tree t1, tree t2) 972 { 973 tree ret = NULL; 974 int value = 0; 975 976 if (!t1) 977 return NULL; 978 else if (TREE_CODE (t1) == SSA_NAME) 979 ret = t1; 980 else if (host_integerp (t1, 0)) 981 value = tree_low_cst (t1, 0); 982 else 983 return NULL; 984 985 if (!t2) 986 return ret; 987 else if (host_integerp (t2, 0)) 988 value = tree_low_cst (t2, 0); 989 else if (TREE_CODE (t2) == SSA_NAME) 990 { 991 if (ret) 992 return NULL; 993 else 994 ret = t2; 995 } 996 997 if (value <= 4 && value >= -4) 998 return ret; 999 else 1000 return NULL; 1001 } 1002 1003 /* Return the SSA_NAME in T or T's operands. 1004 Return NULL if SSA_NAME cannot be found. */ 1005 1006 static tree 1007 get_base_value (tree t) 1008 { 1009 if (TREE_CODE (t) == SSA_NAME) 1010 return t; 1011 1012 if (!BINARY_CLASS_P (t)) 1013 return NULL; 1014 1015 switch (TREE_OPERAND_LENGTH (t)) 1016 { 1017 case 1: 1018 return strips_small_constant (TREE_OPERAND (t, 0), NULL); 1019 case 2: 1020 return strips_small_constant (TREE_OPERAND (t, 0), 1021 TREE_OPERAND (t, 1)); 1022 default: 1023 return NULL; 1024 } 1025 } 1026 1027 /* Check the compare STMT in LOOP. If it compares an induction 1028 variable to a loop invariant, return true, and save 1029 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP. 1030 Otherwise return false and set LOOP_INVAIANT to NULL. */ 1031 1032 static bool 1033 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop, 1034 tree *loop_invariant, 1035 enum tree_code *compare_code, 1036 tree *loop_step, 1037 tree *loop_iv_base) 1038 { 1039 tree op0, op1, bound, base; 1040 affine_iv iv0, iv1; 1041 enum tree_code code; 1042 tree step; 1043 1044 code = gimple_cond_code (stmt); 1045 *loop_invariant = NULL; 1046 1047 switch (code) 1048 { 1049 case GT_EXPR: 1050 case GE_EXPR: 1051 case NE_EXPR: 1052 case LT_EXPR: 1053 case LE_EXPR: 1054 case EQ_EXPR: 1055 break; 1056 1057 default: 1058 return false; 1059 } 1060 1061 op0 = gimple_cond_lhs (stmt); 1062 op1 = gimple_cond_rhs (stmt); 1063 1064 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST) 1065 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST)) 1066 return false; 1067 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true)) 1068 return false; 1069 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true)) 1070 return false; 1071 if (TREE_CODE (iv0.step) != INTEGER_CST 1072 || TREE_CODE (iv1.step) != INTEGER_CST) 1073 return false; 1074 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step)) 1075 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step))) 1076 return false; 1077 1078 if (integer_zerop (iv0.step)) 1079 { 1080 if (code != NE_EXPR && code != EQ_EXPR) 1081 code = invert_tree_comparison (code, false); 1082 bound = iv0.base; 1083 base = iv1.base; 1084 if (host_integerp (iv1.step, 0)) 1085 step = iv1.step; 1086 else 1087 return false; 1088 } 1089 else 1090 { 1091 bound = iv1.base; 1092 base = iv0.base; 1093 if (host_integerp (iv0.step, 0)) 1094 step = iv0.step; 1095 else 1096 return false; 1097 } 1098 1099 if (TREE_CODE (bound) != INTEGER_CST) 1100 bound = get_base_value (bound); 1101 if (!bound) 1102 return false; 1103 if (TREE_CODE (base) != INTEGER_CST) 1104 base = get_base_value (base); 1105 if (!base) 1106 return false; 1107 1108 *loop_invariant = bound; 1109 *compare_code = code; 1110 *loop_step = step; 1111 *loop_iv_base = base; 1112 return true; 1113 } 1114 1115 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */ 1116 1117 static bool 1118 expr_coherent_p (tree t1, tree t2) 1119 { 1120 gimple stmt; 1121 tree ssa_name_1 = NULL; 1122 tree ssa_name_2 = NULL; 1123 1124 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST); 1125 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST); 1126 1127 if (t1 == t2) 1128 return true; 1129 1130 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST) 1131 return true; 1132 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST) 1133 return false; 1134 1135 /* Check to see if t1 is expressed/defined with t2. */ 1136 stmt = SSA_NAME_DEF_STMT (t1); 1137 gcc_assert (stmt != NULL); 1138 if (is_gimple_assign (stmt)) 1139 { 1140 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 1141 if (ssa_name_1 && ssa_name_1 == t2) 1142 return true; 1143 } 1144 1145 /* Check to see if t2 is expressed/defined with t1. */ 1146 stmt = SSA_NAME_DEF_STMT (t2); 1147 gcc_assert (stmt != NULL); 1148 if (is_gimple_assign (stmt)) 1149 { 1150 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 1151 if (ssa_name_2 && ssa_name_2 == t1) 1152 return true; 1153 } 1154 1155 /* Compare if t1 and t2's def_stmts are identical. */ 1156 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2) 1157 return true; 1158 else 1159 return false; 1160 } 1161 1162 /* Predict branch probability of BB when BB contains a branch that compares 1163 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The 1164 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP. 1165 1166 E.g. 1167 for (int i = 0; i < bound; i++) { 1168 if (i < bound - 2) 1169 computation_1(); 1170 else 1171 computation_2(); 1172 } 1173 1174 In this loop, we will predict the branch inside the loop to be taken. */ 1175 1176 static void 1177 predict_iv_comparison (struct loop *loop, basic_block bb, 1178 tree loop_bound_var, 1179 tree loop_iv_base_var, 1180 enum tree_code loop_bound_code, 1181 int loop_bound_step) 1182 { 1183 gimple stmt; 1184 tree compare_var, compare_base; 1185 enum tree_code compare_code; 1186 tree compare_step_var; 1187 edge then_edge; 1188 edge_iterator ei; 1189 1190 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) 1191 || predicted_by_p (bb, PRED_LOOP_ITERATIONS) 1192 || predicted_by_p (bb, PRED_LOOP_EXIT)) 1193 return; 1194 1195 stmt = last_stmt (bb); 1196 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 1197 return; 1198 if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var, 1199 &compare_code, 1200 &compare_step_var, 1201 &compare_base)) 1202 return; 1203 1204 /* Find the taken edge. */ 1205 FOR_EACH_EDGE (then_edge, ei, bb->succs) 1206 if (then_edge->flags & EDGE_TRUE_VALUE) 1207 break; 1208 1209 /* When comparing an IV to a loop invariant, NE is more likely to be 1210 taken while EQ is more likely to be not-taken. */ 1211 if (compare_code == NE_EXPR) 1212 { 1213 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1214 return; 1215 } 1216 else if (compare_code == EQ_EXPR) 1217 { 1218 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1219 return; 1220 } 1221 1222 if (!expr_coherent_p (loop_iv_base_var, compare_base)) 1223 return; 1224 1225 /* If loop bound, base and compare bound are all constants, we can 1226 calculate the probability directly. */ 1227 if (host_integerp (loop_bound_var, 0) 1228 && host_integerp (compare_var, 0) 1229 && host_integerp (compare_base, 0)) 1230 { 1231 int probability; 1232 bool of, overflow = false; 1233 double_int mod, compare_count, tem, loop_count; 1234 1235 double_int loop_bound = tree_to_double_int (loop_bound_var); 1236 double_int compare_bound = tree_to_double_int (compare_var); 1237 double_int base = tree_to_double_int (compare_base); 1238 double_int compare_step = tree_to_double_int (compare_step_var); 1239 1240 /* (loop_bound - base) / compare_step */ 1241 tem = loop_bound.sub_with_overflow (base, &of); 1242 overflow |= of; 1243 loop_count = tem.divmod_with_overflow (compare_step, 1244 0, TRUNC_DIV_EXPR, 1245 &mod, &of); 1246 overflow |= of; 1247 1248 if ((!compare_step.is_negative ()) 1249 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1250 { 1251 /* (loop_bound - compare_bound) / compare_step */ 1252 tem = loop_bound.sub_with_overflow (compare_bound, &of); 1253 overflow |= of; 1254 compare_count = tem.divmod_with_overflow (compare_step, 1255 0, TRUNC_DIV_EXPR, 1256 &mod, &of); 1257 overflow |= of; 1258 } 1259 else 1260 { 1261 /* (compare_bound - base) / compare_step */ 1262 tem = compare_bound.sub_with_overflow (base, &of); 1263 overflow |= of; 1264 compare_count = tem.divmod_with_overflow (compare_step, 1265 0, TRUNC_DIV_EXPR, 1266 &mod, &of); 1267 overflow |= of; 1268 } 1269 if (compare_code == LE_EXPR || compare_code == GE_EXPR) 1270 ++compare_count; 1271 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR) 1272 ++loop_count; 1273 if (compare_count.is_negative ()) 1274 compare_count = double_int_zero; 1275 if (loop_count.is_negative ()) 1276 loop_count = double_int_zero; 1277 if (loop_count.is_zero ()) 1278 probability = 0; 1279 else if (compare_count.scmp (loop_count) == 1) 1280 probability = REG_BR_PROB_BASE; 1281 else 1282 { 1283 /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count 1284 could overflow, shift both loop_count and compare_count right 1285 a bit so that it doesn't overflow. Note both counts are known not 1286 to be negative at this point. */ 1287 int clz_bits = clz_hwi (loop_count.high); 1288 gcc_assert (REG_BR_PROB_BASE < 32768); 1289 if (clz_bits < 16) 1290 { 1291 loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT); 1292 compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT); 1293 } 1294 tem = compare_count.mul_with_sign (double_int::from_shwi 1295 (REG_BR_PROB_BASE), true, &of); 1296 gcc_assert (!of); 1297 tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod); 1298 probability = tem.to_uhwi (); 1299 } 1300 1301 if (!overflow) 1302 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability); 1303 1304 return; 1305 } 1306 1307 if (expr_coherent_p (loop_bound_var, compare_var)) 1308 { 1309 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR) 1310 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1311 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1312 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR) 1313 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) 1314 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1315 else if (loop_bound_code == NE_EXPR) 1316 { 1317 /* If the loop backedge condition is "(i != bound)", we do 1318 the comparison based on the step of IV: 1319 * step < 0 : backedge condition is like (i > bound) 1320 * step > 0 : backedge condition is like (i < bound) */ 1321 gcc_assert (loop_bound_step != 0); 1322 if (loop_bound_step > 0 1323 && (compare_code == LT_EXPR 1324 || compare_code == LE_EXPR)) 1325 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1326 else if (loop_bound_step < 0 1327 && (compare_code == GT_EXPR 1328 || compare_code == GE_EXPR)) 1329 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1330 else 1331 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1332 } 1333 else 1334 /* The branch is predicted not-taken if loop_bound_code is 1335 opposite with compare_code. */ 1336 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1337 } 1338 else if (expr_coherent_p (loop_iv_base_var, compare_var)) 1339 { 1340 /* For cases like: 1341 for (i = s; i < h; i++) 1342 if (i > s + 2) .... 1343 The branch should be predicted taken. */ 1344 if (loop_bound_step > 0 1345 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) 1346 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1347 else if (loop_bound_step < 0 1348 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1349 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1350 else 1351 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1352 } 1353 } 1354 1355 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop 1356 exits are resulted from short-circuit conditions that will generate an 1357 if_tmp. E.g.: 1358 1359 if (foo() || global > 10) 1360 break; 1361 1362 This will be translated into: 1363 1364 BB3: 1365 loop header... 1366 BB4: 1367 if foo() goto BB6 else goto BB5 1368 BB5: 1369 if global > 10 goto BB6 else goto BB7 1370 BB6: 1371 goto BB7 1372 BB7: 1373 iftmp = (PHI 0(BB5), 1(BB6)) 1374 if iftmp == 1 goto BB8 else goto BB3 1375 BB8: 1376 outside of the loop... 1377 1378 The edge BB7->BB8 is loop exit because BB8 is outside of the loop. 1379 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop 1380 exits. This function takes BB7->BB8 as input, and finds out the extra loop 1381 exits to predict them using PRED_LOOP_EXIT. */ 1382 1383 static void 1384 predict_extra_loop_exits (edge exit_edge) 1385 { 1386 unsigned i; 1387 bool check_value_one; 1388 gimple phi_stmt; 1389 tree cmp_rhs, cmp_lhs; 1390 gimple cmp_stmt = last_stmt (exit_edge->src); 1391 1392 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) 1393 return; 1394 cmp_rhs = gimple_cond_rhs (cmp_stmt); 1395 cmp_lhs = gimple_cond_lhs (cmp_stmt); 1396 if (!TREE_CONSTANT (cmp_rhs) 1397 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) 1398 return; 1399 if (TREE_CODE (cmp_lhs) != SSA_NAME) 1400 return; 1401 1402 /* If check_value_one is true, only the phi_args with value '1' will lead 1403 to loop exit. Otherwise, only the phi_args with value '0' will lead to 1404 loop exit. */ 1405 check_value_one = (((integer_onep (cmp_rhs)) 1406 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) 1407 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0)); 1408 1409 phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); 1410 if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) 1411 return; 1412 1413 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) 1414 { 1415 edge e1; 1416 edge_iterator ei; 1417 tree val = gimple_phi_arg_def (phi_stmt, i); 1418 edge e = gimple_phi_arg_edge (phi_stmt, i); 1419 1420 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val))) 1421 continue; 1422 if ((check_value_one ^ integer_onep (val)) == 1) 1423 continue; 1424 if (EDGE_COUNT (e->src->succs) != 1) 1425 { 1426 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN); 1427 continue; 1428 } 1429 1430 FOR_EACH_EDGE (e1, ei, e->src->preds) 1431 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN); 1432 } 1433 } 1434 1435 /* Predict edge probabilities by exploiting loop structure. */ 1436 1437 static void 1438 predict_loops (void) 1439 { 1440 loop_iterator li; 1441 struct loop *loop; 1442 1443 /* Try to predict out blocks in a loop that are not part of a 1444 natural loop. */ 1445 FOR_EACH_LOOP (li, loop, 0) 1446 { 1447 basic_block bb, *bbs; 1448 unsigned j, n_exits; 1449 vec<edge> exits; 1450 struct tree_niter_desc niter_desc; 1451 edge ex; 1452 struct nb_iter_bound *nb_iter; 1453 enum tree_code loop_bound_code = ERROR_MARK; 1454 tree loop_bound_step = NULL; 1455 tree loop_bound_var = NULL; 1456 tree loop_iv_base = NULL; 1457 gimple stmt = NULL; 1458 1459 exits = get_loop_exit_edges (loop); 1460 n_exits = exits.length (); 1461 if (!n_exits) 1462 { 1463 exits.release (); 1464 continue; 1465 } 1466 1467 FOR_EACH_VEC_ELT (exits, j, ex) 1468 { 1469 tree niter = NULL; 1470 HOST_WIDE_INT nitercst; 1471 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); 1472 int probability; 1473 enum br_predictor predictor; 1474 1475 predict_extra_loop_exits (ex); 1476 1477 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false)) 1478 niter = niter_desc.niter; 1479 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) 1480 niter = loop_niter_by_eval (loop, ex); 1481 1482 if (TREE_CODE (niter) == INTEGER_CST) 1483 { 1484 if (host_integerp (niter, 1) 1485 && max 1486 && compare_tree_int (niter, max - 1) == -1) 1487 nitercst = tree_low_cst (niter, 1) + 1; 1488 else 1489 nitercst = max; 1490 predictor = PRED_LOOP_ITERATIONS; 1491 } 1492 /* If we have just one exit and we can derive some information about 1493 the number of iterations of the loop from the statements inside 1494 the loop, use it to predict this exit. */ 1495 else if (n_exits == 1) 1496 { 1497 nitercst = estimated_stmt_executions_int (loop); 1498 if (nitercst < 0) 1499 continue; 1500 if (nitercst > max) 1501 nitercst = max; 1502 1503 predictor = PRED_LOOP_ITERATIONS_GUESSED; 1504 } 1505 else 1506 continue; 1507 1508 /* If the prediction for number of iterations is zero, do not 1509 predict the exit edges. */ 1510 if (nitercst == 0) 1511 continue; 1512 1513 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); 1514 predict_edge (ex, predictor, probability); 1515 } 1516 exits.release (); 1517 1518 /* Find information about loop bound variables. */ 1519 for (nb_iter = loop->bounds; nb_iter; 1520 nb_iter = nb_iter->next) 1521 if (nb_iter->stmt 1522 && gimple_code (nb_iter->stmt) == GIMPLE_COND) 1523 { 1524 stmt = nb_iter->stmt; 1525 break; 1526 } 1527 if (!stmt && last_stmt (loop->header) 1528 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND) 1529 stmt = last_stmt (loop->header); 1530 if (stmt) 1531 is_comparison_with_loop_invariant_p (stmt, loop, 1532 &loop_bound_var, 1533 &loop_bound_code, 1534 &loop_bound_step, 1535 &loop_iv_base); 1536 1537 bbs = get_loop_body (loop); 1538 1539 for (j = 0; j < loop->num_nodes; j++) 1540 { 1541 int header_found = 0; 1542 edge e; 1543 edge_iterator ei; 1544 1545 bb = bbs[j]; 1546 1547 /* Bypass loop heuristics on continue statement. These 1548 statements construct loops via "non-loop" constructs 1549 in the source language and are better to be handled 1550 separately. */ 1551 if (predicted_by_p (bb, PRED_CONTINUE)) 1552 continue; 1553 1554 /* Loop branch heuristics - predict an edge back to a 1555 loop's head as taken. */ 1556 if (bb == loop->latch) 1557 { 1558 e = find_edge (loop->latch, loop->header); 1559 if (e) 1560 { 1561 header_found = 1; 1562 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); 1563 } 1564 } 1565 1566 /* Loop exit heuristics - predict an edge exiting the loop if the 1567 conditional has no loop header successors as not taken. */ 1568 if (!header_found 1569 /* If we already used more reliable loop exit predictors, do not 1570 bother with PRED_LOOP_EXIT. */ 1571 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) 1572 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS)) 1573 { 1574 /* For loop with many exits we don't want to predict all exits 1575 with the pretty large probability, because if all exits are 1576 considered in row, the loop would be predicted to iterate 1577 almost never. The code to divide probability by number of 1578 exits is very rough. It should compute the number of exits 1579 taken in each patch through function (not the overall number 1580 of exits that might be a lot higher for loops with wide switch 1581 statements in them) and compute n-th square root. 1582 1583 We limit the minimal probability by 2% to avoid 1584 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction 1585 as this was causing regression in perl benchmark containing such 1586 a wide loop. */ 1587 1588 int probability = ((REG_BR_PROB_BASE 1589 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) 1590 / n_exits); 1591 if (probability < HITRATE (2)) 1592 probability = HITRATE (2); 1593 FOR_EACH_EDGE (e, ei, bb->succs) 1594 if (e->dest->index < NUM_FIXED_BLOCKS 1595 || !flow_bb_inside_loop_p (loop, e->dest)) 1596 predict_edge (e, PRED_LOOP_EXIT, probability); 1597 } 1598 if (loop_bound_var) 1599 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base, 1600 loop_bound_code, 1601 tree_low_cst (loop_bound_step, 0)); 1602 } 1603 1604 /* Free basic blocks from get_loop_body. */ 1605 free (bbs); 1606 } 1607 } 1608 1609 /* Attempt to predict probabilities of BB outgoing edges using local 1610 properties. */ 1611 static void 1612 bb_estimate_probability_locally (basic_block bb) 1613 { 1614 rtx last_insn = BB_END (bb); 1615 rtx cond; 1616 1617 if (! can_predict_insn_p (last_insn)) 1618 return; 1619 cond = get_condition (last_insn, NULL, false, false); 1620 if (! cond) 1621 return; 1622 1623 /* Try "pointer heuristic." 1624 A comparison ptr == 0 is predicted as false. 1625 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ 1626 if (COMPARISON_P (cond) 1627 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0))) 1628 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1))))) 1629 { 1630 if (GET_CODE (cond) == EQ) 1631 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); 1632 else if (GET_CODE (cond) == NE) 1633 predict_insn_def (last_insn, PRED_POINTER, TAKEN); 1634 } 1635 else 1636 1637 /* Try "opcode heuristic." 1638 EQ tests are usually false and NE tests are usually true. Also, 1639 most quantities are positive, so we can make the appropriate guesses 1640 about signed comparisons against zero. */ 1641 switch (GET_CODE (cond)) 1642 { 1643 case CONST_INT: 1644 /* Unconditional branch. */ 1645 predict_insn_def (last_insn, PRED_UNCONDITIONAL, 1646 cond == const0_rtx ? NOT_TAKEN : TAKEN); 1647 break; 1648 1649 case EQ: 1650 case UNEQ: 1651 /* Floating point comparisons appears to behave in a very 1652 unpredictable way because of special role of = tests in 1653 FP code. */ 1654 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) 1655 ; 1656 /* Comparisons with 0 are often used for booleans and there is 1657 nothing useful to predict about them. */ 1658 else if (XEXP (cond, 1) == const0_rtx 1659 || XEXP (cond, 0) == const0_rtx) 1660 ; 1661 else 1662 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN); 1663 break; 1664 1665 case NE: 1666 case LTGT: 1667 /* Floating point comparisons appears to behave in a very 1668 unpredictable way because of special role of = tests in 1669 FP code. */ 1670 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) 1671 ; 1672 /* Comparisons with 0 are often used for booleans and there is 1673 nothing useful to predict about them. */ 1674 else if (XEXP (cond, 1) == const0_rtx 1675 || XEXP (cond, 0) == const0_rtx) 1676 ; 1677 else 1678 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN); 1679 break; 1680 1681 case ORDERED: 1682 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN); 1683 break; 1684 1685 case UNORDERED: 1686 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN); 1687 break; 1688 1689 case LE: 1690 case LT: 1691 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx 1692 || XEXP (cond, 1) == constm1_rtx) 1693 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN); 1694 break; 1695 1696 case GE: 1697 case GT: 1698 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx 1699 || XEXP (cond, 1) == constm1_rtx) 1700 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN); 1701 break; 1702 1703 default: 1704 break; 1705 } 1706 } 1707 1708 /* Set edge->probability for each successor edge of BB. */ 1709 void 1710 guess_outgoing_edge_probabilities (basic_block bb) 1711 { 1712 bb_estimate_probability_locally (bb); 1713 combine_predictions_for_insn (BB_END (bb), bb); 1714 } 1715 1716 static tree expr_expected_value (tree, bitmap); 1717 1718 /* Helper function for expr_expected_value. */ 1719 1720 static tree 1721 expr_expected_value_1 (tree type, tree op0, enum tree_code code, 1722 tree op1, bitmap visited) 1723 { 1724 gimple def; 1725 1726 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1727 { 1728 if (TREE_CONSTANT (op0)) 1729 return op0; 1730 1731 if (code != SSA_NAME) 1732 return NULL_TREE; 1733 1734 def = SSA_NAME_DEF_STMT (op0); 1735 1736 /* If we were already here, break the infinite cycle. */ 1737 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0))) 1738 return NULL; 1739 1740 if (gimple_code (def) == GIMPLE_PHI) 1741 { 1742 /* All the arguments of the PHI node must have the same constant 1743 length. */ 1744 int i, n = gimple_phi_num_args (def); 1745 tree val = NULL, new_val; 1746 1747 for (i = 0; i < n; i++) 1748 { 1749 tree arg = PHI_ARG_DEF (def, i); 1750 1751 /* If this PHI has itself as an argument, we cannot 1752 determine the string length of this argument. However, 1753 if we can find an expected constant value for the other 1754 PHI args then we can still be sure that this is 1755 likely a constant. So be optimistic and just 1756 continue with the next argument. */ 1757 if (arg == PHI_RESULT (def)) 1758 continue; 1759 1760 new_val = expr_expected_value (arg, visited); 1761 if (!new_val) 1762 return NULL; 1763 if (!val) 1764 val = new_val; 1765 else if (!operand_equal_p (val, new_val, false)) 1766 return NULL; 1767 } 1768 return val; 1769 } 1770 if (is_gimple_assign (def)) 1771 { 1772 if (gimple_assign_lhs (def) != op0) 1773 return NULL; 1774 1775 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), 1776 gimple_assign_rhs1 (def), 1777 gimple_assign_rhs_code (def), 1778 gimple_assign_rhs2 (def), 1779 visited); 1780 } 1781 1782 if (is_gimple_call (def)) 1783 { 1784 tree decl = gimple_call_fndecl (def); 1785 if (!decl) 1786 return NULL; 1787 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) 1788 switch (DECL_FUNCTION_CODE (decl)) 1789 { 1790 case BUILT_IN_EXPECT: 1791 { 1792 tree val; 1793 if (gimple_call_num_args (def) != 2) 1794 return NULL; 1795 val = gimple_call_arg (def, 0); 1796 if (TREE_CONSTANT (val)) 1797 return val; 1798 return gimple_call_arg (def, 1); 1799 } 1800 1801 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: 1802 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: 1803 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2: 1804 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4: 1805 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8: 1806 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16: 1807 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE: 1808 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N: 1809 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: 1810 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: 1811 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: 1812 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: 1813 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: 1814 /* Assume that any given atomic operation has low contention, 1815 and thus the compare-and-swap operation succeeds. */ 1816 return boolean_true_node; 1817 } 1818 } 1819 1820 return NULL; 1821 } 1822 1823 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) 1824 { 1825 tree res; 1826 op0 = expr_expected_value (op0, visited); 1827 if (!op0) 1828 return NULL; 1829 op1 = expr_expected_value (op1, visited); 1830 if (!op1) 1831 return NULL; 1832 res = fold_build2 (code, type, op0, op1); 1833 if (TREE_CONSTANT (res)) 1834 return res; 1835 return NULL; 1836 } 1837 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) 1838 { 1839 tree res; 1840 op0 = expr_expected_value (op0, visited); 1841 if (!op0) 1842 return NULL; 1843 res = fold_build1 (code, type, op0); 1844 if (TREE_CONSTANT (res)) 1845 return res; 1846 return NULL; 1847 } 1848 return NULL; 1849 } 1850 1851 /* Return constant EXPR will likely have at execution time, NULL if unknown. 1852 The function is used by builtin_expect branch predictor so the evidence 1853 must come from this construct and additional possible constant folding. 1854 1855 We may want to implement more involved value guess (such as value range 1856 propagation based prediction), but such tricks shall go to new 1857 implementation. */ 1858 1859 static tree 1860 expr_expected_value (tree expr, bitmap visited) 1861 { 1862 enum tree_code code; 1863 tree op0, op1; 1864 1865 if (TREE_CONSTANT (expr)) 1866 return expr; 1867 1868 extract_ops_from_tree (expr, &code, &op0, &op1); 1869 return expr_expected_value_1 (TREE_TYPE (expr), 1870 op0, code, op1, visited); 1871 } 1872 1873 1874 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements 1875 we no longer need. */ 1876 static unsigned int 1877 strip_predict_hints (void) 1878 { 1879 basic_block bb; 1880 gimple ass_stmt; 1881 tree var; 1882 1883 FOR_EACH_BB (bb) 1884 { 1885 gimple_stmt_iterator bi; 1886 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) 1887 { 1888 gimple stmt = gsi_stmt (bi); 1889 1890 if (gimple_code (stmt) == GIMPLE_PREDICT) 1891 { 1892 gsi_remove (&bi, true); 1893 continue; 1894 } 1895 else if (gimple_code (stmt) == GIMPLE_CALL) 1896 { 1897 tree fndecl = gimple_call_fndecl (stmt); 1898 1899 if (fndecl 1900 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL 1901 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT 1902 && gimple_call_num_args (stmt) == 2) 1903 { 1904 var = gimple_call_lhs (stmt); 1905 if (var) 1906 { 1907 ass_stmt 1908 = gimple_build_assign (var, gimple_call_arg (stmt, 0)); 1909 gsi_replace (&bi, ass_stmt, true); 1910 } 1911 else 1912 { 1913 gsi_remove (&bi, true); 1914 continue; 1915 } 1916 } 1917 } 1918 gsi_next (&bi); 1919 } 1920 } 1921 return 0; 1922 } 1923 1924 /* Predict using opcode of the last statement in basic block. */ 1925 static void 1926 tree_predict_by_opcode (basic_block bb) 1927 { 1928 gimple stmt = last_stmt (bb); 1929 edge then_edge; 1930 tree op0, op1; 1931 tree type; 1932 tree val; 1933 enum tree_code cmp; 1934 bitmap visited; 1935 edge_iterator ei; 1936 1937 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 1938 return; 1939 FOR_EACH_EDGE (then_edge, ei, bb->succs) 1940 if (then_edge->flags & EDGE_TRUE_VALUE) 1941 break; 1942 op0 = gimple_cond_lhs (stmt); 1943 op1 = gimple_cond_rhs (stmt); 1944 cmp = gimple_cond_code (stmt); 1945 type = TREE_TYPE (op0); 1946 visited = BITMAP_ALLOC (NULL); 1947 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited); 1948 BITMAP_FREE (visited); 1949 if (val) 1950 { 1951 if (integer_zerop (val)) 1952 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN); 1953 else 1954 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN); 1955 return; 1956 } 1957 /* Try "pointer heuristic." 1958 A comparison ptr == 0 is predicted as false. 1959 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ 1960 if (POINTER_TYPE_P (type)) 1961 { 1962 if (cmp == EQ_EXPR) 1963 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN); 1964 else if (cmp == NE_EXPR) 1965 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN); 1966 } 1967 else 1968 1969 /* Try "opcode heuristic." 1970 EQ tests are usually false and NE tests are usually true. Also, 1971 most quantities are positive, so we can make the appropriate guesses 1972 about signed comparisons against zero. */ 1973 switch (cmp) 1974 { 1975 case EQ_EXPR: 1976 case UNEQ_EXPR: 1977 /* Floating point comparisons appears to behave in a very 1978 unpredictable way because of special role of = tests in 1979 FP code. */ 1980 if (FLOAT_TYPE_P (type)) 1981 ; 1982 /* Comparisons with 0 are often used for booleans and there is 1983 nothing useful to predict about them. */ 1984 else if (integer_zerop (op0) || integer_zerop (op1)) 1985 ; 1986 else 1987 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN); 1988 break; 1989 1990 case NE_EXPR: 1991 case LTGT_EXPR: 1992 /* Floating point comparisons appears to behave in a very 1993 unpredictable way because of special role of = tests in 1994 FP code. */ 1995 if (FLOAT_TYPE_P (type)) 1996 ; 1997 /* Comparisons with 0 are often used for booleans and there is 1998 nothing useful to predict about them. */ 1999 else if (integer_zerop (op0) 2000 || integer_zerop (op1)) 2001 ; 2002 else 2003 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN); 2004 break; 2005 2006 case ORDERED_EXPR: 2007 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN); 2008 break; 2009 2010 case UNORDERED_EXPR: 2011 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN); 2012 break; 2013 2014 case LE_EXPR: 2015 case LT_EXPR: 2016 if (integer_zerop (op1) 2017 || integer_onep (op1) 2018 || integer_all_onesp (op1) 2019 || real_zerop (op1) 2020 || real_onep (op1) 2021 || real_minus_onep (op1)) 2022 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN); 2023 break; 2024 2025 case GE_EXPR: 2026 case GT_EXPR: 2027 if (integer_zerop (op1) 2028 || integer_onep (op1) 2029 || integer_all_onesp (op1) 2030 || real_zerop (op1) 2031 || real_onep (op1) 2032 || real_minus_onep (op1)) 2033 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN); 2034 break; 2035 2036 default: 2037 break; 2038 } 2039 } 2040 2041 /* Try to guess whether the value of return means error code. */ 2042 2043 static enum br_predictor 2044 return_prediction (tree val, enum prediction *prediction) 2045 { 2046 /* VOID. */ 2047 if (!val) 2048 return PRED_NO_PREDICTION; 2049 /* Different heuristics for pointers and scalars. */ 2050 if (POINTER_TYPE_P (TREE_TYPE (val))) 2051 { 2052 /* NULL is usually not returned. */ 2053 if (integer_zerop (val)) 2054 { 2055 *prediction = NOT_TAKEN; 2056 return PRED_NULL_RETURN; 2057 } 2058 } 2059 else if (INTEGRAL_TYPE_P (TREE_TYPE (val))) 2060 { 2061 /* Negative return values are often used to indicate 2062 errors. */ 2063 if (TREE_CODE (val) == INTEGER_CST 2064 && tree_int_cst_sgn (val) < 0) 2065 { 2066 *prediction = NOT_TAKEN; 2067 return PRED_NEGATIVE_RETURN; 2068 } 2069 /* Constant return values seems to be commonly taken. 2070 Zero/one often represent booleans so exclude them from the 2071 heuristics. */ 2072 if (TREE_CONSTANT (val) 2073 && (!integer_zerop (val) && !integer_onep (val))) 2074 { 2075 *prediction = TAKEN; 2076 return PRED_CONST_RETURN; 2077 } 2078 } 2079 return PRED_NO_PREDICTION; 2080 } 2081 2082 /* Find the basic block with return expression and look up for possible 2083 return value trying to apply RETURN_PREDICTION heuristics. */ 2084 static void 2085 apply_return_prediction (void) 2086 { 2087 gimple return_stmt = NULL; 2088 tree return_val; 2089 edge e; 2090 gimple phi; 2091 int phi_num_args, i; 2092 enum br_predictor pred; 2093 enum prediction direction; 2094 edge_iterator ei; 2095 2096 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 2097 { 2098 return_stmt = last_stmt (e->src); 2099 if (return_stmt 2100 && gimple_code (return_stmt) == GIMPLE_RETURN) 2101 break; 2102 } 2103 if (!e) 2104 return; 2105 return_val = gimple_return_retval (return_stmt); 2106 if (!return_val) 2107 return; 2108 if (TREE_CODE (return_val) != SSA_NAME 2109 || !SSA_NAME_DEF_STMT (return_val) 2110 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) 2111 return; 2112 phi = SSA_NAME_DEF_STMT (return_val); 2113 phi_num_args = gimple_phi_num_args (phi); 2114 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); 2115 2116 /* Avoid the degenerate case where all return values form the function 2117 belongs to same category (ie they are all positive constants) 2118 so we can hardly say something about them. */ 2119 for (i = 1; i < phi_num_args; i++) 2120 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction)) 2121 break; 2122 if (i != phi_num_args) 2123 for (i = 0; i < phi_num_args; i++) 2124 { 2125 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction); 2126 if (pred != PRED_NO_PREDICTION) 2127 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred, 2128 direction); 2129 } 2130 } 2131 2132 /* Look for basic block that contains unlikely to happen events 2133 (such as noreturn calls) and mark all paths leading to execution 2134 of this basic blocks as unlikely. */ 2135 2136 static void 2137 tree_bb_level_predictions (void) 2138 { 2139 basic_block bb; 2140 bool has_return_edges = false; 2141 edge e; 2142 edge_iterator ei; 2143 2144 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 2145 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) 2146 { 2147 has_return_edges = true; 2148 break; 2149 } 2150 2151 apply_return_prediction (); 2152 2153 FOR_EACH_BB (bb) 2154 { 2155 gimple_stmt_iterator gsi; 2156 2157 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2158 { 2159 gimple stmt = gsi_stmt (gsi); 2160 tree decl; 2161 2162 if (is_gimple_call (stmt)) 2163 { 2164 if ((gimple_call_flags (stmt) & ECF_NORETURN) 2165 && has_return_edges) 2166 predict_paths_leading_to (bb, PRED_NORETURN, 2167 NOT_TAKEN); 2168 decl = gimple_call_fndecl (stmt); 2169 if (decl 2170 && lookup_attribute ("cold", 2171 DECL_ATTRIBUTES (decl))) 2172 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, 2173 NOT_TAKEN); 2174 } 2175 else if (gimple_code (stmt) == GIMPLE_PREDICT) 2176 { 2177 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), 2178 gimple_predict_outcome (stmt)); 2179 /* Keep GIMPLE_PREDICT around so early inlining will propagate 2180 hints to callers. */ 2181 } 2182 } 2183 } 2184 } 2185 2186 #ifdef ENABLE_CHECKING 2187 2188 /* Callback for pointer_map_traverse, asserts that the pointer map is 2189 empty. */ 2190 2191 static bool 2192 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value, 2193 void *data ATTRIBUTE_UNUSED) 2194 { 2195 gcc_assert (!*value); 2196 return false; 2197 } 2198 #endif 2199 2200 /* Predict branch probabilities and estimate profile for basic block BB. */ 2201 2202 static void 2203 tree_estimate_probability_bb (basic_block bb) 2204 { 2205 edge e; 2206 edge_iterator ei; 2207 gimple last; 2208 2209 FOR_EACH_EDGE (e, ei, bb->succs) 2210 { 2211 /* Predict edges to user labels with attributes. */ 2212 if (e->dest != EXIT_BLOCK_PTR) 2213 { 2214 gimple_stmt_iterator gi; 2215 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi)) 2216 { 2217 gimple stmt = gsi_stmt (gi); 2218 tree decl; 2219 2220 if (gimple_code (stmt) != GIMPLE_LABEL) 2221 break; 2222 decl = gimple_label_label (stmt); 2223 if (DECL_ARTIFICIAL (decl)) 2224 continue; 2225 2226 /* Finally, we have a user-defined label. */ 2227 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))) 2228 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN); 2229 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl))) 2230 predict_edge_def (e, PRED_HOT_LABEL, TAKEN); 2231 } 2232 } 2233 2234 /* Predict early returns to be probable, as we've already taken 2235 care for error returns and other cases are often used for 2236 fast paths through function. 2237 2238 Since we've already removed the return statements, we are 2239 looking for CFG like: 2240 2241 if (conditional) 2242 { 2243 .. 2244 goto return_block 2245 } 2246 some other blocks 2247 return_block: 2248 return_stmt. */ 2249 if (e->dest != bb->next_bb 2250 && e->dest != EXIT_BLOCK_PTR 2251 && single_succ_p (e->dest) 2252 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR 2253 && (last = last_stmt (e->dest)) != NULL 2254 && gimple_code (last) == GIMPLE_RETURN) 2255 { 2256 edge e1; 2257 edge_iterator ei1; 2258 2259 if (single_succ_p (bb)) 2260 { 2261 FOR_EACH_EDGE (e1, ei1, bb->preds) 2262 if (!predicted_by_p (e1->src, PRED_NULL_RETURN) 2263 && !predicted_by_p (e1->src, PRED_CONST_RETURN) 2264 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)) 2265 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN); 2266 } 2267 else 2268 if (!predicted_by_p (e->src, PRED_NULL_RETURN) 2269 && !predicted_by_p (e->src, PRED_CONST_RETURN) 2270 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN)) 2271 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN); 2272 } 2273 2274 /* Look for block we are guarding (ie we dominate it, 2275 but it doesn't postdominate us). */ 2276 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb 2277 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) 2278 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) 2279 { 2280 gimple_stmt_iterator bi; 2281 2282 /* The call heuristic claims that a guarded function call 2283 is improbable. This is because such calls are often used 2284 to signal exceptional situations such as printing error 2285 messages. */ 2286 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); 2287 gsi_next (&bi)) 2288 { 2289 gimple stmt = gsi_stmt (bi); 2290 if (is_gimple_call (stmt) 2291 /* Constant and pure calls are hardly used to signalize 2292 something exceptional. */ 2293 && gimple_has_side_effects (stmt)) 2294 { 2295 predict_edge_def (e, PRED_CALL, NOT_TAKEN); 2296 break; 2297 } 2298 } 2299 } 2300 } 2301 tree_predict_by_opcode (bb); 2302 } 2303 2304 /* Predict branch probabilities and estimate profile of the tree CFG. 2305 This function can be called from the loop optimizers to recompute 2306 the profile information. */ 2307 2308 void 2309 tree_estimate_probability (void) 2310 { 2311 basic_block bb; 2312 2313 add_noreturn_fake_exit_edges (); 2314 connect_infinite_loops_to_exit (); 2315 /* We use loop_niter_by_eval, which requires that the loops have 2316 preheaders. */ 2317 create_preheaders (CP_SIMPLE_PREHEADERS); 2318 calculate_dominance_info (CDI_POST_DOMINATORS); 2319 2320 bb_predictions = pointer_map_create (); 2321 tree_bb_level_predictions (); 2322 record_loop_exits (); 2323 2324 if (number_of_loops () > 1) 2325 predict_loops (); 2326 2327 FOR_EACH_BB (bb) 2328 tree_estimate_probability_bb (bb); 2329 2330 FOR_EACH_BB (bb) 2331 combine_predictions_for_bb (bb); 2332 2333 #ifdef ENABLE_CHECKING 2334 pointer_map_traverse (bb_predictions, assert_is_empty, NULL); 2335 #endif 2336 pointer_map_destroy (bb_predictions); 2337 bb_predictions = NULL; 2338 2339 estimate_bb_frequencies (); 2340 free_dominance_info (CDI_POST_DOMINATORS); 2341 remove_fake_exit_edges (); 2342 } 2343 2344 /* Predict branch probabilities and estimate profile of the tree CFG. 2345 This is the driver function for PASS_PROFILE. */ 2346 2347 static unsigned int 2348 tree_estimate_probability_driver (void) 2349 { 2350 unsigned nb_loops; 2351 2352 loop_optimizer_init (LOOPS_NORMAL); 2353 if (dump_file && (dump_flags & TDF_DETAILS)) 2354 flow_loops_dump (dump_file, NULL, 0); 2355 2356 mark_irreducible_loops (); 2357 2358 nb_loops = number_of_loops (); 2359 if (nb_loops > 1) 2360 scev_initialize (); 2361 2362 tree_estimate_probability (); 2363 2364 if (nb_loops > 1) 2365 scev_finalize (); 2366 2367 loop_optimizer_finalize (); 2368 if (dump_file && (dump_flags & TDF_DETAILS)) 2369 gimple_dump_cfg (dump_file, dump_flags); 2370 if (profile_status == PROFILE_ABSENT) 2371 profile_status = PROFILE_GUESSED; 2372 return 0; 2373 } 2374 2375 /* Predict edges to successors of CUR whose sources are not postdominated by 2376 BB by PRED and recurse to all postdominators. */ 2377 2378 static void 2379 predict_paths_for_bb (basic_block cur, basic_block bb, 2380 enum br_predictor pred, 2381 enum prediction taken, 2382 bitmap visited) 2383 { 2384 edge e; 2385 edge_iterator ei; 2386 basic_block son; 2387 2388 /* We are looking for all edges forming edge cut induced by 2389 set of all blocks postdominated by BB. */ 2390 FOR_EACH_EDGE (e, ei, cur->preds) 2391 if (e->src->index >= NUM_FIXED_BLOCKS 2392 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb)) 2393 { 2394 edge e2; 2395 edge_iterator ei2; 2396 bool found = false; 2397 2398 /* Ignore fake edges and eh, we predict them as not taken anyway. */ 2399 if (e->flags & (EDGE_EH | EDGE_FAKE)) 2400 continue; 2401 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); 2402 2403 /* See if there is an edge from e->src that is not abnormal 2404 and does not lead to BB. */ 2405 FOR_EACH_EDGE (e2, ei2, e->src->succs) 2406 if (e2 != e 2407 && !(e2->flags & (EDGE_EH | EDGE_FAKE)) 2408 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)) 2409 { 2410 found = true; 2411 break; 2412 } 2413 2414 /* If there is non-abnormal path leaving e->src, predict edge 2415 using predictor. Otherwise we need to look for paths 2416 leading to e->src. 2417 2418 The second may lead to infinite loop in the case we are predicitng 2419 regions that are only reachable by abnormal edges. We simply 2420 prevent visiting given BB twice. */ 2421 if (found) 2422 predict_edge_def (e, pred, taken); 2423 else if (bitmap_set_bit (visited, e->src->index)) 2424 predict_paths_for_bb (e->src, e->src, pred, taken, visited); 2425 } 2426 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); 2427 son; 2428 son = next_dom_son (CDI_POST_DOMINATORS, son)) 2429 predict_paths_for_bb (son, bb, pred, taken, visited); 2430 } 2431 2432 /* Sets branch probabilities according to PREDiction and 2433 FLAGS. */ 2434 2435 static void 2436 predict_paths_leading_to (basic_block bb, enum br_predictor pred, 2437 enum prediction taken) 2438 { 2439 bitmap visited = BITMAP_ALLOC (NULL); 2440 predict_paths_for_bb (bb, bb, pred, taken, visited); 2441 BITMAP_FREE (visited); 2442 } 2443 2444 /* Like predict_paths_leading_to but take edge instead of basic block. */ 2445 2446 static void 2447 predict_paths_leading_to_edge (edge e, enum br_predictor pred, 2448 enum prediction taken) 2449 { 2450 bool has_nonloop_edge = false; 2451 edge_iterator ei; 2452 edge e2; 2453 2454 basic_block bb = e->src; 2455 FOR_EACH_EDGE (e2, ei, bb->succs) 2456 if (e2->dest != e->src && e2->dest != e->dest 2457 && !(e->flags & (EDGE_EH | EDGE_FAKE)) 2458 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) 2459 { 2460 has_nonloop_edge = true; 2461 break; 2462 } 2463 if (!has_nonloop_edge) 2464 { 2465 bitmap visited = BITMAP_ALLOC (NULL); 2466 predict_paths_for_bb (bb, bb, pred, taken, visited); 2467 BITMAP_FREE (visited); 2468 } 2469 else 2470 predict_edge_def (e, pred, taken); 2471 } 2472 2473 /* This is used to carry information about basic blocks. It is 2474 attached to the AUX field of the standard CFG block. */ 2475 2476 typedef struct block_info_def 2477 { 2478 /* Estimated frequency of execution of basic_block. */ 2479 sreal frequency; 2480 2481 /* To keep queue of basic blocks to process. */ 2482 basic_block next; 2483 2484 /* Number of predecessors we need to visit first. */ 2485 int npredecessors; 2486 } *block_info; 2487 2488 /* Similar information for edges. */ 2489 typedef struct edge_info_def 2490 { 2491 /* In case edge is a loopback edge, the probability edge will be reached 2492 in case header is. Estimated number of iterations of the loop can be 2493 then computed as 1 / (1 - back_edge_prob). */ 2494 sreal back_edge_prob; 2495 /* True if the edge is a loopback edge in the natural loop. */ 2496 unsigned int back_edge:1; 2497 } *edge_info; 2498 2499 #define BLOCK_INFO(B) ((block_info) (B)->aux) 2500 #define EDGE_INFO(E) ((edge_info) (E)->aux) 2501 2502 /* Helper function for estimate_bb_frequencies. 2503 Propagate the frequencies in blocks marked in 2504 TOVISIT, starting in HEAD. */ 2505 2506 static void 2507 propagate_freq (basic_block head, bitmap tovisit) 2508 { 2509 basic_block bb; 2510 basic_block last; 2511 unsigned i; 2512 edge e; 2513 basic_block nextbb; 2514 bitmap_iterator bi; 2515 2516 /* For each basic block we need to visit count number of his predecessors 2517 we need to visit first. */ 2518 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) 2519 { 2520 edge_iterator ei; 2521 int count = 0; 2522 2523 bb = BASIC_BLOCK (i); 2524 2525 FOR_EACH_EDGE (e, ei, bb->preds) 2526 { 2527 bool visit = bitmap_bit_p (tovisit, e->src->index); 2528 2529 if (visit && !(e->flags & EDGE_DFS_BACK)) 2530 count++; 2531 else if (visit && dump_file && !EDGE_INFO (e)->back_edge) 2532 fprintf (dump_file, 2533 "Irreducible region hit, ignoring edge to %i->%i\n", 2534 e->src->index, bb->index); 2535 } 2536 BLOCK_INFO (bb)->npredecessors = count; 2537 /* When function never returns, we will never process exit block. */ 2538 if (!count && bb == EXIT_BLOCK_PTR) 2539 bb->count = bb->frequency = 0; 2540 } 2541 2542 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); 2543 last = head; 2544 for (bb = head; bb; bb = nextbb) 2545 { 2546 edge_iterator ei; 2547 sreal cyclic_probability, frequency; 2548 2549 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero)); 2550 memcpy (&frequency, &real_zero, sizeof (real_zero)); 2551 2552 nextbb = BLOCK_INFO (bb)->next; 2553 BLOCK_INFO (bb)->next = NULL; 2554 2555 /* Compute frequency of basic block. */ 2556 if (bb != head) 2557 { 2558 #ifdef ENABLE_CHECKING 2559 FOR_EACH_EDGE (e, ei, bb->preds) 2560 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) 2561 || (e->flags & EDGE_DFS_BACK)); 2562 #endif 2563 2564 FOR_EACH_EDGE (e, ei, bb->preds) 2565 if (EDGE_INFO (e)->back_edge) 2566 { 2567 sreal_add (&cyclic_probability, &cyclic_probability, 2568 &EDGE_INFO (e)->back_edge_prob); 2569 } 2570 else if (!(e->flags & EDGE_DFS_BACK)) 2571 { 2572 sreal tmp; 2573 2574 /* frequency += (e->probability 2575 * BLOCK_INFO (e->src)->frequency / 2576 REG_BR_PROB_BASE); */ 2577 2578 sreal_init (&tmp, e->probability, 0); 2579 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); 2580 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); 2581 sreal_add (&frequency, &frequency, &tmp); 2582 } 2583 2584 if (sreal_compare (&cyclic_probability, &real_zero) == 0) 2585 { 2586 memcpy (&BLOCK_INFO (bb)->frequency, &frequency, 2587 sizeof (frequency)); 2588 } 2589 else 2590 { 2591 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) 2592 { 2593 memcpy (&cyclic_probability, &real_almost_one, 2594 sizeof (real_almost_one)); 2595 } 2596 2597 /* BLOCK_INFO (bb)->frequency = frequency 2598 / (1 - cyclic_probability) */ 2599 2600 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); 2601 sreal_div (&BLOCK_INFO (bb)->frequency, 2602 &frequency, &cyclic_probability); 2603 } 2604 } 2605 2606 bitmap_clear_bit (tovisit, bb->index); 2607 2608 e = find_edge (bb, head); 2609 if (e) 2610 { 2611 sreal tmp; 2612 2613 /* EDGE_INFO (e)->back_edge_prob 2614 = ((e->probability * BLOCK_INFO (bb)->frequency) 2615 / REG_BR_PROB_BASE); */ 2616 2617 sreal_init (&tmp, e->probability, 0); 2618 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); 2619 sreal_mul (&EDGE_INFO (e)->back_edge_prob, 2620 &tmp, &real_inv_br_prob_base); 2621 } 2622 2623 /* Propagate to successor blocks. */ 2624 FOR_EACH_EDGE (e, ei, bb->succs) 2625 if (!(e->flags & EDGE_DFS_BACK) 2626 && BLOCK_INFO (e->dest)->npredecessors) 2627 { 2628 BLOCK_INFO (e->dest)->npredecessors--; 2629 if (!BLOCK_INFO (e->dest)->npredecessors) 2630 { 2631 if (!nextbb) 2632 nextbb = e->dest; 2633 else 2634 BLOCK_INFO (last)->next = e->dest; 2635 2636 last = e->dest; 2637 } 2638 } 2639 } 2640 } 2641 2642 /* Estimate probabilities of loopback edges in loops at same nest level. */ 2643 2644 static void 2645 estimate_loops_at_level (struct loop *first_loop) 2646 { 2647 struct loop *loop; 2648 2649 for (loop = first_loop; loop; loop = loop->next) 2650 { 2651 edge e; 2652 basic_block *bbs; 2653 unsigned i; 2654 bitmap tovisit = BITMAP_ALLOC (NULL); 2655 2656 estimate_loops_at_level (loop->inner); 2657 2658 /* Find current loop back edge and mark it. */ 2659 e = loop_latch_edge (loop); 2660 EDGE_INFO (e)->back_edge = 1; 2661 2662 bbs = get_loop_body (loop); 2663 for (i = 0; i < loop->num_nodes; i++) 2664 bitmap_set_bit (tovisit, bbs[i]->index); 2665 free (bbs); 2666 propagate_freq (loop->header, tovisit); 2667 BITMAP_FREE (tovisit); 2668 } 2669 } 2670 2671 /* Propagates frequencies through structure of loops. */ 2672 2673 static void 2674 estimate_loops (void) 2675 { 2676 bitmap tovisit = BITMAP_ALLOC (NULL); 2677 basic_block bb; 2678 2679 /* Start by estimating the frequencies in the loops. */ 2680 if (number_of_loops () > 1) 2681 estimate_loops_at_level (current_loops->tree_root->inner); 2682 2683 /* Now propagate the frequencies through all the blocks. */ 2684 FOR_ALL_BB (bb) 2685 { 2686 bitmap_set_bit (tovisit, bb->index); 2687 } 2688 propagate_freq (ENTRY_BLOCK_PTR, tovisit); 2689 BITMAP_FREE (tovisit); 2690 } 2691 2692 /* Convert counts measured by profile driven feedback to frequencies. 2693 Return nonzero iff there was any nonzero execution count. */ 2694 2695 int 2696 counts_to_freqs (void) 2697 { 2698 gcov_type count_max, true_count_max = 0; 2699 basic_block bb; 2700 2701 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 2702 true_count_max = MAX (bb->count, true_count_max); 2703 2704 count_max = MAX (true_count_max, 1); 2705 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 2706 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; 2707 2708 return true_count_max; 2709 } 2710 2711 /* Return true if function is likely to be expensive, so there is no point to 2712 optimize performance of prologue, epilogue or do inlining at the expense 2713 of code size growth. THRESHOLD is the limit of number of instructions 2714 function can execute at average to be still considered not expensive. */ 2715 2716 bool 2717 expensive_function_p (int threshold) 2718 { 2719 unsigned int sum = 0; 2720 basic_block bb; 2721 unsigned int limit; 2722 2723 /* We can not compute accurately for large thresholds due to scaled 2724 frequencies. */ 2725 gcc_assert (threshold <= BB_FREQ_MAX); 2726 2727 /* Frequencies are out of range. This either means that function contains 2728 internal loop executing more than BB_FREQ_MAX times or profile feedback 2729 is available and function has not been executed at all. */ 2730 if (ENTRY_BLOCK_PTR->frequency == 0) 2731 return true; 2732 2733 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ 2734 limit = ENTRY_BLOCK_PTR->frequency * threshold; 2735 FOR_EACH_BB (bb) 2736 { 2737 rtx insn; 2738 2739 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); 2740 insn = NEXT_INSN (insn)) 2741 if (active_insn_p (insn)) 2742 { 2743 sum += bb->frequency; 2744 if (sum > limit) 2745 return true; 2746 } 2747 } 2748 2749 return false; 2750 } 2751 2752 /* Estimate basic blocks frequency by given branch probabilities. */ 2753 2754 void 2755 estimate_bb_frequencies (void) 2756 { 2757 basic_block bb; 2758 sreal freq_max; 2759 2760 if (profile_status != PROFILE_READ || !counts_to_freqs ()) 2761 { 2762 static int real_values_initialized = 0; 2763 2764 if (!real_values_initialized) 2765 { 2766 real_values_initialized = 1; 2767 sreal_init (&real_zero, 0, 0); 2768 sreal_init (&real_one, 1, 0); 2769 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); 2770 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); 2771 sreal_init (&real_one_half, 1, -1); 2772 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base); 2773 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base); 2774 } 2775 2776 mark_dfs_back_edges (); 2777 2778 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE; 2779 2780 /* Set up block info for each basic block. */ 2781 alloc_aux_for_blocks (sizeof (struct block_info_def)); 2782 alloc_aux_for_edges (sizeof (struct edge_info_def)); 2783 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 2784 { 2785 edge e; 2786 edge_iterator ei; 2787 2788 FOR_EACH_EDGE (e, ei, bb->succs) 2789 { 2790 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); 2791 sreal_mul (&EDGE_INFO (e)->back_edge_prob, 2792 &EDGE_INFO (e)->back_edge_prob, 2793 &real_inv_br_prob_base); 2794 } 2795 } 2796 2797 /* First compute probabilities locally for each loop from innermost 2798 to outermost to examine probabilities for back edges. */ 2799 estimate_loops (); 2800 2801 memcpy (&freq_max, &real_zero, sizeof (real_zero)); 2802 FOR_EACH_BB (bb) 2803 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) 2804 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); 2805 2806 sreal_div (&freq_max, &real_bb_freq_max, &freq_max); 2807 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) 2808 { 2809 sreal tmp; 2810 2811 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max); 2812 sreal_add (&tmp, &tmp, &real_one_half); 2813 bb->frequency = sreal_to_int (&tmp); 2814 } 2815 2816 free_aux_for_blocks (); 2817 free_aux_for_edges (); 2818 } 2819 compute_function_frequency (); 2820 } 2821 2822 /* Decide whether function is hot, cold or unlikely executed. */ 2823 void 2824 compute_function_frequency (void) 2825 { 2826 basic_block bb; 2827 struct cgraph_node *node = cgraph_get_node (current_function_decl); 2828 if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 2829 || MAIN_NAME_P (DECL_NAME (current_function_decl))) 2830 node->only_called_at_startup = true; 2831 if (DECL_STATIC_DESTRUCTOR (current_function_decl)) 2832 node->only_called_at_exit = true; 2833 2834 if (!profile_info || !flag_branch_probabilities) 2835 { 2836 int flags = flags_from_decl_or_type (current_function_decl); 2837 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) 2838 != NULL) 2839 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 2840 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) 2841 != NULL) 2842 node->frequency = NODE_FREQUENCY_HOT; 2843 else if (flags & ECF_NORETURN) 2844 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2845 else if (MAIN_NAME_P (DECL_NAME (current_function_decl))) 2846 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2847 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 2848 || DECL_STATIC_DESTRUCTOR (current_function_decl)) 2849 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2850 return; 2851 } 2852 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 2853 FOR_EACH_BB (bb) 2854 { 2855 if (maybe_hot_bb_p (cfun, bb)) 2856 { 2857 node->frequency = NODE_FREQUENCY_HOT; 2858 return; 2859 } 2860 if (!probably_never_executed_bb_p (cfun, bb)) 2861 node->frequency = NODE_FREQUENCY_NORMAL; 2862 } 2863 } 2864 2865 static bool 2866 gate_estimate_probability (void) 2867 { 2868 return flag_guess_branch_prob; 2869 } 2870 2871 /* Build PREDICT_EXPR. */ 2872 tree 2873 build_predict_expr (enum br_predictor predictor, enum prediction taken) 2874 { 2875 tree t = build1 (PREDICT_EXPR, void_type_node, 2876 build_int_cst (integer_type_node, predictor)); 2877 SET_PREDICT_EXPR_OUTCOME (t, taken); 2878 return t; 2879 } 2880 2881 const char * 2882 predictor_name (enum br_predictor predictor) 2883 { 2884 return predictor_info[predictor].name; 2885 } 2886 2887 struct gimple_opt_pass pass_profile = 2888 { 2889 { 2890 GIMPLE_PASS, 2891 "profile_estimate", /* name */ 2892 OPTGROUP_NONE, /* optinfo_flags */ 2893 gate_estimate_probability, /* gate */ 2894 tree_estimate_probability_driver, /* execute */ 2895 NULL, /* sub */ 2896 NULL, /* next */ 2897 0, /* static_pass_number */ 2898 TV_BRANCH_PROB, /* tv_id */ 2899 PROP_cfg, /* properties_required */ 2900 0, /* properties_provided */ 2901 0, /* properties_destroyed */ 2902 0, /* todo_flags_start */ 2903 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ 2904 } 2905 }; 2906 2907 struct gimple_opt_pass pass_strip_predict_hints = 2908 { 2909 { 2910 GIMPLE_PASS, 2911 "*strip_predict_hints", /* name */ 2912 OPTGROUP_NONE, /* optinfo_flags */ 2913 NULL, /* gate */ 2914 strip_predict_hints, /* execute */ 2915 NULL, /* sub */ 2916 NULL, /* next */ 2917 0, /* static_pass_number */ 2918 TV_BRANCH_PROB, /* tv_id */ 2919 PROP_cfg, /* properties_required */ 2920 0, /* properties_provided */ 2921 0, /* properties_destroyed */ 2922 0, /* todo_flags_start */ 2923 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ 2924 } 2925 }; 2926 2927 /* Rebuild function frequencies. Passes are in general expected to 2928 maintain profile by hand, however in some cases this is not possible: 2929 for example when inlining several functions with loops freuqencies might run 2930 out of scale and thus needs to be recomputed. */ 2931 2932 void 2933 rebuild_frequencies (void) 2934 { 2935 timevar_push (TV_REBUILD_FREQUENCIES); 2936 if (profile_status == PROFILE_GUESSED) 2937 { 2938 loop_optimizer_init (0); 2939 add_noreturn_fake_exit_edges (); 2940 mark_irreducible_loops (); 2941 connect_infinite_loops_to_exit (); 2942 estimate_bb_frequencies (); 2943 remove_fake_exit_edges (); 2944 loop_optimizer_finalize (); 2945 } 2946 else if (profile_status == PROFILE_READ) 2947 counts_to_freqs (); 2948 else 2949 gcc_unreachable (); 2950 timevar_pop (TV_REBUILD_FREQUENCIES); 2951 } 2952