1 /* Read and annotate call graph profile from the auto profile data file. 2 Copyright (C) 2014-2015 Free Software Foundation, Inc. 3 Contributed by Dehao Chen (dehao@google.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #define INCLUDE_MAP 23 #define INCLUDE_SET 24 #include "system.h" 25 26 #include "coretypes.h" 27 #include "hash-set.h" 28 #include "machmode.h" 29 #include "vec.h" 30 #include "double-int.h" 31 #include "input.h" 32 #include "alias.h" 33 #include "symtab.h" 34 #include "options.h" 35 #include "wide-int.h" 36 #include "inchash.h" 37 #include "tree.h" 38 #include "fold-const.h" 39 #include "tree-pass.h" 40 #include "flags.h" 41 #include "predict.h" 42 #include "vec.h" 43 #include "hashtab.h" 44 #include "hash-set.h" 45 #include "machmode.h" 46 #include "tm.h" 47 #include "hard-reg-set.h" 48 #include "input.h" 49 #include "function.h" 50 #include "dominance.h" 51 #include "cfg.h" 52 #include "basic-block.h" 53 #include "diagnostic-core.h" 54 #include "gcov-io.h" 55 #include "profile.h" 56 #include "langhooks.h" 57 #include "opts.h" 58 #include "tree-pass.h" 59 #include "cfgloop.h" 60 #include "tree-ssa-alias.h" 61 #include "tree-cfg.h" 62 #include "tree-cfgcleanup.h" 63 #include "tree-ssa-operands.h" 64 #include "tree-into-ssa.h" 65 #include "internal-fn.h" 66 #include "is-a.h" 67 #include "gimple-expr.h" 68 #include "gimple.h" 69 #include "gimple-iterator.h" 70 #include "gimple-ssa.h" 71 #include "hash-map.h" 72 #include "plugin-api.h" 73 #include "ipa-ref.h" 74 #include "cgraph.h" 75 #include "value-prof.h" 76 #include "coverage.h" 77 #include "params.h" 78 #include "alloc-pool.h" 79 #include "symbol-summary.h" 80 #include "ipa-prop.h" 81 #include "ipa-inline.h" 82 #include "tree-inline.h" 83 #include "stringpool.h" 84 #include "auto-profile.h" 85 86 /* The following routines implements AutoFDO optimization. 87 88 This optimization uses sampling profiles to annotate basic block counts 89 and uses heuristics to estimate branch probabilities. 90 91 There are three phases in AutoFDO: 92 93 Phase 1: Read profile from the profile data file. 94 The following info is read from the profile datafile: 95 * string_table: a map between function name and its index. 96 * autofdo_source_profile: a map from function_instance name to 97 function_instance. This is represented as a forest of 98 function_instances. 99 * WorkingSet: a histogram of how many instructions are covered for a 100 given percentage of total cycles. This is describing the binary 101 level information (not source level). This info is used to help 102 decide if we want aggressive optimizations that could increase 103 code footprint (e.g. loop unroll etc.) 104 A function instance is an instance of function that could either be a 105 standalone symbol, or a clone of a function that is inlined into another 106 function. 107 108 Phase 2: Early inline + value profile transformation. 109 Early inline uses autofdo_source_profile to find if a callsite is: 110 * inlined in the profiled binary. 111 * callee body is hot in the profiling run. 112 If both condition satisfies, early inline will inline the callsite 113 regardless of the code growth. 114 Phase 2 is an iterative process. During each iteration, we also check 115 if an indirect callsite is promoted and inlined in the profiling run. 116 If yes, vpt will happen to force promote it and in the next iteration, 117 einline will inline the promoted callsite in the next iteration. 118 119 Phase 3: Annotate control flow graph. 120 AutoFDO uses a separate pass to: 121 * Annotate basic block count 122 * Estimate branch probability 123 124 After the above 3 phases, all profile is readily annotated on the GCC IR. 125 AutoFDO tries to reuse all FDO infrastructure as much as possible to make 126 use of the profile. E.g. it uses existing mechanism to calculate the basic 127 block/edge frequency, as well as the cgraph node/edge count. 128 */ 129 130 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" 131 #define AUTO_PROFILE_VERSION 1 132 133 namespace autofdo 134 { 135 136 /* Represent a source location: (function_decl, lineno). */ 137 typedef std::pair<tree, unsigned> decl_lineno; 138 139 /* Represent an inline stack. vector[0] is the leaf node. */ 140 typedef auto_vec<decl_lineno> inline_stack; 141 142 /* String array that stores function names. */ 143 typedef auto_vec<char *> string_vector; 144 145 /* Map from function name's index in string_table to target's 146 execution count. */ 147 typedef std::map<unsigned, gcov_type> icall_target_map; 148 149 /* Set of gimple stmts. Used to track if the stmt has already been promoted 150 to direct call. */ 151 typedef std::set<gimple> stmt_set; 152 153 /* Represent count info of an inline stack. */ 154 struct count_info 155 { 156 /* Sampled count of the inline stack. */ 157 gcov_type count; 158 159 /* Map from indirect call target to its sample count. */ 160 icall_target_map targets; 161 162 /* Whether this inline stack is already used in annotation. 163 164 Each inline stack should only be used to annotate IR once. 165 This will be enforced when instruction-level discriminator 166 is supported. */ 167 bool annotated; 168 }; 169 170 /* operator< for "const char *". */ 171 struct string_compare 172 { 173 bool operator()(const char *a, const char *b) const 174 { 175 return strcmp (a, b) < 0; 176 } 177 }; 178 179 /* Store a string array, indexed by string position in the array. */ 180 class string_table 181 { 182 public: 183 string_table () 184 {} 185 186 ~string_table (); 187 188 /* For a given string, returns its index. */ 189 int get_index (const char *name) const; 190 191 /* For a given decl, returns the index of the decl name. */ 192 int get_index_by_decl (tree decl) const; 193 194 /* For a given index, returns the string. */ 195 const char *get_name (int index) const; 196 197 /* Read profile, return TRUE on success. */ 198 bool read (); 199 200 private: 201 typedef std::map<const char *, unsigned, string_compare> string_index_map; 202 string_vector vector_; 203 string_index_map map_; 204 }; 205 206 /* Profile of a function instance: 207 1. total_count of the function. 208 2. head_count (entry basic block count) of the function (only valid when 209 function is a top-level function_instance, i.e. it is the original copy 210 instead of the inlined copy). 211 3. map from source location (decl_lineno) to profile (count_info). 212 4. map from callsite to callee function_instance. */ 213 class function_instance 214 { 215 public: 216 typedef auto_vec<function_instance *> function_instance_stack; 217 218 /* Read the profile and return a function_instance with head count as 219 HEAD_COUNT. Recursively read callsites to create nested function_instances 220 too. STACK is used to track the recursive creation process. */ 221 static function_instance * 222 read_function_instance (function_instance_stack *stack, 223 gcov_type head_count); 224 225 /* Recursively deallocate all callsites (nested function_instances). */ 226 ~function_instance (); 227 228 /* Accessors. */ 229 int 230 name () const 231 { 232 return name_; 233 } 234 gcov_type 235 total_count () const 236 { 237 return total_count_; 238 } 239 gcov_type 240 head_count () const 241 { 242 return head_count_; 243 } 244 245 /* Traverse callsites of the current function_instance to find one at the 246 location of LINENO and callee name represented in DECL. */ 247 function_instance *get_function_instance_by_decl (unsigned lineno, 248 tree decl) const; 249 250 /* Store the profile info for LOC in INFO. Return TRUE if profile info 251 is found. */ 252 bool get_count_info (location_t loc, count_info *info) const; 253 254 /* Read the inlined indirect call target profile for STMT and store it in 255 MAP, return the total count for all inlined indirect calls. */ 256 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; 257 258 /* Sum of counts that is used during annotation. */ 259 gcov_type total_annotated_count () const; 260 261 /* Mark LOC as annotated. */ 262 void mark_annotated (location_t loc); 263 264 private: 265 /* Callsite, represented as (decl_lineno, callee_function_name_index). */ 266 typedef std::pair<unsigned, unsigned> callsite; 267 268 /* Map from callsite to callee function_instance. */ 269 typedef std::map<callsite, function_instance *> callsite_map; 270 271 function_instance (unsigned name, gcov_type head_count) 272 : name_ (name), total_count_ (0), head_count_ (head_count) 273 { 274 } 275 276 /* Map from source location (decl_lineno) to profile (count_info). */ 277 typedef std::map<unsigned, count_info> position_count_map; 278 279 /* function_instance name index in the string_table. */ 280 unsigned name_; 281 282 /* Total sample count. */ 283 gcov_type total_count_; 284 285 /* Entry BB's sample count. */ 286 gcov_type head_count_; 287 288 /* Map from callsite location to callee function_instance. */ 289 callsite_map callsites; 290 291 /* Map from source location to count_info. */ 292 position_count_map pos_counts; 293 }; 294 295 /* Profile for all functions. */ 296 class autofdo_source_profile 297 { 298 public: 299 static autofdo_source_profile * 300 create () 301 { 302 autofdo_source_profile *map = new autofdo_source_profile (); 303 304 if (map->read ()) 305 return map; 306 delete map; 307 return NULL; 308 } 309 310 ~autofdo_source_profile (); 311 312 /* For a given DECL, returns the top-level function_instance. */ 313 function_instance *get_function_instance_by_decl (tree decl) const; 314 315 /* Find count_info for a given gimple STMT. If found, store the count_info 316 in INFO and return true; otherwise return false. */ 317 bool get_count_info (gimple stmt, count_info *info) const; 318 319 /* Find total count of the callee of EDGE. */ 320 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; 321 322 /* Update value profile INFO for STMT from the inlined indirect callsite. 323 Return true if INFO is updated. */ 324 bool update_inlined_ind_target (gcall *stmt, count_info *info); 325 326 /* Mark LOC as annotated. */ 327 void mark_annotated (location_t loc); 328 329 private: 330 /* Map from function_instance name index (in string_table) to 331 function_instance. */ 332 typedef std::map<unsigned, function_instance *> name_function_instance_map; 333 334 autofdo_source_profile () {} 335 336 /* Read AutoFDO profile and returns TRUE on success. */ 337 bool read (); 338 339 /* Return the function_instance in the profile that correspond to the 340 inline STACK. */ 341 function_instance * 342 get_function_instance_by_inline_stack (const inline_stack &stack) const; 343 344 name_function_instance_map map_; 345 }; 346 347 /* Store the strings read from the profile data file. */ 348 static string_table *afdo_string_table; 349 350 /* Store the AutoFDO source profile. */ 351 static autofdo_source_profile *afdo_source_profile; 352 353 /* gcov_ctr_summary structure to store the profile_info. */ 354 static struct gcov_ctr_summary *afdo_profile_info; 355 356 /* Helper functions. */ 357 358 /* Return the original name of NAME: strip the suffix that starts 359 with '.' Caller is responsible for freeing RET. */ 360 361 static char * 362 get_original_name (const char *name) 363 { 364 char *ret = xstrdup (name); 365 char *find = strchr (ret, '.'); 366 if (find != NULL) 367 *find = 0; 368 return ret; 369 } 370 371 /* Return the combined location, which is a 32bit integer in which 372 higher 16 bits stores the line offset of LOC to the start lineno 373 of DECL, The lower 16 bits stores the discriminator. */ 374 375 static unsigned 376 get_combined_location (location_t loc, tree decl) 377 { 378 /* TODO: allow more bits for line and less bits for discriminator. */ 379 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) 380 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes."); 381 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); 382 } 383 384 /* Return the function decl of a given lexical BLOCK. */ 385 386 static tree 387 get_function_decl_from_block (tree block) 388 { 389 tree decl; 390 391 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION) 392 return NULL_TREE; 393 394 for (decl = BLOCK_ABSTRACT_ORIGIN (block); 395 decl && (TREE_CODE (decl) == BLOCK); 396 decl = BLOCK_ABSTRACT_ORIGIN (decl)) 397 if (TREE_CODE (decl) == FUNCTION_DECL) 398 break; 399 return decl; 400 } 401 402 /* Store inline stack for STMT in STACK. */ 403 404 static void 405 get_inline_stack (location_t locus, inline_stack *stack) 406 { 407 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 408 return; 409 410 tree block = LOCATION_BLOCK (locus); 411 if (block && TREE_CODE (block) == BLOCK) 412 { 413 int level = 0; 414 for (block = BLOCK_SUPERCONTEXT (block); 415 block && (TREE_CODE (block) == BLOCK); 416 block = BLOCK_SUPERCONTEXT (block)) 417 { 418 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); 419 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) 420 continue; 421 422 tree decl = get_function_decl_from_block (block); 423 stack->safe_push ( 424 std::make_pair (decl, get_combined_location (locus, decl))); 425 locus = tmp_locus; 426 level++; 427 } 428 } 429 stack->safe_push ( 430 std::make_pair (current_function_decl, 431 get_combined_location (locus, current_function_decl))); 432 } 433 434 /* Return STMT's combined location, which is a 32bit integer in which 435 higher 16 bits stores the line offset of LOC to the start lineno 436 of DECL, The lower 16 bits stores the discriminator. */ 437 438 static unsigned 439 get_relative_location_for_stmt (gimple stmt) 440 { 441 location_t locus = gimple_location (stmt); 442 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 443 return UNKNOWN_LOCATION; 444 445 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); 446 block = BLOCK_SUPERCONTEXT (block)) 447 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) 448 return get_combined_location (locus, 449 get_function_decl_from_block (block)); 450 return get_combined_location (locus, current_function_decl); 451 } 452 453 /* Return true if BB contains indirect call. */ 454 455 static bool 456 has_indirect_call (basic_block bb) 457 { 458 gimple_stmt_iterator gsi; 459 460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 461 { 462 gimple stmt = gsi_stmt (gsi); 463 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) 464 && (gimple_call_fn (stmt) == NULL 465 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) 466 return true; 467 } 468 return false; 469 } 470 471 /* Member functions for string_table. */ 472 473 /* Deconstructor. */ 474 475 string_table::~string_table () 476 { 477 for (unsigned i = 0; i < vector_.length (); i++) 478 free (vector_[i]); 479 } 480 481 482 /* Return the index of a given function NAME. Return -1 if NAME is not 483 found in string table. */ 484 485 int 486 string_table::get_index (const char *name) const 487 { 488 if (name == NULL) 489 return -1; 490 string_index_map::const_iterator iter = map_.find (name); 491 if (iter == map_.end ()) 492 return -1; 493 494 return iter->second; 495 } 496 497 /* Return the index of a given function DECL. Return -1 if DECL is not 498 found in string table. */ 499 500 int 501 string_table::get_index_by_decl (tree decl) const 502 { 503 char *name 504 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); 505 int ret = get_index (name); 506 free (name); 507 if (ret != -1) 508 return ret; 509 ret = get_index (lang_hooks.dwarf_name (decl, 0)); 510 if (ret != -1) 511 return ret; 512 if (DECL_ABSTRACT_ORIGIN (decl)) 513 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); 514 515 return -1; 516 } 517 518 /* Return the function name of a given INDEX. */ 519 520 const char * 521 string_table::get_name (int index) const 522 { 523 gcc_assert (index > 0 && index < (int)vector_.length ()); 524 return vector_[index]; 525 } 526 527 /* Read the string table. Return TRUE if reading is successful. */ 528 529 bool 530 string_table::read () 531 { 532 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) 533 return false; 534 /* Skip the length of the section. */ 535 gcov_read_unsigned (); 536 /* Read in the file name table. */ 537 unsigned string_num = gcov_read_unsigned (); 538 for (unsigned i = 0; i < string_num; i++) 539 { 540 vector_.safe_push (get_original_name (gcov_read_string ())); 541 map_[vector_.last ()] = i; 542 } 543 return true; 544 } 545 546 /* Member functions for function_instance. */ 547 548 function_instance::~function_instance () 549 { 550 for (callsite_map::iterator iter = callsites.begin (); 551 iter != callsites.end (); ++iter) 552 delete iter->second; 553 } 554 555 /* Traverse callsites of the current function_instance to find one at the 556 location of LINENO and callee name represented in DECL. */ 557 558 function_instance * 559 function_instance::get_function_instance_by_decl (unsigned lineno, 560 tree decl) const 561 { 562 int func_name_idx = afdo_string_table->get_index_by_decl (decl); 563 if (func_name_idx != -1) 564 { 565 callsite_map::const_iterator ret 566 = callsites.find (std::make_pair (lineno, func_name_idx)); 567 if (ret != callsites.end ()) 568 return ret->second; 569 } 570 func_name_idx 571 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); 572 if (func_name_idx != -1) 573 { 574 callsite_map::const_iterator ret 575 = callsites.find (std::make_pair (lineno, func_name_idx)); 576 if (ret != callsites.end ()) 577 return ret->second; 578 } 579 if (DECL_ABSTRACT_ORIGIN (decl)) 580 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); 581 582 return NULL; 583 } 584 585 /* Store the profile info for LOC in INFO. Return TRUE if profile info 586 is found. */ 587 588 bool 589 function_instance::get_count_info (location_t loc, count_info *info) const 590 { 591 position_count_map::const_iterator iter = pos_counts.find (loc); 592 if (iter == pos_counts.end ()) 593 return false; 594 *info = iter->second; 595 return true; 596 } 597 598 /* Mark LOC as annotated. */ 599 600 void 601 function_instance::mark_annotated (location_t loc) 602 { 603 position_count_map::iterator iter = pos_counts.find (loc); 604 if (iter == pos_counts.end ()) 605 return; 606 iter->second.annotated = true; 607 } 608 609 /* Read the inlined indirect call target profile for STMT and store it in 610 MAP, return the total count for all inlined indirect calls. */ 611 612 gcov_type 613 function_instance::find_icall_target_map (gcall *stmt, 614 icall_target_map *map) const 615 { 616 gcov_type ret = 0; 617 unsigned stmt_offset = get_relative_location_for_stmt (stmt); 618 619 for (callsite_map::const_iterator iter = callsites.begin (); 620 iter != callsites.end (); ++iter) 621 { 622 unsigned callee = iter->second->name (); 623 /* Check if callsite location match the stmt. */ 624 if (iter->first.first != stmt_offset) 625 continue; 626 struct cgraph_node *node = cgraph_node::get_for_asmname ( 627 get_identifier (afdo_string_table->get_name (callee))); 628 if (node == NULL) 629 continue; 630 if (!check_ic_target (stmt, node)) 631 continue; 632 (*map)[callee] = iter->second->total_count (); 633 ret += iter->second->total_count (); 634 } 635 return ret; 636 } 637 638 /* Read the profile and create a function_instance with head count as 639 HEAD_COUNT. Recursively read callsites to create nested function_instances 640 too. STACK is used to track the recursive creation process. */ 641 642 /* function instance profile format: 643 644 ENTRY_COUNT: 8 bytes 645 NAME_INDEX: 4 bytes 646 NUM_POS_COUNTS: 4 bytes 647 NUM_CALLSITES: 4 byte 648 POS_COUNT_1: 649 POS_1_OFFSET: 4 bytes 650 NUM_TARGETS: 4 bytes 651 COUNT: 8 bytes 652 TARGET_1: 653 VALUE_PROFILE_TYPE: 4 bytes 654 TARGET_IDX: 8 bytes 655 COUNT: 8 bytes 656 TARGET_2 657 ... 658 TARGET_n 659 POS_COUNT_2 660 ... 661 POS_COUNT_N 662 CALLSITE_1: 663 CALLSITE_1_OFFSET: 4 bytes 664 FUNCTION_INSTANCE_PROFILE (nested) 665 CALLSITE_2 666 ... 667 CALLSITE_n. */ 668 669 function_instance * 670 function_instance::read_function_instance (function_instance_stack *stack, 671 gcov_type head_count) 672 { 673 unsigned name = gcov_read_unsigned (); 674 unsigned num_pos_counts = gcov_read_unsigned (); 675 unsigned num_callsites = gcov_read_unsigned (); 676 function_instance *s = new function_instance (name, head_count); 677 stack->safe_push (s); 678 679 for (unsigned i = 0; i < num_pos_counts; i++) 680 { 681 unsigned offset = gcov_read_unsigned () & 0xffff0000; 682 unsigned num_targets = gcov_read_unsigned (); 683 gcov_type count = gcov_read_counter (); 684 s->pos_counts[offset].count = count; 685 for (unsigned j = 0; j < stack->length (); j++) 686 (*stack)[j]->total_count_ += count; 687 for (unsigned j = 0; j < num_targets; j++) 688 { 689 /* Only indirect call target histogram is supported now. */ 690 gcov_read_unsigned (); 691 gcov_type target_idx = gcov_read_counter (); 692 s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); 693 } 694 } 695 for (unsigned i = 0; i < num_callsites; i++) 696 { 697 unsigned offset = gcov_read_unsigned (); 698 function_instance *callee_function_instance 699 = read_function_instance (stack, 0); 700 s->callsites[std::make_pair (offset, callee_function_instance->name ())] 701 = callee_function_instance; 702 } 703 stack->pop (); 704 return s; 705 } 706 707 /* Sum of counts that is used during annotation. */ 708 709 gcov_type 710 function_instance::total_annotated_count () const 711 { 712 gcov_type ret = 0; 713 for (callsite_map::const_iterator iter = callsites.begin (); 714 iter != callsites.end (); ++iter) 715 ret += iter->second->total_annotated_count (); 716 for (position_count_map::const_iterator iter = pos_counts.begin (); 717 iter != pos_counts.end (); ++iter) 718 if (iter->second.annotated) 719 ret += iter->second.count; 720 return ret; 721 } 722 723 /* Member functions for autofdo_source_profile. */ 724 725 autofdo_source_profile::~autofdo_source_profile () 726 { 727 for (name_function_instance_map::const_iterator iter = map_.begin (); 728 iter != map_.end (); ++iter) 729 delete iter->second; 730 } 731 732 /* For a given DECL, returns the top-level function_instance. */ 733 734 function_instance * 735 autofdo_source_profile::get_function_instance_by_decl (tree decl) const 736 { 737 int index = afdo_string_table->get_index_by_decl (decl); 738 if (index == -1) 739 return NULL; 740 name_function_instance_map::const_iterator ret = map_.find (index); 741 return ret == map_.end () ? NULL : ret->second; 742 } 743 744 /* Find count_info for a given gimple STMT. If found, store the count_info 745 in INFO and return true; otherwise return false. */ 746 747 bool 748 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const 749 { 750 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 751 return false; 752 753 inline_stack stack; 754 get_inline_stack (gimple_location (stmt), &stack); 755 if (stack.length () == 0) 756 return false; 757 function_instance *s = get_function_instance_by_inline_stack (stack); 758 if (s == NULL) 759 return false; 760 return s->get_count_info (stack[0].second, info); 761 } 762 763 /* Mark LOC as annotated. */ 764 765 void 766 autofdo_source_profile::mark_annotated (location_t loc) 767 { 768 inline_stack stack; 769 get_inline_stack (loc, &stack); 770 if (stack.length () == 0) 771 return; 772 function_instance *s = get_function_instance_by_inline_stack (stack); 773 if (s == NULL) 774 return; 775 s->mark_annotated (stack[0].second); 776 } 777 778 /* Update value profile INFO for STMT from the inlined indirect callsite. 779 Return true if INFO is updated. */ 780 781 bool 782 autofdo_source_profile::update_inlined_ind_target (gcall *stmt, 783 count_info *info) 784 { 785 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 786 return false; 787 788 count_info old_info; 789 get_count_info (stmt, &old_info); 790 gcov_type total = 0; 791 for (icall_target_map::const_iterator iter = old_info.targets.begin (); 792 iter != old_info.targets.end (); ++iter) 793 total += iter->second; 794 795 /* Program behavior changed, original promoted (and inlined) target is not 796 hot any more. Will avoid promote the original target. 797 798 To check if original promoted target is still hot, we check the total 799 count of the unpromoted targets (stored in old_info). If it is no less 800 than half of the callsite count (stored in INFO), the original promoted 801 target is considered not hot any more. */ 802 if (total >= info->count / 2) 803 return false; 804 805 inline_stack stack; 806 get_inline_stack (gimple_location (stmt), &stack); 807 if (stack.length () == 0) 808 return false; 809 function_instance *s = get_function_instance_by_inline_stack (stack); 810 if (s == NULL) 811 return false; 812 icall_target_map map; 813 if (s->find_icall_target_map (stmt, &map) == 0) 814 return false; 815 for (icall_target_map::const_iterator iter = map.begin (); 816 iter != map.end (); ++iter) 817 info->targets[iter->first] = iter->second; 818 return true; 819 } 820 821 /* Find total count of the callee of EDGE. */ 822 823 gcov_type 824 autofdo_source_profile::get_callsite_total_count ( 825 struct cgraph_edge *edge) const 826 { 827 inline_stack stack; 828 stack.safe_push (std::make_pair (edge->callee->decl, 0)); 829 get_inline_stack (gimple_location (edge->call_stmt), &stack); 830 831 function_instance *s = get_function_instance_by_inline_stack (stack); 832 if (s == NULL 833 || afdo_string_table->get_index (IDENTIFIER_POINTER ( 834 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) 835 return 0; 836 837 return s->total_count (); 838 } 839 840 /* Read AutoFDO profile and returns TRUE on success. */ 841 842 /* source profile format: 843 844 GCOV_TAG_AFDO_FUNCTION: 4 bytes 845 LENGTH: 4 bytes 846 NUM_FUNCTIONS: 4 bytes 847 FUNCTION_INSTANCE_1 848 FUNCTION_INSTANCE_2 849 ... 850 FUNCTION_INSTANCE_N. */ 851 852 bool 853 autofdo_source_profile::read () 854 { 855 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) 856 { 857 inform (0, "Not expected TAG."); 858 return false; 859 } 860 861 /* Skip the length of the section. */ 862 gcov_read_unsigned (); 863 864 /* Read in the function/callsite profile, and store it in local 865 data structure. */ 866 unsigned function_num = gcov_read_unsigned (); 867 for (unsigned i = 0; i < function_num; i++) 868 { 869 function_instance::function_instance_stack stack; 870 function_instance *s = function_instance::read_function_instance ( 871 &stack, gcov_read_counter ()); 872 afdo_profile_info->sum_all += s->total_count (); 873 map_[s->name ()] = s; 874 } 875 return true; 876 } 877 878 /* Return the function_instance in the profile that correspond to the 879 inline STACK. */ 880 881 function_instance * 882 autofdo_source_profile::get_function_instance_by_inline_stack ( 883 const inline_stack &stack) const 884 { 885 name_function_instance_map::const_iterator iter = map_.find ( 886 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); 887 if (iter == map_.end()) 888 return NULL; 889 function_instance *s = iter->second; 890 for (unsigned i = stack.length() - 1; i > 0; i--) 891 { 892 s = s->get_function_instance_by_decl ( 893 stack[i].second, stack[i - 1].first); 894 if (s == NULL) 895 return NULL; 896 } 897 return s; 898 } 899 900 /* Module profile is only used by LIPO. Here we simply ignore it. */ 901 902 static void 903 fake_read_autofdo_module_profile () 904 { 905 /* Read in the module info. */ 906 gcov_read_unsigned (); 907 908 /* Skip the length of the section. */ 909 gcov_read_unsigned (); 910 911 /* Read in the file name table. */ 912 unsigned total_module_num = gcov_read_unsigned (); 913 gcc_assert (total_module_num == 0); 914 } 915 916 /* Read data from profile data file. */ 917 918 static void 919 read_profile (void) 920 { 921 if (gcov_open (auto_profile_file, 1) == 0) 922 error ("Cannot open profile file %s.", auto_profile_file); 923 924 if (gcov_read_unsigned () != GCOV_DATA_MAGIC) 925 error ("AutoFDO profile magic number does not mathch."); 926 927 /* Skip the version number. */ 928 unsigned version = gcov_read_unsigned (); 929 if (version != AUTO_PROFILE_VERSION) 930 error ("AutoFDO profile version %u does match %u.", 931 version, AUTO_PROFILE_VERSION); 932 933 /* Skip the empty integer. */ 934 gcov_read_unsigned (); 935 936 /* string_table. */ 937 afdo_string_table = new string_table (); 938 if (!afdo_string_table->read()) 939 error ("Cannot read string table from %s.", auto_profile_file); 940 941 /* autofdo_source_profile. */ 942 afdo_source_profile = autofdo_source_profile::create (); 943 if (afdo_source_profile == NULL) 944 error ("Cannot read function profile from %s.", auto_profile_file); 945 946 /* autofdo_module_profile. */ 947 fake_read_autofdo_module_profile (); 948 949 /* Read in the working set. */ 950 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET) 951 error ("Cannot read working set from %s.", auto_profile_file); 952 953 /* Skip the length of the section. */ 954 gcov_read_unsigned (); 955 gcov_working_set_t set[128]; 956 for (unsigned i = 0; i < 128; i++) 957 { 958 set[i].num_counters = gcov_read_unsigned (); 959 set[i].min_counter = gcov_read_counter (); 960 } 961 add_working_set (set); 962 } 963 964 /* From AutoFDO profiles, find values inside STMT for that we want to measure 965 histograms for indirect-call optimization. 966 967 This function is actually served for 2 purposes: 968 * before annotation, we need to mark histogram, promote and inline 969 * after annotation, we just need to mark, and let follow-up logic to 970 decide if it needs to promote and inline. */ 971 972 static void 973 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, 974 bool transform) 975 { 976 gimple gs = gsi_stmt (*gsi); 977 tree callee; 978 979 if (map.size () == 0) 980 return; 981 gcall *stmt = dyn_cast <gcall *> (gs); 982 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE) 983 return; 984 985 callee = gimple_call_fn (stmt); 986 987 histogram_value hist = gimple_alloc_histogram_value ( 988 cfun, HIST_TYPE_INDIR_CALL, stmt, callee); 989 hist->n_counters = 3; 990 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); 991 gimple_add_histogram_value (cfun, stmt, hist); 992 993 gcov_type total = 0; 994 icall_target_map::const_iterator max_iter = map.end (); 995 996 for (icall_target_map::const_iterator iter = map.begin (); 997 iter != map.end (); ++iter) 998 { 999 total += iter->second; 1000 if (max_iter == map.end () || max_iter->second < iter->second) 1001 max_iter = iter; 1002 } 1003 1004 hist->hvalue.counters[0] 1005 = (unsigned long long)afdo_string_table->get_name (max_iter->first); 1006 hist->hvalue.counters[1] = max_iter->second; 1007 hist->hvalue.counters[2] = total; 1008 1009 if (!transform) 1010 return; 1011 1012 struct cgraph_edge *indirect_edge 1013 = cgraph_node::get (current_function_decl)->get_edge (stmt); 1014 struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( 1015 get_identifier ((const char *) hist->hvalue.counters[0])); 1016 1017 if (direct_call == NULL || !check_ic_target (stmt, direct_call)) 1018 return; 1019 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) 1020 return; 1021 struct cgraph_edge *new_edge 1022 = indirect_edge->make_speculative (direct_call, 0, 0); 1023 new_edge->redirect_call_stmt_to_callee (); 1024 gimple_remove_histogram_value (cfun, stmt, hist); 1025 inline_call (new_edge, true, NULL, NULL, false); 1026 } 1027 1028 /* From AutoFDO profiles, find values inside STMT for that we want to measure 1029 histograms and adds them to list VALUES. */ 1030 1031 static void 1032 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, 1033 bool transform) 1034 { 1035 afdo_indirect_call (gsi, map, transform); 1036 } 1037 1038 typedef std::set<basic_block> bb_set; 1039 typedef std::set<edge> edge_set; 1040 1041 static bool 1042 is_bb_annotated (const basic_block bb, const bb_set &annotated) 1043 { 1044 return annotated.find (bb) != annotated.end (); 1045 } 1046 1047 static void 1048 set_bb_annotated (basic_block bb, bb_set *annotated) 1049 { 1050 annotated->insert (bb); 1051 } 1052 1053 static bool 1054 is_edge_annotated (const edge e, const edge_set &annotated) 1055 { 1056 return annotated.find (e) != annotated.end (); 1057 } 1058 1059 static void 1060 set_edge_annotated (edge e, edge_set *annotated) 1061 { 1062 annotated->insert (e); 1063 } 1064 1065 /* For a given BB, set its execution count. Attach value profile if a stmt 1066 is not in PROMOTED, because we only want to promote an indirect call once. 1067 Return TRUE if BB is annotated. */ 1068 1069 static bool 1070 afdo_set_bb_count (basic_block bb, const stmt_set &promoted) 1071 { 1072 gimple_stmt_iterator gsi; 1073 edge e; 1074 edge_iterator ei; 1075 gcov_type max_count = 0; 1076 bool has_annotated = false; 1077 1078 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1079 { 1080 count_info info; 1081 gimple stmt = gsi_stmt (gsi); 1082 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) 1083 continue; 1084 if (afdo_source_profile->get_count_info (stmt, &info)) 1085 { 1086 if (info.count > max_count) 1087 max_count = info.count; 1088 has_annotated = true; 1089 if (info.targets.size () > 0 1090 && promoted.find (stmt) == promoted.end ()) 1091 afdo_vpt (&gsi, info.targets, false); 1092 } 1093 } 1094 1095 if (!has_annotated) 1096 return false; 1097 1098 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1099 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); 1100 for (gphi_iterator gpi = gsi_start_phis (bb); 1101 !gsi_end_p (gpi); 1102 gsi_next (&gpi)) 1103 { 1104 gphi *phi = gpi.phi (); 1105 size_t i; 1106 for (i = 0; i < gimple_phi_num_args (phi); i++) 1107 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); 1108 } 1109 FOR_EACH_EDGE (e, ei, bb->succs) 1110 afdo_source_profile->mark_annotated (e->goto_locus); 1111 1112 bb->count = max_count; 1113 return true; 1114 } 1115 1116 /* BB1 and BB2 are in an equivalent class iff: 1117 1. BB1 dominates BB2. 1118 2. BB2 post-dominates BB1. 1119 3. BB1 and BB2 are in the same loop nest. 1120 This function finds the equivalent class for each basic block, and 1121 stores a pointer to the first BB in its equivalent class. Meanwhile, 1122 set bb counts for the same equivalent class to be idenical. Update 1123 ANNOTATED_BB for the first BB in its equivalent class. */ 1124 1125 static void 1126 afdo_find_equiv_class (bb_set *annotated_bb) 1127 { 1128 basic_block bb; 1129 1130 FOR_ALL_BB_FN (bb, cfun) 1131 bb->aux = NULL; 1132 1133 FOR_ALL_BB_FN (bb, cfun) 1134 { 1135 vec<basic_block> dom_bbs; 1136 basic_block bb1; 1137 int i; 1138 1139 if (bb->aux != NULL) 1140 continue; 1141 bb->aux = bb; 1142 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); 1143 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1144 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) 1145 && bb1->loop_father == bb->loop_father) 1146 { 1147 bb1->aux = bb; 1148 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1149 { 1150 bb->count = bb1->count; 1151 set_bb_annotated (bb, annotated_bb); 1152 } 1153 } 1154 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb); 1155 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1156 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) 1157 && bb1->loop_father == bb->loop_father) 1158 { 1159 bb1->aux = bb; 1160 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1161 { 1162 bb->count = bb1->count; 1163 set_bb_annotated (bb, annotated_bb); 1164 } 1165 } 1166 } 1167 } 1168 1169 /* If a basic block's count is known, and only one of its in/out edges' count 1170 is unknown, its count can be calculated. Meanwhile, if all of the in/out 1171 edges' counts are known, then the basic block's unknown count can also be 1172 calculated. 1173 IS_SUCC is true if out edges of a basic blocks are examined. 1174 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly. 1175 Return TRUE if any basic block/edge count is changed. */ 1176 1177 static bool 1178 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb, 1179 edge_set *annotated_edge) 1180 { 1181 basic_block bb; 1182 bool changed = false; 1183 1184 FOR_EACH_BB_FN (bb, cfun) 1185 { 1186 edge e, unknown_edge = NULL; 1187 edge_iterator ei; 1188 int num_unknown_edge = 0; 1189 gcov_type total_known_count = 0; 1190 1191 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) 1192 if (!is_edge_annotated (e, *annotated_edge)) 1193 num_unknown_edge++, unknown_edge = e; 1194 else 1195 total_known_count += e->count; 1196 1197 if (num_unknown_edge == 0) 1198 { 1199 if (total_known_count > bb->count) 1200 { 1201 bb->count = total_known_count; 1202 changed = true; 1203 } 1204 if (!is_bb_annotated (bb, *annotated_bb)) 1205 { 1206 set_bb_annotated (bb, annotated_bb); 1207 changed = true; 1208 } 1209 } 1210 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) 1211 { 1212 if (bb->count >= total_known_count) 1213 unknown_edge->count = bb->count - total_known_count; 1214 else 1215 unknown_edge->count = 0; 1216 set_edge_annotated (unknown_edge, annotated_edge); 1217 changed = true; 1218 } 1219 } 1220 return changed; 1221 } 1222 1223 /* Special propagation for circuit expressions. Because GCC translates 1224 control flow into data flow for circuit expressions. E.g. 1225 BB1: 1226 if (a && b) 1227 BB2 1228 else 1229 BB3 1230 1231 will be translated into: 1232 1233 BB1: 1234 if (a) 1235 goto BB.t1 1236 else 1237 goto BB.t3 1238 BB.t1: 1239 if (b) 1240 goto BB.t2 1241 else 1242 goto BB.t3 1243 BB.t2: 1244 goto BB.t3 1245 BB.t3: 1246 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) 1247 if (tmp) 1248 goto BB2 1249 else 1250 goto BB3 1251 1252 In this case, we need to propagate through PHI to determine the edge 1253 count of BB1->BB.t1, BB.t1->BB.t2. 1254 Update ANNOTATED_EDGE accordingly. */ 1255 1256 static void 1257 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge) 1258 { 1259 basic_block bb; 1260 FOR_ALL_BB_FN (bb, cfun) 1261 { 1262 gimple def_stmt; 1263 tree cmp_rhs, cmp_lhs; 1264 gimple cmp_stmt = last_stmt (bb); 1265 edge e; 1266 edge_iterator ei; 1267 1268 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) 1269 continue; 1270 cmp_rhs = gimple_cond_rhs (cmp_stmt); 1271 cmp_lhs = gimple_cond_lhs (cmp_stmt); 1272 if (!TREE_CONSTANT (cmp_rhs) 1273 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) 1274 continue; 1275 if (TREE_CODE (cmp_lhs) != SSA_NAME) 1276 continue; 1277 if (!is_bb_annotated (bb, annotated_bb)) 1278 continue; 1279 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); 1280 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN 1281 && gimple_assign_single_p (def_stmt) 1282 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) 1283 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); 1284 if (!def_stmt) 1285 continue; 1286 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt); 1287 if (!phi_stmt) 1288 continue; 1289 FOR_EACH_EDGE (e, ei, bb->succs) 1290 { 1291 unsigned i, total = 0; 1292 edge only_one; 1293 bool check_value_one = (((integer_onep (cmp_rhs)) 1294 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) 1295 ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); 1296 if (!is_edge_annotated (e, *annotated_edge)) 1297 continue; 1298 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) 1299 { 1300 tree val = gimple_phi_arg_def (phi_stmt, i); 1301 edge ep = gimple_phi_arg_edge (phi_stmt, i); 1302 1303 if (!TREE_CONSTANT (val) 1304 || !(integer_zerop (val) || integer_onep (val))) 1305 continue; 1306 if (check_value_one ^ integer_onep (val)) 1307 continue; 1308 total++; 1309 only_one = ep; 1310 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge)) 1311 { 1312 ep->probability = 0; 1313 ep->count = 0; 1314 set_edge_annotated (ep, annotated_edge); 1315 } 1316 } 1317 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge)) 1318 { 1319 only_one->probability = e->probability; 1320 only_one->count = e->count; 1321 set_edge_annotated (only_one, annotated_edge); 1322 } 1323 } 1324 } 1325 } 1326 1327 /* Propagate the basic block count and edge count on the control flow 1328 graph. We do the propagation iteratively until stablize. */ 1329 1330 static void 1331 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge) 1332 { 1333 basic_block bb; 1334 bool changed = true; 1335 int i = 0; 1336 1337 FOR_ALL_BB_FN (bb, cfun) 1338 { 1339 bb->count = ((basic_block)bb->aux)->count; 1340 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb)) 1341 set_bb_annotated (bb, annotated_bb); 1342 } 1343 1344 while (changed && i++ < 10) 1345 { 1346 changed = false; 1347 1348 if (afdo_propagate_edge (true, annotated_bb, annotated_edge)) 1349 changed = true; 1350 if (afdo_propagate_edge (false, annotated_bb, annotated_edge)) 1351 changed = true; 1352 afdo_propagate_circuit (*annotated_bb, annotated_edge); 1353 } 1354 } 1355 1356 /* Propagate counts on control flow graph and calculate branch 1357 probabilities. */ 1358 1359 static void 1360 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) 1361 { 1362 basic_block bb; 1363 bool has_sample = false; 1364 1365 FOR_EACH_BB_FN (bb, cfun) 1366 if (bb->count > 0) 1367 has_sample = true; 1368 1369 if (!has_sample) 1370 return; 1371 1372 calculate_dominance_info (CDI_POST_DOMINATORS); 1373 calculate_dominance_info (CDI_DOMINATORS); 1374 loop_optimizer_init (0); 1375 1376 afdo_find_equiv_class (annotated_bb); 1377 afdo_propagate (annotated_bb, annotated_edge); 1378 1379 FOR_EACH_BB_FN (bb, cfun) 1380 { 1381 edge e; 1382 edge_iterator ei; 1383 int num_unknown_succ = 0; 1384 gcov_type total_count = 0; 1385 1386 FOR_EACH_EDGE (e, ei, bb->succs) 1387 { 1388 if (!is_edge_annotated (e, *annotated_edge)) 1389 num_unknown_succ++; 1390 else 1391 total_count += e->count; 1392 } 1393 if (num_unknown_succ == 0 && total_count > 0) 1394 { 1395 FOR_EACH_EDGE (e, ei, bb->succs) 1396 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count; 1397 } 1398 } 1399 FOR_ALL_BB_FN (bb, cfun) 1400 { 1401 edge e; 1402 edge_iterator ei; 1403 1404 FOR_EACH_EDGE (e, ei, bb->succs) 1405 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE; 1406 bb->aux = NULL; 1407 } 1408 1409 loop_optimizer_finalize (); 1410 free_dominance_info (CDI_DOMINATORS); 1411 free_dominance_info (CDI_POST_DOMINATORS); 1412 } 1413 1414 /* Perform value profile transformation using AutoFDO profile. Add the 1415 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any 1416 indirect call promoted. */ 1417 1418 static bool 1419 afdo_vpt_for_early_inline (stmt_set *promoted_stmts) 1420 { 1421 basic_block bb; 1422 if (afdo_source_profile->get_function_instance_by_decl ( 1423 current_function_decl) == NULL) 1424 return false; 1425 1426 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1427 1428 bool has_vpt = false; 1429 FOR_EACH_BB_FN (bb, cfun) 1430 { 1431 if (!has_indirect_call (bb)) 1432 continue; 1433 gimple_stmt_iterator gsi; 1434 1435 gcov_type bb_count = 0; 1436 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1437 { 1438 count_info info; 1439 gimple stmt = gsi_stmt (gsi); 1440 if (afdo_source_profile->get_count_info (stmt, &info)) 1441 bb_count = MAX (bb_count, info.count); 1442 } 1443 1444 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1445 { 1446 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi)); 1447 /* IC_promotion and early_inline_2 is done in multiple iterations. 1448 No need to promoted the stmt if its in promoted_stmts (means 1449 it is already been promoted in the previous iterations). */ 1450 if ((!stmt) || gimple_call_fn (stmt) == NULL 1451 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL 1452 || promoted_stmts->find (stmt) != promoted_stmts->end ()) 1453 continue; 1454 1455 count_info info; 1456 afdo_source_profile->get_count_info (stmt, &info); 1457 info.count = bb_count; 1458 if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) 1459 { 1460 /* Promote the indirect call and update the promoted_stmts. */ 1461 promoted_stmts->insert (stmt); 1462 afdo_vpt (&gsi, info.targets, true); 1463 has_vpt = true; 1464 } 1465 } 1466 } 1467 1468 if (has_vpt) 1469 { 1470 unsigned todo = optimize_inline_calls (current_function_decl); 1471 if (todo & TODO_update_ssa_any) 1472 update_ssa (TODO_update_ssa); 1473 return true; 1474 } 1475 1476 return false; 1477 } 1478 1479 /* Annotate auto profile to the control flow graph. Do not annotate value 1480 profile for stmts in PROMOTED_STMTS. */ 1481 1482 static void 1483 afdo_annotate_cfg (const stmt_set &promoted_stmts) 1484 { 1485 basic_block bb; 1486 bb_set annotated_bb; 1487 edge_set annotated_edge; 1488 const function_instance *s 1489 = afdo_source_profile->get_function_instance_by_decl ( 1490 current_function_decl); 1491 1492 if (s == NULL) 1493 return; 1494 cgraph_node::get (current_function_decl)->count = s->head_count (); 1495 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count (); 1496 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1497 1498 FOR_EACH_BB_FN (bb, cfun) 1499 { 1500 edge e; 1501 edge_iterator ei; 1502 1503 bb->count = 0; 1504 FOR_EACH_EDGE (e, ei, bb->succs) 1505 e->count = 0; 1506 1507 if (afdo_set_bb_count (bb, promoted_stmts)) 1508 set_bb_annotated (bb, &annotated_bb); 1509 if (bb->count > max_count) 1510 max_count = bb->count; 1511 } 1512 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1513 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) 1514 { 1515 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count 1516 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1517 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); 1518 } 1519 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1520 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) 1521 { 1522 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count 1523 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1524 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); 1525 } 1526 afdo_source_profile->mark_annotated ( 1527 DECL_SOURCE_LOCATION (current_function_decl)); 1528 afdo_source_profile->mark_annotated (cfun->function_start_locus); 1529 afdo_source_profile->mark_annotated (cfun->function_end_locus); 1530 if (max_count > 0) 1531 { 1532 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge); 1533 counts_to_freqs (); 1534 profile_status_for_fn (cfun) = PROFILE_READ; 1535 } 1536 if (flag_value_profile_transformations) 1537 { 1538 gimple_value_profile_transformations (); 1539 free_dominance_info (CDI_DOMINATORS); 1540 free_dominance_info (CDI_POST_DOMINATORS); 1541 update_ssa (TODO_update_ssa); 1542 } 1543 } 1544 1545 /* Wrapper function to invoke early inliner. */ 1546 1547 static void 1548 early_inline () 1549 { 1550 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1551 unsigned todo = early_inliner (cfun); 1552 if (todo & TODO_update_ssa_any) 1553 update_ssa (TODO_update_ssa); 1554 } 1555 1556 /* Use AutoFDO profile to annoate the control flow graph. 1557 Return the todo flag. */ 1558 1559 static unsigned int 1560 auto_profile (void) 1561 { 1562 struct cgraph_node *node; 1563 1564 if (symtab->state == FINISHED) 1565 return 0; 1566 1567 init_node_map (true); 1568 profile_info = autofdo::afdo_profile_info; 1569 1570 FOR_EACH_FUNCTION (node) 1571 { 1572 if (!gimple_has_body_p (node->decl)) 1573 continue; 1574 1575 /* Don't profile functions produced for builtin stuff. */ 1576 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) 1577 continue; 1578 1579 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); 1580 1581 /* First do indirect call promotion and early inline to make the 1582 IR match the profiled binary before actual annotation. 1583 1584 This is needed because an indirect call might have been promoted 1585 and inlined in the profiled binary. If we do not promote and 1586 inline these indirect calls before annotation, the profile for 1587 these promoted functions will be lost. 1588 1589 e.g. foo() --indirect_call--> bar() 1590 In profiled binary, the callsite is promoted and inlined, making 1591 the profile look like: 1592 1593 foo: { 1594 loc_foo_1: count_1 1595 bar@loc_foo_2: { 1596 loc_bar_1: count_2 1597 loc_bar_2: count_3 1598 } 1599 } 1600 1601 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. 1602 If we perform annotation on it, the profile inside bar@loc_foo2 1603 will be wasted. 1604 1605 To avoid this, we promote loc_foo_2 and inline the promoted bar 1606 function before annotation, so the profile inside bar@loc_foo2 1607 will be useful. */ 1608 autofdo::stmt_set promoted_stmts; 1609 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) 1610 { 1611 if (!flag_value_profile_transformations 1612 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) 1613 break; 1614 early_inline (); 1615 } 1616 1617 early_inline (); 1618 autofdo::afdo_annotate_cfg (promoted_stmts); 1619 compute_function_frequency (); 1620 1621 /* Local pure-const may imply need to fixup the cfg. */ 1622 if (execute_fixup_cfg () & TODO_cleanup_cfg) 1623 cleanup_tree_cfg (); 1624 1625 free_dominance_info (CDI_DOMINATORS); 1626 free_dominance_info (CDI_POST_DOMINATORS); 1627 cgraph_edge::rebuild_edges (); 1628 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1629 pop_cfun (); 1630 } 1631 1632 return TODO_rebuild_cgraph_edges; 1633 } 1634 } /* namespace autofdo. */ 1635 1636 /* Read the profile from the profile data file. */ 1637 1638 void 1639 read_autofdo_file (void) 1640 { 1641 if (auto_profile_file == NULL) 1642 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; 1643 1644 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc ( 1645 1, sizeof (struct gcov_ctr_summary)); 1646 autofdo::afdo_profile_info->runs = 1; 1647 autofdo::afdo_profile_info->sum_max = 0; 1648 autofdo::afdo_profile_info->sum_all = 0; 1649 1650 /* Read the profile from the profile file. */ 1651 autofdo::read_profile (); 1652 } 1653 1654 /* Free the resources. */ 1655 1656 void 1657 end_auto_profile (void) 1658 { 1659 delete autofdo::afdo_source_profile; 1660 delete autofdo::afdo_string_table; 1661 profile_info = NULL; 1662 } 1663 1664 /* Returns TRUE if EDGE is hot enough to be inlined early. */ 1665 1666 bool 1667 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) 1668 { 1669 gcov_type count 1670 = autofdo::afdo_source_profile->get_callsite_total_count (edge); 1671 1672 if (count > 0) 1673 { 1674 bool is_hot; 1675 const struct gcov_ctr_summary *saved_profile_info = profile_info; 1676 /* At early inline stage, profile_info is not set yet. We need to 1677 temporarily set it to afdo_profile_info to calculate hotness. */ 1678 profile_info = autofdo::afdo_profile_info; 1679 is_hot = maybe_hot_count_p (NULL, count); 1680 profile_info = saved_profile_info; 1681 return is_hot; 1682 } 1683 1684 return false; 1685 } 1686 1687 namespace 1688 { 1689 1690 const pass_data pass_data_ipa_auto_profile = { 1691 SIMPLE_IPA_PASS, "afdo", /* name */ 1692 OPTGROUP_NONE, /* optinfo_flags */ 1693 TV_IPA_AUTOFDO, /* tv_id */ 1694 0, /* properties_required */ 1695 0, /* properties_provided */ 1696 0, /* properties_destroyed */ 1697 0, /* todo_flags_start */ 1698 0, /* todo_flags_finish */ 1699 }; 1700 1701 class pass_ipa_auto_profile : public simple_ipa_opt_pass 1702 { 1703 public: 1704 pass_ipa_auto_profile (gcc::context *ctxt) 1705 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) 1706 { 1707 } 1708 1709 /* opt_pass methods: */ 1710 virtual bool 1711 gate (function *) 1712 { 1713 return flag_auto_profile; 1714 } 1715 virtual unsigned int 1716 execute (function *) 1717 { 1718 return autofdo::auto_profile (); 1719 } 1720 }; // class pass_ipa_auto_profile 1721 1722 } // anon namespace 1723 1724 simple_ipa_opt_pass * 1725 make_pass_ipa_auto_profile (gcc::context *ctxt) 1726 { 1727 return new pass_ipa_auto_profile (ctxt); 1728 } 1729