1 /* Read and annotate call graph profile from the auto profile data file. 2 Copyright (C) 2014-2017 Free Software Foundation, Inc. 3 Contributed by Dehao Chen (dehao@google.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #define INCLUDE_MAP 23 #define INCLUDE_SET 24 #include "system.h" 25 #include "coretypes.h" 26 #include "backend.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "predict.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "ssa.h" 33 #include "cgraph.h" 34 #include "gcov-io.h" 35 #include "diagnostic-core.h" 36 #include "profile.h" 37 #include "langhooks.h" 38 #include "cfgloop.h" 39 #include "tree-cfg.h" 40 #include "tree-cfgcleanup.h" 41 #include "tree-into-ssa.h" 42 #include "gimple-iterator.h" 43 #include "value-prof.h" 44 #include "params.h" 45 #include "symbol-summary.h" 46 #include "ipa-prop.h" 47 #include "ipa-inline.h" 48 #include "tree-inline.h" 49 #include "auto-profile.h" 50 #include "tree-pretty-print.h" 51 #include "gimple-pretty-print.h" 52 53 /* The following routines implements AutoFDO optimization. 54 55 This optimization uses sampling profiles to annotate basic block counts 56 and uses heuristics to estimate branch probabilities. 57 58 There are three phases in AutoFDO: 59 60 Phase 1: Read profile from the profile data file. 61 The following info is read from the profile datafile: 62 * string_table: a map between function name and its index. 63 * autofdo_source_profile: a map from function_instance name to 64 function_instance. This is represented as a forest of 65 function_instances. 66 * WorkingSet: a histogram of how many instructions are covered for a 67 given percentage of total cycles. This is describing the binary 68 level information (not source level). This info is used to help 69 decide if we want aggressive optimizations that could increase 70 code footprint (e.g. loop unroll etc.) 71 A function instance is an instance of function that could either be a 72 standalone symbol, or a clone of a function that is inlined into another 73 function. 74 75 Phase 2: Early inline + value profile transformation. 76 Early inline uses autofdo_source_profile to find if a callsite is: 77 * inlined in the profiled binary. 78 * callee body is hot in the profiling run. 79 If both condition satisfies, early inline will inline the callsite 80 regardless of the code growth. 81 Phase 2 is an iterative process. During each iteration, we also check 82 if an indirect callsite is promoted and inlined in the profiling run. 83 If yes, vpt will happen to force promote it and in the next iteration, 84 einline will inline the promoted callsite in the next iteration. 85 86 Phase 3: Annotate control flow graph. 87 AutoFDO uses a separate pass to: 88 * Annotate basic block count 89 * Estimate branch probability 90 91 After the above 3 phases, all profile is readily annotated on the GCC IR. 92 AutoFDO tries to reuse all FDO infrastructure as much as possible to make 93 use of the profile. E.g. it uses existing mechanism to calculate the basic 94 block/edge frequency, as well as the cgraph node/edge count. 95 */ 96 97 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" 98 #define AUTO_PROFILE_VERSION 1 99 100 namespace autofdo 101 { 102 103 /* Represent a source location: (function_decl, lineno). */ 104 typedef std::pair<tree, unsigned> decl_lineno; 105 106 /* Represent an inline stack. vector[0] is the leaf node. */ 107 typedef auto_vec<decl_lineno> inline_stack; 108 109 /* String array that stores function names. */ 110 typedef auto_vec<char *> string_vector; 111 112 /* Map from function name's index in string_table to target's 113 execution count. */ 114 typedef std::map<unsigned, gcov_type> icall_target_map; 115 116 /* Set of gimple stmts. Used to track if the stmt has already been promoted 117 to direct call. */ 118 typedef std::set<gimple *> stmt_set; 119 120 /* Represent count info of an inline stack. */ 121 struct count_info 122 { 123 /* Sampled count of the inline stack. */ 124 gcov_type count; 125 126 /* Map from indirect call target to its sample count. */ 127 icall_target_map targets; 128 129 /* Whether this inline stack is already used in annotation. 130 131 Each inline stack should only be used to annotate IR once. 132 This will be enforced when instruction-level discriminator 133 is supported. */ 134 bool annotated; 135 }; 136 137 /* operator< for "const char *". */ 138 struct string_compare 139 { 140 bool operator()(const char *a, const char *b) const 141 { 142 return strcmp (a, b) < 0; 143 } 144 }; 145 146 /* Store a string array, indexed by string position in the array. */ 147 class string_table 148 { 149 public: 150 string_table () 151 {} 152 153 ~string_table (); 154 155 /* For a given string, returns its index. */ 156 int get_index (const char *name) const; 157 158 /* For a given decl, returns the index of the decl name. */ 159 int get_index_by_decl (tree decl) const; 160 161 /* For a given index, returns the string. */ 162 const char *get_name (int index) const; 163 164 /* Read profile, return TRUE on success. */ 165 bool read (); 166 167 private: 168 typedef std::map<const char *, unsigned, string_compare> string_index_map; 169 string_vector vector_; 170 string_index_map map_; 171 }; 172 173 /* Profile of a function instance: 174 1. total_count of the function. 175 2. head_count (entry basic block count) of the function (only valid when 176 function is a top-level function_instance, i.e. it is the original copy 177 instead of the inlined copy). 178 3. map from source location (decl_lineno) to profile (count_info). 179 4. map from callsite to callee function_instance. */ 180 class function_instance 181 { 182 public: 183 typedef auto_vec<function_instance *> function_instance_stack; 184 185 /* Read the profile and return a function_instance with head count as 186 HEAD_COUNT. Recursively read callsites to create nested function_instances 187 too. STACK is used to track the recursive creation process. */ 188 static function_instance * 189 read_function_instance (function_instance_stack *stack, 190 gcov_type head_count); 191 192 /* Recursively deallocate all callsites (nested function_instances). */ 193 ~function_instance (); 194 195 /* Accessors. */ 196 int 197 name () const 198 { 199 return name_; 200 } 201 gcov_type 202 total_count () const 203 { 204 return total_count_; 205 } 206 gcov_type 207 head_count () const 208 { 209 return head_count_; 210 } 211 212 /* Traverse callsites of the current function_instance to find one at the 213 location of LINENO and callee name represented in DECL. */ 214 function_instance *get_function_instance_by_decl (unsigned lineno, 215 tree decl) const; 216 217 /* Store the profile info for LOC in INFO. Return TRUE if profile info 218 is found. */ 219 bool get_count_info (location_t loc, count_info *info) const; 220 221 /* Read the inlined indirect call target profile for STMT and store it in 222 MAP, return the total count for all inlined indirect calls. */ 223 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; 224 225 /* Sum of counts that is used during annotation. */ 226 gcov_type total_annotated_count () const; 227 228 /* Mark LOC as annotated. */ 229 void mark_annotated (location_t loc); 230 231 private: 232 /* Callsite, represented as (decl_lineno, callee_function_name_index). */ 233 typedef std::pair<unsigned, unsigned> callsite; 234 235 /* Map from callsite to callee function_instance. */ 236 typedef std::map<callsite, function_instance *> callsite_map; 237 238 function_instance (unsigned name, gcov_type head_count) 239 : name_ (name), total_count_ (0), head_count_ (head_count) 240 { 241 } 242 243 /* Map from source location (decl_lineno) to profile (count_info). */ 244 typedef std::map<unsigned, count_info> position_count_map; 245 246 /* function_instance name index in the string_table. */ 247 unsigned name_; 248 249 /* Total sample count. */ 250 gcov_type total_count_; 251 252 /* Entry BB's sample count. */ 253 gcov_type head_count_; 254 255 /* Map from callsite location to callee function_instance. */ 256 callsite_map callsites; 257 258 /* Map from source location to count_info. */ 259 position_count_map pos_counts; 260 }; 261 262 /* Profile for all functions. */ 263 class autofdo_source_profile 264 { 265 public: 266 static autofdo_source_profile * 267 create () 268 { 269 autofdo_source_profile *map = new autofdo_source_profile (); 270 271 if (map->read ()) 272 return map; 273 delete map; 274 return NULL; 275 } 276 277 ~autofdo_source_profile (); 278 279 /* For a given DECL, returns the top-level function_instance. */ 280 function_instance *get_function_instance_by_decl (tree decl) const; 281 282 /* Find count_info for a given gimple STMT. If found, store the count_info 283 in INFO and return true; otherwise return false. */ 284 bool get_count_info (gimple *stmt, count_info *info) const; 285 286 /* Find total count of the callee of EDGE. */ 287 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; 288 289 /* Update value profile INFO for STMT from the inlined indirect callsite. 290 Return true if INFO is updated. */ 291 bool update_inlined_ind_target (gcall *stmt, count_info *info); 292 293 /* Mark LOC as annotated. */ 294 void mark_annotated (location_t loc); 295 296 private: 297 /* Map from function_instance name index (in string_table) to 298 function_instance. */ 299 typedef std::map<unsigned, function_instance *> name_function_instance_map; 300 301 autofdo_source_profile () {} 302 303 /* Read AutoFDO profile and returns TRUE on success. */ 304 bool read (); 305 306 /* Return the function_instance in the profile that correspond to the 307 inline STACK. */ 308 function_instance * 309 get_function_instance_by_inline_stack (const inline_stack &stack) const; 310 311 name_function_instance_map map_; 312 }; 313 314 /* Store the strings read from the profile data file. */ 315 static string_table *afdo_string_table; 316 317 /* Store the AutoFDO source profile. */ 318 static autofdo_source_profile *afdo_source_profile; 319 320 /* gcov_ctr_summary structure to store the profile_info. */ 321 static struct gcov_ctr_summary *afdo_profile_info; 322 323 /* Helper functions. */ 324 325 /* Return the original name of NAME: strip the suffix that starts 326 with '.' Caller is responsible for freeing RET. */ 327 328 static char * 329 get_original_name (const char *name) 330 { 331 char *ret = xstrdup (name); 332 char *find = strchr (ret, '.'); 333 if (find != NULL) 334 *find = 0; 335 return ret; 336 } 337 338 /* Return the combined location, which is a 32bit integer in which 339 higher 16 bits stores the line offset of LOC to the start lineno 340 of DECL, The lower 16 bits stores the discriminator. */ 341 342 static unsigned 343 get_combined_location (location_t loc, tree decl) 344 { 345 /* TODO: allow more bits for line and less bits for discriminator. */ 346 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) 347 warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes"); 348 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); 349 } 350 351 /* Return the function decl of a given lexical BLOCK. */ 352 353 static tree 354 get_function_decl_from_block (tree block) 355 { 356 tree decl; 357 358 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION) 359 return NULL_TREE; 360 361 for (decl = BLOCK_ABSTRACT_ORIGIN (block); 362 decl && (TREE_CODE (decl) == BLOCK); 363 decl = BLOCK_ABSTRACT_ORIGIN (decl)) 364 if (TREE_CODE (decl) == FUNCTION_DECL) 365 break; 366 return decl; 367 } 368 369 /* Store inline stack for STMT in STACK. */ 370 371 static void 372 get_inline_stack (location_t locus, inline_stack *stack) 373 { 374 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 375 return; 376 377 tree block = LOCATION_BLOCK (locus); 378 if (block && TREE_CODE (block) == BLOCK) 379 { 380 int level = 0; 381 for (block = BLOCK_SUPERCONTEXT (block); 382 block && (TREE_CODE (block) == BLOCK); 383 block = BLOCK_SUPERCONTEXT (block)) 384 { 385 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); 386 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) 387 continue; 388 389 tree decl = get_function_decl_from_block (block); 390 stack->safe_push ( 391 std::make_pair (decl, get_combined_location (locus, decl))); 392 locus = tmp_locus; 393 level++; 394 } 395 } 396 stack->safe_push ( 397 std::make_pair (current_function_decl, 398 get_combined_location (locus, current_function_decl))); 399 } 400 401 /* Return STMT's combined location, which is a 32bit integer in which 402 higher 16 bits stores the line offset of LOC to the start lineno 403 of DECL, The lower 16 bits stores the discriminator. */ 404 405 static unsigned 406 get_relative_location_for_stmt (gimple *stmt) 407 { 408 location_t locus = gimple_location (stmt); 409 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 410 return UNKNOWN_LOCATION; 411 412 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); 413 block = BLOCK_SUPERCONTEXT (block)) 414 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) 415 return get_combined_location (locus, 416 get_function_decl_from_block (block)); 417 return get_combined_location (locus, current_function_decl); 418 } 419 420 /* Return true if BB contains indirect call. */ 421 422 static bool 423 has_indirect_call (basic_block bb) 424 { 425 gimple_stmt_iterator gsi; 426 427 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 428 { 429 gimple *stmt = gsi_stmt (gsi); 430 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) 431 && (gimple_call_fn (stmt) == NULL 432 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) 433 return true; 434 } 435 return false; 436 } 437 438 /* Member functions for string_table. */ 439 440 /* Deconstructor. */ 441 442 string_table::~string_table () 443 { 444 for (unsigned i = 0; i < vector_.length (); i++) 445 free (vector_[i]); 446 } 447 448 449 /* Return the index of a given function NAME. Return -1 if NAME is not 450 found in string table. */ 451 452 int 453 string_table::get_index (const char *name) const 454 { 455 if (name == NULL) 456 return -1; 457 string_index_map::const_iterator iter = map_.find (name); 458 if (iter == map_.end ()) 459 return -1; 460 461 return iter->second; 462 } 463 464 /* Return the index of a given function DECL. Return -1 if DECL is not 465 found in string table. */ 466 467 int 468 string_table::get_index_by_decl (tree decl) const 469 { 470 char *name 471 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); 472 int ret = get_index (name); 473 free (name); 474 if (ret != -1) 475 return ret; 476 ret = get_index (lang_hooks.dwarf_name (decl, 0)); 477 if (ret != -1) 478 return ret; 479 if (DECL_ABSTRACT_ORIGIN (decl)) 480 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); 481 482 return -1; 483 } 484 485 /* Return the function name of a given INDEX. */ 486 487 const char * 488 string_table::get_name (int index) const 489 { 490 gcc_assert (index > 0 && index < (int)vector_.length ()); 491 return vector_[index]; 492 } 493 494 /* Read the string table. Return TRUE if reading is successful. */ 495 496 bool 497 string_table::read () 498 { 499 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) 500 return false; 501 /* Skip the length of the section. */ 502 gcov_read_unsigned (); 503 /* Read in the file name table. */ 504 unsigned string_num = gcov_read_unsigned (); 505 for (unsigned i = 0; i < string_num; i++) 506 { 507 vector_.safe_push (get_original_name (gcov_read_string ())); 508 map_[vector_.last ()] = i; 509 } 510 return true; 511 } 512 513 /* Member functions for function_instance. */ 514 515 function_instance::~function_instance () 516 { 517 for (callsite_map::iterator iter = callsites.begin (); 518 iter != callsites.end (); ++iter) 519 delete iter->second; 520 } 521 522 /* Traverse callsites of the current function_instance to find one at the 523 location of LINENO and callee name represented in DECL. */ 524 525 function_instance * 526 function_instance::get_function_instance_by_decl (unsigned lineno, 527 tree decl) const 528 { 529 int func_name_idx = afdo_string_table->get_index_by_decl (decl); 530 if (func_name_idx != -1) 531 { 532 callsite_map::const_iterator ret 533 = callsites.find (std::make_pair (lineno, func_name_idx)); 534 if (ret != callsites.end ()) 535 return ret->second; 536 } 537 func_name_idx 538 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); 539 if (func_name_idx != -1) 540 { 541 callsite_map::const_iterator ret 542 = callsites.find (std::make_pair (lineno, func_name_idx)); 543 if (ret != callsites.end ()) 544 return ret->second; 545 } 546 if (DECL_ABSTRACT_ORIGIN (decl)) 547 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); 548 549 return NULL; 550 } 551 552 /* Store the profile info for LOC in INFO. Return TRUE if profile info 553 is found. */ 554 555 bool 556 function_instance::get_count_info (location_t loc, count_info *info) const 557 { 558 position_count_map::const_iterator iter = pos_counts.find (loc); 559 if (iter == pos_counts.end ()) 560 return false; 561 *info = iter->second; 562 return true; 563 } 564 565 /* Mark LOC as annotated. */ 566 567 void 568 function_instance::mark_annotated (location_t loc) 569 { 570 position_count_map::iterator iter = pos_counts.find (loc); 571 if (iter == pos_counts.end ()) 572 return; 573 iter->second.annotated = true; 574 } 575 576 /* Read the inlined indirect call target profile for STMT and store it in 577 MAP, return the total count for all inlined indirect calls. */ 578 579 gcov_type 580 function_instance::find_icall_target_map (gcall *stmt, 581 icall_target_map *map) const 582 { 583 gcov_type ret = 0; 584 unsigned stmt_offset = get_relative_location_for_stmt (stmt); 585 586 for (callsite_map::const_iterator iter = callsites.begin (); 587 iter != callsites.end (); ++iter) 588 { 589 unsigned callee = iter->second->name (); 590 /* Check if callsite location match the stmt. */ 591 if (iter->first.first != stmt_offset) 592 continue; 593 struct cgraph_node *node = cgraph_node::get_for_asmname ( 594 get_identifier (afdo_string_table->get_name (callee))); 595 if (node == NULL) 596 continue; 597 if (!check_ic_target (stmt, node)) 598 continue; 599 (*map)[callee] = iter->second->total_count (); 600 ret += iter->second->total_count (); 601 } 602 return ret; 603 } 604 605 /* Read the profile and create a function_instance with head count as 606 HEAD_COUNT. Recursively read callsites to create nested function_instances 607 too. STACK is used to track the recursive creation process. */ 608 609 /* function instance profile format: 610 611 ENTRY_COUNT: 8 bytes 612 NAME_INDEX: 4 bytes 613 NUM_POS_COUNTS: 4 bytes 614 NUM_CALLSITES: 4 byte 615 POS_COUNT_1: 616 POS_1_OFFSET: 4 bytes 617 NUM_TARGETS: 4 bytes 618 COUNT: 8 bytes 619 TARGET_1: 620 VALUE_PROFILE_TYPE: 4 bytes 621 TARGET_IDX: 8 bytes 622 COUNT: 8 bytes 623 TARGET_2 624 ... 625 TARGET_n 626 POS_COUNT_2 627 ... 628 POS_COUNT_N 629 CALLSITE_1: 630 CALLSITE_1_OFFSET: 4 bytes 631 FUNCTION_INSTANCE_PROFILE (nested) 632 CALLSITE_2 633 ... 634 CALLSITE_n. */ 635 636 function_instance * 637 function_instance::read_function_instance (function_instance_stack *stack, 638 gcov_type head_count) 639 { 640 unsigned name = gcov_read_unsigned (); 641 unsigned num_pos_counts = gcov_read_unsigned (); 642 unsigned num_callsites = gcov_read_unsigned (); 643 function_instance *s = new function_instance (name, head_count); 644 stack->safe_push (s); 645 646 for (unsigned i = 0; i < num_pos_counts; i++) 647 { 648 unsigned offset = gcov_read_unsigned () & 0xffff0000; 649 unsigned num_targets = gcov_read_unsigned (); 650 gcov_type count = gcov_read_counter (); 651 s->pos_counts[offset].count = count; 652 for (unsigned j = 0; j < stack->length (); j++) 653 (*stack)[j]->total_count_ += count; 654 for (unsigned j = 0; j < num_targets; j++) 655 { 656 /* Only indirect call target histogram is supported now. */ 657 gcov_read_unsigned (); 658 gcov_type target_idx = gcov_read_counter (); 659 s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); 660 } 661 } 662 for (unsigned i = 0; i < num_callsites; i++) 663 { 664 unsigned offset = gcov_read_unsigned (); 665 function_instance *callee_function_instance 666 = read_function_instance (stack, 0); 667 s->callsites[std::make_pair (offset, callee_function_instance->name ())] 668 = callee_function_instance; 669 } 670 stack->pop (); 671 return s; 672 } 673 674 /* Sum of counts that is used during annotation. */ 675 676 gcov_type 677 function_instance::total_annotated_count () const 678 { 679 gcov_type ret = 0; 680 for (callsite_map::const_iterator iter = callsites.begin (); 681 iter != callsites.end (); ++iter) 682 ret += iter->second->total_annotated_count (); 683 for (position_count_map::const_iterator iter = pos_counts.begin (); 684 iter != pos_counts.end (); ++iter) 685 if (iter->second.annotated) 686 ret += iter->second.count; 687 return ret; 688 } 689 690 /* Member functions for autofdo_source_profile. */ 691 692 autofdo_source_profile::~autofdo_source_profile () 693 { 694 for (name_function_instance_map::const_iterator iter = map_.begin (); 695 iter != map_.end (); ++iter) 696 delete iter->second; 697 } 698 699 /* For a given DECL, returns the top-level function_instance. */ 700 701 function_instance * 702 autofdo_source_profile::get_function_instance_by_decl (tree decl) const 703 { 704 int index = afdo_string_table->get_index_by_decl (decl); 705 if (index == -1) 706 return NULL; 707 name_function_instance_map::const_iterator ret = map_.find (index); 708 return ret == map_.end () ? NULL : ret->second; 709 } 710 711 /* Find count_info for a given gimple STMT. If found, store the count_info 712 in INFO and return true; otherwise return false. */ 713 714 bool 715 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const 716 { 717 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 718 return false; 719 720 inline_stack stack; 721 get_inline_stack (gimple_location (stmt), &stack); 722 if (stack.length () == 0) 723 return false; 724 function_instance *s = get_function_instance_by_inline_stack (stack); 725 if (s == NULL) 726 return false; 727 return s->get_count_info (stack[0].second, info); 728 } 729 730 /* Mark LOC as annotated. */ 731 732 void 733 autofdo_source_profile::mark_annotated (location_t loc) 734 { 735 inline_stack stack; 736 get_inline_stack (loc, &stack); 737 if (stack.length () == 0) 738 return; 739 function_instance *s = get_function_instance_by_inline_stack (stack); 740 if (s == NULL) 741 return; 742 s->mark_annotated (stack[0].second); 743 } 744 745 /* Update value profile INFO for STMT from the inlined indirect callsite. 746 Return true if INFO is updated. */ 747 748 bool 749 autofdo_source_profile::update_inlined_ind_target (gcall *stmt, 750 count_info *info) 751 { 752 if (dump_file) 753 { 754 fprintf (dump_file, "Checking indirect call -> direct call "); 755 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 756 } 757 758 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 759 { 760 if (dump_file) 761 fprintf (dump_file, " good locus\n"); 762 return false; 763 } 764 765 count_info old_info; 766 get_count_info (stmt, &old_info); 767 gcov_type total = 0; 768 for (icall_target_map::const_iterator iter = old_info.targets.begin (); 769 iter != old_info.targets.end (); ++iter) 770 total += iter->second; 771 772 /* Program behavior changed, original promoted (and inlined) target is not 773 hot any more. Will avoid promote the original target. 774 775 To check if original promoted target is still hot, we check the total 776 count of the unpromoted targets (stored in old_info). If it is no less 777 than half of the callsite count (stored in INFO), the original promoted 778 target is considered not hot any more. */ 779 if (total >= info->count / 2) 780 { 781 if (dump_file) 782 fprintf (dump_file, " not hot anymore %ld >= %ld", 783 (long)total, 784 (long)info->count /2); 785 return false; 786 } 787 788 inline_stack stack; 789 get_inline_stack (gimple_location (stmt), &stack); 790 if (stack.length () == 0) 791 { 792 if (dump_file) 793 fprintf (dump_file, " no inline stack\n"); 794 return false; 795 } 796 function_instance *s = get_function_instance_by_inline_stack (stack); 797 if (s == NULL) 798 { 799 if (dump_file) 800 fprintf (dump_file, " function not found in inline stack\n"); 801 return false; 802 } 803 icall_target_map map; 804 if (s->find_icall_target_map (stmt, &map) == 0) 805 { 806 if (dump_file) 807 fprintf (dump_file, " no target map\n"); 808 return false; 809 } 810 for (icall_target_map::const_iterator iter = map.begin (); 811 iter != map.end (); ++iter) 812 info->targets[iter->first] = iter->second; 813 if (dump_file) 814 fprintf (dump_file, " looks good\n"); 815 return true; 816 } 817 818 /* Find total count of the callee of EDGE. */ 819 820 gcov_type 821 autofdo_source_profile::get_callsite_total_count ( 822 struct cgraph_edge *edge) const 823 { 824 inline_stack stack; 825 stack.safe_push (std::make_pair (edge->callee->decl, 0)); 826 get_inline_stack (gimple_location (edge->call_stmt), &stack); 827 828 function_instance *s = get_function_instance_by_inline_stack (stack); 829 if (s == NULL 830 || afdo_string_table->get_index (IDENTIFIER_POINTER ( 831 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) 832 return 0; 833 834 return s->total_count (); 835 } 836 837 /* Read AutoFDO profile and returns TRUE on success. */ 838 839 /* source profile format: 840 841 GCOV_TAG_AFDO_FUNCTION: 4 bytes 842 LENGTH: 4 bytes 843 NUM_FUNCTIONS: 4 bytes 844 FUNCTION_INSTANCE_1 845 FUNCTION_INSTANCE_2 846 ... 847 FUNCTION_INSTANCE_N. */ 848 849 bool 850 autofdo_source_profile::read () 851 { 852 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) 853 { 854 inform (0, "Not expected TAG."); 855 return false; 856 } 857 858 /* Skip the length of the section. */ 859 gcov_read_unsigned (); 860 861 /* Read in the function/callsite profile, and store it in local 862 data structure. */ 863 unsigned function_num = gcov_read_unsigned (); 864 for (unsigned i = 0; i < function_num; i++) 865 { 866 function_instance::function_instance_stack stack; 867 function_instance *s = function_instance::read_function_instance ( 868 &stack, gcov_read_counter ()); 869 afdo_profile_info->sum_all += s->total_count (); 870 map_[s->name ()] = s; 871 } 872 return true; 873 } 874 875 /* Return the function_instance in the profile that correspond to the 876 inline STACK. */ 877 878 function_instance * 879 autofdo_source_profile::get_function_instance_by_inline_stack ( 880 const inline_stack &stack) const 881 { 882 name_function_instance_map::const_iterator iter = map_.find ( 883 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); 884 if (iter == map_.end()) 885 return NULL; 886 function_instance *s = iter->second; 887 for (unsigned i = stack.length() - 1; i > 0; i--) 888 { 889 s = s->get_function_instance_by_decl ( 890 stack[i].second, stack[i - 1].first); 891 if (s == NULL) 892 return NULL; 893 } 894 return s; 895 } 896 897 /* Module profile is only used by LIPO. Here we simply ignore it. */ 898 899 static void 900 fake_read_autofdo_module_profile () 901 { 902 /* Read in the module info. */ 903 gcov_read_unsigned (); 904 905 /* Skip the length of the section. */ 906 gcov_read_unsigned (); 907 908 /* Read in the file name table. */ 909 unsigned total_module_num = gcov_read_unsigned (); 910 gcc_assert (total_module_num == 0); 911 } 912 913 /* Read data from profile data file. */ 914 915 static void 916 read_profile (void) 917 { 918 if (gcov_open (auto_profile_file, 1) == 0) 919 { 920 error ("cannot open profile file %s", auto_profile_file); 921 return; 922 } 923 924 if (gcov_read_unsigned () != GCOV_DATA_MAGIC) 925 { 926 error ("AutoFDO profile magic number does not match"); 927 return; 928 } 929 930 /* Skip the version number. */ 931 unsigned version = gcov_read_unsigned (); 932 if (version != AUTO_PROFILE_VERSION) 933 { 934 error ("AutoFDO profile version %u does match %u", 935 version, AUTO_PROFILE_VERSION); 936 return; 937 } 938 939 /* Skip the empty integer. */ 940 gcov_read_unsigned (); 941 942 /* string_table. */ 943 afdo_string_table = new string_table (); 944 if (!afdo_string_table->read()) 945 { 946 error ("cannot read string table from %s", auto_profile_file); 947 return; 948 } 949 950 /* autofdo_source_profile. */ 951 afdo_source_profile = autofdo_source_profile::create (); 952 if (afdo_source_profile == NULL) 953 { 954 error ("cannot read function profile from %s", auto_profile_file); 955 return; 956 } 957 958 /* autofdo_module_profile. */ 959 fake_read_autofdo_module_profile (); 960 961 /* Read in the working set. */ 962 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET) 963 { 964 error ("cannot read working set from %s", auto_profile_file); 965 return; 966 } 967 968 /* Skip the length of the section. */ 969 gcov_read_unsigned (); 970 gcov_working_set_t set[128]; 971 for (unsigned i = 0; i < 128; i++) 972 { 973 set[i].num_counters = gcov_read_unsigned (); 974 set[i].min_counter = gcov_read_counter (); 975 } 976 add_working_set (set); 977 } 978 979 /* From AutoFDO profiles, find values inside STMT for that we want to measure 980 histograms for indirect-call optimization. 981 982 This function is actually served for 2 purposes: 983 * before annotation, we need to mark histogram, promote and inline 984 * after annotation, we just need to mark, and let follow-up logic to 985 decide if it needs to promote and inline. */ 986 987 static void 988 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, 989 bool transform) 990 { 991 gimple *gs = gsi_stmt (*gsi); 992 tree callee; 993 994 if (map.size () == 0) 995 return; 996 gcall *stmt = dyn_cast <gcall *> (gs); 997 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE) 998 return; 999 1000 callee = gimple_call_fn (stmt); 1001 1002 histogram_value hist = gimple_alloc_histogram_value ( 1003 cfun, HIST_TYPE_INDIR_CALL, stmt, callee); 1004 hist->n_counters = 3; 1005 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); 1006 gimple_add_histogram_value (cfun, stmt, hist); 1007 1008 gcov_type total = 0; 1009 icall_target_map::const_iterator max_iter = map.end (); 1010 1011 for (icall_target_map::const_iterator iter = map.begin (); 1012 iter != map.end (); ++iter) 1013 { 1014 total += iter->second; 1015 if (max_iter == map.end () || max_iter->second < iter->second) 1016 max_iter = iter; 1017 } 1018 1019 hist->hvalue.counters[0] 1020 = (unsigned long long)afdo_string_table->get_name (max_iter->first); 1021 hist->hvalue.counters[1] = max_iter->second; 1022 hist->hvalue.counters[2] = total; 1023 1024 if (!transform) 1025 return; 1026 1027 struct cgraph_edge *indirect_edge 1028 = cgraph_node::get (current_function_decl)->get_edge (stmt); 1029 struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( 1030 get_identifier ((const char *) hist->hvalue.counters[0])); 1031 1032 if (dump_file) 1033 { 1034 fprintf (dump_file, "Indirect call -> direct call "); 1035 print_generic_expr (dump_file, callee, TDF_SLIM); 1036 fprintf (dump_file, " => "); 1037 print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); 1038 } 1039 1040 if (direct_call == NULL || !check_ic_target (stmt, direct_call)) 1041 { 1042 if (dump_file) 1043 fprintf (dump_file, " not transforming\n"); 1044 return; 1045 } 1046 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) 1047 { 1048 if (dump_file) 1049 fprintf (dump_file, " no declaration\n"); 1050 return; 1051 } 1052 1053 if (dump_file) 1054 { 1055 fprintf (dump_file, " transformation on insn "); 1056 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 1057 fprintf (dump_file, "\n"); 1058 } 1059 1060 struct cgraph_edge *new_edge 1061 = indirect_edge->make_speculative (direct_call, 0, 0); 1062 new_edge->redirect_call_stmt_to_callee (); 1063 gimple_remove_histogram_value (cfun, stmt, hist); 1064 inline_call (new_edge, true, NULL, NULL, false); 1065 } 1066 1067 /* From AutoFDO profiles, find values inside STMT for that we want to measure 1068 histograms and adds them to list VALUES. */ 1069 1070 static void 1071 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, 1072 bool transform) 1073 { 1074 afdo_indirect_call (gsi, map, transform); 1075 } 1076 1077 typedef std::set<basic_block> bb_set; 1078 typedef std::set<edge> edge_set; 1079 1080 static bool 1081 is_bb_annotated (const basic_block bb, const bb_set &annotated) 1082 { 1083 return annotated.find (bb) != annotated.end (); 1084 } 1085 1086 static void 1087 set_bb_annotated (basic_block bb, bb_set *annotated) 1088 { 1089 annotated->insert (bb); 1090 } 1091 1092 static bool 1093 is_edge_annotated (const edge e, const edge_set &annotated) 1094 { 1095 return annotated.find (e) != annotated.end (); 1096 } 1097 1098 static void 1099 set_edge_annotated (edge e, edge_set *annotated) 1100 { 1101 annotated->insert (e); 1102 } 1103 1104 /* For a given BB, set its execution count. Attach value profile if a stmt 1105 is not in PROMOTED, because we only want to promote an indirect call once. 1106 Return TRUE if BB is annotated. */ 1107 1108 static bool 1109 afdo_set_bb_count (basic_block bb, const stmt_set &promoted) 1110 { 1111 gimple_stmt_iterator gsi; 1112 edge e; 1113 edge_iterator ei; 1114 gcov_type max_count = 0; 1115 bool has_annotated = false; 1116 1117 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1118 { 1119 count_info info; 1120 gimple *stmt = gsi_stmt (gsi); 1121 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) 1122 continue; 1123 if (afdo_source_profile->get_count_info (stmt, &info)) 1124 { 1125 if (info.count > max_count) 1126 max_count = info.count; 1127 has_annotated = true; 1128 if (info.targets.size () > 0 1129 && promoted.find (stmt) == promoted.end ()) 1130 afdo_vpt (&gsi, info.targets, false); 1131 } 1132 } 1133 1134 if (!has_annotated) 1135 return false; 1136 1137 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1138 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); 1139 for (gphi_iterator gpi = gsi_start_phis (bb); 1140 !gsi_end_p (gpi); 1141 gsi_next (&gpi)) 1142 { 1143 gphi *phi = gpi.phi (); 1144 size_t i; 1145 for (i = 0; i < gimple_phi_num_args (phi); i++) 1146 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); 1147 } 1148 FOR_EACH_EDGE (e, ei, bb->succs) 1149 afdo_source_profile->mark_annotated (e->goto_locus); 1150 1151 bb->count = max_count; 1152 return true; 1153 } 1154 1155 /* BB1 and BB2 are in an equivalent class iff: 1156 1. BB1 dominates BB2. 1157 2. BB2 post-dominates BB1. 1158 3. BB1 and BB2 are in the same loop nest. 1159 This function finds the equivalent class for each basic block, and 1160 stores a pointer to the first BB in its equivalent class. Meanwhile, 1161 set bb counts for the same equivalent class to be idenical. Update 1162 ANNOTATED_BB for the first BB in its equivalent class. */ 1163 1164 static void 1165 afdo_find_equiv_class (bb_set *annotated_bb) 1166 { 1167 basic_block bb; 1168 1169 FOR_ALL_BB_FN (bb, cfun) 1170 bb->aux = NULL; 1171 1172 FOR_ALL_BB_FN (bb, cfun) 1173 { 1174 vec<basic_block> dom_bbs; 1175 basic_block bb1; 1176 int i; 1177 1178 if (bb->aux != NULL) 1179 continue; 1180 bb->aux = bb; 1181 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); 1182 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1183 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) 1184 && bb1->loop_father == bb->loop_father) 1185 { 1186 bb1->aux = bb; 1187 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1188 { 1189 bb->count = bb1->count; 1190 set_bb_annotated (bb, annotated_bb); 1191 } 1192 } 1193 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb); 1194 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1195 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) 1196 && bb1->loop_father == bb->loop_father) 1197 { 1198 bb1->aux = bb; 1199 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1200 { 1201 bb->count = bb1->count; 1202 set_bb_annotated (bb, annotated_bb); 1203 } 1204 } 1205 } 1206 } 1207 1208 /* If a basic block's count is known, and only one of its in/out edges' count 1209 is unknown, its count can be calculated. Meanwhile, if all of the in/out 1210 edges' counts are known, then the basic block's unknown count can also be 1211 calculated. 1212 IS_SUCC is true if out edges of a basic blocks are examined. 1213 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly. 1214 Return TRUE if any basic block/edge count is changed. */ 1215 1216 static bool 1217 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb, 1218 edge_set *annotated_edge) 1219 { 1220 basic_block bb; 1221 bool changed = false; 1222 1223 FOR_EACH_BB_FN (bb, cfun) 1224 { 1225 edge e, unknown_edge = NULL; 1226 edge_iterator ei; 1227 int num_unknown_edge = 0; 1228 gcov_type total_known_count = 0; 1229 1230 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) 1231 if (!is_edge_annotated (e, *annotated_edge)) 1232 num_unknown_edge++, unknown_edge = e; 1233 else 1234 total_known_count += e->count; 1235 1236 if (num_unknown_edge == 0) 1237 { 1238 if (total_known_count > bb->count) 1239 { 1240 bb->count = total_known_count; 1241 changed = true; 1242 } 1243 if (!is_bb_annotated (bb, *annotated_bb)) 1244 { 1245 set_bb_annotated (bb, annotated_bb); 1246 changed = true; 1247 } 1248 } 1249 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) 1250 { 1251 if (bb->count >= total_known_count) 1252 unknown_edge->count = bb->count - total_known_count; 1253 else 1254 unknown_edge->count = 0; 1255 set_edge_annotated (unknown_edge, annotated_edge); 1256 changed = true; 1257 } 1258 } 1259 return changed; 1260 } 1261 1262 /* Special propagation for circuit expressions. Because GCC translates 1263 control flow into data flow for circuit expressions. E.g. 1264 BB1: 1265 if (a && b) 1266 BB2 1267 else 1268 BB3 1269 1270 will be translated into: 1271 1272 BB1: 1273 if (a) 1274 goto BB.t1 1275 else 1276 goto BB.t3 1277 BB.t1: 1278 if (b) 1279 goto BB.t2 1280 else 1281 goto BB.t3 1282 BB.t2: 1283 goto BB.t3 1284 BB.t3: 1285 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) 1286 if (tmp) 1287 goto BB2 1288 else 1289 goto BB3 1290 1291 In this case, we need to propagate through PHI to determine the edge 1292 count of BB1->BB.t1, BB.t1->BB.t2. 1293 Update ANNOTATED_EDGE accordingly. */ 1294 1295 static void 1296 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge) 1297 { 1298 basic_block bb; 1299 FOR_ALL_BB_FN (bb, cfun) 1300 { 1301 gimple *def_stmt; 1302 tree cmp_rhs, cmp_lhs; 1303 gimple *cmp_stmt = last_stmt (bb); 1304 edge e; 1305 edge_iterator ei; 1306 1307 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) 1308 continue; 1309 cmp_rhs = gimple_cond_rhs (cmp_stmt); 1310 cmp_lhs = gimple_cond_lhs (cmp_stmt); 1311 if (!TREE_CONSTANT (cmp_rhs) 1312 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) 1313 continue; 1314 if (TREE_CODE (cmp_lhs) != SSA_NAME) 1315 continue; 1316 if (!is_bb_annotated (bb, annotated_bb)) 1317 continue; 1318 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); 1319 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN 1320 && gimple_assign_single_p (def_stmt) 1321 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) 1322 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); 1323 if (!def_stmt) 1324 continue; 1325 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt); 1326 if (!phi_stmt) 1327 continue; 1328 FOR_EACH_EDGE (e, ei, bb->succs) 1329 { 1330 unsigned i, total = 0; 1331 edge only_one; 1332 bool check_value_one = (((integer_onep (cmp_rhs)) 1333 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) 1334 ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); 1335 if (!is_edge_annotated (e, *annotated_edge)) 1336 continue; 1337 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) 1338 { 1339 tree val = gimple_phi_arg_def (phi_stmt, i); 1340 edge ep = gimple_phi_arg_edge (phi_stmt, i); 1341 1342 if (!TREE_CONSTANT (val) 1343 || !(integer_zerop (val) || integer_onep (val))) 1344 continue; 1345 if (check_value_one ^ integer_onep (val)) 1346 continue; 1347 total++; 1348 only_one = ep; 1349 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge)) 1350 { 1351 ep->probability = 0; 1352 ep->count = 0; 1353 set_edge_annotated (ep, annotated_edge); 1354 } 1355 } 1356 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge)) 1357 { 1358 only_one->probability = e->probability; 1359 only_one->count = e->count; 1360 set_edge_annotated (only_one, annotated_edge); 1361 } 1362 } 1363 } 1364 } 1365 1366 /* Propagate the basic block count and edge count on the control flow 1367 graph. We do the propagation iteratively until stablize. */ 1368 1369 static void 1370 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge) 1371 { 1372 basic_block bb; 1373 bool changed = true; 1374 int i = 0; 1375 1376 FOR_ALL_BB_FN (bb, cfun) 1377 { 1378 bb->count = ((basic_block)bb->aux)->count; 1379 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb)) 1380 set_bb_annotated (bb, annotated_bb); 1381 } 1382 1383 while (changed && i++ < 10) 1384 { 1385 changed = false; 1386 1387 if (afdo_propagate_edge (true, annotated_bb, annotated_edge)) 1388 changed = true; 1389 if (afdo_propagate_edge (false, annotated_bb, annotated_edge)) 1390 changed = true; 1391 afdo_propagate_circuit (*annotated_bb, annotated_edge); 1392 } 1393 } 1394 1395 /* Propagate counts on control flow graph and calculate branch 1396 probabilities. */ 1397 1398 static void 1399 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) 1400 { 1401 basic_block bb; 1402 bool has_sample = false; 1403 1404 FOR_EACH_BB_FN (bb, cfun) 1405 { 1406 if (bb->count > 0) 1407 { 1408 has_sample = true; 1409 break; 1410 } 1411 } 1412 1413 if (!has_sample) 1414 return; 1415 1416 calculate_dominance_info (CDI_POST_DOMINATORS); 1417 calculate_dominance_info (CDI_DOMINATORS); 1418 loop_optimizer_init (0); 1419 1420 afdo_find_equiv_class (annotated_bb); 1421 afdo_propagate (annotated_bb, annotated_edge); 1422 1423 FOR_EACH_BB_FN (bb, cfun) 1424 { 1425 edge e; 1426 edge_iterator ei; 1427 int num_unknown_succ = 0; 1428 gcov_type total_count = 0; 1429 1430 FOR_EACH_EDGE (e, ei, bb->succs) 1431 { 1432 if (!is_edge_annotated (e, *annotated_edge)) 1433 num_unknown_succ++; 1434 else 1435 total_count += e->count; 1436 } 1437 if (num_unknown_succ == 0 && total_count > 0) 1438 { 1439 FOR_EACH_EDGE (e, ei, bb->succs) 1440 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count; 1441 } 1442 } 1443 FOR_ALL_BB_FN (bb, cfun) 1444 { 1445 edge e; 1446 edge_iterator ei; 1447 1448 FOR_EACH_EDGE (e, ei, bb->succs) 1449 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE; 1450 bb->aux = NULL; 1451 } 1452 1453 loop_optimizer_finalize (); 1454 free_dominance_info (CDI_DOMINATORS); 1455 free_dominance_info (CDI_POST_DOMINATORS); 1456 } 1457 1458 /* Perform value profile transformation using AutoFDO profile. Add the 1459 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any 1460 indirect call promoted. */ 1461 1462 static bool 1463 afdo_vpt_for_early_inline (stmt_set *promoted_stmts) 1464 { 1465 basic_block bb; 1466 if (afdo_source_profile->get_function_instance_by_decl ( 1467 current_function_decl) == NULL) 1468 return false; 1469 1470 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1471 1472 bool has_vpt = false; 1473 FOR_EACH_BB_FN (bb, cfun) 1474 { 1475 if (!has_indirect_call (bb)) 1476 continue; 1477 gimple_stmt_iterator gsi; 1478 1479 gcov_type bb_count = 0; 1480 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1481 { 1482 count_info info; 1483 gimple *stmt = gsi_stmt (gsi); 1484 if (afdo_source_profile->get_count_info (stmt, &info)) 1485 bb_count = MAX (bb_count, info.count); 1486 } 1487 1488 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1489 { 1490 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi)); 1491 /* IC_promotion and early_inline_2 is done in multiple iterations. 1492 No need to promoted the stmt if its in promoted_stmts (means 1493 it is already been promoted in the previous iterations). */ 1494 if ((!stmt) || gimple_call_fn (stmt) == NULL 1495 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL 1496 || promoted_stmts->find (stmt) != promoted_stmts->end ()) 1497 continue; 1498 1499 count_info info; 1500 afdo_source_profile->get_count_info (stmt, &info); 1501 info.count = bb_count; 1502 if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) 1503 { 1504 /* Promote the indirect call and update the promoted_stmts. */ 1505 promoted_stmts->insert (stmt); 1506 afdo_vpt (&gsi, info.targets, true); 1507 has_vpt = true; 1508 } 1509 } 1510 } 1511 1512 if (has_vpt) 1513 { 1514 unsigned todo = optimize_inline_calls (current_function_decl); 1515 if (todo & TODO_update_ssa_any) 1516 update_ssa (TODO_update_ssa); 1517 return true; 1518 } 1519 1520 return false; 1521 } 1522 1523 /* Annotate auto profile to the control flow graph. Do not annotate value 1524 profile for stmts in PROMOTED_STMTS. */ 1525 1526 static void 1527 afdo_annotate_cfg (const stmt_set &promoted_stmts) 1528 { 1529 basic_block bb; 1530 bb_set annotated_bb; 1531 edge_set annotated_edge; 1532 const function_instance *s 1533 = afdo_source_profile->get_function_instance_by_decl ( 1534 current_function_decl); 1535 1536 if (s == NULL) 1537 return; 1538 cgraph_node::get (current_function_decl)->count = s->head_count (); 1539 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count (); 1540 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1541 1542 FOR_EACH_BB_FN (bb, cfun) 1543 { 1544 edge e; 1545 edge_iterator ei; 1546 1547 bb->count = 0; 1548 FOR_EACH_EDGE (e, ei, bb->succs) 1549 e->count = 0; 1550 1551 if (afdo_set_bb_count (bb, promoted_stmts)) 1552 set_bb_annotated (bb, &annotated_bb); 1553 if (bb->count > max_count) 1554 max_count = bb->count; 1555 } 1556 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1557 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) 1558 { 1559 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count 1560 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1561 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); 1562 } 1563 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1564 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) 1565 { 1566 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count 1567 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1568 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); 1569 } 1570 afdo_source_profile->mark_annotated ( 1571 DECL_SOURCE_LOCATION (current_function_decl)); 1572 afdo_source_profile->mark_annotated (cfun->function_start_locus); 1573 afdo_source_profile->mark_annotated (cfun->function_end_locus); 1574 if (max_count > 0) 1575 { 1576 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge); 1577 counts_to_freqs (); 1578 profile_status_for_fn (cfun) = PROFILE_READ; 1579 } 1580 if (flag_value_profile_transformations) 1581 { 1582 gimple_value_profile_transformations (); 1583 free_dominance_info (CDI_DOMINATORS); 1584 free_dominance_info (CDI_POST_DOMINATORS); 1585 update_ssa (TODO_update_ssa); 1586 } 1587 } 1588 1589 /* Wrapper function to invoke early inliner. */ 1590 1591 static void 1592 early_inline () 1593 { 1594 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1595 unsigned todo = early_inliner (cfun); 1596 if (todo & TODO_update_ssa_any) 1597 update_ssa (TODO_update_ssa); 1598 } 1599 1600 /* Use AutoFDO profile to annoate the control flow graph. 1601 Return the todo flag. */ 1602 1603 static unsigned int 1604 auto_profile (void) 1605 { 1606 struct cgraph_node *node; 1607 1608 if (symtab->state == FINISHED) 1609 return 0; 1610 1611 init_node_map (true); 1612 profile_info = autofdo::afdo_profile_info; 1613 1614 FOR_EACH_FUNCTION (node) 1615 { 1616 if (!gimple_has_body_p (node->decl)) 1617 continue; 1618 1619 /* Don't profile functions produced for builtin stuff. */ 1620 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) 1621 continue; 1622 1623 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); 1624 1625 /* First do indirect call promotion and early inline to make the 1626 IR match the profiled binary before actual annotation. 1627 1628 This is needed because an indirect call might have been promoted 1629 and inlined in the profiled binary. If we do not promote and 1630 inline these indirect calls before annotation, the profile for 1631 these promoted functions will be lost. 1632 1633 e.g. foo() --indirect_call--> bar() 1634 In profiled binary, the callsite is promoted and inlined, making 1635 the profile look like: 1636 1637 foo: { 1638 loc_foo_1: count_1 1639 bar@loc_foo_2: { 1640 loc_bar_1: count_2 1641 loc_bar_2: count_3 1642 } 1643 } 1644 1645 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. 1646 If we perform annotation on it, the profile inside bar@loc_foo2 1647 will be wasted. 1648 1649 To avoid this, we promote loc_foo_2 and inline the promoted bar 1650 function before annotation, so the profile inside bar@loc_foo2 1651 will be useful. */ 1652 autofdo::stmt_set promoted_stmts; 1653 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) 1654 { 1655 if (!flag_value_profile_transformations 1656 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) 1657 break; 1658 early_inline (); 1659 } 1660 1661 early_inline (); 1662 autofdo::afdo_annotate_cfg (promoted_stmts); 1663 compute_function_frequency (); 1664 1665 /* Local pure-const may imply need to fixup the cfg. */ 1666 if (execute_fixup_cfg () & TODO_cleanup_cfg) 1667 cleanup_tree_cfg (); 1668 1669 free_dominance_info (CDI_DOMINATORS); 1670 free_dominance_info (CDI_POST_DOMINATORS); 1671 cgraph_edge::rebuild_edges (); 1672 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1673 pop_cfun (); 1674 } 1675 1676 return TODO_rebuild_cgraph_edges; 1677 } 1678 } /* namespace autofdo. */ 1679 1680 /* Read the profile from the profile data file. */ 1681 1682 void 1683 read_autofdo_file (void) 1684 { 1685 if (auto_profile_file == NULL) 1686 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; 1687 1688 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc ( 1689 1, sizeof (struct gcov_ctr_summary)); 1690 autofdo::afdo_profile_info->runs = 1; 1691 autofdo::afdo_profile_info->sum_max = 0; 1692 autofdo::afdo_profile_info->sum_all = 0; 1693 1694 /* Read the profile from the profile file. */ 1695 autofdo::read_profile (); 1696 } 1697 1698 /* Free the resources. */ 1699 1700 void 1701 end_auto_profile (void) 1702 { 1703 delete autofdo::afdo_source_profile; 1704 delete autofdo::afdo_string_table; 1705 profile_info = NULL; 1706 } 1707 1708 /* Returns TRUE if EDGE is hot enough to be inlined early. */ 1709 1710 bool 1711 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) 1712 { 1713 gcov_type count 1714 = autofdo::afdo_source_profile->get_callsite_total_count (edge); 1715 1716 if (count > 0) 1717 { 1718 bool is_hot; 1719 const struct gcov_ctr_summary *saved_profile_info = profile_info; 1720 /* At early inline stage, profile_info is not set yet. We need to 1721 temporarily set it to afdo_profile_info to calculate hotness. */ 1722 profile_info = autofdo::afdo_profile_info; 1723 is_hot = maybe_hot_count_p (NULL, count); 1724 profile_info = saved_profile_info; 1725 return is_hot; 1726 } 1727 1728 return false; 1729 } 1730 1731 namespace 1732 { 1733 1734 const pass_data pass_data_ipa_auto_profile = { 1735 SIMPLE_IPA_PASS, "afdo", /* name */ 1736 OPTGROUP_NONE, /* optinfo_flags */ 1737 TV_IPA_AUTOFDO, /* tv_id */ 1738 0, /* properties_required */ 1739 0, /* properties_provided */ 1740 0, /* properties_destroyed */ 1741 0, /* todo_flags_start */ 1742 0, /* todo_flags_finish */ 1743 }; 1744 1745 class pass_ipa_auto_profile : public simple_ipa_opt_pass 1746 { 1747 public: 1748 pass_ipa_auto_profile (gcc::context *ctxt) 1749 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) 1750 { 1751 } 1752 1753 /* opt_pass methods: */ 1754 virtual bool 1755 gate (function *) 1756 { 1757 return flag_auto_profile; 1758 } 1759 virtual unsigned int 1760 execute (function *) 1761 { 1762 return autofdo::auto_profile (); 1763 } 1764 }; // class pass_ipa_auto_profile 1765 1766 } // anon namespace 1767 1768 simple_ipa_opt_pass * 1769 make_pass_ipa_auto_profile (gcc::context *ctxt) 1770 { 1771 return new pass_ipa_auto_profile (ctxt); 1772 } 1773