1 /* Interprocedural constant propagation 2 Copyright (C) 2005-2019 Free Software Foundation, Inc. 3 4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor 5 <mjambor@suse.cz> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 /* Interprocedural constant propagation (IPA-CP). 24 25 The goal of this transformation is to 26 27 1) discover functions which are always invoked with some arguments with the 28 same known constant values and modify the functions so that the 29 subsequent optimizations can take advantage of the knowledge, and 30 31 2) partial specialization - create specialized versions of functions 32 transformed in this way if some parameters are known constants only in 33 certain contexts but the estimated tradeoff between speedup and cost size 34 is deemed good. 35 36 The algorithm also propagates types and attempts to perform type based 37 devirtualization. Types are propagated much like constants. 38 39 The algorithm basically consists of three stages. In the first, functions 40 are analyzed one at a time and jump functions are constructed for all known 41 call-sites. In the second phase, the pass propagates information from the 42 jump functions across the call to reveal what values are available at what 43 call sites, performs estimations of effects of known values on functions and 44 their callees, and finally decides what specialized extra versions should be 45 created. In the third, the special versions materialize and appropriate 46 calls are redirected. 47 48 The algorithm used is to a certain extent based on "Interprocedural Constant 49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, 50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D 51 Cooper, Mary W. Hall, and Ken Kennedy. 52 53 54 First stage - intraprocedural analysis 55 ======================================= 56 57 This phase computes jump_function and modification flags. 58 59 A jump function for a call-site represents the values passed as an actual 60 arguments of a given call-site. In principle, there are three types of 61 values: 62 63 Pass through - the caller's formal parameter is passed as an actual 64 argument, plus an operation on it can be performed. 65 Constant - a constant is passed as an actual argument. 66 Unknown - neither of the above. 67 68 All jump function types are described in detail in ipa-prop.h, together with 69 the data structures that represent them and methods of accessing them. 70 71 ipcp_generate_summary() is the main function of the first stage. 72 73 Second stage - interprocedural analysis 74 ======================================== 75 76 This stage is itself divided into two phases. In the first, we propagate 77 known values over the call graph, in the second, we make cloning decisions. 78 It uses a different algorithm than the original Callahan's paper. 79 80 First, we traverse the functions topologically from callers to callees and, 81 for each strongly connected component (SCC), we propagate constants 82 according to previously computed jump functions. We also record what known 83 values depend on other known values and estimate local effects. Finally, we 84 propagate cumulative information about these effects from dependent values 85 to those on which they depend. 86 87 Second, we again traverse the call graph in the same topological order and 88 make clones for functions which we know are called with the same values in 89 all contexts and decide about extra specialized clones of functions just for 90 some contexts - these decisions are based on both local estimates and 91 cumulative estimates propagated from callees. 92 93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the 94 third stage. 95 96 Third phase - materialization of clones, call statement updates. 97 ============================================ 98 99 This stage is currently performed by call graph code (mainly in cgraphunit.c 100 and tree-inline.c) according to instructions inserted to the call graph by 101 the second stage. */ 102 103 #include "config.h" 104 #include "system.h" 105 #include "coretypes.h" 106 #include "backend.h" 107 #include "tree.h" 108 #include "gimple-expr.h" 109 #include "predict.h" 110 #include "alloc-pool.h" 111 #include "tree-pass.h" 112 #include "cgraph.h" 113 #include "diagnostic.h" 114 #include "fold-const.h" 115 #include "gimple-fold.h" 116 #include "symbol-summary.h" 117 #include "tree-vrp.h" 118 #include "ipa-prop.h" 119 #include "tree-pretty-print.h" 120 #include "tree-inline.h" 121 #include "params.h" 122 #include "ipa-fnsummary.h" 123 #include "ipa-utils.h" 124 #include "tree-ssa-ccp.h" 125 #include "stringpool.h" 126 #include "attribs.h" 127 128 template <typename valtype> class ipcp_value; 129 130 /* Describes a particular source for an IPA-CP value. */ 131 132 template <typename valtype> 133 class ipcp_value_source 134 { 135 public: 136 /* Aggregate offset of the source, negative if the source is scalar value of 137 the argument itself. */ 138 HOST_WIDE_INT offset; 139 /* The incoming edge that brought the value. */ 140 cgraph_edge *cs; 141 /* If the jump function that resulted into his value was a pass-through or an 142 ancestor, this is the ipcp_value of the caller from which the described 143 value has been derived. Otherwise it is NULL. */ 144 ipcp_value<valtype> *val; 145 /* Next pointer in a linked list of sources of a value. */ 146 ipcp_value_source *next; 147 /* If the jump function that resulted into his value was a pass-through or an 148 ancestor, this is the index of the parameter of the caller the jump 149 function references. */ 150 int index; 151 }; 152 153 /* Common ancestor for all ipcp_value instantiations. */ 154 155 class ipcp_value_base 156 { 157 public: 158 /* Time benefit and size cost that specializing the function for this value 159 would bring about in this function alone. */ 160 int local_time_benefit, local_size_cost; 161 /* Time benefit and size cost that specializing the function for this value 162 can bring about in it's callees (transitively). */ 163 int prop_time_benefit, prop_size_cost; 164 165 ipcp_value_base () 166 : local_time_benefit (0), local_size_cost (0), 167 prop_time_benefit (0), prop_size_cost (0) {} 168 }; 169 170 /* Describes one particular value stored in struct ipcp_lattice. */ 171 172 template <typename valtype> 173 class ipcp_value : public ipcp_value_base 174 { 175 public: 176 /* The actual value for the given parameter. */ 177 valtype value; 178 /* The list of sources from which this value originates. */ 179 ipcp_value_source <valtype> *sources; 180 /* Next pointers in a linked list of all values in a lattice. */ 181 ipcp_value *next; 182 /* Next pointers in a linked list of values in a strongly connected component 183 of values. */ 184 ipcp_value *scc_next; 185 /* Next pointers in a linked list of SCCs of values sorted topologically 186 according their sources. */ 187 ipcp_value *topo_next; 188 /* A specialized node created for this value, NULL if none has been (so far) 189 created. */ 190 cgraph_node *spec_node; 191 /* Depth first search number and low link for topological sorting of 192 values. */ 193 int dfs, low_link; 194 /* True if this value is currently on the topo-sort stack. */ 195 bool on_stack; 196 197 ipcp_value() 198 : sources (0), next (0), scc_next (0), topo_next (0), 199 spec_node (0), dfs (0), low_link (0), on_stack (false) {} 200 201 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx, 202 HOST_WIDE_INT offset); 203 }; 204 205 /* Lattice describing potential values of a formal parameter of a function, or 206 a part of an aggregate. TOP is represented by a lattice with zero values 207 and with contains_variable and bottom flags cleared. BOTTOM is represented 208 by a lattice with the bottom flag set. In that case, values and 209 contains_variable flag should be disregarded. */ 210 211 template <typename valtype> 212 class ipcp_lattice 213 { 214 public: 215 /* The list of known values and types in this lattice. Note that values are 216 not deallocated if a lattice is set to bottom because there may be value 217 sources referencing them. */ 218 ipcp_value<valtype> *values; 219 /* Number of known values and types in this lattice. */ 220 int values_count; 221 /* The lattice contains a variable component (in addition to values). */ 222 bool contains_variable; 223 /* The value of the lattice is bottom (i.e. variable and unusable for any 224 propagation). */ 225 bool bottom; 226 227 inline bool is_single_const (); 228 inline bool set_to_bottom (); 229 inline bool set_contains_variable (); 230 bool add_value (valtype newval, cgraph_edge *cs, 231 ipcp_value<valtype> *src_val = NULL, 232 int src_idx = 0, HOST_WIDE_INT offset = -1); 233 void print (FILE * f, bool dump_sources, bool dump_benefits); 234 }; 235 236 /* Lattice of tree values with an offset to describe a part of an 237 aggregate. */ 238 239 class ipcp_agg_lattice : public ipcp_lattice<tree> 240 { 241 public: 242 /* Offset that is being described by this lattice. */ 243 HOST_WIDE_INT offset; 244 /* Size so that we don't have to re-compute it every time we traverse the 245 list. Must correspond to TYPE_SIZE of all lat values. */ 246 HOST_WIDE_INT size; 247 /* Next element of the linked list. */ 248 struct ipcp_agg_lattice *next; 249 }; 250 251 /* Lattice of known bits, only capable of holding one value. 252 Bitwise constant propagation propagates which bits of a 253 value are constant. 254 For eg: 255 int f(int x) 256 { 257 return some_op (x); 258 } 259 260 int f1(int y) 261 { 262 if (cond) 263 return f (y & 0xff); 264 else 265 return f (y & 0xf); 266 } 267 268 In the above case, the param 'x' will always have all 269 the bits (except the bits in lsb) set to 0. 270 Hence the mask of 'x' would be 0xff. The mask 271 reflects that the bits in lsb are unknown. 272 The actual propagated value is given by m_value & ~m_mask. */ 273 274 class ipcp_bits_lattice 275 { 276 public: 277 bool bottom_p () { return m_lattice_val == IPA_BITS_VARYING; } 278 bool top_p () { return m_lattice_val == IPA_BITS_UNDEFINED; } 279 bool constant_p () { return m_lattice_val == IPA_BITS_CONSTANT; } 280 bool set_to_bottom (); 281 bool set_to_constant (widest_int, widest_int); 282 283 widest_int get_value () { return m_value; } 284 widest_int get_mask () { return m_mask; } 285 286 bool meet_with (ipcp_bits_lattice& other, unsigned, signop, 287 enum tree_code, tree); 288 289 bool meet_with (widest_int, widest_int, unsigned); 290 291 void print (FILE *); 292 293 private: 294 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val; 295 296 /* Similar to ccp_lattice_t, mask represents which bits of value are constant. 297 If a bit in mask is set to 0, then the corresponding bit in 298 value is known to be constant. */ 299 widest_int m_value, m_mask; 300 301 bool meet_with_1 (widest_int, widest_int, unsigned); 302 void get_value_and_mask (tree, widest_int *, widest_int *); 303 }; 304 305 /* Lattice of value ranges. */ 306 307 class ipcp_vr_lattice 308 { 309 public: 310 value_range_base m_vr; 311 312 inline bool bottom_p () const; 313 inline bool top_p () const; 314 inline bool set_to_bottom (); 315 bool meet_with (const value_range_base *p_vr); 316 bool meet_with (const ipcp_vr_lattice &other); 317 void init () { gcc_assert (m_vr.undefined_p ()); } 318 void print (FILE * f); 319 320 private: 321 bool meet_with_1 (const value_range_base *other_vr); 322 }; 323 324 /* Structure containing lattices for a parameter itself and for pieces of 325 aggregates that are passed in the parameter or by a reference in a parameter 326 plus some other useful flags. */ 327 328 class ipcp_param_lattices 329 { 330 public: 331 /* Lattice describing the value of the parameter itself. */ 332 ipcp_lattice<tree> itself; 333 /* Lattice describing the polymorphic contexts of a parameter. */ 334 ipcp_lattice<ipa_polymorphic_call_context> ctxlat; 335 /* Lattices describing aggregate parts. */ 336 ipcp_agg_lattice *aggs; 337 /* Lattice describing known bits. */ 338 ipcp_bits_lattice bits_lattice; 339 /* Lattice describing value range. */ 340 ipcp_vr_lattice m_value_range; 341 /* Number of aggregate lattices */ 342 int aggs_count; 343 /* True if aggregate data were passed by reference (as opposed to by 344 value). */ 345 bool aggs_by_ref; 346 /* All aggregate lattices contain a variable component (in addition to 347 values). */ 348 bool aggs_contain_variable; 349 /* The value of all aggregate lattices is bottom (i.e. variable and unusable 350 for any propagation). */ 351 bool aggs_bottom; 352 353 /* There is a virtual call based on this parameter. */ 354 bool virt_call; 355 }; 356 357 /* Allocation pools for values and their sources in ipa-cp. */ 358 359 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool 360 ("IPA-CP constant values"); 361 362 object_allocator<ipcp_value<ipa_polymorphic_call_context> > 363 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts"); 364 365 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool 366 ("IPA-CP value sources"); 367 368 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool 369 ("IPA_CP aggregate lattices"); 370 371 /* Maximal count found in program. */ 372 373 static profile_count max_count; 374 375 /* Original overall size of the program. */ 376 377 static long overall_size, max_new_size; 378 379 /* Node name to unique clone suffix number map. */ 380 static hash_map<const char *, unsigned> *clone_num_suffixes; 381 382 /* Return the param lattices structure corresponding to the Ith formal 383 parameter of the function described by INFO. */ 384 static inline struct ipcp_param_lattices * 385 ipa_get_parm_lattices (struct ipa_node_params *info, int i) 386 { 387 gcc_assert (i >= 0 && i < ipa_get_param_count (info)); 388 gcc_checking_assert (!info->ipcp_orig_node); 389 gcc_checking_assert (info->lattices); 390 return &(info->lattices[i]); 391 } 392 393 /* Return the lattice corresponding to the scalar value of the Ith formal 394 parameter of the function described by INFO. */ 395 static inline ipcp_lattice<tree> * 396 ipa_get_scalar_lat (struct ipa_node_params *info, int i) 397 { 398 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 399 return &plats->itself; 400 } 401 402 /* Return the lattice corresponding to the scalar value of the Ith formal 403 parameter of the function described by INFO. */ 404 static inline ipcp_lattice<ipa_polymorphic_call_context> * 405 ipa_get_poly_ctx_lat (struct ipa_node_params *info, int i) 406 { 407 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 408 return &plats->ctxlat; 409 } 410 411 /* Return whether LAT is a lattice with a single constant and without an 412 undefined value. */ 413 414 template <typename valtype> 415 inline bool 416 ipcp_lattice<valtype>::is_single_const () 417 { 418 if (bottom || contains_variable || values_count != 1) 419 return false; 420 else 421 return true; 422 } 423 424 /* Print V which is extracted from a value in a lattice to F. */ 425 426 static void 427 print_ipcp_constant_value (FILE * f, tree v) 428 { 429 if (TREE_CODE (v) == ADDR_EXPR 430 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL) 431 { 432 fprintf (f, "& "); 433 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0))); 434 } 435 else 436 print_generic_expr (f, v); 437 } 438 439 /* Print V which is extracted from a value in a lattice to F. */ 440 441 static void 442 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v) 443 { 444 v.dump(f, false); 445 } 446 447 /* Print a lattice LAT to F. */ 448 449 template <typename valtype> 450 void 451 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) 452 { 453 ipcp_value<valtype> *val; 454 bool prev = false; 455 456 if (bottom) 457 { 458 fprintf (f, "BOTTOM\n"); 459 return; 460 } 461 462 if (!values_count && !contains_variable) 463 { 464 fprintf (f, "TOP\n"); 465 return; 466 } 467 468 if (contains_variable) 469 { 470 fprintf (f, "VARIABLE"); 471 prev = true; 472 if (dump_benefits) 473 fprintf (f, "\n"); 474 } 475 476 for (val = values; val; val = val->next) 477 { 478 if (dump_benefits && prev) 479 fprintf (f, " "); 480 else if (!dump_benefits && prev) 481 fprintf (f, ", "); 482 else 483 prev = true; 484 485 print_ipcp_constant_value (f, val->value); 486 487 if (dump_sources) 488 { 489 ipcp_value_source<valtype> *s; 490 491 fprintf (f, " [from:"); 492 for (s = val->sources; s; s = s->next) 493 fprintf (f, " %i(%f)", s->cs->caller->order, 494 s->cs->sreal_frequency ().to_double ()); 495 fprintf (f, "]"); 496 } 497 498 if (dump_benefits) 499 fprintf (f, " [loc_time: %i, loc_size: %i, " 500 "prop_time: %i, prop_size: %i]\n", 501 val->local_time_benefit, val->local_size_cost, 502 val->prop_time_benefit, val->prop_size_cost); 503 } 504 if (!dump_benefits) 505 fprintf (f, "\n"); 506 } 507 508 void 509 ipcp_bits_lattice::print (FILE *f) 510 { 511 if (top_p ()) 512 fprintf (f, " Bits unknown (TOP)\n"); 513 else if (bottom_p ()) 514 fprintf (f, " Bits unusable (BOTTOM)\n"); 515 else 516 { 517 fprintf (f, " Bits: value = "); print_hex (get_value (), f); 518 fprintf (f, ", mask = "); print_hex (get_mask (), f); 519 fprintf (f, "\n"); 520 } 521 } 522 523 /* Print value range lattice to F. */ 524 525 void 526 ipcp_vr_lattice::print (FILE * f) 527 { 528 dump_value_range (f, &m_vr); 529 } 530 531 /* Print all ipcp_lattices of all functions to F. */ 532 533 static void 534 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits) 535 { 536 struct cgraph_node *node; 537 int i, count; 538 539 fprintf (f, "\nLattices:\n"); 540 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 541 { 542 struct ipa_node_params *info; 543 544 info = IPA_NODE_REF (node); 545 /* Skip constprop clones since we don't make lattices for them. */ 546 if (info->ipcp_orig_node) 547 continue; 548 fprintf (f, " Node: %s:\n", node->dump_name ()); 549 count = ipa_get_param_count (info); 550 for (i = 0; i < count; i++) 551 { 552 struct ipcp_agg_lattice *aglat; 553 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 554 fprintf (f, " param [%d]: ", i); 555 plats->itself.print (f, dump_sources, dump_benefits); 556 fprintf (f, " ctxs: "); 557 plats->ctxlat.print (f, dump_sources, dump_benefits); 558 plats->bits_lattice.print (f); 559 fprintf (f, " "); 560 plats->m_value_range.print (f); 561 fprintf (f, "\n"); 562 if (plats->virt_call) 563 fprintf (f, " virt_call flag set\n"); 564 565 if (plats->aggs_bottom) 566 { 567 fprintf (f, " AGGS BOTTOM\n"); 568 continue; 569 } 570 if (plats->aggs_contain_variable) 571 fprintf (f, " AGGS VARIABLE\n"); 572 for (aglat = plats->aggs; aglat; aglat = aglat->next) 573 { 574 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ", 575 plats->aggs_by_ref ? "ref " : "", aglat->offset); 576 aglat->print (f, dump_sources, dump_benefits); 577 } 578 } 579 } 580 } 581 582 /* Determine whether it is at all technically possible to create clones of NODE 583 and store this information in the ipa_node_params structure associated 584 with NODE. */ 585 586 static void 587 determine_versionability (struct cgraph_node *node, 588 struct ipa_node_params *info) 589 { 590 const char *reason = NULL; 591 592 /* There are a number of generic reasons functions cannot be versioned. We 593 also cannot remove parameters if there are type attributes such as fnspec 594 present. */ 595 if (node->alias || node->thunk.thunk_p) 596 reason = "alias or thunk"; 597 else if (!node->local.versionable) 598 reason = "not a tree_versionable_function"; 599 else if (node->get_availability () <= AVAIL_INTERPOSABLE) 600 reason = "insufficient body availability"; 601 else if (!opt_for_fn (node->decl, optimize) 602 || !opt_for_fn (node->decl, flag_ipa_cp)) 603 reason = "non-optimized function"; 604 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl))) 605 { 606 /* Ideally we should clone the SIMD clones themselves and create 607 vector copies of them, so IPA-cp and SIMD clones can happily 608 coexist, but that may not be worth the effort. */ 609 reason = "function has SIMD clones"; 610 } 611 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl))) 612 { 613 /* Ideally we should clone the target clones themselves and create 614 copies of them, so IPA-cp and target clones can happily 615 coexist, but that may not be worth the effort. */ 616 reason = "function target_clones attribute"; 617 } 618 /* Don't clone decls local to a comdat group; it breaks and for C++ 619 decloned constructors, inlining is always better anyway. */ 620 else if (node->comdat_local_p ()) 621 reason = "comdat-local function"; 622 else if (node->calls_comdat_local) 623 { 624 /* TODO: call is versionable if we make sure that all 625 callers are inside of a comdat group. */ 626 reason = "calls comdat-local function"; 627 } 628 629 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN 630 work only when inlined. Cloning them may still lead to better code 631 because ipa-cp will not give up on cloning further. If the function is 632 external this however leads to wrong code because we may end up producing 633 offline copy of the function. */ 634 if (DECL_EXTERNAL (node->decl)) 635 for (cgraph_edge *edge = node->callees; !reason && edge; 636 edge = edge->next_callee) 637 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL)) 638 { 639 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK) 640 reason = "external function which calls va_arg_pack"; 641 if (DECL_FUNCTION_CODE (edge->callee->decl) 642 == BUILT_IN_VA_ARG_PACK_LEN) 643 reason = "external function which calls va_arg_pack_len"; 644 } 645 646 if (reason && dump_file && !node->alias && !node->thunk.thunk_p) 647 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n", 648 node->dump_name (), reason); 649 650 info->versionable = (reason == NULL); 651 } 652 653 /* Return true if it is at all technically possible to create clones of a 654 NODE. */ 655 656 static bool 657 ipcp_versionable_function_p (struct cgraph_node *node) 658 { 659 return IPA_NODE_REF (node)->versionable; 660 } 661 662 /* Structure holding accumulated information about callers of a node. */ 663 664 struct caller_statistics 665 { 666 profile_count count_sum; 667 int n_calls, n_hot_calls, freq_sum; 668 }; 669 670 /* Initialize fields of STAT to zeroes. */ 671 672 static inline void 673 init_caller_stats (struct caller_statistics *stats) 674 { 675 stats->count_sum = profile_count::zero (); 676 stats->n_calls = 0; 677 stats->n_hot_calls = 0; 678 stats->freq_sum = 0; 679 } 680 681 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of 682 non-thunk incoming edges to NODE. */ 683 684 static bool 685 gather_caller_stats (struct cgraph_node *node, void *data) 686 { 687 struct caller_statistics *stats = (struct caller_statistics *) data; 688 struct cgraph_edge *cs; 689 690 for (cs = node->callers; cs; cs = cs->next_caller) 691 if (!cs->caller->thunk.thunk_p) 692 { 693 if (cs->count.ipa ().initialized_p ()) 694 stats->count_sum += cs->count.ipa (); 695 stats->freq_sum += cs->frequency (); 696 stats->n_calls++; 697 if (cs->maybe_hot_p ()) 698 stats->n_hot_calls ++; 699 } 700 return false; 701 702 } 703 704 /* Return true if this NODE is viable candidate for cloning. */ 705 706 static bool 707 ipcp_cloning_candidate_p (struct cgraph_node *node) 708 { 709 struct caller_statistics stats; 710 711 gcc_checking_assert (node->has_gimple_body_p ()); 712 713 if (!opt_for_fn (node->decl, flag_ipa_cp_clone)) 714 { 715 if (dump_file) 716 fprintf (dump_file, "Not considering %s for cloning; " 717 "-fipa-cp-clone disabled.\n", 718 node->name ()); 719 return false; 720 } 721 722 if (node->optimize_for_size_p ()) 723 { 724 if (dump_file) 725 fprintf (dump_file, "Not considering %s for cloning; " 726 "optimizing it for size.\n", 727 node->name ()); 728 return false; 729 } 730 731 init_caller_stats (&stats); 732 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); 733 734 if (ipa_fn_summaries->get (node)->self_size < stats.n_calls) 735 { 736 if (dump_file) 737 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n", 738 node->name ()); 739 return true; 740 } 741 742 /* When profile is available and function is hot, propagate into it even if 743 calls seems cold; constant propagation can improve function's speed 744 significantly. */ 745 if (max_count > profile_count::zero ()) 746 { 747 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100)) 748 { 749 if (dump_file) 750 fprintf (dump_file, "Considering %s for cloning; " 751 "usually called directly.\n", 752 node->name ()); 753 return true; 754 } 755 } 756 if (!stats.n_hot_calls) 757 { 758 if (dump_file) 759 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", 760 node->name ()); 761 return false; 762 } 763 if (dump_file) 764 fprintf (dump_file, "Considering %s for cloning.\n", 765 node->name ()); 766 return true; 767 } 768 769 template <typename valtype> 770 class value_topo_info 771 { 772 public: 773 /* Head of the linked list of topologically sorted values. */ 774 ipcp_value<valtype> *values_topo; 775 /* Stack for creating SCCs, represented by a linked list too. */ 776 ipcp_value<valtype> *stack; 777 /* Counter driving the algorithm in add_val_to_toposort. */ 778 int dfs_counter; 779 780 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0) 781 {} 782 void add_val (ipcp_value<valtype> *cur_val); 783 void propagate_effects (); 784 }; 785 786 /* Arrays representing a topological ordering of call graph nodes and a stack 787 of nodes used during constant propagation and also data required to perform 788 topological sort of values and propagation of benefits in the determined 789 order. */ 790 791 class ipa_topo_info 792 { 793 public: 794 /* Array with obtained topological order of cgraph nodes. */ 795 struct cgraph_node **order; 796 /* Stack of cgraph nodes used during propagation within SCC until all values 797 in the SCC stabilize. */ 798 struct cgraph_node **stack; 799 int nnodes, stack_top; 800 801 value_topo_info<tree> constants; 802 value_topo_info<ipa_polymorphic_call_context> contexts; 803 804 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0), 805 constants () 806 {} 807 }; 808 809 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */ 810 811 static void 812 build_toporder_info (struct ipa_topo_info *topo) 813 { 814 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); 815 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); 816 817 gcc_checking_assert (topo->stack_top == 0); 818 topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL); 819 } 820 821 /* Free information about strongly connected components and the arrays in 822 TOPO. */ 823 824 static void 825 free_toporder_info (struct ipa_topo_info *topo) 826 { 827 ipa_free_postorder_info (); 828 free (topo->order); 829 free (topo->stack); 830 } 831 832 /* Add NODE to the stack in TOPO, unless it is already there. */ 833 834 static inline void 835 push_node_to_stack (struct ipa_topo_info *topo, struct cgraph_node *node) 836 { 837 struct ipa_node_params *info = IPA_NODE_REF (node); 838 if (info->node_enqueued) 839 return; 840 info->node_enqueued = 1; 841 topo->stack[topo->stack_top++] = node; 842 } 843 844 /* Pop a node from the stack in TOPO and return it or return NULL if the stack 845 is empty. */ 846 847 static struct cgraph_node * 848 pop_node_from_stack (struct ipa_topo_info *topo) 849 { 850 if (topo->stack_top) 851 { 852 struct cgraph_node *node; 853 topo->stack_top--; 854 node = topo->stack[topo->stack_top]; 855 IPA_NODE_REF (node)->node_enqueued = 0; 856 return node; 857 } 858 else 859 return NULL; 860 } 861 862 /* Set lattice LAT to bottom and return true if it previously was not set as 863 such. */ 864 865 template <typename valtype> 866 inline bool 867 ipcp_lattice<valtype>::set_to_bottom () 868 { 869 bool ret = !bottom; 870 bottom = true; 871 return ret; 872 } 873 874 /* Mark lattice as containing an unknown value and return true if it previously 875 was not marked as such. */ 876 877 template <typename valtype> 878 inline bool 879 ipcp_lattice<valtype>::set_contains_variable () 880 { 881 bool ret = !contains_variable; 882 contains_variable = true; 883 return ret; 884 } 885 886 /* Set all aggregate lattices in PLATS to bottom and return true if they were 887 not previously set as such. */ 888 889 static inline bool 890 set_agg_lats_to_bottom (struct ipcp_param_lattices *plats) 891 { 892 bool ret = !plats->aggs_bottom; 893 plats->aggs_bottom = true; 894 return ret; 895 } 896 897 /* Mark all aggregate lattices in PLATS as containing an unknown value and 898 return true if they were not previously marked as such. */ 899 900 static inline bool 901 set_agg_lats_contain_variable (struct ipcp_param_lattices *plats) 902 { 903 bool ret = !plats->aggs_contain_variable; 904 plats->aggs_contain_variable = true; 905 return ret; 906 } 907 908 bool 909 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other) 910 { 911 return meet_with_1 (&other.m_vr); 912 } 913 914 /* Meet the current value of the lattice with value range described by VR 915 lattice. */ 916 917 bool 918 ipcp_vr_lattice::meet_with (const value_range_base *p_vr) 919 { 920 return meet_with_1 (p_vr); 921 } 922 923 /* Meet the current value of the lattice with value range described by 924 OTHER_VR lattice. Return TRUE if anything changed. */ 925 926 bool 927 ipcp_vr_lattice::meet_with_1 (const value_range_base *other_vr) 928 { 929 if (bottom_p ()) 930 return false; 931 932 if (other_vr->varying_p ()) 933 return set_to_bottom (); 934 935 value_range_base save (m_vr); 936 m_vr.union_ (other_vr); 937 return !m_vr.equal_p (save); 938 } 939 940 /* Return true if value range information in the lattice is yet unknown. */ 941 942 bool 943 ipcp_vr_lattice::top_p () const 944 { 945 return m_vr.undefined_p (); 946 } 947 948 /* Return true if value range information in the lattice is known to be 949 unusable. */ 950 951 bool 952 ipcp_vr_lattice::bottom_p () const 953 { 954 return m_vr.varying_p (); 955 } 956 957 /* Set value range information in the lattice to bottom. Return true if it 958 previously was in a different state. */ 959 960 bool 961 ipcp_vr_lattice::set_to_bottom () 962 { 963 if (m_vr.varying_p ()) 964 return false; 965 m_vr.set_varying (); 966 return true; 967 } 968 969 /* Set lattice value to bottom, if it already isn't the case. */ 970 971 bool 972 ipcp_bits_lattice::set_to_bottom () 973 { 974 if (bottom_p ()) 975 return false; 976 m_lattice_val = IPA_BITS_VARYING; 977 m_value = 0; 978 m_mask = -1; 979 return true; 980 } 981 982 /* Set to constant if it isn't already. Only meant to be called 983 when switching state from TOP. */ 984 985 bool 986 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask) 987 { 988 gcc_assert (top_p ()); 989 m_lattice_val = IPA_BITS_CONSTANT; 990 m_value = value; 991 m_mask = mask; 992 return true; 993 } 994 995 /* Convert operand to value, mask form. */ 996 997 void 998 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp) 999 { 1000 wide_int get_nonzero_bits (const_tree); 1001 1002 if (TREE_CODE (operand) == INTEGER_CST) 1003 { 1004 *valuep = wi::to_widest (operand); 1005 *maskp = 0; 1006 } 1007 else 1008 { 1009 *valuep = 0; 1010 *maskp = -1; 1011 } 1012 } 1013 1014 /* Meet operation, similar to ccp_lattice_meet, we xor values 1015 if this->value, value have different values at same bit positions, we want 1016 to drop that bit to varying. Return true if mask is changed. 1017 This function assumes that the lattice value is in CONSTANT state */ 1018 1019 bool 1020 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, 1021 unsigned precision) 1022 { 1023 gcc_assert (constant_p ()); 1024 1025 widest_int old_mask = m_mask; 1026 m_mask = (m_mask | mask) | (m_value ^ value); 1027 1028 if (wi::sext (m_mask, precision) == -1) 1029 return set_to_bottom (); 1030 1031 return m_mask != old_mask; 1032 } 1033 1034 /* Meet the bits lattice with operand 1035 described by <value, mask, sgn, precision. */ 1036 1037 bool 1038 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, 1039 unsigned precision) 1040 { 1041 if (bottom_p ()) 1042 return false; 1043 1044 if (top_p ()) 1045 { 1046 if (wi::sext (mask, precision) == -1) 1047 return set_to_bottom (); 1048 return set_to_constant (value, mask); 1049 } 1050 1051 return meet_with_1 (value, mask, precision); 1052 } 1053 1054 /* Meet bits lattice with the result of bit_value_binop (other, operand) 1055 if code is binary operation or bit_value_unop (other) if code is unary op. 1056 In the case when code is nop_expr, no adjustment is required. */ 1057 1058 bool 1059 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, 1060 signop sgn, enum tree_code code, tree operand) 1061 { 1062 if (other.bottom_p ()) 1063 return set_to_bottom (); 1064 1065 if (bottom_p () || other.top_p ()) 1066 return false; 1067 1068 widest_int adjusted_value, adjusted_mask; 1069 1070 if (TREE_CODE_CLASS (code) == tcc_binary) 1071 { 1072 tree type = TREE_TYPE (operand); 1073 widest_int o_value, o_mask; 1074 get_value_and_mask (operand, &o_value, &o_mask); 1075 1076 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask, 1077 sgn, precision, other.get_value (), other.get_mask (), 1078 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask); 1079 1080 if (wi::sext (adjusted_mask, precision) == -1) 1081 return set_to_bottom (); 1082 } 1083 1084 else if (TREE_CODE_CLASS (code) == tcc_unary) 1085 { 1086 bit_value_unop (code, sgn, precision, &adjusted_value, 1087 &adjusted_mask, sgn, precision, other.get_value (), 1088 other.get_mask ()); 1089 1090 if (wi::sext (adjusted_mask, precision) == -1) 1091 return set_to_bottom (); 1092 } 1093 1094 else 1095 return set_to_bottom (); 1096 1097 if (top_p ()) 1098 { 1099 if (wi::sext (adjusted_mask, precision) == -1) 1100 return set_to_bottom (); 1101 return set_to_constant (adjusted_value, adjusted_mask); 1102 } 1103 else 1104 return meet_with_1 (adjusted_value, adjusted_mask, precision); 1105 } 1106 1107 /* Mark bot aggregate and scalar lattices as containing an unknown variable, 1108 return true is any of them has not been marked as such so far. */ 1109 1110 static inline bool 1111 set_all_contains_variable (struct ipcp_param_lattices *plats) 1112 { 1113 bool ret; 1114 ret = plats->itself.set_contains_variable (); 1115 ret |= plats->ctxlat.set_contains_variable (); 1116 ret |= set_agg_lats_contain_variable (plats); 1117 ret |= plats->bits_lattice.set_to_bottom (); 1118 ret |= plats->m_value_range.set_to_bottom (); 1119 return ret; 1120 } 1121 1122 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA 1123 points to by the number of callers to NODE. */ 1124 1125 static bool 1126 count_callers (cgraph_node *node, void *data) 1127 { 1128 int *caller_count = (int *) data; 1129 1130 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) 1131 /* Local thunks can be handled transparently, but if the thunk cannot 1132 be optimized out, count it as a real use. */ 1133 if (!cs->caller->thunk.thunk_p || !cs->caller->local.local) 1134 ++*caller_count; 1135 return false; 1136 } 1137 1138 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on 1139 the one caller of some other node. Set the caller's corresponding flag. */ 1140 1141 static bool 1142 set_single_call_flag (cgraph_node *node, void *) 1143 { 1144 cgraph_edge *cs = node->callers; 1145 /* Local thunks can be handled transparently, skip them. */ 1146 while (cs && cs->caller->thunk.thunk_p && cs->caller->local.local) 1147 cs = cs->next_caller; 1148 if (cs) 1149 { 1150 IPA_NODE_REF (cs->caller)->node_calling_single_call = true; 1151 return true; 1152 } 1153 return false; 1154 } 1155 1156 /* Initialize ipcp_lattices. */ 1157 1158 static void 1159 initialize_node_lattices (struct cgraph_node *node) 1160 { 1161 struct ipa_node_params *info = IPA_NODE_REF (node); 1162 struct cgraph_edge *ie; 1163 bool disable = false, variable = false; 1164 int i; 1165 1166 gcc_checking_assert (node->has_gimple_body_p ()); 1167 if (node->local.local) 1168 { 1169 int caller_count = 0; 1170 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count, 1171 true); 1172 gcc_checking_assert (caller_count > 0); 1173 if (caller_count == 1) 1174 node->call_for_symbol_thunks_and_aliases (set_single_call_flag, 1175 NULL, true); 1176 } 1177 else 1178 { 1179 /* When cloning is allowed, we can assume that externally visible 1180 functions are not called. We will compensate this by cloning 1181 later. */ 1182 if (ipcp_versionable_function_p (node) 1183 && ipcp_cloning_candidate_p (node)) 1184 variable = true; 1185 else 1186 disable = true; 1187 } 1188 1189 for (i = 0; i < ipa_get_param_count (info); i++) 1190 { 1191 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 1192 plats->m_value_range.init (); 1193 } 1194 1195 if (disable || variable) 1196 { 1197 for (i = 0; i < ipa_get_param_count (info); i++) 1198 { 1199 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 1200 if (disable) 1201 { 1202 plats->itself.set_to_bottom (); 1203 plats->ctxlat.set_to_bottom (); 1204 set_agg_lats_to_bottom (plats); 1205 plats->bits_lattice.set_to_bottom (); 1206 plats->m_value_range.set_to_bottom (); 1207 } 1208 else 1209 set_all_contains_variable (plats); 1210 } 1211 if (dump_file && (dump_flags & TDF_DETAILS) 1212 && !node->alias && !node->thunk.thunk_p) 1213 fprintf (dump_file, "Marking all lattices of %s as %s\n", 1214 node->dump_name (), disable ? "BOTTOM" : "VARIABLE"); 1215 } 1216 1217 for (ie = node->indirect_calls; ie; ie = ie->next_callee) 1218 if (ie->indirect_info->polymorphic 1219 && ie->indirect_info->param_index >= 0) 1220 { 1221 gcc_checking_assert (ie->indirect_info->param_index >= 0); 1222 ipa_get_parm_lattices (info, 1223 ie->indirect_info->param_index)->virt_call = 1; 1224 } 1225 } 1226 1227 /* Return the result of a (possibly arithmetic) pass through jump function 1228 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter 1229 to which the result is passed. Return NULL_TREE if that cannot be 1230 determined or be considered an interprocedural invariant. */ 1231 1232 static tree 1233 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, 1234 tree res_type) 1235 { 1236 tree res; 1237 1238 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 1239 return input; 1240 if (!is_gimple_ip_invariant (input)) 1241 return NULL_TREE; 1242 1243 tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); 1244 if (!res_type) 1245 { 1246 if (TREE_CODE_CLASS (opcode) == tcc_comparison) 1247 res_type = boolean_type_node; 1248 else if (expr_type_first_operand_type_p (opcode)) 1249 res_type = TREE_TYPE (input); 1250 else 1251 return NULL_TREE; 1252 } 1253 1254 if (TREE_CODE_CLASS (opcode) == tcc_unary) 1255 res = fold_unary (opcode, res_type, input); 1256 else 1257 res = fold_binary (opcode, res_type, input, 1258 ipa_get_jf_pass_through_operand (jfunc)); 1259 1260 if (res && !is_gimple_ip_invariant (res)) 1261 return NULL_TREE; 1262 1263 return res; 1264 } 1265 1266 /* Return the result of an ancestor jump function JFUNC on the constant value 1267 INPUT. Return NULL_TREE if that cannot be determined. */ 1268 1269 static tree 1270 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input) 1271 { 1272 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO); 1273 if (TREE_CODE (input) == ADDR_EXPR) 1274 { 1275 tree t = TREE_OPERAND (input, 0); 1276 t = build_ref_for_offset (EXPR_LOCATION (t), t, 1277 ipa_get_jf_ancestor_offset (jfunc), false, 1278 ptr_type_node, NULL, false); 1279 return build_fold_addr_expr (t); 1280 } 1281 else 1282 return NULL_TREE; 1283 } 1284 1285 /* Determine whether JFUNC evaluates to a single known constant value and if 1286 so, return it. Otherwise return NULL. INFO describes the caller node or 1287 the one it is inlined to, so that pass-through jump functions can be 1288 evaluated. PARM_TYPE is the type of the parameter to which the result is 1289 passed. */ 1290 1291 tree 1292 ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc, 1293 tree parm_type) 1294 { 1295 if (jfunc->type == IPA_JF_CONST) 1296 return ipa_get_jf_constant (jfunc); 1297 else if (jfunc->type == IPA_JF_PASS_THROUGH 1298 || jfunc->type == IPA_JF_ANCESTOR) 1299 { 1300 tree input; 1301 int idx; 1302 1303 if (jfunc->type == IPA_JF_PASS_THROUGH) 1304 idx = ipa_get_jf_pass_through_formal_id (jfunc); 1305 else 1306 idx = ipa_get_jf_ancestor_formal_id (jfunc); 1307 1308 if (info->ipcp_orig_node) 1309 input = info->known_csts[idx]; 1310 else 1311 { 1312 ipcp_lattice<tree> *lat; 1313 1314 if (!info->lattices 1315 || idx >= ipa_get_param_count (info)) 1316 return NULL_TREE; 1317 lat = ipa_get_scalar_lat (info, idx); 1318 if (!lat->is_single_const ()) 1319 return NULL_TREE; 1320 input = lat->values->value; 1321 } 1322 1323 if (!input) 1324 return NULL_TREE; 1325 1326 if (jfunc->type == IPA_JF_PASS_THROUGH) 1327 return ipa_get_jf_pass_through_result (jfunc, input, parm_type); 1328 else 1329 return ipa_get_jf_ancestor_result (jfunc, input); 1330 } 1331 else 1332 return NULL_TREE; 1333 } 1334 1335 /* Determine whether JFUNC evaluates to single known polymorphic context, given 1336 that INFO describes the caller node or the one it is inlined to, CS is the 1337 call graph edge corresponding to JFUNC and CSIDX index of the described 1338 parameter. */ 1339 1340 ipa_polymorphic_call_context 1341 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx, 1342 ipa_jump_func *jfunc) 1343 { 1344 ipa_edge_args *args = IPA_EDGE_REF (cs); 1345 ipa_polymorphic_call_context ctx; 1346 ipa_polymorphic_call_context *edge_ctx 1347 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL; 1348 1349 if (edge_ctx && !edge_ctx->useless_p ()) 1350 ctx = *edge_ctx; 1351 1352 if (jfunc->type == IPA_JF_PASS_THROUGH 1353 || jfunc->type == IPA_JF_ANCESTOR) 1354 { 1355 ipa_polymorphic_call_context srcctx; 1356 int srcidx; 1357 bool type_preserved = true; 1358 if (jfunc->type == IPA_JF_PASS_THROUGH) 1359 { 1360 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) 1361 return ctx; 1362 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); 1363 srcidx = ipa_get_jf_pass_through_formal_id (jfunc); 1364 } 1365 else 1366 { 1367 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); 1368 srcidx = ipa_get_jf_ancestor_formal_id (jfunc); 1369 } 1370 if (info->ipcp_orig_node) 1371 { 1372 if (info->known_contexts.exists ()) 1373 srcctx = info->known_contexts[srcidx]; 1374 } 1375 else 1376 { 1377 if (!info->lattices 1378 || srcidx >= ipa_get_param_count (info)) 1379 return ctx; 1380 ipcp_lattice<ipa_polymorphic_call_context> *lat; 1381 lat = ipa_get_poly_ctx_lat (info, srcidx); 1382 if (!lat->is_single_const ()) 1383 return ctx; 1384 srcctx = lat->values->value; 1385 } 1386 if (srcctx.useless_p ()) 1387 return ctx; 1388 if (jfunc->type == IPA_JF_ANCESTOR) 1389 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc)); 1390 if (!type_preserved) 1391 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor); 1392 srcctx.combine_with (ctx); 1393 return srcctx; 1394 } 1395 1396 return ctx; 1397 } 1398 1399 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not 1400 bottom, not containing a variable component and without any known value at 1401 the same time. */ 1402 1403 DEBUG_FUNCTION void 1404 ipcp_verify_propagated_values (void) 1405 { 1406 struct cgraph_node *node; 1407 1408 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 1409 { 1410 struct ipa_node_params *info = IPA_NODE_REF (node); 1411 int i, count = ipa_get_param_count (info); 1412 1413 for (i = 0; i < count; i++) 1414 { 1415 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i); 1416 1417 if (!lat->bottom 1418 && !lat->contains_variable 1419 && lat->values_count == 0) 1420 { 1421 if (dump_file) 1422 { 1423 symtab->dump (dump_file); 1424 fprintf (dump_file, "\nIPA lattices after constant " 1425 "propagation, before gcc_unreachable:\n"); 1426 print_all_lattices (dump_file, true, false); 1427 } 1428 1429 gcc_unreachable (); 1430 } 1431 } 1432 } 1433 } 1434 1435 /* Return true iff X and Y should be considered equal values by IPA-CP. */ 1436 1437 static bool 1438 values_equal_for_ipcp_p (tree x, tree y) 1439 { 1440 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE); 1441 1442 if (x == y) 1443 return true; 1444 1445 if (TREE_CODE (x) == ADDR_EXPR 1446 && TREE_CODE (y) == ADDR_EXPR 1447 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL 1448 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL) 1449 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)), 1450 DECL_INITIAL (TREE_OPERAND (y, 0)), 0); 1451 else 1452 return operand_equal_p (x, y, 0); 1453 } 1454 1455 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */ 1456 1457 static bool 1458 values_equal_for_ipcp_p (ipa_polymorphic_call_context x, 1459 ipa_polymorphic_call_context y) 1460 { 1461 return x.equal_to (y); 1462 } 1463 1464 1465 /* Add a new value source to the value represented by THIS, marking that a 1466 value comes from edge CS and (if the underlying jump function is a 1467 pass-through or an ancestor one) from a caller value SRC_VAL of a caller 1468 parameter described by SRC_INDEX. OFFSET is negative if the source was the 1469 scalar value of the parameter itself or the offset within an aggregate. */ 1470 1471 template <typename valtype> 1472 void 1473 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val, 1474 int src_idx, HOST_WIDE_INT offset) 1475 { 1476 ipcp_value_source<valtype> *src; 1477 1478 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>; 1479 src->offset = offset; 1480 src->cs = cs; 1481 src->val = src_val; 1482 src->index = src_idx; 1483 1484 src->next = sources; 1485 sources = src; 1486 } 1487 1488 /* Allocate a new ipcp_value holding a tree constant, initialize its value to 1489 SOURCE and clear all other fields. */ 1490 1491 static ipcp_value<tree> * 1492 allocate_and_init_ipcp_value (tree source) 1493 { 1494 ipcp_value<tree> *val; 1495 1496 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>(); 1497 val->value = source; 1498 return val; 1499 } 1500 1501 /* Allocate a new ipcp_value holding a polymorphic context, initialize its 1502 value to SOURCE and clear all other fields. */ 1503 1504 static ipcp_value<ipa_polymorphic_call_context> * 1505 allocate_and_init_ipcp_value (ipa_polymorphic_call_context source) 1506 { 1507 ipcp_value<ipa_polymorphic_call_context> *val; 1508 1509 // TODO 1510 val = new (ipcp_poly_ctx_values_pool.allocate ()) 1511 ipcp_value<ipa_polymorphic_call_context>(); 1512 val->value = source; 1513 return val; 1514 } 1515 1516 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, 1517 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same 1518 meaning. OFFSET -1 means the source is scalar and not a part of an 1519 aggregate. */ 1520 1521 template <typename valtype> 1522 bool 1523 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, 1524 ipcp_value<valtype> *src_val, 1525 int src_idx, HOST_WIDE_INT offset) 1526 { 1527 ipcp_value<valtype> *val; 1528 1529 if (bottom) 1530 return false; 1531 1532 for (val = values; val; val = val->next) 1533 if (values_equal_for_ipcp_p (val->value, newval)) 1534 { 1535 if (ipa_edge_within_scc (cs)) 1536 { 1537 ipcp_value_source<valtype> *s; 1538 for (s = val->sources; s; s = s->next) 1539 if (s->cs == cs) 1540 break; 1541 if (s) 1542 return false; 1543 } 1544 1545 val->add_source (cs, src_val, src_idx, offset); 1546 return false; 1547 } 1548 1549 if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) 1550 { 1551 /* We can only free sources, not the values themselves, because sources 1552 of other values in this SCC might point to them. */ 1553 for (val = values; val; val = val->next) 1554 { 1555 while (val->sources) 1556 { 1557 ipcp_value_source<valtype> *src = val->sources; 1558 val->sources = src->next; 1559 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src); 1560 } 1561 } 1562 1563 values = NULL; 1564 return set_to_bottom (); 1565 } 1566 1567 values_count++; 1568 val = allocate_and_init_ipcp_value (newval); 1569 val->add_source (cs, src_val, src_idx, offset); 1570 val->next = values; 1571 values = val; 1572 return true; 1573 } 1574 1575 /* Propagate values through a pass-through jump function JFUNC associated with 1576 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX 1577 is the index of the source parameter. PARM_TYPE is the type of the 1578 parameter to which the result is passed. */ 1579 1580 static bool 1581 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, 1582 ipcp_lattice<tree> *src_lat, 1583 ipcp_lattice<tree> *dest_lat, int src_idx, 1584 tree parm_type) 1585 { 1586 ipcp_value<tree> *src_val; 1587 bool ret = false; 1588 1589 /* Do not create new values when propagating within an SCC because if there 1590 are arithmetic functions with circular dependencies, there is infinite 1591 number of them and we would just make lattices bottom. If this condition 1592 is ever relaxed we have to detect self-feeding recursive calls in 1593 cgraph_edge_brings_value_p in a smarter way. */ 1594 if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) 1595 && ipa_edge_within_scc (cs)) 1596 ret = dest_lat->set_contains_variable (); 1597 else 1598 for (src_val = src_lat->values; src_val; src_val = src_val->next) 1599 { 1600 tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value, 1601 parm_type); 1602 1603 if (cstval) 1604 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx); 1605 else 1606 ret |= dest_lat->set_contains_variable (); 1607 } 1608 1609 return ret; 1610 } 1611 1612 /* Propagate values through an ancestor jump function JFUNC associated with 1613 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX 1614 is the index of the source parameter. */ 1615 1616 static bool 1617 propagate_vals_across_ancestor (struct cgraph_edge *cs, 1618 struct ipa_jump_func *jfunc, 1619 ipcp_lattice<tree> *src_lat, 1620 ipcp_lattice<tree> *dest_lat, int src_idx) 1621 { 1622 ipcp_value<tree> *src_val; 1623 bool ret = false; 1624 1625 if (ipa_edge_within_scc (cs)) 1626 return dest_lat->set_contains_variable (); 1627 1628 for (src_val = src_lat->values; src_val; src_val = src_val->next) 1629 { 1630 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value); 1631 1632 if (t) 1633 ret |= dest_lat->add_value (t, cs, src_val, src_idx); 1634 else 1635 ret |= dest_lat->set_contains_variable (); 1636 } 1637 1638 return ret; 1639 } 1640 1641 /* Propagate scalar values across jump function JFUNC that is associated with 1642 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the 1643 parameter to which the result is passed. */ 1644 1645 static bool 1646 propagate_scalar_across_jump_function (struct cgraph_edge *cs, 1647 struct ipa_jump_func *jfunc, 1648 ipcp_lattice<tree> *dest_lat, 1649 tree param_type) 1650 { 1651 if (dest_lat->bottom) 1652 return false; 1653 1654 if (jfunc->type == IPA_JF_CONST) 1655 { 1656 tree val = ipa_get_jf_constant (jfunc); 1657 return dest_lat->add_value (val, cs, NULL, 0); 1658 } 1659 else if (jfunc->type == IPA_JF_PASS_THROUGH 1660 || jfunc->type == IPA_JF_ANCESTOR) 1661 { 1662 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 1663 ipcp_lattice<tree> *src_lat; 1664 int src_idx; 1665 bool ret; 1666 1667 if (jfunc->type == IPA_JF_PASS_THROUGH) 1668 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 1669 else 1670 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 1671 1672 src_lat = ipa_get_scalar_lat (caller_info, src_idx); 1673 if (src_lat->bottom) 1674 return dest_lat->set_contains_variable (); 1675 1676 /* If we would need to clone the caller and cannot, do not propagate. */ 1677 if (!ipcp_versionable_function_p (cs->caller) 1678 && (src_lat->contains_variable 1679 || (src_lat->values_count > 1))) 1680 return dest_lat->set_contains_variable (); 1681 1682 if (jfunc->type == IPA_JF_PASS_THROUGH) 1683 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat, 1684 dest_lat, src_idx, param_type); 1685 else 1686 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat, 1687 src_idx); 1688 1689 if (src_lat->contains_variable) 1690 ret |= dest_lat->set_contains_variable (); 1691 1692 return ret; 1693 } 1694 1695 /* TODO: We currently do not handle member method pointers in IPA-CP (we only 1696 use it for indirect inlining), we should propagate them too. */ 1697 return dest_lat->set_contains_variable (); 1698 } 1699 1700 /* Propagate scalar values across jump function JFUNC that is associated with 1701 edge CS and describes argument IDX and put the values into DEST_LAT. */ 1702 1703 static bool 1704 propagate_context_across_jump_function (cgraph_edge *cs, 1705 ipa_jump_func *jfunc, int idx, 1706 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat) 1707 { 1708 ipa_edge_args *args = IPA_EDGE_REF (cs); 1709 if (dest_lat->bottom) 1710 return false; 1711 bool ret = false; 1712 bool added_sth = false; 1713 bool type_preserved = true; 1714 1715 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr 1716 = ipa_get_ith_polymorhic_call_context (args, idx); 1717 1718 if (edge_ctx_ptr) 1719 edge_ctx = *edge_ctx_ptr; 1720 1721 if (jfunc->type == IPA_JF_PASS_THROUGH 1722 || jfunc->type == IPA_JF_ANCESTOR) 1723 { 1724 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 1725 int src_idx; 1726 ipcp_lattice<ipa_polymorphic_call_context> *src_lat; 1727 1728 /* TODO: Once we figure out how to propagate speculations, it will 1729 probably be a good idea to switch to speculation if type_preserved is 1730 not set instead of punting. */ 1731 if (jfunc->type == IPA_JF_PASS_THROUGH) 1732 { 1733 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) 1734 goto prop_fail; 1735 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); 1736 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 1737 } 1738 else 1739 { 1740 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); 1741 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 1742 } 1743 1744 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx); 1745 /* If we would need to clone the caller and cannot, do not propagate. */ 1746 if (!ipcp_versionable_function_p (cs->caller) 1747 && (src_lat->contains_variable 1748 || (src_lat->values_count > 1))) 1749 goto prop_fail; 1750 1751 ipcp_value<ipa_polymorphic_call_context> *src_val; 1752 for (src_val = src_lat->values; src_val; src_val = src_val->next) 1753 { 1754 ipa_polymorphic_call_context cur = src_val->value; 1755 1756 if (!type_preserved) 1757 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor); 1758 if (jfunc->type == IPA_JF_ANCESTOR) 1759 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc)); 1760 /* TODO: In cases we know how the context is going to be used, 1761 we can improve the result by passing proper OTR_TYPE. */ 1762 cur.combine_with (edge_ctx); 1763 if (!cur.useless_p ()) 1764 { 1765 if (src_lat->contains_variable 1766 && !edge_ctx.equal_to (cur)) 1767 ret |= dest_lat->set_contains_variable (); 1768 ret |= dest_lat->add_value (cur, cs, src_val, src_idx); 1769 added_sth = true; 1770 } 1771 } 1772 1773 } 1774 1775 prop_fail: 1776 if (!added_sth) 1777 { 1778 if (!edge_ctx.useless_p ()) 1779 ret |= dest_lat->add_value (edge_ctx, cs); 1780 else 1781 ret |= dest_lat->set_contains_variable (); 1782 } 1783 1784 return ret; 1785 } 1786 1787 /* Propagate bits across jfunc that is associated with 1788 edge cs and update dest_lattice accordingly. */ 1789 1790 bool 1791 propagate_bits_across_jump_function (cgraph_edge *cs, int idx, 1792 ipa_jump_func *jfunc, 1793 ipcp_bits_lattice *dest_lattice) 1794 { 1795 if (dest_lattice->bottom_p ()) 1796 return false; 1797 1798 enum availability availability; 1799 cgraph_node *callee = cs->callee->function_symbol (&availability); 1800 struct ipa_node_params *callee_info = IPA_NODE_REF (callee); 1801 tree parm_type = ipa_get_type (callee_info, idx); 1802 1803 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the 1804 transform for these cases. Similarly, we can have bad type mismatches 1805 with LTO, avoid doing anything with those too. */ 1806 if (!parm_type 1807 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type))) 1808 { 1809 if (dump_file && (dump_flags & TDF_DETAILS)) 1810 fprintf (dump_file, "Setting dest_lattice to bottom, because type of " 1811 "param %i of %s is NULL or unsuitable for bits propagation\n", 1812 idx, cs->callee->name ()); 1813 1814 return dest_lattice->set_to_bottom (); 1815 } 1816 1817 unsigned precision = TYPE_PRECISION (parm_type); 1818 signop sgn = TYPE_SIGN (parm_type); 1819 1820 if (jfunc->type == IPA_JF_PASS_THROUGH 1821 || jfunc->type == IPA_JF_ANCESTOR) 1822 { 1823 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 1824 tree operand = NULL_TREE; 1825 enum tree_code code; 1826 unsigned src_idx; 1827 1828 if (jfunc->type == IPA_JF_PASS_THROUGH) 1829 { 1830 code = ipa_get_jf_pass_through_operation (jfunc); 1831 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 1832 if (code != NOP_EXPR) 1833 operand = ipa_get_jf_pass_through_operand (jfunc); 1834 } 1835 else 1836 { 1837 code = POINTER_PLUS_EXPR; 1838 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 1839 unsigned HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT; 1840 operand = build_int_cstu (size_type_node, offset); 1841 } 1842 1843 struct ipcp_param_lattices *src_lats 1844 = ipa_get_parm_lattices (caller_info, src_idx); 1845 1846 /* Try to propagate bits if src_lattice is bottom, but jfunc is known. 1847 for eg consider: 1848 int f(int x) 1849 { 1850 g (x & 0xff); 1851 } 1852 Assume lattice for x is bottom, however we can still propagate 1853 result of x & 0xff == 0xff, which gets computed during ccp1 pass 1854 and we store it in jump function during analysis stage. */ 1855 1856 if (src_lats->bits_lattice.bottom_p () 1857 && jfunc->bits) 1858 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask, 1859 precision); 1860 else 1861 return dest_lattice->meet_with (src_lats->bits_lattice, precision, sgn, 1862 code, operand); 1863 } 1864 1865 else if (jfunc->type == IPA_JF_ANCESTOR) 1866 return dest_lattice->set_to_bottom (); 1867 else if (jfunc->bits) 1868 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask, 1869 precision); 1870 else 1871 return dest_lattice->set_to_bottom (); 1872 } 1873 1874 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to 1875 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if 1876 the result is a range or an anti-range. */ 1877 1878 static bool 1879 ipa_vr_operation_and_type_effects (value_range_base *dst_vr, 1880 value_range_base *src_vr, 1881 enum tree_code operation, 1882 tree dst_type, tree src_type) 1883 { 1884 extract_range_from_unary_expr (dst_vr, operation, dst_type, 1885 src_vr, src_type); 1886 if (dst_vr->varying_p () || dst_vr->undefined_p ()) 1887 return false; 1888 return true; 1889 } 1890 1891 /* Propagate value range across jump function JFUNC that is associated with 1892 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS 1893 accordingly. */ 1894 1895 static bool 1896 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, 1897 struct ipcp_param_lattices *dest_plats, 1898 tree param_type) 1899 { 1900 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range; 1901 1902 if (dest_lat->bottom_p ()) 1903 return false; 1904 1905 if (!param_type 1906 || (!INTEGRAL_TYPE_P (param_type) 1907 && !POINTER_TYPE_P (param_type))) 1908 return dest_lat->set_to_bottom (); 1909 1910 if (jfunc->type == IPA_JF_PASS_THROUGH) 1911 { 1912 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc); 1913 1914 if (TREE_CODE_CLASS (operation) == tcc_unary) 1915 { 1916 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 1917 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 1918 tree operand_type = ipa_get_type (caller_info, src_idx); 1919 struct ipcp_param_lattices *src_lats 1920 = ipa_get_parm_lattices (caller_info, src_idx); 1921 1922 if (src_lats->m_value_range.bottom_p ()) 1923 return dest_lat->set_to_bottom (); 1924 value_range_base vr; 1925 if (ipa_vr_operation_and_type_effects (&vr, 1926 &src_lats->m_value_range.m_vr, 1927 operation, param_type, 1928 operand_type)) 1929 return dest_lat->meet_with (&vr); 1930 } 1931 } 1932 else if (jfunc->type == IPA_JF_CONST) 1933 { 1934 tree val = ipa_get_jf_constant (jfunc); 1935 if (TREE_CODE (val) == INTEGER_CST) 1936 { 1937 val = fold_convert (param_type, val); 1938 if (TREE_OVERFLOW_P (val)) 1939 val = drop_tree_overflow (val); 1940 1941 value_range_base tmpvr (VR_RANGE, val, val); 1942 return dest_lat->meet_with (&tmpvr); 1943 } 1944 } 1945 1946 value_range_base vr; 1947 if (jfunc->m_vr 1948 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR, 1949 param_type, 1950 jfunc->m_vr->type ())) 1951 return dest_lat->meet_with (&vr); 1952 else 1953 return dest_lat->set_to_bottom (); 1954 } 1955 1956 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches 1957 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all 1958 other cases, return false). If there are no aggregate items, set 1959 aggs_by_ref to NEW_AGGS_BY_REF. */ 1960 1961 static bool 1962 set_check_aggs_by_ref (struct ipcp_param_lattices *dest_plats, 1963 bool new_aggs_by_ref) 1964 { 1965 if (dest_plats->aggs) 1966 { 1967 if (dest_plats->aggs_by_ref != new_aggs_by_ref) 1968 { 1969 set_agg_lats_to_bottom (dest_plats); 1970 return true; 1971 } 1972 } 1973 else 1974 dest_plats->aggs_by_ref = new_aggs_by_ref; 1975 return false; 1976 } 1977 1978 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an 1979 already existing lattice for the given OFFSET and SIZE, marking all skipped 1980 lattices as containing variable and checking for overlaps. If there is no 1981 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize 1982 it with offset, size and contains_variable to PRE_EXISTING, and return true, 1983 unless there are too many already. If there are two many, return false. If 1984 there are overlaps turn whole DEST_PLATS to bottom and return false. If any 1985 skipped lattices were newly marked as containing variable, set *CHANGE to 1986 true. */ 1987 1988 static bool 1989 merge_agg_lats_step (struct ipcp_param_lattices *dest_plats, 1990 HOST_WIDE_INT offset, HOST_WIDE_INT val_size, 1991 struct ipcp_agg_lattice ***aglat, 1992 bool pre_existing, bool *change) 1993 { 1994 gcc_checking_assert (offset >= 0); 1995 1996 while (**aglat && (**aglat)->offset < offset) 1997 { 1998 if ((**aglat)->offset + (**aglat)->size > offset) 1999 { 2000 set_agg_lats_to_bottom (dest_plats); 2001 return false; 2002 } 2003 *change |= (**aglat)->set_contains_variable (); 2004 *aglat = &(**aglat)->next; 2005 } 2006 2007 if (**aglat && (**aglat)->offset == offset) 2008 { 2009 if ((**aglat)->size != val_size 2010 || ((**aglat)->next 2011 && (**aglat)->next->offset < offset + val_size)) 2012 { 2013 set_agg_lats_to_bottom (dest_plats); 2014 return false; 2015 } 2016 gcc_checking_assert (!(**aglat)->next 2017 || (**aglat)->next->offset >= offset + val_size); 2018 return true; 2019 } 2020 else 2021 { 2022 struct ipcp_agg_lattice *new_al; 2023 2024 if (**aglat && (**aglat)->offset < offset + val_size) 2025 { 2026 set_agg_lats_to_bottom (dest_plats); 2027 return false; 2028 } 2029 if (dest_plats->aggs_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) 2030 return false; 2031 dest_plats->aggs_count++; 2032 new_al = ipcp_agg_lattice_pool.allocate (); 2033 memset (new_al, 0, sizeof (*new_al)); 2034 2035 new_al->offset = offset; 2036 new_al->size = val_size; 2037 new_al->contains_variable = pre_existing; 2038 2039 new_al->next = **aglat; 2040 **aglat = new_al; 2041 return true; 2042 } 2043 } 2044 2045 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as 2046 containing an unknown value. */ 2047 2048 static bool 2049 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat) 2050 { 2051 bool ret = false; 2052 while (aglat) 2053 { 2054 ret |= aglat->set_contains_variable (); 2055 aglat = aglat->next; 2056 } 2057 return ret; 2058 } 2059 2060 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting 2061 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source 2062 parameter used for lattice value sources. Return true if DEST_PLATS changed 2063 in any way. */ 2064 2065 static bool 2066 merge_aggregate_lattices (struct cgraph_edge *cs, 2067 struct ipcp_param_lattices *dest_plats, 2068 struct ipcp_param_lattices *src_plats, 2069 int src_idx, HOST_WIDE_INT offset_delta) 2070 { 2071 bool pre_existing = dest_plats->aggs != NULL; 2072 struct ipcp_agg_lattice **dst_aglat; 2073 bool ret = false; 2074 2075 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref)) 2076 return true; 2077 if (src_plats->aggs_bottom) 2078 return set_agg_lats_contain_variable (dest_plats); 2079 if (src_plats->aggs_contain_variable) 2080 ret |= set_agg_lats_contain_variable (dest_plats); 2081 dst_aglat = &dest_plats->aggs; 2082 2083 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs; 2084 src_aglat; 2085 src_aglat = src_aglat->next) 2086 { 2087 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta; 2088 2089 if (new_offset < 0) 2090 continue; 2091 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size, 2092 &dst_aglat, pre_existing, &ret)) 2093 { 2094 struct ipcp_agg_lattice *new_al = *dst_aglat; 2095 2096 dst_aglat = &(*dst_aglat)->next; 2097 if (src_aglat->bottom) 2098 { 2099 ret |= new_al->set_contains_variable (); 2100 continue; 2101 } 2102 if (src_aglat->contains_variable) 2103 ret |= new_al->set_contains_variable (); 2104 for (ipcp_value<tree> *val = src_aglat->values; 2105 val; 2106 val = val->next) 2107 ret |= new_al->add_value (val->value, cs, val, src_idx, 2108 src_aglat->offset); 2109 } 2110 else if (dest_plats->aggs_bottom) 2111 return true; 2112 } 2113 ret |= set_chain_of_aglats_contains_variable (*dst_aglat); 2114 return ret; 2115 } 2116 2117 /* Determine whether there is anything to propagate FROM SRC_PLATS through a 2118 pass-through JFUNC and if so, whether it has conform and conforms to the 2119 rules about propagating values passed by reference. */ 2120 2121 static bool 2122 agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats, 2123 struct ipa_jump_func *jfunc) 2124 { 2125 return src_plats->aggs 2126 && (!src_plats->aggs_by_ref 2127 || ipa_get_jf_pass_through_agg_preserved (jfunc)); 2128 } 2129 2130 /* Propagate scalar values across jump function JFUNC that is associated with 2131 edge CS and put the values into DEST_LAT. */ 2132 2133 static bool 2134 propagate_aggs_across_jump_function (struct cgraph_edge *cs, 2135 struct ipa_jump_func *jfunc, 2136 struct ipcp_param_lattices *dest_plats) 2137 { 2138 bool ret = false; 2139 2140 if (dest_plats->aggs_bottom) 2141 return false; 2142 2143 if (jfunc->type == IPA_JF_PASS_THROUGH 2144 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 2145 { 2146 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2147 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2148 struct ipcp_param_lattices *src_plats; 2149 2150 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 2151 if (agg_pass_through_permissible_p (src_plats, jfunc)) 2152 { 2153 /* Currently we do not produce clobber aggregate jump 2154 functions, replace with merging when we do. */ 2155 gcc_assert (!jfunc->agg.items); 2156 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, 2157 src_idx, 0); 2158 } 2159 else 2160 ret |= set_agg_lats_contain_variable (dest_plats); 2161 } 2162 else if (jfunc->type == IPA_JF_ANCESTOR 2163 && ipa_get_jf_ancestor_agg_preserved (jfunc)) 2164 { 2165 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2166 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 2167 struct ipcp_param_lattices *src_plats; 2168 2169 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 2170 if (src_plats->aggs && src_plats->aggs_by_ref) 2171 { 2172 /* Currently we do not produce clobber aggregate jump 2173 functions, replace with merging when we do. */ 2174 gcc_assert (!jfunc->agg.items); 2175 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx, 2176 ipa_get_jf_ancestor_offset (jfunc)); 2177 } 2178 else if (!src_plats->aggs_by_ref) 2179 ret |= set_agg_lats_to_bottom (dest_plats); 2180 else 2181 ret |= set_agg_lats_contain_variable (dest_plats); 2182 } 2183 else if (jfunc->agg.items) 2184 { 2185 bool pre_existing = dest_plats->aggs != NULL; 2186 struct ipcp_agg_lattice **aglat = &dest_plats->aggs; 2187 struct ipa_agg_jf_item *item; 2188 int i; 2189 2190 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref)) 2191 return true; 2192 2193 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item) 2194 { 2195 HOST_WIDE_INT val_size; 2196 2197 if (item->offset < 0) 2198 continue; 2199 gcc_checking_assert (is_gimple_ip_invariant (item->value)); 2200 val_size = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (item->value))); 2201 2202 if (merge_agg_lats_step (dest_plats, item->offset, val_size, 2203 &aglat, pre_existing, &ret)) 2204 { 2205 ret |= (*aglat)->add_value (item->value, cs, NULL, 0, 0); 2206 aglat = &(*aglat)->next; 2207 } 2208 else if (dest_plats->aggs_bottom) 2209 return true; 2210 } 2211 2212 ret |= set_chain_of_aglats_contains_variable (*aglat); 2213 } 2214 else 2215 ret |= set_agg_lats_contain_variable (dest_plats); 2216 2217 return ret; 2218 } 2219 2220 /* Return true if on the way cfrom CS->caller to the final (non-alias and 2221 non-thunk) destination, the call passes through a thunk. */ 2222 2223 static bool 2224 call_passes_through_thunk_p (cgraph_edge *cs) 2225 { 2226 cgraph_node *alias_or_thunk = cs->callee; 2227 while (alias_or_thunk->alias) 2228 alias_or_thunk = alias_or_thunk->get_alias_target (); 2229 return alias_or_thunk->thunk.thunk_p; 2230 } 2231 2232 /* Propagate constants from the caller to the callee of CS. INFO describes the 2233 caller. */ 2234 2235 static bool 2236 propagate_constants_across_call (struct cgraph_edge *cs) 2237 { 2238 struct ipa_node_params *callee_info; 2239 enum availability availability; 2240 cgraph_node *callee; 2241 struct ipa_edge_args *args; 2242 bool ret = false; 2243 int i, args_count, parms_count; 2244 2245 callee = cs->callee->function_symbol (&availability); 2246 if (!callee->definition) 2247 return false; 2248 gcc_checking_assert (callee->has_gimple_body_p ()); 2249 callee_info = IPA_NODE_REF (callee); 2250 2251 args = IPA_EDGE_REF (cs); 2252 args_count = ipa_get_cs_argument_count (args); 2253 parms_count = ipa_get_param_count (callee_info); 2254 if (parms_count == 0) 2255 return false; 2256 2257 /* If this call goes through a thunk we must not propagate to the first (0th) 2258 parameter. However, we might need to uncover a thunk from below a series 2259 of aliases first. */ 2260 if (call_passes_through_thunk_p (cs)) 2261 { 2262 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, 2263 0)); 2264 i = 1; 2265 } 2266 else 2267 i = 0; 2268 2269 for (; (i < args_count) && (i < parms_count); i++) 2270 { 2271 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); 2272 struct ipcp_param_lattices *dest_plats; 2273 tree param_type = ipa_get_type (callee_info, i); 2274 2275 dest_plats = ipa_get_parm_lattices (callee_info, i); 2276 if (availability == AVAIL_INTERPOSABLE) 2277 ret |= set_all_contains_variable (dest_plats); 2278 else 2279 { 2280 ret |= propagate_scalar_across_jump_function (cs, jump_func, 2281 &dest_plats->itself, 2282 param_type); 2283 ret |= propagate_context_across_jump_function (cs, jump_func, i, 2284 &dest_plats->ctxlat); 2285 ret 2286 |= propagate_bits_across_jump_function (cs, i, jump_func, 2287 &dest_plats->bits_lattice); 2288 ret |= propagate_aggs_across_jump_function (cs, jump_func, 2289 dest_plats); 2290 if (opt_for_fn (callee->decl, flag_ipa_vrp)) 2291 ret |= propagate_vr_across_jump_function (cs, jump_func, 2292 dest_plats, param_type); 2293 else 2294 ret |= dest_plats->m_value_range.set_to_bottom (); 2295 } 2296 } 2297 for (; i < parms_count; i++) 2298 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i)); 2299 2300 return ret; 2301 } 2302 2303 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS 2304 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter 2305 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */ 2306 2307 static tree 2308 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, 2309 vec<tree> known_csts, 2310 vec<ipa_polymorphic_call_context> known_contexts, 2311 vec<ipa_agg_jump_function_p> known_aggs, 2312 struct ipa_agg_replacement_value *agg_reps, 2313 bool *speculative) 2314 { 2315 int param_index = ie->indirect_info->param_index; 2316 HOST_WIDE_INT anc_offset; 2317 tree t; 2318 tree target = NULL; 2319 2320 *speculative = false; 2321 2322 if (param_index == -1 2323 || known_csts.length () <= (unsigned int) param_index) 2324 return NULL_TREE; 2325 2326 if (!ie->indirect_info->polymorphic) 2327 { 2328 tree t; 2329 2330 if (ie->indirect_info->agg_contents) 2331 { 2332 t = NULL; 2333 if (agg_reps && ie->indirect_info->guaranteed_unmodified) 2334 { 2335 while (agg_reps) 2336 { 2337 if (agg_reps->index == param_index 2338 && agg_reps->offset == ie->indirect_info->offset 2339 && agg_reps->by_ref == ie->indirect_info->by_ref) 2340 { 2341 t = agg_reps->value; 2342 break; 2343 } 2344 agg_reps = agg_reps->next; 2345 } 2346 } 2347 if (!t) 2348 { 2349 struct ipa_agg_jump_function *agg; 2350 if (known_aggs.length () > (unsigned int) param_index) 2351 agg = known_aggs[param_index]; 2352 else 2353 agg = NULL; 2354 bool from_global_constant; 2355 t = ipa_find_agg_cst_for_param (agg, known_csts[param_index], 2356 ie->indirect_info->offset, 2357 ie->indirect_info->by_ref, 2358 &from_global_constant); 2359 if (t 2360 && !from_global_constant 2361 && !ie->indirect_info->guaranteed_unmodified) 2362 t = NULL_TREE; 2363 } 2364 } 2365 else 2366 t = known_csts[param_index]; 2367 2368 if (t 2369 && TREE_CODE (t) == ADDR_EXPR 2370 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL) 2371 return TREE_OPERAND (t, 0); 2372 else 2373 return NULL_TREE; 2374 } 2375 2376 if (!opt_for_fn (ie->caller->decl, flag_devirtualize)) 2377 return NULL_TREE; 2378 2379 gcc_assert (!ie->indirect_info->agg_contents); 2380 anc_offset = ie->indirect_info->offset; 2381 2382 t = NULL; 2383 2384 /* Try to work out value of virtual table pointer value in replacements. */ 2385 if (!t && agg_reps && !ie->indirect_info->by_ref) 2386 { 2387 while (agg_reps) 2388 { 2389 if (agg_reps->index == param_index 2390 && agg_reps->offset == ie->indirect_info->offset 2391 && agg_reps->by_ref) 2392 { 2393 t = agg_reps->value; 2394 break; 2395 } 2396 agg_reps = agg_reps->next; 2397 } 2398 } 2399 2400 /* Try to work out value of virtual table pointer value in known 2401 aggregate values. */ 2402 if (!t && known_aggs.length () > (unsigned int) param_index 2403 && !ie->indirect_info->by_ref) 2404 { 2405 struct ipa_agg_jump_function *agg; 2406 agg = known_aggs[param_index]; 2407 t = ipa_find_agg_cst_for_param (agg, known_csts[param_index], 2408 ie->indirect_info->offset, true); 2409 } 2410 2411 /* If we found the virtual table pointer, lookup the target. */ 2412 if (t) 2413 { 2414 tree vtable; 2415 unsigned HOST_WIDE_INT offset; 2416 if (vtable_pointer_value_to_vtable (t, &vtable, &offset)) 2417 { 2418 bool can_refer; 2419 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token, 2420 vtable, offset, &can_refer); 2421 if (can_refer) 2422 { 2423 if (!target 2424 || (TREE_CODE (TREE_TYPE (target)) == FUNCTION_TYPE 2425 && DECL_FUNCTION_CODE (target) == BUILT_IN_UNREACHABLE) 2426 || !possible_polymorphic_call_target_p 2427 (ie, cgraph_node::get (target))) 2428 { 2429 /* Do not speculate builtin_unreachable, it is stupid! */ 2430 if (ie->indirect_info->vptr_changed) 2431 return NULL; 2432 target = ipa_impossible_devirt_target (ie, target); 2433 } 2434 *speculative = ie->indirect_info->vptr_changed; 2435 if (!*speculative) 2436 return target; 2437 } 2438 } 2439 } 2440 2441 /* Do we know the constant value of pointer? */ 2442 if (!t) 2443 t = known_csts[param_index]; 2444 2445 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO); 2446 2447 ipa_polymorphic_call_context context; 2448 if (known_contexts.length () > (unsigned int) param_index) 2449 { 2450 context = known_contexts[param_index]; 2451 context.offset_by (anc_offset); 2452 if (ie->indirect_info->vptr_changed) 2453 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, 2454 ie->indirect_info->otr_type); 2455 if (t) 2456 { 2457 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context 2458 (t, ie->indirect_info->otr_type, anc_offset); 2459 if (!ctx2.useless_p ()) 2460 context.combine_with (ctx2, ie->indirect_info->otr_type); 2461 } 2462 } 2463 else if (t) 2464 { 2465 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type, 2466 anc_offset); 2467 if (ie->indirect_info->vptr_changed) 2468 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, 2469 ie->indirect_info->otr_type); 2470 } 2471 else 2472 return NULL_TREE; 2473 2474 vec <cgraph_node *>targets; 2475 bool final; 2476 2477 targets = possible_polymorphic_call_targets 2478 (ie->indirect_info->otr_type, 2479 ie->indirect_info->otr_token, 2480 context, &final); 2481 if (!final || targets.length () > 1) 2482 { 2483 struct cgraph_node *node; 2484 if (*speculative) 2485 return target; 2486 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively) 2487 || ie->speculative || !ie->maybe_hot_p ()) 2488 return NULL; 2489 node = try_speculative_devirtualization (ie->indirect_info->otr_type, 2490 ie->indirect_info->otr_token, 2491 context); 2492 if (node) 2493 { 2494 *speculative = true; 2495 target = node->decl; 2496 } 2497 else 2498 return NULL; 2499 } 2500 else 2501 { 2502 *speculative = false; 2503 if (targets.length () == 1) 2504 target = targets[0]->decl; 2505 else 2506 target = ipa_impossible_devirt_target (ie, NULL_TREE); 2507 } 2508 2509 if (target && !possible_polymorphic_call_target_p (ie, 2510 cgraph_node::get (target))) 2511 { 2512 if (*speculative) 2513 return NULL; 2514 target = ipa_impossible_devirt_target (ie, target); 2515 } 2516 2517 return target; 2518 } 2519 2520 2521 /* If an indirect edge IE can be turned into a direct one based on KNOWN_CSTS, 2522 KNOWN_CONTEXTS (which can be vNULL) or KNOWN_AGGS (which also can be vNULL) 2523 return the destination. */ 2524 2525 tree 2526 ipa_get_indirect_edge_target (struct cgraph_edge *ie, 2527 vec<tree> known_csts, 2528 vec<ipa_polymorphic_call_context> known_contexts, 2529 vec<ipa_agg_jump_function_p> known_aggs, 2530 bool *speculative) 2531 { 2532 return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, 2533 known_aggs, NULL, speculative); 2534 } 2535 2536 /* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS 2537 and KNOWN_CONTEXTS. */ 2538 2539 static int 2540 devirtualization_time_bonus (struct cgraph_node *node, 2541 vec<tree> known_csts, 2542 vec<ipa_polymorphic_call_context> known_contexts, 2543 vec<ipa_agg_jump_function_p> known_aggs) 2544 { 2545 struct cgraph_edge *ie; 2546 int res = 0; 2547 2548 for (ie = node->indirect_calls; ie; ie = ie->next_callee) 2549 { 2550 struct cgraph_node *callee; 2551 struct ipa_fn_summary *isummary; 2552 enum availability avail; 2553 tree target; 2554 bool speculative; 2555 2556 target = ipa_get_indirect_edge_target (ie, known_csts, known_contexts, 2557 known_aggs, &speculative); 2558 if (!target) 2559 continue; 2560 2561 /* Only bare minimum benefit for clearly un-inlineable targets. */ 2562 res += 1; 2563 callee = cgraph_node::get (target); 2564 if (!callee || !callee->definition) 2565 continue; 2566 callee = callee->function_symbol (&avail); 2567 if (avail < AVAIL_AVAILABLE) 2568 continue; 2569 isummary = ipa_fn_summaries->get (callee); 2570 if (!isummary || !isummary->inlinable) 2571 continue; 2572 2573 /* FIXME: The values below need re-considering and perhaps also 2574 integrating into the cost metrics, at lest in some very basic way. */ 2575 if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4) 2576 res += 31 / ((int)speculative + 1); 2577 else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2) 2578 res += 15 / ((int)speculative + 1); 2579 else if (isummary->size <= MAX_INLINE_INSNS_AUTO 2580 || DECL_DECLARED_INLINE_P (callee->decl)) 2581 res += 7 / ((int)speculative + 1); 2582 } 2583 2584 return res; 2585 } 2586 2587 /* Return time bonus incurred because of HINTS. */ 2588 2589 static int 2590 hint_time_bonus (ipa_hints hints) 2591 { 2592 int result = 0; 2593 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) 2594 result += PARAM_VALUE (PARAM_IPA_CP_LOOP_HINT_BONUS); 2595 if (hints & INLINE_HINT_array_index) 2596 result += PARAM_VALUE (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS); 2597 return result; 2598 } 2599 2600 /* If there is a reason to penalize the function described by INFO in the 2601 cloning goodness evaluation, do so. */ 2602 2603 static inline int64_t 2604 incorporate_penalties (ipa_node_params *info, int64_t evaluation) 2605 { 2606 if (info->node_within_scc) 2607 evaluation = (evaluation 2608 * (100 - PARAM_VALUE (PARAM_IPA_CP_RECURSION_PENALTY))) / 100; 2609 2610 if (info->node_calling_single_call) 2611 evaluation = (evaluation 2612 * (100 - PARAM_VALUE (PARAM_IPA_CP_SINGLE_CALL_PENALTY))) 2613 / 100; 2614 2615 return evaluation; 2616 } 2617 2618 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT 2619 and SIZE_COST and with the sum of frequencies of incoming edges to the 2620 potential new clone in FREQUENCIES. */ 2621 2622 static bool 2623 good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit, 2624 int freq_sum, profile_count count_sum, int size_cost) 2625 { 2626 if (time_benefit == 0 2627 || !opt_for_fn (node->decl, flag_ipa_cp_clone) 2628 || node->optimize_for_size_p ()) 2629 return false; 2630 2631 gcc_assert (size_cost > 0); 2632 2633 struct ipa_node_params *info = IPA_NODE_REF (node); 2634 if (max_count > profile_count::zero ()) 2635 { 2636 int factor = RDIV (count_sum.probability_in 2637 (max_count).to_reg_br_prob_base () 2638 * 1000, REG_BR_PROB_BASE); 2639 int64_t evaluation = (((int64_t) time_benefit * factor) 2640 / size_cost); 2641 evaluation = incorporate_penalties (info, evaluation); 2642 2643 if (dump_file && (dump_flags & TDF_DETAILS)) 2644 { 2645 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " 2646 "size: %i, count_sum: ", time_benefit, size_cost); 2647 count_sum.dump (dump_file); 2648 fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64 2649 ", threshold: %i\n", 2650 info->node_within_scc ? ", scc" : "", 2651 info->node_calling_single_call ? ", single_call" : "", 2652 evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); 2653 } 2654 2655 return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD); 2656 } 2657 else 2658 { 2659 int64_t evaluation = (((int64_t) time_benefit * freq_sum) 2660 / size_cost); 2661 evaluation = incorporate_penalties (info, evaluation); 2662 2663 if (dump_file && (dump_flags & TDF_DETAILS)) 2664 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " 2665 "size: %i, freq_sum: %i%s%s) -> evaluation: " 2666 "%" PRId64 ", threshold: %i\n", 2667 time_benefit, size_cost, freq_sum, 2668 info->node_within_scc ? ", scc" : "", 2669 info->node_calling_single_call ? ", single_call" : "", 2670 evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); 2671 2672 return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD); 2673 } 2674 } 2675 2676 /* Return all context independent values from aggregate lattices in PLATS in a 2677 vector. Return NULL if there are none. */ 2678 2679 static vec<ipa_agg_jf_item, va_gc> * 2680 context_independent_aggregate_values (struct ipcp_param_lattices *plats) 2681 { 2682 vec<ipa_agg_jf_item, va_gc> *res = NULL; 2683 2684 if (plats->aggs_bottom 2685 || plats->aggs_contain_variable 2686 || plats->aggs_count == 0) 2687 return NULL; 2688 2689 for (struct ipcp_agg_lattice *aglat = plats->aggs; 2690 aglat; 2691 aglat = aglat->next) 2692 if (aglat->is_single_const ()) 2693 { 2694 struct ipa_agg_jf_item item; 2695 item.offset = aglat->offset; 2696 item.value = aglat->values->value; 2697 vec_safe_push (res, item); 2698 } 2699 return res; 2700 } 2701 2702 /* Allocate KNOWN_CSTS, KNOWN_CONTEXTS and, if non-NULL, KNOWN_AGGS and 2703 populate them with values of parameters that are known independent of the 2704 context. INFO describes the function. If REMOVABLE_PARAMS_COST is 2705 non-NULL, the movement cost of all removable parameters will be stored in 2706 it. */ 2707 2708 static bool 2709 gather_context_independent_values (struct ipa_node_params *info, 2710 vec<tree> *known_csts, 2711 vec<ipa_polymorphic_call_context> 2712 *known_contexts, 2713 vec<ipa_agg_jump_function> *known_aggs, 2714 int *removable_params_cost) 2715 { 2716 int i, count = ipa_get_param_count (info); 2717 bool ret = false; 2718 2719 known_csts->create (0); 2720 known_contexts->create (0); 2721 known_csts->safe_grow_cleared (count); 2722 known_contexts->safe_grow_cleared (count); 2723 if (known_aggs) 2724 { 2725 known_aggs->create (0); 2726 known_aggs->safe_grow_cleared (count); 2727 } 2728 2729 if (removable_params_cost) 2730 *removable_params_cost = 0; 2731 2732 for (i = 0; i < count; i++) 2733 { 2734 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 2735 ipcp_lattice<tree> *lat = &plats->itself; 2736 2737 if (lat->is_single_const ()) 2738 { 2739 ipcp_value<tree> *val = lat->values; 2740 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); 2741 (*known_csts)[i] = val->value; 2742 if (removable_params_cost) 2743 *removable_params_cost 2744 += estimate_move_cost (TREE_TYPE (val->value), false); 2745 ret = true; 2746 } 2747 else if (removable_params_cost 2748 && !ipa_is_param_used (info, i)) 2749 *removable_params_cost 2750 += ipa_get_param_move_cost (info, i); 2751 2752 if (!ipa_is_param_used (info, i)) 2753 continue; 2754 2755 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 2756 /* Do not account known context as reason for cloning. We can see 2757 if it permits devirtualization. */ 2758 if (ctxlat->is_single_const ()) 2759 (*known_contexts)[i] = ctxlat->values->value; 2760 2761 if (known_aggs) 2762 { 2763 vec<ipa_agg_jf_item, va_gc> *agg_items; 2764 struct ipa_agg_jump_function *ajf; 2765 2766 agg_items = context_independent_aggregate_values (plats); 2767 ajf = &(*known_aggs)[i]; 2768 ajf->items = agg_items; 2769 ajf->by_ref = plats->aggs_by_ref; 2770 ret |= agg_items != NULL; 2771 } 2772 } 2773 2774 return ret; 2775 } 2776 2777 /* The current interface in ipa-inline-analysis requires a pointer vector. 2778 Create it. 2779 2780 FIXME: That interface should be re-worked, this is slightly silly. Still, 2781 I'd like to discuss how to change it first and this demonstrates the 2782 issue. */ 2783 2784 static vec<ipa_agg_jump_function_p> 2785 agg_jmp_p_vec_for_t_vec (vec<ipa_agg_jump_function> known_aggs) 2786 { 2787 vec<ipa_agg_jump_function_p> ret; 2788 struct ipa_agg_jump_function *ajf; 2789 int i; 2790 2791 ret.create (known_aggs.length ()); 2792 FOR_EACH_VEC_ELT (known_aggs, i, ajf) 2793 ret.quick_push (ajf); 2794 return ret; 2795 } 2796 2797 /* Perform time and size measurement of NODE with the context given in 2798 KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost 2799 given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of 2800 all context-independent removable parameters and EST_MOVE_COST of estimated 2801 movement of the considered parameter and store it into VAL. */ 2802 2803 static void 2804 perform_estimation_of_a_value (cgraph_node *node, vec<tree> known_csts, 2805 vec<ipa_polymorphic_call_context> known_contexts, 2806 vec<ipa_agg_jump_function_p> known_aggs_ptrs, 2807 int removable_params_cost, 2808 int est_move_cost, ipcp_value_base *val) 2809 { 2810 int size, time_benefit; 2811 sreal time, base_time; 2812 ipa_hints hints; 2813 2814 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, 2815 known_aggs_ptrs, &size, &time, 2816 &base_time, &hints); 2817 base_time -= time; 2818 if (base_time > 65535) 2819 base_time = 65535; 2820 2821 /* Extern inline functions have no cloning local time benefits because they 2822 will be inlined anyway. The only reason to clone them is if it enables 2823 optimization in any of the functions they call. */ 2824 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) 2825 time_benefit = 0; 2826 else 2827 time_benefit = base_time.to_int () 2828 + devirtualization_time_bonus (node, known_csts, known_contexts, 2829 known_aggs_ptrs) 2830 + hint_time_bonus (hints) 2831 + removable_params_cost + est_move_cost; 2832 2833 gcc_checking_assert (size >=0); 2834 /* The inliner-heuristics based estimates may think that in certain 2835 contexts some functions do not have any size at all but we want 2836 all specializations to have at least a tiny cost, not least not to 2837 divide by zero. */ 2838 if (size == 0) 2839 size = 1; 2840 2841 val->local_time_benefit = time_benefit; 2842 val->local_size_cost = size; 2843 } 2844 2845 /* Iterate over known values of parameters of NODE and estimate the local 2846 effects in terms of time and size they have. */ 2847 2848 static void 2849 estimate_local_effects (struct cgraph_node *node) 2850 { 2851 struct ipa_node_params *info = IPA_NODE_REF (node); 2852 int i, count = ipa_get_param_count (info); 2853 vec<tree> known_csts; 2854 vec<ipa_polymorphic_call_context> known_contexts; 2855 vec<ipa_agg_jump_function> known_aggs; 2856 vec<ipa_agg_jump_function_p> known_aggs_ptrs; 2857 bool always_const; 2858 int removable_params_cost; 2859 2860 if (!count || !ipcp_versionable_function_p (node)) 2861 return; 2862 2863 if (dump_file && (dump_flags & TDF_DETAILS)) 2864 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ()); 2865 2866 always_const = gather_context_independent_values (info, &known_csts, 2867 &known_contexts, &known_aggs, 2868 &removable_params_cost); 2869 known_aggs_ptrs = agg_jmp_p_vec_for_t_vec (known_aggs); 2870 int devirt_bonus = devirtualization_time_bonus (node, known_csts, 2871 known_contexts, known_aggs_ptrs); 2872 if (always_const || devirt_bonus 2873 || (removable_params_cost && node->local.can_change_signature)) 2874 { 2875 struct caller_statistics stats; 2876 ipa_hints hints; 2877 sreal time, base_time; 2878 int size; 2879 2880 init_caller_stats (&stats); 2881 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 2882 false); 2883 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, 2884 known_aggs_ptrs, &size, &time, 2885 &base_time, &hints); 2886 time -= devirt_bonus; 2887 time -= hint_time_bonus (hints); 2888 time -= removable_params_cost; 2889 size -= stats.n_calls * removable_params_cost; 2890 2891 if (dump_file) 2892 fprintf (dump_file, " - context independent values, size: %i, " 2893 "time_benefit: %f\n", size, (base_time - time).to_double ()); 2894 2895 if (size <= 0 || node->local.local) 2896 { 2897 info->do_clone_for_all_contexts = true; 2898 2899 if (dump_file) 2900 fprintf (dump_file, " Decided to specialize for all " 2901 "known contexts, code not going to grow.\n"); 2902 } 2903 else if (good_cloning_opportunity_p (node, 2904 MIN ((base_time - time).to_int (), 2905 65536), 2906 stats.freq_sum, stats.count_sum, 2907 size)) 2908 { 2909 if (size + overall_size <= max_new_size) 2910 { 2911 info->do_clone_for_all_contexts = true; 2912 overall_size += size; 2913 2914 if (dump_file) 2915 fprintf (dump_file, " Decided to specialize for all " 2916 "known contexts, growth deemed beneficial.\n"); 2917 } 2918 else if (dump_file && (dump_flags & TDF_DETAILS)) 2919 fprintf (dump_file, " Not cloning for all contexts because " 2920 "max_new_size would be reached with %li.\n", 2921 size + overall_size); 2922 } 2923 else if (dump_file && (dump_flags & TDF_DETAILS)) 2924 fprintf (dump_file, " Not cloning for all contexts because " 2925 "!good_cloning_opportunity_p.\n"); 2926 2927 } 2928 2929 for (i = 0; i < count; i++) 2930 { 2931 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 2932 ipcp_lattice<tree> *lat = &plats->itself; 2933 ipcp_value<tree> *val; 2934 2935 if (lat->bottom 2936 || !lat->values 2937 || known_csts[i]) 2938 continue; 2939 2940 for (val = lat->values; val; val = val->next) 2941 { 2942 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); 2943 known_csts[i] = val->value; 2944 2945 int emc = estimate_move_cost (TREE_TYPE (val->value), true); 2946 perform_estimation_of_a_value (node, known_csts, known_contexts, 2947 known_aggs_ptrs, 2948 removable_params_cost, emc, val); 2949 2950 if (dump_file && (dump_flags & TDF_DETAILS)) 2951 { 2952 fprintf (dump_file, " - estimates for value "); 2953 print_ipcp_constant_value (dump_file, val->value); 2954 fprintf (dump_file, " for "); 2955 ipa_dump_param (dump_file, info, i); 2956 fprintf (dump_file, ": time_benefit: %i, size: %i\n", 2957 val->local_time_benefit, val->local_size_cost); 2958 } 2959 } 2960 known_csts[i] = NULL_TREE; 2961 } 2962 2963 for (i = 0; i < count; i++) 2964 { 2965 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 2966 2967 if (!plats->virt_call) 2968 continue; 2969 2970 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 2971 ipcp_value<ipa_polymorphic_call_context> *val; 2972 2973 if (ctxlat->bottom 2974 || !ctxlat->values 2975 || !known_contexts[i].useless_p ()) 2976 continue; 2977 2978 for (val = ctxlat->values; val; val = val->next) 2979 { 2980 known_contexts[i] = val->value; 2981 perform_estimation_of_a_value (node, known_csts, known_contexts, 2982 known_aggs_ptrs, 2983 removable_params_cost, 0, val); 2984 2985 if (dump_file && (dump_flags & TDF_DETAILS)) 2986 { 2987 fprintf (dump_file, " - estimates for polymorphic context "); 2988 print_ipcp_constant_value (dump_file, val->value); 2989 fprintf (dump_file, " for "); 2990 ipa_dump_param (dump_file, info, i); 2991 fprintf (dump_file, ": time_benefit: %i, size: %i\n", 2992 val->local_time_benefit, val->local_size_cost); 2993 } 2994 } 2995 known_contexts[i] = ipa_polymorphic_call_context (); 2996 } 2997 2998 for (i = 0; i < count; i++) 2999 { 3000 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3001 struct ipa_agg_jump_function *ajf; 3002 struct ipcp_agg_lattice *aglat; 3003 3004 if (plats->aggs_bottom || !plats->aggs) 3005 continue; 3006 3007 ajf = &known_aggs[i]; 3008 for (aglat = plats->aggs; aglat; aglat = aglat->next) 3009 { 3010 ipcp_value<tree> *val; 3011 if (aglat->bottom || !aglat->values 3012 /* If the following is true, the one value is in known_aggs. */ 3013 || (!plats->aggs_contain_variable 3014 && aglat->is_single_const ())) 3015 continue; 3016 3017 for (val = aglat->values; val; val = val->next) 3018 { 3019 struct ipa_agg_jf_item item; 3020 3021 item.offset = aglat->offset; 3022 item.value = val->value; 3023 vec_safe_push (ajf->items, item); 3024 3025 perform_estimation_of_a_value (node, known_csts, known_contexts, 3026 known_aggs_ptrs, 3027 removable_params_cost, 0, val); 3028 3029 if (dump_file && (dump_flags & TDF_DETAILS)) 3030 { 3031 fprintf (dump_file, " - estimates for value "); 3032 print_ipcp_constant_value (dump_file, val->value); 3033 fprintf (dump_file, " for "); 3034 ipa_dump_param (dump_file, info, i); 3035 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC 3036 "]: time_benefit: %i, size: %i\n", 3037 plats->aggs_by_ref ? "ref " : "", 3038 aglat->offset, 3039 val->local_time_benefit, val->local_size_cost); 3040 } 3041 3042 ajf->items->pop (); 3043 } 3044 } 3045 } 3046 3047 for (i = 0; i < count; i++) 3048 vec_free (known_aggs[i].items); 3049 3050 known_csts.release (); 3051 known_contexts.release (); 3052 known_aggs.release (); 3053 known_aggs_ptrs.release (); 3054 } 3055 3056 3057 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the 3058 topological sort of values. */ 3059 3060 template <typename valtype> 3061 void 3062 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val) 3063 { 3064 ipcp_value_source<valtype> *src; 3065 3066 if (cur_val->dfs) 3067 return; 3068 3069 dfs_counter++; 3070 cur_val->dfs = dfs_counter; 3071 cur_val->low_link = dfs_counter; 3072 3073 cur_val->topo_next = stack; 3074 stack = cur_val; 3075 cur_val->on_stack = true; 3076 3077 for (src = cur_val->sources; src; src = src->next) 3078 if (src->val) 3079 { 3080 if (src->val->dfs == 0) 3081 { 3082 add_val (src->val); 3083 if (src->val->low_link < cur_val->low_link) 3084 cur_val->low_link = src->val->low_link; 3085 } 3086 else if (src->val->on_stack 3087 && src->val->dfs < cur_val->low_link) 3088 cur_val->low_link = src->val->dfs; 3089 } 3090 3091 if (cur_val->dfs == cur_val->low_link) 3092 { 3093 ipcp_value<valtype> *v, *scc_list = NULL; 3094 3095 do 3096 { 3097 v = stack; 3098 stack = v->topo_next; 3099 v->on_stack = false; 3100 3101 v->scc_next = scc_list; 3102 scc_list = v; 3103 } 3104 while (v != cur_val); 3105 3106 cur_val->topo_next = values_topo; 3107 values_topo = cur_val; 3108 } 3109 } 3110 3111 /* Add all values in lattices associated with NODE to the topological sort if 3112 they are not there yet. */ 3113 3114 static void 3115 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo) 3116 { 3117 struct ipa_node_params *info = IPA_NODE_REF (node); 3118 int i, count = ipa_get_param_count (info); 3119 3120 for (i = 0; i < count; i++) 3121 { 3122 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3123 ipcp_lattice<tree> *lat = &plats->itself; 3124 struct ipcp_agg_lattice *aglat; 3125 3126 if (!lat->bottom) 3127 { 3128 ipcp_value<tree> *val; 3129 for (val = lat->values; val; val = val->next) 3130 topo->constants.add_val (val); 3131 } 3132 3133 if (!plats->aggs_bottom) 3134 for (aglat = plats->aggs; aglat; aglat = aglat->next) 3135 if (!aglat->bottom) 3136 { 3137 ipcp_value<tree> *val; 3138 for (val = aglat->values; val; val = val->next) 3139 topo->constants.add_val (val); 3140 } 3141 3142 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 3143 if (!ctxlat->bottom) 3144 { 3145 ipcp_value<ipa_polymorphic_call_context> *ctxval; 3146 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next) 3147 topo->contexts.add_val (ctxval); 3148 } 3149 } 3150 } 3151 3152 /* One pass of constants propagation along the call graph edges, from callers 3153 to callees (requires topological ordering in TOPO), iterate over strongly 3154 connected components. */ 3155 3156 static void 3157 propagate_constants_topo (struct ipa_topo_info *topo) 3158 { 3159 int i; 3160 3161 for (i = topo->nnodes - 1; i >= 0; i--) 3162 { 3163 unsigned j; 3164 struct cgraph_node *v, *node = topo->order[i]; 3165 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node); 3166 3167 /* First, iteratively propagate within the strongly connected component 3168 until all lattices stabilize. */ 3169 FOR_EACH_VEC_ELT (cycle_nodes, j, v) 3170 if (v->has_gimple_body_p ()) 3171 push_node_to_stack (topo, v); 3172 3173 v = pop_node_from_stack (topo); 3174 while (v) 3175 { 3176 struct cgraph_edge *cs; 3177 3178 for (cs = v->callees; cs; cs = cs->next_callee) 3179 if (ipa_edge_within_scc (cs)) 3180 { 3181 IPA_NODE_REF (v)->node_within_scc = true; 3182 if (propagate_constants_across_call (cs)) 3183 push_node_to_stack (topo, cs->callee->function_symbol ()); 3184 } 3185 v = pop_node_from_stack (topo); 3186 } 3187 3188 /* Afterwards, propagate along edges leading out of the SCC, calculates 3189 the local effects of the discovered constants and all valid values to 3190 their topological sort. */ 3191 FOR_EACH_VEC_ELT (cycle_nodes, j, v) 3192 if (v->has_gimple_body_p ()) 3193 { 3194 struct cgraph_edge *cs; 3195 3196 estimate_local_effects (v); 3197 add_all_node_vals_to_toposort (v, topo); 3198 for (cs = v->callees; cs; cs = cs->next_callee) 3199 if (!ipa_edge_within_scc (cs)) 3200 propagate_constants_across_call (cs); 3201 } 3202 cycle_nodes.release (); 3203 } 3204 } 3205 3206 3207 /* Return the sum of A and B if none of them is bigger than INT_MAX/2, return 3208 the bigger one if otherwise. */ 3209 3210 static int 3211 safe_add (int a, int b) 3212 { 3213 if (a > INT_MAX/2 || b > INT_MAX/2) 3214 return a > b ? a : b; 3215 else 3216 return a + b; 3217 } 3218 3219 3220 /* Propagate the estimated effects of individual values along the topological 3221 from the dependent values to those they depend on. */ 3222 3223 template <typename valtype> 3224 void 3225 value_topo_info<valtype>::propagate_effects () 3226 { 3227 ipcp_value<valtype> *base; 3228 3229 for (base = values_topo; base; base = base->topo_next) 3230 { 3231 ipcp_value_source<valtype> *src; 3232 ipcp_value<valtype> *val; 3233 int time = 0, size = 0; 3234 3235 for (val = base; val; val = val->scc_next) 3236 { 3237 time = safe_add (time, 3238 val->local_time_benefit + val->prop_time_benefit); 3239 size = safe_add (size, val->local_size_cost + val->prop_size_cost); 3240 } 3241 3242 for (val = base; val; val = val->scc_next) 3243 for (src = val->sources; src; src = src->next) 3244 if (src->val 3245 && src->cs->maybe_hot_p ()) 3246 { 3247 src->val->prop_time_benefit = safe_add (time, 3248 src->val->prop_time_benefit); 3249 src->val->prop_size_cost = safe_add (size, 3250 src->val->prop_size_cost); 3251 } 3252 } 3253 } 3254 3255 3256 /* Propagate constants, polymorphic contexts and their effects from the 3257 summaries interprocedurally. */ 3258 3259 static void 3260 ipcp_propagate_stage (struct ipa_topo_info *topo) 3261 { 3262 struct cgraph_node *node; 3263 3264 if (dump_file) 3265 fprintf (dump_file, "\n Propagating constants:\n\n"); 3266 3267 max_count = profile_count::uninitialized (); 3268 3269 FOR_EACH_DEFINED_FUNCTION (node) 3270 { 3271 struct ipa_node_params *info = IPA_NODE_REF (node); 3272 3273 determine_versionability (node, info); 3274 if (node->has_gimple_body_p ()) 3275 { 3276 info->lattices = XCNEWVEC (struct ipcp_param_lattices, 3277 ipa_get_param_count (info)); 3278 initialize_node_lattices (node); 3279 } 3280 ipa_fn_summary *s = ipa_fn_summaries->get (node); 3281 if (node->definition && !node->alias && s != NULL) 3282 overall_size += s->self_size; 3283 max_count = max_count.max (node->count.ipa ()); 3284 } 3285 3286 max_new_size = overall_size; 3287 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) 3288 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); 3289 max_new_size += max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; 3290 3291 if (dump_file) 3292 fprintf (dump_file, "\noverall_size: %li, max_new_size: %li\n", 3293 overall_size, max_new_size); 3294 3295 propagate_constants_topo (topo); 3296 if (flag_checking) 3297 ipcp_verify_propagated_values (); 3298 topo->constants.propagate_effects (); 3299 topo->contexts.propagate_effects (); 3300 3301 if (dump_file) 3302 { 3303 fprintf (dump_file, "\nIPA lattices after all propagation:\n"); 3304 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true); 3305 } 3306 } 3307 3308 /* Discover newly direct outgoing edges from NODE which is a new clone with 3309 known KNOWN_CSTS and make them direct. */ 3310 3311 static void 3312 ipcp_discover_new_direct_edges (struct cgraph_node *node, 3313 vec<tree> known_csts, 3314 vec<ipa_polymorphic_call_context> 3315 known_contexts, 3316 struct ipa_agg_replacement_value *aggvals) 3317 { 3318 struct cgraph_edge *ie, *next_ie; 3319 bool found = false; 3320 3321 for (ie = node->indirect_calls; ie; ie = next_ie) 3322 { 3323 tree target; 3324 bool speculative; 3325 3326 next_ie = ie->next_callee; 3327 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, 3328 vNULL, aggvals, &speculative); 3329 if (target) 3330 { 3331 bool agg_contents = ie->indirect_info->agg_contents; 3332 bool polymorphic = ie->indirect_info->polymorphic; 3333 int param_index = ie->indirect_info->param_index; 3334 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target, 3335 speculative); 3336 found = true; 3337 3338 if (cs && !agg_contents && !polymorphic) 3339 { 3340 struct ipa_node_params *info = IPA_NODE_REF (node); 3341 int c = ipa_get_controlled_uses (info, param_index); 3342 if (c != IPA_UNDESCRIBED_USE) 3343 { 3344 struct ipa_ref *to_del; 3345 3346 c--; 3347 ipa_set_controlled_uses (info, param_index, c); 3348 if (dump_file && (dump_flags & TDF_DETAILS)) 3349 fprintf (dump_file, " controlled uses count of param " 3350 "%i bumped down to %i\n", param_index, c); 3351 if (c == 0 3352 && (to_del = node->find_reference (cs->callee, NULL, 0))) 3353 { 3354 if (dump_file && (dump_flags & TDF_DETAILS)) 3355 fprintf (dump_file, " and even removing its " 3356 "cloning-created reference\n"); 3357 to_del->remove_reference (); 3358 } 3359 } 3360 } 3361 } 3362 } 3363 /* Turning calls to direct calls will improve overall summary. */ 3364 if (found) 3365 ipa_update_overall_fn_summary (node); 3366 } 3367 3368 class edge_clone_summary; 3369 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL; 3370 3371 /* Edge clone summary. */ 3372 3373 struct edge_clone_summary 3374 { 3375 /* Default constructor. */ 3376 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {} 3377 3378 /* Default destructor. */ 3379 ~edge_clone_summary () 3380 { 3381 if (prev_clone) 3382 edge_clone_summaries->get (prev_clone)->next_clone = next_clone; 3383 if (next_clone) 3384 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone; 3385 } 3386 3387 cgraph_edge *prev_clone; 3388 cgraph_edge *next_clone; 3389 }; 3390 3391 class edge_clone_summary_t: 3392 public call_summary <edge_clone_summary *> 3393 { 3394 public: 3395 edge_clone_summary_t (symbol_table *symtab): 3396 call_summary <edge_clone_summary *> (symtab) 3397 { 3398 m_initialize_when_cloning = true; 3399 } 3400 3401 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, 3402 edge_clone_summary *src_data, 3403 edge_clone_summary *dst_data); 3404 }; 3405 3406 /* Edge duplication hook. */ 3407 3408 void 3409 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, 3410 edge_clone_summary *src_data, 3411 edge_clone_summary *dst_data) 3412 { 3413 if (src_data->next_clone) 3414 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge; 3415 dst_data->prev_clone = src_edge; 3416 dst_data->next_clone = src_data->next_clone; 3417 src_data->next_clone = dst_edge; 3418 } 3419 3420 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a 3421 parameter with the given INDEX. */ 3422 3423 static tree 3424 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, 3425 int index) 3426 { 3427 struct ipa_agg_replacement_value *aggval; 3428 3429 aggval = ipa_get_agg_replacements_for_node (node); 3430 while (aggval) 3431 { 3432 if (aggval->offset == offset 3433 && aggval->index == index) 3434 return aggval->value; 3435 aggval = aggval->next; 3436 } 3437 return NULL_TREE; 3438 } 3439 3440 /* Return true is NODE is DEST or its clone for all contexts. */ 3441 3442 static bool 3443 same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest) 3444 { 3445 if (node == dest) 3446 return true; 3447 3448 struct ipa_node_params *info = IPA_NODE_REF (node); 3449 return info->is_all_contexts_clone && info->ipcp_orig_node == dest; 3450 } 3451 3452 /* Return true if edge CS does bring about the value described by SRC to 3453 DEST_VAL of node DEST or its clone for all contexts. */ 3454 3455 static bool 3456 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src, 3457 cgraph_node *dest, ipcp_value<tree> *dest_val) 3458 { 3459 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 3460 enum availability availability; 3461 cgraph_node *real_dest = cs->callee->function_symbol (&availability); 3462 3463 if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) 3464 || availability <= AVAIL_INTERPOSABLE 3465 || caller_info->node_dead) 3466 return false; 3467 3468 if (!src->val) 3469 return true; 3470 3471 if (caller_info->ipcp_orig_node) 3472 { 3473 tree t; 3474 if (src->offset == -1) 3475 t = caller_info->known_csts[src->index]; 3476 else 3477 t = get_clone_agg_value (cs->caller, src->offset, src->index); 3478 return (t != NULL_TREE 3479 && values_equal_for_ipcp_p (src->val->value, t)); 3480 } 3481 else 3482 { 3483 /* At the moment we do not propagate over arithmetic jump functions in 3484 SCCs, so it is safe to detect self-feeding recursive calls in this 3485 way. */ 3486 if (src->val == dest_val) 3487 return true; 3488 3489 struct ipcp_agg_lattice *aglat; 3490 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, 3491 src->index); 3492 if (src->offset == -1) 3493 return (plats->itself.is_single_const () 3494 && values_equal_for_ipcp_p (src->val->value, 3495 plats->itself.values->value)); 3496 else 3497 { 3498 if (plats->aggs_bottom || plats->aggs_contain_variable) 3499 return false; 3500 for (aglat = plats->aggs; aglat; aglat = aglat->next) 3501 if (aglat->offset == src->offset) 3502 return (aglat->is_single_const () 3503 && values_equal_for_ipcp_p (src->val->value, 3504 aglat->values->value)); 3505 } 3506 return false; 3507 } 3508 } 3509 3510 /* Return true if edge CS does bring about the value described by SRC to 3511 DST_VAL of node DEST or its clone for all contexts. */ 3512 3513 static bool 3514 cgraph_edge_brings_value_p (cgraph_edge *cs, 3515 ipcp_value_source<ipa_polymorphic_call_context> *src, 3516 cgraph_node *dest, 3517 ipcp_value<ipa_polymorphic_call_context> *) 3518 { 3519 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 3520 cgraph_node *real_dest = cs->callee->function_symbol (); 3521 3522 if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) 3523 || caller_info->node_dead) 3524 return false; 3525 if (!src->val) 3526 return true; 3527 3528 if (caller_info->ipcp_orig_node) 3529 return (caller_info->known_contexts.length () > (unsigned) src->index) 3530 && values_equal_for_ipcp_p (src->val->value, 3531 caller_info->known_contexts[src->index]); 3532 3533 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, 3534 src->index); 3535 return plats->ctxlat.is_single_const () 3536 && values_equal_for_ipcp_p (src->val->value, 3537 plats->ctxlat.values->value); 3538 } 3539 3540 /* Get the next clone in the linked list of clones of an edge. */ 3541 3542 static inline struct cgraph_edge * 3543 get_next_cgraph_edge_clone (struct cgraph_edge *cs) 3544 { 3545 edge_clone_summary *s = edge_clone_summaries->get (cs); 3546 return s != NULL ? s->next_clone : NULL; 3547 } 3548 3549 /* Given VAL that is intended for DEST, iterate over all its sources and if any 3550 of them is viable and hot, return true. In that case, for those that still 3551 hold, add their edge frequency and their number into *FREQUENCY and 3552 *CALLER_COUNT respectively. */ 3553 3554 template <typename valtype> 3555 static bool 3556 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, 3557 int *freq_sum, 3558 profile_count *count_sum, int *caller_count) 3559 { 3560 ipcp_value_source<valtype> *src; 3561 int freq = 0, count = 0; 3562 profile_count cnt = profile_count::zero (); 3563 bool hot = false; 3564 bool non_self_recursive = false; 3565 3566 for (src = val->sources; src; src = src->next) 3567 { 3568 struct cgraph_edge *cs = src->cs; 3569 while (cs) 3570 { 3571 if (cgraph_edge_brings_value_p (cs, src, dest, val)) 3572 { 3573 count++; 3574 freq += cs->frequency (); 3575 if (cs->count.ipa ().initialized_p ()) 3576 cnt += cs->count.ipa (); 3577 hot |= cs->maybe_hot_p (); 3578 if (cs->caller != dest) 3579 non_self_recursive = true; 3580 } 3581 cs = get_next_cgraph_edge_clone (cs); 3582 } 3583 } 3584 3585 /* If the only edges bringing a value are self-recursive ones, do not bother 3586 evaluating it. */ 3587 if (!non_self_recursive) 3588 return false; 3589 3590 *freq_sum = freq; 3591 *count_sum = cnt; 3592 *caller_count = count; 3593 return hot; 3594 } 3595 3596 /* Return a vector of incoming edges that do bring value VAL to node DEST. It 3597 is assumed their number is known and equal to CALLER_COUNT. */ 3598 3599 template <typename valtype> 3600 static vec<cgraph_edge *> 3601 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, 3602 int caller_count) 3603 { 3604 ipcp_value_source<valtype> *src; 3605 vec<cgraph_edge *> ret; 3606 3607 ret.create (caller_count); 3608 for (src = val->sources; src; src = src->next) 3609 { 3610 struct cgraph_edge *cs = src->cs; 3611 while (cs) 3612 { 3613 if (cgraph_edge_brings_value_p (cs, src, dest, val)) 3614 ret.quick_push (cs); 3615 cs = get_next_cgraph_edge_clone (cs); 3616 } 3617 } 3618 3619 return ret; 3620 } 3621 3622 /* Construct a replacement map for a know VALUE for a formal parameter PARAM. 3623 Return it or NULL if for some reason it cannot be created. */ 3624 3625 static struct ipa_replace_map * 3626 get_replacement_map (struct ipa_node_params *info, tree value, int parm_num) 3627 { 3628 struct ipa_replace_map *replace_map; 3629 3630 3631 replace_map = ggc_alloc<ipa_replace_map> (); 3632 if (dump_file) 3633 { 3634 fprintf (dump_file, " replacing "); 3635 ipa_dump_param (dump_file, info, parm_num); 3636 3637 fprintf (dump_file, " with const "); 3638 print_generic_expr (dump_file, value); 3639 fprintf (dump_file, "\n"); 3640 } 3641 replace_map->old_tree = NULL; 3642 replace_map->parm_num = parm_num; 3643 replace_map->new_tree = value; 3644 replace_map->replace_p = true; 3645 replace_map->ref_p = false; 3646 3647 return replace_map; 3648 } 3649 3650 /* Dump new profiling counts */ 3651 3652 static void 3653 dump_profile_updates (struct cgraph_node *orig_node, 3654 struct cgraph_node *new_node) 3655 { 3656 struct cgraph_edge *cs; 3657 3658 fprintf (dump_file, " setting count of the specialized node to "); 3659 new_node->count.dump (dump_file); 3660 fprintf (dump_file, "\n"); 3661 for (cs = new_node->callees; cs; cs = cs->next_callee) 3662 { 3663 fprintf (dump_file, " edge to %s has count ", 3664 cs->callee->name ()); 3665 cs->count.dump (dump_file); 3666 fprintf (dump_file, "\n"); 3667 } 3668 3669 fprintf (dump_file, " setting count of the original node to "); 3670 orig_node->count.dump (dump_file); 3671 fprintf (dump_file, "\n"); 3672 for (cs = orig_node->callees; cs; cs = cs->next_callee) 3673 { 3674 fprintf (dump_file, " edge to %s is left with ", 3675 cs->callee->name ()); 3676 cs->count.dump (dump_file); 3677 fprintf (dump_file, "\n"); 3678 } 3679 } 3680 3681 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update 3682 their profile information to reflect this. */ 3683 3684 static void 3685 update_profiling_info (struct cgraph_node *orig_node, 3686 struct cgraph_node *new_node) 3687 { 3688 struct cgraph_edge *cs; 3689 struct caller_statistics stats; 3690 profile_count new_sum, orig_sum; 3691 profile_count remainder, orig_node_count = orig_node->count; 3692 3693 if (!(orig_node_count.ipa () > profile_count::zero ())) 3694 return; 3695 3696 init_caller_stats (&stats); 3697 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 3698 false); 3699 orig_sum = stats.count_sum; 3700 init_caller_stats (&stats); 3701 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 3702 false); 3703 new_sum = stats.count_sum; 3704 3705 if (orig_node_count < orig_sum + new_sum) 3706 { 3707 if (dump_file) 3708 { 3709 fprintf (dump_file, " Problem: node %s has too low count ", 3710 orig_node->dump_name ()); 3711 orig_node_count.dump (dump_file); 3712 fprintf (dump_file, "while the sum of incoming count is "); 3713 (orig_sum + new_sum).dump (dump_file); 3714 fprintf (dump_file, "\n"); 3715 } 3716 3717 orig_node_count = (orig_sum + new_sum).apply_scale (12, 10); 3718 if (dump_file) 3719 { 3720 fprintf (dump_file, " proceeding by pretending it was "); 3721 orig_node_count.dump (dump_file); 3722 fprintf (dump_file, "\n"); 3723 } 3724 } 3725 3726 remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa () 3727 - new_sum.ipa ()); 3728 new_sum = orig_node_count.combine_with_ipa_count (new_sum); 3729 orig_node->count = remainder; 3730 3731 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_node_count); 3732 for (cs = new_node->callees; cs; cs = cs->next_callee) 3733 cs->count = cs->count.apply_scale (new_sum, orig_node_count); 3734 3735 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count); 3736 for (cs = orig_node->callees; cs; cs = cs->next_callee) 3737 cs->count = cs->count.apply_scale (remainder, orig_node_count); 3738 3739 if (dump_file) 3740 dump_profile_updates (orig_node, new_node); 3741 } 3742 3743 /* Update the respective profile of specialized NEW_NODE and the original 3744 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM 3745 have been redirected to the specialized version. */ 3746 3747 static void 3748 update_specialized_profile (struct cgraph_node *new_node, 3749 struct cgraph_node *orig_node, 3750 profile_count redirected_sum) 3751 { 3752 struct cgraph_edge *cs; 3753 profile_count new_node_count, orig_node_count = orig_node->count; 3754 3755 if (dump_file) 3756 { 3757 fprintf (dump_file, " the sum of counts of redirected edges is "); 3758 redirected_sum.dump (dump_file); 3759 fprintf (dump_file, "\n"); 3760 } 3761 if (!(orig_node_count > profile_count::zero ())) 3762 return; 3763 3764 gcc_assert (orig_node_count >= redirected_sum); 3765 3766 new_node_count = new_node->count; 3767 new_node->count += redirected_sum; 3768 orig_node->count -= redirected_sum; 3769 3770 for (cs = new_node->callees; cs; cs = cs->next_callee) 3771 cs->count += cs->count.apply_scale (redirected_sum, new_node_count); 3772 3773 for (cs = orig_node->callees; cs; cs = cs->next_callee) 3774 { 3775 profile_count dec = cs->count.apply_scale (redirected_sum, 3776 orig_node_count); 3777 cs->count -= dec; 3778 } 3779 3780 if (dump_file) 3781 dump_profile_updates (orig_node, new_node); 3782 } 3783 3784 /* Create a specialized version of NODE with known constants in KNOWN_CSTS, 3785 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and 3786 redirect all edges in CALLERS to it. */ 3787 3788 static struct cgraph_node * 3789 create_specialized_node (struct cgraph_node *node, 3790 vec<tree> known_csts, 3791 vec<ipa_polymorphic_call_context> known_contexts, 3792 struct ipa_agg_replacement_value *aggvals, 3793 vec<cgraph_edge *> callers) 3794 { 3795 struct ipa_node_params *new_info, *info = IPA_NODE_REF (node); 3796 vec<ipa_replace_map *, va_gc> *replace_trees = NULL; 3797 struct ipa_agg_replacement_value *av; 3798 struct cgraph_node *new_node; 3799 int i, count = ipa_get_param_count (info); 3800 bitmap args_to_skip; 3801 3802 gcc_assert (!info->ipcp_orig_node); 3803 3804 if (node->local.can_change_signature) 3805 { 3806 args_to_skip = BITMAP_GGC_ALLOC (); 3807 for (i = 0; i < count; i++) 3808 { 3809 tree t = known_csts[i]; 3810 3811 if (t || !ipa_is_param_used (info, i)) 3812 bitmap_set_bit (args_to_skip, i); 3813 } 3814 } 3815 else 3816 { 3817 args_to_skip = NULL; 3818 if (dump_file && (dump_flags & TDF_DETAILS)) 3819 fprintf (dump_file, " cannot change function signature\n"); 3820 } 3821 3822 for (i = 0; i < count; i++) 3823 { 3824 tree t = known_csts[i]; 3825 if (t) 3826 { 3827 struct ipa_replace_map *replace_map; 3828 3829 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); 3830 replace_map = get_replacement_map (info, t, i); 3831 if (replace_map) 3832 vec_safe_push (replace_trees, replace_map); 3833 } 3834 } 3835 auto_vec<cgraph_edge *, 2> self_recursive_calls; 3836 for (i = callers.length () - 1; i >= 0; i--) 3837 { 3838 cgraph_edge *cs = callers[i]; 3839 if (cs->caller == node) 3840 { 3841 self_recursive_calls.safe_push (cs); 3842 callers.unordered_remove (i); 3843 } 3844 } 3845 3846 unsigned &suffix_counter = clone_num_suffixes->get_or_insert ( 3847 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME ( 3848 node->decl))); 3849 new_node = node->create_virtual_clone (callers, replace_trees, 3850 args_to_skip, "constprop", 3851 suffix_counter); 3852 suffix_counter++; 3853 3854 bool have_self_recursive_calls = !self_recursive_calls.is_empty (); 3855 for (unsigned j = 0; j < self_recursive_calls.length (); j++) 3856 { 3857 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]); 3858 /* Cloned edges can disappear during cloning as speculation can be 3859 resolved, check that we have one and that it comes from the last 3860 cloning. */ 3861 if (cs && cs->caller == new_node) 3862 cs->redirect_callee_duplicating_thunks (new_node); 3863 /* Any future code that would make more than one clone of an outgoing 3864 edge would confuse this mechanism, so let's check that does not 3865 happen. */ 3866 gcc_checking_assert (!cs 3867 || !get_next_cgraph_edge_clone (cs) 3868 || get_next_cgraph_edge_clone (cs)->caller != new_node); 3869 } 3870 if (have_self_recursive_calls) 3871 new_node->expand_all_artificial_thunks (); 3872 3873 ipa_set_node_agg_value_chain (new_node, aggvals); 3874 for (av = aggvals; av; av = av->next) 3875 new_node->maybe_create_reference (av->value, NULL); 3876 3877 if (dump_file && (dump_flags & TDF_DETAILS)) 3878 { 3879 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ()); 3880 if (known_contexts.exists ()) 3881 { 3882 for (i = 0; i < count; i++) 3883 if (!known_contexts[i].useless_p ()) 3884 { 3885 fprintf (dump_file, " known ctx %i is ", i); 3886 known_contexts[i].dump (dump_file); 3887 } 3888 } 3889 if (aggvals) 3890 ipa_dump_agg_replacement_values (dump_file, aggvals); 3891 } 3892 ipa_check_create_node_params (); 3893 update_profiling_info (node, new_node); 3894 new_info = IPA_NODE_REF (new_node); 3895 new_info->ipcp_orig_node = node; 3896 new_info->known_csts = known_csts; 3897 new_info->known_contexts = known_contexts; 3898 3899 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals); 3900 3901 callers.release (); 3902 return new_node; 3903 } 3904 3905 /* Return true, if JFUNC, which describes a i-th parameter of call CS, is a 3906 simple no-operation pass-through function to itself. */ 3907 3908 static bool 3909 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i) 3910 { 3911 enum availability availability; 3912 if (cs->caller == cs->callee->function_symbol (&availability) 3913 && availability > AVAIL_INTERPOSABLE 3914 && jfunc->type == IPA_JF_PASS_THROUGH 3915 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR 3916 && ipa_get_jf_pass_through_formal_id (jfunc) == i) 3917 return true; 3918 return false; 3919 } 3920 3921 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in 3922 KNOWN_CSTS with constants that are also known for all of the CALLERS. */ 3923 3924 static void 3925 find_more_scalar_values_for_callers_subset (struct cgraph_node *node, 3926 vec<tree> known_csts, 3927 vec<cgraph_edge *> callers) 3928 { 3929 struct ipa_node_params *info = IPA_NODE_REF (node); 3930 int i, count = ipa_get_param_count (info); 3931 3932 for (i = 0; i < count; i++) 3933 { 3934 struct cgraph_edge *cs; 3935 tree newval = NULL_TREE; 3936 int j; 3937 bool first = true; 3938 tree type = ipa_get_type (info, i); 3939 3940 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i]) 3941 continue; 3942 3943 FOR_EACH_VEC_ELT (callers, j, cs) 3944 { 3945 struct ipa_jump_func *jump_func; 3946 tree t; 3947 3948 if (IPA_NODE_REF (cs->caller)->node_dead) 3949 continue; 3950 3951 if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) 3952 || (i == 0 3953 && call_passes_through_thunk_p (cs))) 3954 { 3955 newval = NULL_TREE; 3956 break; 3957 } 3958 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); 3959 if (self_recursive_pass_through_p (cs, jump_func, i)) 3960 continue; 3961 3962 t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func, type); 3963 if (!t 3964 || (newval 3965 && !values_equal_for_ipcp_p (t, newval)) 3966 || (!first && !newval)) 3967 { 3968 newval = NULL_TREE; 3969 break; 3970 } 3971 else 3972 newval = t; 3973 first = false; 3974 } 3975 3976 if (newval) 3977 { 3978 if (dump_file && (dump_flags & TDF_DETAILS)) 3979 { 3980 fprintf (dump_file, " adding an extra known scalar value "); 3981 print_ipcp_constant_value (dump_file, newval); 3982 fprintf (dump_file, " for "); 3983 ipa_dump_param (dump_file, info, i); 3984 fprintf (dump_file, "\n"); 3985 } 3986 3987 known_csts[i] = newval; 3988 } 3989 } 3990 } 3991 3992 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in 3993 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the 3994 CALLERS. */ 3995 3996 static void 3997 find_more_contexts_for_caller_subset (cgraph_node *node, 3998 vec<ipa_polymorphic_call_context> 3999 *known_contexts, 4000 vec<cgraph_edge *> callers) 4001 { 4002 ipa_node_params *info = IPA_NODE_REF (node); 4003 int i, count = ipa_get_param_count (info); 4004 4005 for (i = 0; i < count; i++) 4006 { 4007 cgraph_edge *cs; 4008 4009 if (ipa_get_poly_ctx_lat (info, i)->bottom 4010 || (known_contexts->exists () 4011 && !(*known_contexts)[i].useless_p ())) 4012 continue; 4013 4014 ipa_polymorphic_call_context newval; 4015 bool first = true; 4016 int j; 4017 4018 FOR_EACH_VEC_ELT (callers, j, cs) 4019 { 4020 if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) 4021 return; 4022 ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), 4023 i); 4024 ipa_polymorphic_call_context ctx; 4025 ctx = ipa_context_from_jfunc (IPA_NODE_REF (cs->caller), cs, i, 4026 jfunc); 4027 if (first) 4028 { 4029 newval = ctx; 4030 first = false; 4031 } 4032 else 4033 newval.meet_with (ctx); 4034 if (newval.useless_p ()) 4035 break; 4036 } 4037 4038 if (!newval.useless_p ()) 4039 { 4040 if (dump_file && (dump_flags & TDF_DETAILS)) 4041 { 4042 fprintf (dump_file, " adding an extra known polymorphic " 4043 "context "); 4044 print_ipcp_constant_value (dump_file, newval); 4045 fprintf (dump_file, " for "); 4046 ipa_dump_param (dump_file, info, i); 4047 fprintf (dump_file, "\n"); 4048 } 4049 4050 if (!known_contexts->exists ()) 4051 known_contexts->safe_grow_cleared (ipa_get_param_count (info)); 4052 (*known_contexts)[i] = newval; 4053 } 4054 4055 } 4056 } 4057 4058 /* Go through PLATS and create a vector of values consisting of values and 4059 offsets (minus OFFSET) of lattices that contain only a single value. */ 4060 4061 static vec<ipa_agg_jf_item> 4062 copy_plats_to_inter (struct ipcp_param_lattices *plats, HOST_WIDE_INT offset) 4063 { 4064 vec<ipa_agg_jf_item> res = vNULL; 4065 4066 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) 4067 return vNULL; 4068 4069 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) 4070 if (aglat->is_single_const ()) 4071 { 4072 struct ipa_agg_jf_item ti; 4073 ti.offset = aglat->offset - offset; 4074 ti.value = aglat->values->value; 4075 res.safe_push (ti); 4076 } 4077 return res; 4078 } 4079 4080 /* Intersect all values in INTER with single value lattices in PLATS (while 4081 subtracting OFFSET). */ 4082 4083 static void 4084 intersect_with_plats (struct ipcp_param_lattices *plats, 4085 vec<ipa_agg_jf_item> *inter, 4086 HOST_WIDE_INT offset) 4087 { 4088 struct ipcp_agg_lattice *aglat; 4089 struct ipa_agg_jf_item *item; 4090 int k; 4091 4092 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) 4093 { 4094 inter->release (); 4095 return; 4096 } 4097 4098 aglat = plats->aggs; 4099 FOR_EACH_VEC_ELT (*inter, k, item) 4100 { 4101 bool found = false; 4102 if (!item->value) 4103 continue; 4104 while (aglat) 4105 { 4106 if (aglat->offset - offset > item->offset) 4107 break; 4108 if (aglat->offset - offset == item->offset) 4109 { 4110 gcc_checking_assert (item->value); 4111 if (aglat->is_single_const () 4112 && values_equal_for_ipcp_p (item->value, 4113 aglat->values->value)) 4114 found = true; 4115 break; 4116 } 4117 aglat = aglat->next; 4118 } 4119 if (!found) 4120 item->value = NULL_TREE; 4121 } 4122 } 4123 4124 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the 4125 vector result while subtracting OFFSET from the individual value offsets. */ 4126 4127 static vec<ipa_agg_jf_item> 4128 agg_replacements_to_vector (struct cgraph_node *node, int index, 4129 HOST_WIDE_INT offset) 4130 { 4131 struct ipa_agg_replacement_value *av; 4132 vec<ipa_agg_jf_item> res = vNULL; 4133 4134 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next) 4135 if (av->index == index 4136 && (av->offset - offset) >= 0) 4137 { 4138 struct ipa_agg_jf_item item; 4139 gcc_checking_assert (av->value); 4140 item.offset = av->offset - offset; 4141 item.value = av->value; 4142 res.safe_push (item); 4143 } 4144 4145 return res; 4146 } 4147 4148 /* Intersect all values in INTER with those that we have already scheduled to 4149 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone 4150 (while subtracting OFFSET). */ 4151 4152 static void 4153 intersect_with_agg_replacements (struct cgraph_node *node, int index, 4154 vec<ipa_agg_jf_item> *inter, 4155 HOST_WIDE_INT offset) 4156 { 4157 struct ipa_agg_replacement_value *srcvals; 4158 struct ipa_agg_jf_item *item; 4159 int i; 4160 4161 srcvals = ipa_get_agg_replacements_for_node (node); 4162 if (!srcvals) 4163 { 4164 inter->release (); 4165 return; 4166 } 4167 4168 FOR_EACH_VEC_ELT (*inter, i, item) 4169 { 4170 struct ipa_agg_replacement_value *av; 4171 bool found = false; 4172 if (!item->value) 4173 continue; 4174 for (av = srcvals; av; av = av->next) 4175 { 4176 gcc_checking_assert (av->value); 4177 if (av->index == index 4178 && av->offset - offset == item->offset) 4179 { 4180 if (values_equal_for_ipcp_p (item->value, av->value)) 4181 found = true; 4182 break; 4183 } 4184 } 4185 if (!found) 4186 item->value = NULL_TREE; 4187 } 4188 } 4189 4190 /* Intersect values in INTER with aggregate values that come along edge CS to 4191 parameter number INDEX and return it. If INTER does not actually exist yet, 4192 copy all incoming values to it. If we determine we ended up with no values 4193 whatsoever, return a released vector. */ 4194 4195 static vec<ipa_agg_jf_item> 4196 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index, 4197 vec<ipa_agg_jf_item> inter) 4198 { 4199 struct ipa_jump_func *jfunc; 4200 jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index); 4201 if (jfunc->type == IPA_JF_PASS_THROUGH 4202 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 4203 { 4204 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 4205 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 4206 4207 if (caller_info->ipcp_orig_node) 4208 { 4209 struct cgraph_node *orig_node = caller_info->ipcp_orig_node; 4210 struct ipcp_param_lattices *orig_plats; 4211 orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node), 4212 src_idx); 4213 if (agg_pass_through_permissible_p (orig_plats, jfunc)) 4214 { 4215 if (!inter.exists ()) 4216 inter = agg_replacements_to_vector (cs->caller, src_idx, 0); 4217 else 4218 intersect_with_agg_replacements (cs->caller, src_idx, 4219 &inter, 0); 4220 } 4221 else 4222 { 4223 inter.release (); 4224 return vNULL; 4225 } 4226 } 4227 else 4228 { 4229 struct ipcp_param_lattices *src_plats; 4230 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 4231 if (agg_pass_through_permissible_p (src_plats, jfunc)) 4232 { 4233 /* Currently we do not produce clobber aggregate jump 4234 functions, adjust when we do. */ 4235 gcc_checking_assert (!jfunc->agg.items); 4236 if (!inter.exists ()) 4237 inter = copy_plats_to_inter (src_plats, 0); 4238 else 4239 intersect_with_plats (src_plats, &inter, 0); 4240 } 4241 else 4242 { 4243 inter.release (); 4244 return vNULL; 4245 } 4246 } 4247 } 4248 else if (jfunc->type == IPA_JF_ANCESTOR 4249 && ipa_get_jf_ancestor_agg_preserved (jfunc)) 4250 { 4251 struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 4252 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 4253 struct ipcp_param_lattices *src_plats; 4254 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc); 4255 4256 if (caller_info->ipcp_orig_node) 4257 { 4258 if (!inter.exists ()) 4259 inter = agg_replacements_to_vector (cs->caller, src_idx, delta); 4260 else 4261 intersect_with_agg_replacements (cs->caller, src_idx, &inter, 4262 delta); 4263 } 4264 else 4265 { 4266 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 4267 /* Currently we do not produce clobber aggregate jump 4268 functions, adjust when we do. */ 4269 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items); 4270 if (!inter.exists ()) 4271 inter = copy_plats_to_inter (src_plats, delta); 4272 else 4273 intersect_with_plats (src_plats, &inter, delta); 4274 } 4275 } 4276 else if (jfunc->agg.items) 4277 { 4278 struct ipa_agg_jf_item *item; 4279 int k; 4280 4281 if (!inter.exists ()) 4282 for (unsigned i = 0; i < jfunc->agg.items->length (); i++) 4283 inter.safe_push ((*jfunc->agg.items)[i]); 4284 else 4285 FOR_EACH_VEC_ELT (inter, k, item) 4286 { 4287 int l = 0; 4288 bool found = false; 4289 4290 if (!item->value) 4291 continue; 4292 4293 while ((unsigned) l < jfunc->agg.items->length ()) 4294 { 4295 struct ipa_agg_jf_item *ti; 4296 ti = &(*jfunc->agg.items)[l]; 4297 if (ti->offset > item->offset) 4298 break; 4299 if (ti->offset == item->offset) 4300 { 4301 gcc_checking_assert (ti->value); 4302 if (values_equal_for_ipcp_p (item->value, 4303 ti->value)) 4304 found = true; 4305 break; 4306 } 4307 l++; 4308 } 4309 if (!found) 4310 item->value = NULL; 4311 } 4312 } 4313 else 4314 { 4315 inter.release (); 4316 return vec<ipa_agg_jf_item>(); 4317 } 4318 return inter; 4319 } 4320 4321 /* Look at edges in CALLERS and collect all known aggregate values that arrive 4322 from all of them. */ 4323 4324 static struct ipa_agg_replacement_value * 4325 find_aggregate_values_for_callers_subset (struct cgraph_node *node, 4326 vec<cgraph_edge *> callers) 4327 { 4328 struct ipa_node_params *dest_info = IPA_NODE_REF (node); 4329 struct ipa_agg_replacement_value *res; 4330 struct ipa_agg_replacement_value **tail = &res; 4331 struct cgraph_edge *cs; 4332 int i, j, count = ipa_get_param_count (dest_info); 4333 4334 FOR_EACH_VEC_ELT (callers, j, cs) 4335 { 4336 int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); 4337 if (c < count) 4338 count = c; 4339 } 4340 4341 for (i = 0; i < count; i++) 4342 { 4343 struct cgraph_edge *cs; 4344 vec<ipa_agg_jf_item> inter = vNULL; 4345 struct ipa_agg_jf_item *item; 4346 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); 4347 int j; 4348 4349 /* Among other things, the following check should deal with all by_ref 4350 mismatches. */ 4351 if (plats->aggs_bottom) 4352 continue; 4353 4354 FOR_EACH_VEC_ELT (callers, j, cs) 4355 { 4356 struct ipa_jump_func *jfunc 4357 = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); 4358 if (self_recursive_pass_through_p (cs, jfunc, i) 4359 && (!plats->aggs_by_ref 4360 || ipa_get_jf_pass_through_agg_preserved (jfunc))) 4361 continue; 4362 inter = intersect_aggregates_with_edge (cs, i, inter); 4363 4364 if (!inter.exists ()) 4365 goto next_param; 4366 } 4367 4368 FOR_EACH_VEC_ELT (inter, j, item) 4369 { 4370 struct ipa_agg_replacement_value *v; 4371 4372 if (!item->value) 4373 continue; 4374 4375 v = ggc_alloc<ipa_agg_replacement_value> (); 4376 v->index = i; 4377 v->offset = item->offset; 4378 v->value = item->value; 4379 v->by_ref = plats->aggs_by_ref; 4380 *tail = v; 4381 tail = &v->next; 4382 } 4383 4384 next_param: 4385 if (inter.exists ()) 4386 inter.release (); 4387 } 4388 *tail = NULL; 4389 return res; 4390 } 4391 4392 /* Determine whether CS also brings all scalar values that the NODE is 4393 specialized for. */ 4394 4395 static bool 4396 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs, 4397 struct cgraph_node *node) 4398 { 4399 struct ipa_node_params *dest_info = IPA_NODE_REF (node); 4400 int count = ipa_get_param_count (dest_info); 4401 struct ipa_node_params *caller_info; 4402 struct ipa_edge_args *args; 4403 int i; 4404 4405 caller_info = IPA_NODE_REF (cs->caller); 4406 args = IPA_EDGE_REF (cs); 4407 for (i = 0; i < count; i++) 4408 { 4409 struct ipa_jump_func *jump_func; 4410 tree val, t; 4411 4412 val = dest_info->known_csts[i]; 4413 if (!val) 4414 continue; 4415 4416 if (i >= ipa_get_cs_argument_count (args)) 4417 return false; 4418 jump_func = ipa_get_ith_jump_func (args, i); 4419 t = ipa_value_from_jfunc (caller_info, jump_func, 4420 ipa_get_type (dest_info, i)); 4421 if (!t || !values_equal_for_ipcp_p (val, t)) 4422 return false; 4423 } 4424 return true; 4425 } 4426 4427 /* Determine whether CS also brings all aggregate values that NODE is 4428 specialized for. */ 4429 static bool 4430 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs, 4431 struct cgraph_node *node) 4432 { 4433 struct ipa_node_params *orig_caller_info = IPA_NODE_REF (cs->caller); 4434 struct ipa_node_params *orig_node_info; 4435 struct ipa_agg_replacement_value *aggval; 4436 int i, ec, count; 4437 4438 aggval = ipa_get_agg_replacements_for_node (node); 4439 if (!aggval) 4440 return true; 4441 4442 count = ipa_get_param_count (IPA_NODE_REF (node)); 4443 ec = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); 4444 if (ec < count) 4445 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 4446 if (aggval->index >= ec) 4447 return false; 4448 4449 orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node); 4450 if (orig_caller_info->ipcp_orig_node) 4451 orig_caller_info = IPA_NODE_REF (orig_caller_info->ipcp_orig_node); 4452 4453 for (i = 0; i < count; i++) 4454 { 4455 struct ipcp_param_lattices *plats; 4456 bool interesting = false; 4457 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 4458 if (aggval->index == i) 4459 { 4460 interesting = true; 4461 break; 4462 } 4463 if (!interesting) 4464 continue; 4465 4466 plats = ipa_get_parm_lattices (orig_node_info, aggval->index); 4467 if (plats->aggs_bottom) 4468 return false; 4469 4470 vec<ipa_agg_jf_item> values 4471 = intersect_aggregates_with_edge (cs, i, vNULL); 4472 if (!values.exists ()) 4473 return false; 4474 4475 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 4476 if (aggval->index == i) 4477 { 4478 struct ipa_agg_jf_item *item; 4479 int j; 4480 bool found = false; 4481 FOR_EACH_VEC_ELT (values, j, item) 4482 if (item->value 4483 && item->offset == av->offset 4484 && values_equal_for_ipcp_p (item->value, av->value)) 4485 { 4486 found = true; 4487 break; 4488 } 4489 if (!found) 4490 { 4491 values.release (); 4492 return false; 4493 } 4494 } 4495 values.release (); 4496 } 4497 return true; 4498 } 4499 4500 /* Given an original NODE and a VAL for which we have already created a 4501 specialized clone, look whether there are incoming edges that still lead 4502 into the old node but now also bring the requested value and also conform to 4503 all other criteria such that they can be redirected the special node. 4504 This function can therefore redirect the final edge in a SCC. */ 4505 4506 template <typename valtype> 4507 static void 4508 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val) 4509 { 4510 ipcp_value_source<valtype> *src; 4511 profile_count redirected_sum = profile_count::zero (); 4512 4513 for (src = val->sources; src; src = src->next) 4514 { 4515 struct cgraph_edge *cs = src->cs; 4516 while (cs) 4517 { 4518 if (cgraph_edge_brings_value_p (cs, src, node, val) 4519 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node) 4520 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node)) 4521 { 4522 if (dump_file) 4523 fprintf (dump_file, " - adding an extra caller %s of %s\n", 4524 cs->caller->dump_name (), 4525 val->spec_node->dump_name ()); 4526 4527 cs->redirect_callee_duplicating_thunks (val->spec_node); 4528 val->spec_node->expand_all_artificial_thunks (); 4529 if (cs->count.ipa ().initialized_p ()) 4530 redirected_sum = redirected_sum + cs->count.ipa (); 4531 } 4532 cs = get_next_cgraph_edge_clone (cs); 4533 } 4534 } 4535 4536 if (redirected_sum.nonzero_p ()) 4537 update_specialized_profile (val->spec_node, node, redirected_sum); 4538 } 4539 4540 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */ 4541 4542 static bool 4543 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts) 4544 { 4545 ipa_polymorphic_call_context *ctx; 4546 int i; 4547 4548 FOR_EACH_VEC_ELT (known_contexts, i, ctx) 4549 if (!ctx->useless_p ()) 4550 return true; 4551 return false; 4552 } 4553 4554 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */ 4555 4556 static vec<ipa_polymorphic_call_context> 4557 copy_useful_known_contexts (vec<ipa_polymorphic_call_context> known_contexts) 4558 { 4559 if (known_contexts_useful_p (known_contexts)) 4560 return known_contexts.copy (); 4561 else 4562 return vNULL; 4563 } 4564 4565 /* Copy KNOWN_CSTS and modify the copy according to VAL and INDEX. If 4566 non-empty, replace KNOWN_CONTEXTS with its copy too. */ 4567 4568 static void 4569 modify_known_vectors_with_val (vec<tree> *known_csts, 4570 vec<ipa_polymorphic_call_context> *known_contexts, 4571 ipcp_value<tree> *val, 4572 int index) 4573 { 4574 *known_csts = known_csts->copy (); 4575 *known_contexts = copy_useful_known_contexts (*known_contexts); 4576 (*known_csts)[index] = val->value; 4577 } 4578 4579 /* Replace KNOWN_CSTS with its copy. Also copy KNOWN_CONTEXTS and modify the 4580 copy according to VAL and INDEX. */ 4581 4582 static void 4583 modify_known_vectors_with_val (vec<tree> *known_csts, 4584 vec<ipa_polymorphic_call_context> *known_contexts, 4585 ipcp_value<ipa_polymorphic_call_context> *val, 4586 int index) 4587 { 4588 *known_csts = known_csts->copy (); 4589 *known_contexts = known_contexts->copy (); 4590 (*known_contexts)[index] = val->value; 4591 } 4592 4593 /* Return true if OFFSET indicates this was not an aggregate value or there is 4594 a replacement equivalent to VALUE, INDEX and OFFSET among those in the 4595 AGGVALS list. */ 4596 4597 DEBUG_FUNCTION bool 4598 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals, 4599 int index, HOST_WIDE_INT offset, tree value) 4600 { 4601 if (offset == -1) 4602 return true; 4603 4604 while (aggvals) 4605 { 4606 if (aggvals->index == index 4607 && aggvals->offset == offset 4608 && values_equal_for_ipcp_p (aggvals->value, value)) 4609 return true; 4610 aggvals = aggvals->next; 4611 } 4612 return false; 4613 } 4614 4615 /* Return true if offset is minus one because source of a polymorphic context 4616 cannot be an aggregate value. */ 4617 4618 DEBUG_FUNCTION bool 4619 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *, 4620 int , HOST_WIDE_INT offset, 4621 ipa_polymorphic_call_context) 4622 { 4623 return offset == -1; 4624 } 4625 4626 /* Decide whether to create a special version of NODE for value VAL of parameter 4627 at the given INDEX. If OFFSET is -1, the value is for the parameter itself, 4628 otherwise it is stored at the given OFFSET of the parameter. KNOWN_CSTS, 4629 KNOWN_CONTEXTS and KNOWN_AGGS describe the other already known values. */ 4630 4631 template <typename valtype> 4632 static bool 4633 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, 4634 ipcp_value<valtype> *val, vec<tree> known_csts, 4635 vec<ipa_polymorphic_call_context> known_contexts) 4636 { 4637 struct ipa_agg_replacement_value *aggvals; 4638 int freq_sum, caller_count; 4639 profile_count count_sum; 4640 vec<cgraph_edge *> callers; 4641 4642 if (val->spec_node) 4643 { 4644 perhaps_add_new_callers (node, val); 4645 return false; 4646 } 4647 else if (val->local_size_cost + overall_size > max_new_size) 4648 { 4649 if (dump_file && (dump_flags & TDF_DETAILS)) 4650 fprintf (dump_file, " Ignoring candidate value because " 4651 "max_new_size would be reached with %li.\n", 4652 val->local_size_cost + overall_size); 4653 return false; 4654 } 4655 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &count_sum, 4656 &caller_count)) 4657 return false; 4658 4659 if (dump_file && (dump_flags & TDF_DETAILS)) 4660 { 4661 fprintf (dump_file, " - considering value "); 4662 print_ipcp_constant_value (dump_file, val->value); 4663 fprintf (dump_file, " for "); 4664 ipa_dump_param (dump_file, IPA_NODE_REF (node), index); 4665 if (offset != -1) 4666 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset); 4667 fprintf (dump_file, " (caller_count: %i)\n", caller_count); 4668 } 4669 4670 if (!good_cloning_opportunity_p (node, val->local_time_benefit, 4671 freq_sum, count_sum, 4672 val->local_size_cost) 4673 && !good_cloning_opportunity_p (node, 4674 val->local_time_benefit 4675 + val->prop_time_benefit, 4676 freq_sum, count_sum, 4677 val->local_size_cost 4678 + val->prop_size_cost)) 4679 return false; 4680 4681 if (dump_file) 4682 fprintf (dump_file, " Creating a specialized node of %s.\n", 4683 node->dump_name ()); 4684 4685 callers = gather_edges_for_value (val, node, caller_count); 4686 if (offset == -1) 4687 modify_known_vectors_with_val (&known_csts, &known_contexts, val, index); 4688 else 4689 { 4690 known_csts = known_csts.copy (); 4691 known_contexts = copy_useful_known_contexts (known_contexts); 4692 } 4693 find_more_scalar_values_for_callers_subset (node, known_csts, callers); 4694 find_more_contexts_for_caller_subset (node, &known_contexts, callers); 4695 aggvals = find_aggregate_values_for_callers_subset (node, callers); 4696 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index, 4697 offset, val->value)); 4698 val->spec_node = create_specialized_node (node, known_csts, known_contexts, 4699 aggvals, callers); 4700 overall_size += val->local_size_cost; 4701 4702 /* TODO: If for some lattice there is only one other known value 4703 left, make a special node for it too. */ 4704 4705 return true; 4706 } 4707 4708 /* Decide whether and what specialized clones of NODE should be created. */ 4709 4710 static bool 4711 decide_whether_version_node (struct cgraph_node *node) 4712 { 4713 struct ipa_node_params *info = IPA_NODE_REF (node); 4714 int i, count = ipa_get_param_count (info); 4715 vec<tree> known_csts; 4716 vec<ipa_polymorphic_call_context> known_contexts; 4717 vec<ipa_agg_jump_function> known_aggs = vNULL; 4718 bool ret = false; 4719 4720 if (count == 0) 4721 return false; 4722 4723 if (dump_file && (dump_flags & TDF_DETAILS)) 4724 fprintf (dump_file, "\nEvaluating opportunities for %s.\n", 4725 node->dump_name ()); 4726 4727 gather_context_independent_values (info, &known_csts, &known_contexts, 4728 info->do_clone_for_all_contexts ? &known_aggs 4729 : NULL, NULL); 4730 4731 for (i = 0; i < count;i++) 4732 { 4733 struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 4734 ipcp_lattice<tree> *lat = &plats->itself; 4735 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 4736 4737 if (!lat->bottom 4738 && !known_csts[i]) 4739 { 4740 ipcp_value<tree> *val; 4741 for (val = lat->values; val; val = val->next) 4742 ret |= decide_about_value (node, i, -1, val, known_csts, 4743 known_contexts); 4744 } 4745 4746 if (!plats->aggs_bottom) 4747 { 4748 struct ipcp_agg_lattice *aglat; 4749 ipcp_value<tree> *val; 4750 for (aglat = plats->aggs; aglat; aglat = aglat->next) 4751 if (!aglat->bottom && aglat->values 4752 /* If the following is false, the one value is in 4753 known_aggs. */ 4754 && (plats->aggs_contain_variable 4755 || !aglat->is_single_const ())) 4756 for (val = aglat->values; val; val = val->next) 4757 ret |= decide_about_value (node, i, aglat->offset, val, 4758 known_csts, known_contexts); 4759 } 4760 4761 if (!ctxlat->bottom 4762 && known_contexts[i].useless_p ()) 4763 { 4764 ipcp_value<ipa_polymorphic_call_context> *val; 4765 for (val = ctxlat->values; val; val = val->next) 4766 ret |= decide_about_value (node, i, -1, val, known_csts, 4767 known_contexts); 4768 } 4769 4770 info = IPA_NODE_REF (node); 4771 } 4772 4773 if (info->do_clone_for_all_contexts) 4774 { 4775 struct cgraph_node *clone; 4776 vec<cgraph_edge *> callers; 4777 4778 if (dump_file) 4779 fprintf (dump_file, " - Creating a specialized node of %s " 4780 "for all known contexts.\n", node->dump_name ()); 4781 4782 callers = node->collect_callers (); 4783 find_more_scalar_values_for_callers_subset (node, known_csts, callers); 4784 find_more_contexts_for_caller_subset (node, &known_contexts, callers); 4785 ipa_agg_replacement_value *aggvals 4786 = find_aggregate_values_for_callers_subset (node, callers); 4787 4788 if (!known_contexts_useful_p (known_contexts)) 4789 { 4790 known_contexts.release (); 4791 known_contexts = vNULL; 4792 } 4793 clone = create_specialized_node (node, known_csts, known_contexts, 4794 aggvals, callers); 4795 info = IPA_NODE_REF (node); 4796 info->do_clone_for_all_contexts = false; 4797 IPA_NODE_REF (clone)->is_all_contexts_clone = true; 4798 for (i = 0; i < count; i++) 4799 vec_free (known_aggs[i].items); 4800 known_aggs.release (); 4801 ret = true; 4802 } 4803 else 4804 { 4805 known_csts.release (); 4806 known_contexts.release (); 4807 } 4808 4809 return ret; 4810 } 4811 4812 /* Transitively mark all callees of NODE within the same SCC as not dead. */ 4813 4814 static void 4815 spread_undeadness (struct cgraph_node *node) 4816 { 4817 struct cgraph_edge *cs; 4818 4819 for (cs = node->callees; cs; cs = cs->next_callee) 4820 if (ipa_edge_within_scc (cs)) 4821 { 4822 struct cgraph_node *callee; 4823 struct ipa_node_params *info; 4824 4825 callee = cs->callee->function_symbol (NULL); 4826 info = IPA_NODE_REF (callee); 4827 4828 if (info->node_dead) 4829 { 4830 info->node_dead = 0; 4831 spread_undeadness (callee); 4832 } 4833 } 4834 } 4835 4836 /* Return true if NODE has a caller from outside of its SCC that is not 4837 dead. Worker callback for cgraph_for_node_and_aliases. */ 4838 4839 static bool 4840 has_undead_caller_from_outside_scc_p (struct cgraph_node *node, 4841 void *data ATTRIBUTE_UNUSED) 4842 { 4843 struct cgraph_edge *cs; 4844 4845 for (cs = node->callers; cs; cs = cs->next_caller) 4846 if (cs->caller->thunk.thunk_p 4847 && cs->caller->call_for_symbol_thunks_and_aliases 4848 (has_undead_caller_from_outside_scc_p, NULL, true)) 4849 return true; 4850 else if (!ipa_edge_within_scc (cs) 4851 && !IPA_NODE_REF (cs->caller)->node_dead) 4852 return true; 4853 return false; 4854 } 4855 4856 4857 /* Identify nodes within the same SCC as NODE which are no longer needed 4858 because of new clones and will be removed as unreachable. */ 4859 4860 static void 4861 identify_dead_nodes (struct cgraph_node *node) 4862 { 4863 struct cgraph_node *v; 4864 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 4865 if (v->local.local 4866 && !v->call_for_symbol_thunks_and_aliases 4867 (has_undead_caller_from_outside_scc_p, NULL, true)) 4868 IPA_NODE_REF (v)->node_dead = 1; 4869 4870 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 4871 if (!IPA_NODE_REF (v)->node_dead) 4872 spread_undeadness (v); 4873 4874 if (dump_file && (dump_flags & TDF_DETAILS)) 4875 { 4876 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 4877 if (IPA_NODE_REF (v)->node_dead) 4878 fprintf (dump_file, " Marking node as dead: %s.\n", v->dump_name ()); 4879 } 4880 } 4881 4882 /* The decision stage. Iterate over the topological order of call graph nodes 4883 TOPO and make specialized clones if deemed beneficial. */ 4884 4885 static void 4886 ipcp_decision_stage (struct ipa_topo_info *topo) 4887 { 4888 int i; 4889 4890 if (dump_file) 4891 fprintf (dump_file, "\nIPA decision stage:\n\n"); 4892 4893 for (i = topo->nnodes - 1; i >= 0; i--) 4894 { 4895 struct cgraph_node *node = topo->order[i]; 4896 bool change = false, iterate = true; 4897 4898 while (iterate) 4899 { 4900 struct cgraph_node *v; 4901 iterate = false; 4902 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 4903 if (v->has_gimple_body_p () 4904 && ipcp_versionable_function_p (v)) 4905 iterate |= decide_whether_version_node (v); 4906 4907 change |= iterate; 4908 } 4909 if (change) 4910 identify_dead_nodes (node); 4911 } 4912 } 4913 4914 /* Look up all the bits information that we have discovered and copy it over 4915 to the transformation summary. */ 4916 4917 static void 4918 ipcp_store_bits_results (void) 4919 { 4920 cgraph_node *node; 4921 4922 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 4923 { 4924 ipa_node_params *info = IPA_NODE_REF (node); 4925 bool dumped_sth = false; 4926 bool found_useful_result = false; 4927 4928 if (!opt_for_fn (node->decl, flag_ipa_bit_cp)) 4929 { 4930 if (dump_file) 4931 fprintf (dump_file, "Not considering %s for ipa bitwise propagation " 4932 "; -fipa-bit-cp: disabled.\n", 4933 node->name ()); 4934 continue; 4935 } 4936 4937 if (info->ipcp_orig_node) 4938 info = IPA_NODE_REF (info->ipcp_orig_node); 4939 4940 unsigned count = ipa_get_param_count (info); 4941 for (unsigned i = 0; i < count; i++) 4942 { 4943 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 4944 if (plats->bits_lattice.constant_p ()) 4945 { 4946 found_useful_result = true; 4947 break; 4948 } 4949 } 4950 4951 if (!found_useful_result) 4952 continue; 4953 4954 ipcp_transformation_initialize (); 4955 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node); 4956 vec_safe_reserve_exact (ts->bits, count); 4957 4958 for (unsigned i = 0; i < count; i++) 4959 { 4960 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 4961 ipa_bits *jfbits; 4962 4963 if (plats->bits_lattice.constant_p ()) 4964 jfbits 4965 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (), 4966 plats->bits_lattice.get_mask ()); 4967 else 4968 jfbits = NULL; 4969 4970 ts->bits->quick_push (jfbits); 4971 if (!dump_file || !jfbits) 4972 continue; 4973 if (!dumped_sth) 4974 { 4975 fprintf (dump_file, "Propagated bits info for function %s:\n", 4976 node->dump_name ()); 4977 dumped_sth = true; 4978 } 4979 fprintf (dump_file, " param %i: value = ", i); 4980 print_hex (jfbits->value, dump_file); 4981 fprintf (dump_file, ", mask = "); 4982 print_hex (jfbits->mask, dump_file); 4983 fprintf (dump_file, "\n"); 4984 } 4985 } 4986 } 4987 4988 /* Look up all VR information that we have discovered and copy it over 4989 to the transformation summary. */ 4990 4991 static void 4992 ipcp_store_vr_results (void) 4993 { 4994 cgraph_node *node; 4995 4996 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 4997 { 4998 ipa_node_params *info = IPA_NODE_REF (node); 4999 bool found_useful_result = false; 5000 5001 if (!opt_for_fn (node->decl, flag_ipa_vrp)) 5002 { 5003 if (dump_file) 5004 fprintf (dump_file, "Not considering %s for VR discovery " 5005 "and propagate; -fipa-ipa-vrp: disabled.\n", 5006 node->name ()); 5007 continue; 5008 } 5009 5010 if (info->ipcp_orig_node) 5011 info = IPA_NODE_REF (info->ipcp_orig_node); 5012 5013 unsigned count = ipa_get_param_count (info); 5014 for (unsigned i = 0; i < count; i++) 5015 { 5016 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5017 if (!plats->m_value_range.bottom_p () 5018 && !plats->m_value_range.top_p ()) 5019 { 5020 found_useful_result = true; 5021 break; 5022 } 5023 } 5024 if (!found_useful_result) 5025 continue; 5026 5027 ipcp_transformation_initialize (); 5028 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node); 5029 vec_safe_reserve_exact (ts->m_vr, count); 5030 5031 for (unsigned i = 0; i < count; i++) 5032 { 5033 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5034 ipa_vr vr; 5035 5036 if (!plats->m_value_range.bottom_p () 5037 && !plats->m_value_range.top_p ()) 5038 { 5039 vr.known = true; 5040 vr.type = plats->m_value_range.m_vr.kind (); 5041 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ()); 5042 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ()); 5043 } 5044 else 5045 { 5046 vr.known = false; 5047 vr.type = VR_VARYING; 5048 vr.min = vr.max = wi::zero (INT_TYPE_SIZE); 5049 } 5050 ts->m_vr->quick_push (vr); 5051 } 5052 } 5053 } 5054 5055 /* The IPCP driver. */ 5056 5057 static unsigned int 5058 ipcp_driver (void) 5059 { 5060 struct ipa_topo_info topo; 5061 5062 if (edge_clone_summaries == NULL) 5063 edge_clone_summaries = new edge_clone_summary_t (symtab); 5064 5065 ipa_check_create_node_params (); 5066 ipa_check_create_edge_args (); 5067 clone_num_suffixes = new hash_map<const char *, unsigned>; 5068 5069 if (dump_file) 5070 { 5071 fprintf (dump_file, "\nIPA structures before propagation:\n"); 5072 if (dump_flags & TDF_DETAILS) 5073 ipa_print_all_params (dump_file); 5074 ipa_print_all_jump_functions (dump_file); 5075 } 5076 5077 /* Topological sort. */ 5078 build_toporder_info (&topo); 5079 /* Do the interprocedural propagation. */ 5080 ipcp_propagate_stage (&topo); 5081 /* Decide what constant propagation and cloning should be performed. */ 5082 ipcp_decision_stage (&topo); 5083 /* Store results of bits propagation. */ 5084 ipcp_store_bits_results (); 5085 /* Store results of value range propagation. */ 5086 ipcp_store_vr_results (); 5087 5088 /* Free all IPCP structures. */ 5089 delete clone_num_suffixes; 5090 free_toporder_info (&topo); 5091 delete edge_clone_summaries; 5092 edge_clone_summaries = NULL; 5093 ipa_free_all_structures_after_ipa_cp (); 5094 if (dump_file) 5095 fprintf (dump_file, "\nIPA constant propagation end\n"); 5096 return 0; 5097 } 5098 5099 /* Initialization and computation of IPCP data structures. This is the initial 5100 intraprocedural analysis of functions, which gathers information to be 5101 propagated later on. */ 5102 5103 static void 5104 ipcp_generate_summary (void) 5105 { 5106 struct cgraph_node *node; 5107 5108 if (dump_file) 5109 fprintf (dump_file, "\nIPA constant propagation start:\n"); 5110 ipa_register_cgraph_hooks (); 5111 5112 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 5113 ipa_analyze_node (node); 5114 } 5115 5116 /* Write ipcp summary for nodes in SET. */ 5117 5118 static void 5119 ipcp_write_summary (void) 5120 { 5121 ipa_prop_write_jump_functions (); 5122 } 5123 5124 /* Read ipcp summary. */ 5125 5126 static void 5127 ipcp_read_summary (void) 5128 { 5129 ipa_prop_read_jump_functions (); 5130 } 5131 5132 namespace { 5133 5134 const pass_data pass_data_ipa_cp = 5135 { 5136 IPA_PASS, /* type */ 5137 "cp", /* name */ 5138 OPTGROUP_NONE, /* optinfo_flags */ 5139 TV_IPA_CONSTANT_PROP, /* tv_id */ 5140 0, /* properties_required */ 5141 0, /* properties_provided */ 5142 0, /* properties_destroyed */ 5143 0, /* todo_flags_start */ 5144 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */ 5145 }; 5146 5147 class pass_ipa_cp : public ipa_opt_pass_d 5148 { 5149 public: 5150 pass_ipa_cp (gcc::context *ctxt) 5151 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt, 5152 ipcp_generate_summary, /* generate_summary */ 5153 ipcp_write_summary, /* write_summary */ 5154 ipcp_read_summary, /* read_summary */ 5155 ipcp_write_transformation_summaries, /* 5156 write_optimization_summary */ 5157 ipcp_read_transformation_summaries, /* 5158 read_optimization_summary */ 5159 NULL, /* stmt_fixup */ 5160 0, /* function_transform_todo_flags_start */ 5161 ipcp_transform_function, /* function_transform */ 5162 NULL) /* variable_transform */ 5163 {} 5164 5165 /* opt_pass methods: */ 5166 virtual bool gate (function *) 5167 { 5168 /* FIXME: We should remove the optimize check after we ensure we never run 5169 IPA passes when not optimizing. */ 5170 return (flag_ipa_cp && optimize) || in_lto_p; 5171 } 5172 5173 virtual unsigned int execute (function *) { return ipcp_driver (); } 5174 5175 }; // class pass_ipa_cp 5176 5177 } // anon namespace 5178 5179 ipa_opt_pass_d * 5180 make_pass_ipa_cp (gcc::context *ctxt) 5181 { 5182 return new pass_ipa_cp (ctxt); 5183 } 5184 5185 /* Reset all state within ipa-cp.c so that we can rerun the compiler 5186 within the same process. For use by toplev::finalize. */ 5187 5188 void 5189 ipa_cp_c_finalize (void) 5190 { 5191 max_count = profile_count::uninitialized (); 5192 overall_size = 0; 5193 max_new_size = 0; 5194 ipcp_free_transformation_sum (); 5195 } 5196