1 /* Interprocedural constant propagation 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. 3 4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor 5 <mjambor@suse.cz> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 /* Interprocedural constant propagation (IPA-CP). 24 25 The goal of this transformation is to 26 27 1) discover functions which are always invoked with some arguments with the 28 same known constant values and modify the functions so that the 29 subsequent optimizations can take advantage of the knowledge, and 30 31 2) partial specialization - create specialized versions of functions 32 transformed in this way if some parameters are known constants only in 33 certain contexts but the estimated tradeoff between speedup and cost size 34 is deemed good. 35 36 The algorithm also propagates types and attempts to perform type based 37 devirtualization. Types are propagated much like constants. 38 39 The algorithm basically consists of three stages. In the first, functions 40 are analyzed one at a time and jump functions are constructed for all known 41 call-sites. In the second phase, the pass propagates information from the 42 jump functions across the call to reveal what values are available at what 43 call sites, performs estimations of effects of known values on functions and 44 their callees, and finally decides what specialized extra versions should be 45 created. In the third, the special versions materialize and appropriate 46 calls are redirected. 47 48 The algorithm used is to a certain extent based on "Interprocedural Constant 49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, 50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D 51 Cooper, Mary W. Hall, and Ken Kennedy. 52 53 54 First stage - intraprocedural analysis 55 ======================================= 56 57 This phase computes jump_function and modification flags. 58 59 A jump function for a call-site represents the values passed as an actual 60 arguments of a given call-site. In principle, there are three types of 61 values: 62 63 Pass through - the caller's formal parameter is passed as an actual 64 argument, plus an operation on it can be performed. 65 Constant - a constant is passed as an actual argument. 66 Unknown - neither of the above. 67 68 All jump function types are described in detail in ipa-prop.h, together with 69 the data structures that represent them and methods of accessing them. 70 71 ipcp_generate_summary() is the main function of the first stage. 72 73 Second stage - interprocedural analysis 74 ======================================== 75 76 This stage is itself divided into two phases. In the first, we propagate 77 known values over the call graph, in the second, we make cloning decisions. 78 It uses a different algorithm than the original Callahan's paper. 79 80 First, we traverse the functions topologically from callers to callees and, 81 for each strongly connected component (SCC), we propagate constants 82 according to previously computed jump functions. We also record what known 83 values depend on other known values and estimate local effects. Finally, we 84 propagate cumulative information about these effects from dependent values 85 to those on which they depend. 86 87 Second, we again traverse the call graph in the same topological order and 88 make clones for functions which we know are called with the same values in 89 all contexts and decide about extra specialized clones of functions just for 90 some contexts - these decisions are based on both local estimates and 91 cumulative estimates propagated from callees. 92 93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the 94 third stage. 95 96 Third phase - materialization of clones, call statement updates. 97 ============================================ 98 99 This stage is currently performed by call graph code (mainly in cgraphunit.c 100 and tree-inline.c) according to instructions inserted to the call graph by 101 the second stage. */ 102 103 #include "config.h" 104 #include "system.h" 105 #include "coretypes.h" 106 #include "backend.h" 107 #include "tree.h" 108 #include "gimple-expr.h" 109 #include "predict.h" 110 #include "alloc-pool.h" 111 #include "tree-pass.h" 112 #include "cgraph.h" 113 #include "diagnostic.h" 114 #include "fold-const.h" 115 #include "gimple-fold.h" 116 #include "symbol-summary.h" 117 #include "tree-vrp.h" 118 #include "ipa-prop.h" 119 #include "tree-pretty-print.h" 120 #include "tree-inline.h" 121 #include "ipa-fnsummary.h" 122 #include "ipa-utils.h" 123 #include "tree-ssa-ccp.h" 124 #include "stringpool.h" 125 #include "attribs.h" 126 127 template <typename valtype> class ipcp_value; 128 129 /* Describes a particular source for an IPA-CP value. */ 130 131 template <typename valtype> 132 struct ipcp_value_source 133 { 134 public: 135 /* Aggregate offset of the source, negative if the source is scalar value of 136 the argument itself. */ 137 HOST_WIDE_INT offset; 138 /* The incoming edge that brought the value. */ 139 cgraph_edge *cs; 140 /* If the jump function that resulted into his value was a pass-through or an 141 ancestor, this is the ipcp_value of the caller from which the described 142 value has been derived. Otherwise it is NULL. */ 143 ipcp_value<valtype> *val; 144 /* Next pointer in a linked list of sources of a value. */ 145 ipcp_value_source *next; 146 /* If the jump function that resulted into his value was a pass-through or an 147 ancestor, this is the index of the parameter of the caller the jump 148 function references. */ 149 int index; 150 }; 151 152 /* Common ancestor for all ipcp_value instantiations. */ 153 154 class ipcp_value_base 155 { 156 public: 157 /* Time benefit and size cost that specializing the function for this value 158 would bring about in this function alone. */ 159 int local_time_benefit, local_size_cost; 160 /* Time benefit and size cost that specializing the function for this value 161 can bring about in it's callees (transitively). */ 162 int prop_time_benefit, prop_size_cost; 163 164 ipcp_value_base () 165 : local_time_benefit (0), local_size_cost (0), 166 prop_time_benefit (0), prop_size_cost (0) {} 167 }; 168 169 /* Describes one particular value stored in struct ipcp_lattice. */ 170 171 template <typename valtype> 172 class ipcp_value : public ipcp_value_base 173 { 174 public: 175 /* The actual value for the given parameter. */ 176 valtype value; 177 /* The list of sources from which this value originates. */ 178 ipcp_value_source <valtype> *sources; 179 /* Next pointers in a linked list of all values in a lattice. */ 180 ipcp_value *next; 181 /* Next pointers in a linked list of values in a strongly connected component 182 of values. */ 183 ipcp_value *scc_next; 184 /* Next pointers in a linked list of SCCs of values sorted topologically 185 according their sources. */ 186 ipcp_value *topo_next; 187 /* A specialized node created for this value, NULL if none has been (so far) 188 created. */ 189 cgraph_node *spec_node; 190 /* Depth first search number and low link for topological sorting of 191 values. */ 192 int dfs, low_link; 193 /* True if this value is currently on the topo-sort stack. */ 194 bool on_stack; 195 196 ipcp_value() 197 : sources (0), next (0), scc_next (0), topo_next (0), 198 spec_node (0), dfs (0), low_link (0), on_stack (false) {} 199 200 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx, 201 HOST_WIDE_INT offset); 202 }; 203 204 /* Lattice describing potential values of a formal parameter of a function, or 205 a part of an aggregate. TOP is represented by a lattice with zero values 206 and with contains_variable and bottom flags cleared. BOTTOM is represented 207 by a lattice with the bottom flag set. In that case, values and 208 contains_variable flag should be disregarded. */ 209 210 template <typename valtype> 211 struct ipcp_lattice 212 { 213 public: 214 /* The list of known values and types in this lattice. Note that values are 215 not deallocated if a lattice is set to bottom because there may be value 216 sources referencing them. */ 217 ipcp_value<valtype> *values; 218 /* Number of known values and types in this lattice. */ 219 int values_count; 220 /* The lattice contains a variable component (in addition to values). */ 221 bool contains_variable; 222 /* The value of the lattice is bottom (i.e. variable and unusable for any 223 propagation). */ 224 bool bottom; 225 226 inline bool is_single_const (); 227 inline bool set_to_bottom (); 228 inline bool set_contains_variable (); 229 bool add_value (valtype newval, cgraph_edge *cs, 230 ipcp_value<valtype> *src_val = NULL, 231 int src_idx = 0, HOST_WIDE_INT offset = -1, 232 ipcp_value<valtype> **val_p = NULL, 233 bool unlimited = false); 234 void print (FILE * f, bool dump_sources, bool dump_benefits); 235 }; 236 237 /* Lattice of tree values with an offset to describe a part of an 238 aggregate. */ 239 240 struct ipcp_agg_lattice : public ipcp_lattice<tree> 241 { 242 public: 243 /* Offset that is being described by this lattice. */ 244 HOST_WIDE_INT offset; 245 /* Size so that we don't have to re-compute it every time we traverse the 246 list. Must correspond to TYPE_SIZE of all lat values. */ 247 HOST_WIDE_INT size; 248 /* Next element of the linked list. */ 249 struct ipcp_agg_lattice *next; 250 }; 251 252 /* Lattice of known bits, only capable of holding one value. 253 Bitwise constant propagation propagates which bits of a 254 value are constant. 255 For eg: 256 int f(int x) 257 { 258 return some_op (x); 259 } 260 261 int f1(int y) 262 { 263 if (cond) 264 return f (y & 0xff); 265 else 266 return f (y & 0xf); 267 } 268 269 In the above case, the param 'x' will always have all 270 the bits (except the bits in lsb) set to 0. 271 Hence the mask of 'x' would be 0xff. The mask 272 reflects that the bits in lsb are unknown. 273 The actual propagated value is given by m_value & ~m_mask. */ 274 275 class ipcp_bits_lattice 276 { 277 public: 278 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; } 279 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; } 280 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; } 281 bool set_to_bottom (); 282 bool set_to_constant (widest_int, widest_int); 283 bool known_nonzero_p () const; 284 285 widest_int get_value () const { return m_value; } 286 widest_int get_mask () const { return m_mask; } 287 288 bool meet_with (ipcp_bits_lattice& other, unsigned, signop, 289 enum tree_code, tree, bool); 290 291 bool meet_with (widest_int, widest_int, unsigned); 292 293 void print (FILE *); 294 295 private: 296 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val; 297 298 /* Similar to ccp_lattice_t, mask represents which bits of value are constant. 299 If a bit in mask is set to 0, then the corresponding bit in 300 value is known to be constant. */ 301 widest_int m_value, m_mask; 302 303 bool meet_with_1 (widest_int, widest_int, unsigned, bool); 304 void get_value_and_mask (tree, widest_int *, widest_int *); 305 }; 306 307 /* Lattice of value ranges. */ 308 309 class ipcp_vr_lattice 310 { 311 public: 312 value_range m_vr; 313 314 inline bool bottom_p () const; 315 inline bool top_p () const; 316 inline bool set_to_bottom (); 317 bool meet_with (const value_range *p_vr); 318 bool meet_with (const ipcp_vr_lattice &other); 319 void init () { gcc_assert (m_vr.undefined_p ()); } 320 void print (FILE * f); 321 322 private: 323 bool meet_with_1 (const value_range *other_vr); 324 }; 325 326 /* Structure containing lattices for a parameter itself and for pieces of 327 aggregates that are passed in the parameter or by a reference in a parameter 328 plus some other useful flags. */ 329 330 class ipcp_param_lattices 331 { 332 public: 333 /* Lattice describing the value of the parameter itself. */ 334 ipcp_lattice<tree> itself; 335 /* Lattice describing the polymorphic contexts of a parameter. */ 336 ipcp_lattice<ipa_polymorphic_call_context> ctxlat; 337 /* Lattices describing aggregate parts. */ 338 ipcp_agg_lattice *aggs; 339 /* Lattice describing known bits. */ 340 ipcp_bits_lattice bits_lattice; 341 /* Lattice describing value range. */ 342 ipcp_vr_lattice m_value_range; 343 /* Number of aggregate lattices */ 344 int aggs_count; 345 /* True if aggregate data were passed by reference (as opposed to by 346 value). */ 347 bool aggs_by_ref; 348 /* All aggregate lattices contain a variable component (in addition to 349 values). */ 350 bool aggs_contain_variable; 351 /* The value of all aggregate lattices is bottom (i.e. variable and unusable 352 for any propagation). */ 353 bool aggs_bottom; 354 355 /* There is a virtual call based on this parameter. */ 356 bool virt_call; 357 }; 358 359 /* Allocation pools for values and their sources in ipa-cp. */ 360 361 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool 362 ("IPA-CP constant values"); 363 364 object_allocator<ipcp_value<ipa_polymorphic_call_context> > 365 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts"); 366 367 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool 368 ("IPA-CP value sources"); 369 370 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool 371 ("IPA_CP aggregate lattices"); 372 373 /* Maximal count found in program. */ 374 375 static profile_count max_count; 376 377 /* Original overall size of the program. */ 378 379 static long overall_size, orig_overall_size; 380 381 /* Node name to unique clone suffix number map. */ 382 static hash_map<const char *, unsigned> *clone_num_suffixes; 383 384 /* Return the param lattices structure corresponding to the Ith formal 385 parameter of the function described by INFO. */ 386 static inline class ipcp_param_lattices * 387 ipa_get_parm_lattices (class ipa_node_params *info, int i) 388 { 389 gcc_assert (i >= 0 && i < ipa_get_param_count (info)); 390 gcc_checking_assert (!info->ipcp_orig_node); 391 gcc_checking_assert (info->lattices); 392 return &(info->lattices[i]); 393 } 394 395 /* Return the lattice corresponding to the scalar value of the Ith formal 396 parameter of the function described by INFO. */ 397 static inline ipcp_lattice<tree> * 398 ipa_get_scalar_lat (class ipa_node_params *info, int i) 399 { 400 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 401 return &plats->itself; 402 } 403 404 /* Return the lattice corresponding to the scalar value of the Ith formal 405 parameter of the function described by INFO. */ 406 static inline ipcp_lattice<ipa_polymorphic_call_context> * 407 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i) 408 { 409 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 410 return &plats->ctxlat; 411 } 412 413 /* Return whether LAT is a lattice with a single constant and without an 414 undefined value. */ 415 416 template <typename valtype> 417 inline bool 418 ipcp_lattice<valtype>::is_single_const () 419 { 420 if (bottom || contains_variable || values_count != 1) 421 return false; 422 else 423 return true; 424 } 425 426 /* Print V which is extracted from a value in a lattice to F. */ 427 428 static void 429 print_ipcp_constant_value (FILE * f, tree v) 430 { 431 if (TREE_CODE (v) == ADDR_EXPR 432 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL) 433 { 434 fprintf (f, "& "); 435 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0))); 436 } 437 else 438 print_generic_expr (f, v); 439 } 440 441 /* Print V which is extracted from a value in a lattice to F. */ 442 443 static void 444 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v) 445 { 446 v.dump(f, false); 447 } 448 449 /* Print a lattice LAT to F. */ 450 451 template <typename valtype> 452 void 453 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) 454 { 455 ipcp_value<valtype> *val; 456 bool prev = false; 457 458 if (bottom) 459 { 460 fprintf (f, "BOTTOM\n"); 461 return; 462 } 463 464 if (!values_count && !contains_variable) 465 { 466 fprintf (f, "TOP\n"); 467 return; 468 } 469 470 if (contains_variable) 471 { 472 fprintf (f, "VARIABLE"); 473 prev = true; 474 if (dump_benefits) 475 fprintf (f, "\n"); 476 } 477 478 for (val = values; val; val = val->next) 479 { 480 if (dump_benefits && prev) 481 fprintf (f, " "); 482 else if (!dump_benefits && prev) 483 fprintf (f, ", "); 484 else 485 prev = true; 486 487 print_ipcp_constant_value (f, val->value); 488 489 if (dump_sources) 490 { 491 ipcp_value_source<valtype> *s; 492 493 fprintf (f, " [from:"); 494 for (s = val->sources; s; s = s->next) 495 fprintf (f, " %i(%f)", s->cs->caller->order, 496 s->cs->sreal_frequency ().to_double ()); 497 fprintf (f, "]"); 498 } 499 500 if (dump_benefits) 501 fprintf (f, " [loc_time: %i, loc_size: %i, " 502 "prop_time: %i, prop_size: %i]\n", 503 val->local_time_benefit, val->local_size_cost, 504 val->prop_time_benefit, val->prop_size_cost); 505 } 506 if (!dump_benefits) 507 fprintf (f, "\n"); 508 } 509 510 void 511 ipcp_bits_lattice::print (FILE *f) 512 { 513 if (top_p ()) 514 fprintf (f, " Bits unknown (TOP)\n"); 515 else if (bottom_p ()) 516 fprintf (f, " Bits unusable (BOTTOM)\n"); 517 else 518 { 519 fprintf (f, " Bits: value = "); print_hex (get_value (), f); 520 fprintf (f, ", mask = "); print_hex (get_mask (), f); 521 fprintf (f, "\n"); 522 } 523 } 524 525 /* Print value range lattice to F. */ 526 527 void 528 ipcp_vr_lattice::print (FILE * f) 529 { 530 dump_value_range (f, &m_vr); 531 } 532 533 /* Print all ipcp_lattices of all functions to F. */ 534 535 static void 536 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits) 537 { 538 struct cgraph_node *node; 539 int i, count; 540 541 fprintf (f, "\nLattices:\n"); 542 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 543 { 544 class ipa_node_params *info; 545 546 info = IPA_NODE_REF (node); 547 /* Skip unoptimized functions and constprop clones since we don't make 548 lattices for them. */ 549 if (!info || info->ipcp_orig_node) 550 continue; 551 fprintf (f, " Node: %s:\n", node->dump_name ()); 552 count = ipa_get_param_count (info); 553 for (i = 0; i < count; i++) 554 { 555 struct ipcp_agg_lattice *aglat; 556 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 557 fprintf (f, " param [%d]: ", i); 558 plats->itself.print (f, dump_sources, dump_benefits); 559 fprintf (f, " ctxs: "); 560 plats->ctxlat.print (f, dump_sources, dump_benefits); 561 plats->bits_lattice.print (f); 562 fprintf (f, " "); 563 plats->m_value_range.print (f); 564 fprintf (f, "\n"); 565 if (plats->virt_call) 566 fprintf (f, " virt_call flag set\n"); 567 568 if (plats->aggs_bottom) 569 { 570 fprintf (f, " AGGS BOTTOM\n"); 571 continue; 572 } 573 if (plats->aggs_contain_variable) 574 fprintf (f, " AGGS VARIABLE\n"); 575 for (aglat = plats->aggs; aglat; aglat = aglat->next) 576 { 577 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ", 578 plats->aggs_by_ref ? "ref " : "", aglat->offset); 579 aglat->print (f, dump_sources, dump_benefits); 580 } 581 } 582 } 583 } 584 585 /* Determine whether it is at all technically possible to create clones of NODE 586 and store this information in the ipa_node_params structure associated 587 with NODE. */ 588 589 static void 590 determine_versionability (struct cgraph_node *node, 591 class ipa_node_params *info) 592 { 593 const char *reason = NULL; 594 595 /* There are a number of generic reasons functions cannot be versioned. We 596 also cannot remove parameters if there are type attributes such as fnspec 597 present. */ 598 if (node->alias || node->thunk.thunk_p) 599 reason = "alias or thunk"; 600 else if (!node->versionable) 601 reason = "not a tree_versionable_function"; 602 else if (node->get_availability () <= AVAIL_INTERPOSABLE) 603 reason = "insufficient body availability"; 604 else if (!opt_for_fn (node->decl, optimize) 605 || !opt_for_fn (node->decl, flag_ipa_cp)) 606 reason = "non-optimized function"; 607 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl))) 608 { 609 /* Ideally we should clone the SIMD clones themselves and create 610 vector copies of them, so IPA-cp and SIMD clones can happily 611 coexist, but that may not be worth the effort. */ 612 reason = "function has SIMD clones"; 613 } 614 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl))) 615 { 616 /* Ideally we should clone the target clones themselves and create 617 copies of them, so IPA-cp and target clones can happily 618 coexist, but that may not be worth the effort. */ 619 reason = "function target_clones attribute"; 620 } 621 /* Don't clone decls local to a comdat group; it breaks and for C++ 622 decloned constructors, inlining is always better anyway. */ 623 else if (node->comdat_local_p ()) 624 reason = "comdat-local function"; 625 else if (node->calls_comdat_local) 626 { 627 /* TODO: call is versionable if we make sure that all 628 callers are inside of a comdat group. */ 629 reason = "calls comdat-local function"; 630 } 631 632 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN 633 work only when inlined. Cloning them may still lead to better code 634 because ipa-cp will not give up on cloning further. If the function is 635 external this however leads to wrong code because we may end up producing 636 offline copy of the function. */ 637 if (DECL_EXTERNAL (node->decl)) 638 for (cgraph_edge *edge = node->callees; !reason && edge; 639 edge = edge->next_callee) 640 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL)) 641 { 642 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK) 643 reason = "external function which calls va_arg_pack"; 644 if (DECL_FUNCTION_CODE (edge->callee->decl) 645 == BUILT_IN_VA_ARG_PACK_LEN) 646 reason = "external function which calls va_arg_pack_len"; 647 } 648 649 if (reason && dump_file && !node->alias && !node->thunk.thunk_p) 650 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n", 651 node->dump_name (), reason); 652 653 info->versionable = (reason == NULL); 654 } 655 656 /* Return true if it is at all technically possible to create clones of a 657 NODE. */ 658 659 static bool 660 ipcp_versionable_function_p (struct cgraph_node *node) 661 { 662 return IPA_NODE_REF (node) && IPA_NODE_REF (node)->versionable; 663 } 664 665 /* Structure holding accumulated information about callers of a node. */ 666 667 struct caller_statistics 668 { 669 profile_count count_sum; 670 int n_calls, n_hot_calls, freq_sum; 671 }; 672 673 /* Initialize fields of STAT to zeroes. */ 674 675 static inline void 676 init_caller_stats (struct caller_statistics *stats) 677 { 678 stats->count_sum = profile_count::zero (); 679 stats->n_calls = 0; 680 stats->n_hot_calls = 0; 681 stats->freq_sum = 0; 682 } 683 684 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of 685 non-thunk incoming edges to NODE. */ 686 687 static bool 688 gather_caller_stats (struct cgraph_node *node, void *data) 689 { 690 struct caller_statistics *stats = (struct caller_statistics *) data; 691 struct cgraph_edge *cs; 692 693 for (cs = node->callers; cs; cs = cs->next_caller) 694 if (!cs->caller->thunk.thunk_p) 695 { 696 if (cs->count.ipa ().initialized_p ()) 697 stats->count_sum += cs->count.ipa (); 698 stats->freq_sum += cs->frequency (); 699 stats->n_calls++; 700 if (cs->maybe_hot_p ()) 701 stats->n_hot_calls ++; 702 } 703 return false; 704 705 } 706 707 /* Return true if this NODE is viable candidate for cloning. */ 708 709 static bool 710 ipcp_cloning_candidate_p (struct cgraph_node *node) 711 { 712 struct caller_statistics stats; 713 714 gcc_checking_assert (node->has_gimple_body_p ()); 715 716 if (!opt_for_fn (node->decl, flag_ipa_cp_clone)) 717 { 718 if (dump_file) 719 fprintf (dump_file, "Not considering %s for cloning; " 720 "-fipa-cp-clone disabled.\n", 721 node->dump_name ()); 722 return false; 723 } 724 725 if (node->optimize_for_size_p ()) 726 { 727 if (dump_file) 728 fprintf (dump_file, "Not considering %s for cloning; " 729 "optimizing it for size.\n", 730 node->dump_name ()); 731 return false; 732 } 733 734 init_caller_stats (&stats); 735 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); 736 737 if (ipa_size_summaries->get (node)->self_size < stats.n_calls) 738 { 739 if (dump_file) 740 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n", 741 node->dump_name ()); 742 return true; 743 } 744 745 /* When profile is available and function is hot, propagate into it even if 746 calls seems cold; constant propagation can improve function's speed 747 significantly. */ 748 if (max_count > profile_count::zero ()) 749 { 750 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100)) 751 { 752 if (dump_file) 753 fprintf (dump_file, "Considering %s for cloning; " 754 "usually called directly.\n", 755 node->dump_name ()); 756 return true; 757 } 758 } 759 if (!stats.n_hot_calls) 760 { 761 if (dump_file) 762 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", 763 node->dump_name ()); 764 return false; 765 } 766 if (dump_file) 767 fprintf (dump_file, "Considering %s for cloning.\n", 768 node->dump_name ()); 769 return true; 770 } 771 772 template <typename valtype> 773 class value_topo_info 774 { 775 public: 776 /* Head of the linked list of topologically sorted values. */ 777 ipcp_value<valtype> *values_topo; 778 /* Stack for creating SCCs, represented by a linked list too. */ 779 ipcp_value<valtype> *stack; 780 /* Counter driving the algorithm in add_val_to_toposort. */ 781 int dfs_counter; 782 783 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0) 784 {} 785 void add_val (ipcp_value<valtype> *cur_val); 786 void propagate_effects (); 787 }; 788 789 /* Arrays representing a topological ordering of call graph nodes and a stack 790 of nodes used during constant propagation and also data required to perform 791 topological sort of values and propagation of benefits in the determined 792 order. */ 793 794 class ipa_topo_info 795 { 796 public: 797 /* Array with obtained topological order of cgraph nodes. */ 798 struct cgraph_node **order; 799 /* Stack of cgraph nodes used during propagation within SCC until all values 800 in the SCC stabilize. */ 801 struct cgraph_node **stack; 802 int nnodes, stack_top; 803 804 value_topo_info<tree> constants; 805 value_topo_info<ipa_polymorphic_call_context> contexts; 806 807 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0), 808 constants () 809 {} 810 }; 811 812 /* Skip edges from and to nodes without ipa_cp enabled. 813 Ignore not available symbols. */ 814 815 static bool 816 ignore_edge_p (cgraph_edge *e) 817 { 818 enum availability avail; 819 cgraph_node *ultimate_target 820 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); 821 822 return (avail <= AVAIL_INTERPOSABLE 823 || !opt_for_fn (ultimate_target->decl, optimize) 824 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp)); 825 } 826 827 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */ 828 829 static void 830 build_toporder_info (class ipa_topo_info *topo) 831 { 832 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); 833 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); 834 835 gcc_checking_assert (topo->stack_top == 0); 836 topo->nnodes = ipa_reduced_postorder (topo->order, true, 837 ignore_edge_p); 838 } 839 840 /* Free information about strongly connected components and the arrays in 841 TOPO. */ 842 843 static void 844 free_toporder_info (class ipa_topo_info *topo) 845 { 846 ipa_free_postorder_info (); 847 free (topo->order); 848 free (topo->stack); 849 } 850 851 /* Add NODE to the stack in TOPO, unless it is already there. */ 852 853 static inline void 854 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node) 855 { 856 class ipa_node_params *info = IPA_NODE_REF (node); 857 if (info->node_enqueued) 858 return; 859 info->node_enqueued = 1; 860 topo->stack[topo->stack_top++] = node; 861 } 862 863 /* Pop a node from the stack in TOPO and return it or return NULL if the stack 864 is empty. */ 865 866 static struct cgraph_node * 867 pop_node_from_stack (class ipa_topo_info *topo) 868 { 869 if (topo->stack_top) 870 { 871 struct cgraph_node *node; 872 topo->stack_top--; 873 node = topo->stack[topo->stack_top]; 874 IPA_NODE_REF (node)->node_enqueued = 0; 875 return node; 876 } 877 else 878 return NULL; 879 } 880 881 /* Set lattice LAT to bottom and return true if it previously was not set as 882 such. */ 883 884 template <typename valtype> 885 inline bool 886 ipcp_lattice<valtype>::set_to_bottom () 887 { 888 bool ret = !bottom; 889 bottom = true; 890 return ret; 891 } 892 893 /* Mark lattice as containing an unknown value and return true if it previously 894 was not marked as such. */ 895 896 template <typename valtype> 897 inline bool 898 ipcp_lattice<valtype>::set_contains_variable () 899 { 900 bool ret = !contains_variable; 901 contains_variable = true; 902 return ret; 903 } 904 905 /* Set all aggregate lattices in PLATS to bottom and return true if they were 906 not previously set as such. */ 907 908 static inline bool 909 set_agg_lats_to_bottom (class ipcp_param_lattices *plats) 910 { 911 bool ret = !plats->aggs_bottom; 912 plats->aggs_bottom = true; 913 return ret; 914 } 915 916 /* Mark all aggregate lattices in PLATS as containing an unknown value and 917 return true if they were not previously marked as such. */ 918 919 static inline bool 920 set_agg_lats_contain_variable (class ipcp_param_lattices *plats) 921 { 922 bool ret = !plats->aggs_contain_variable; 923 plats->aggs_contain_variable = true; 924 return ret; 925 } 926 927 bool 928 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other) 929 { 930 return meet_with_1 (&other.m_vr); 931 } 932 933 /* Meet the current value of the lattice with value range described by VR 934 lattice. */ 935 936 bool 937 ipcp_vr_lattice::meet_with (const value_range *p_vr) 938 { 939 return meet_with_1 (p_vr); 940 } 941 942 /* Meet the current value of the lattice with value range described by 943 OTHER_VR lattice. Return TRUE if anything changed. */ 944 945 bool 946 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr) 947 { 948 if (bottom_p ()) 949 return false; 950 951 if (other_vr->varying_p ()) 952 return set_to_bottom (); 953 954 value_range save (m_vr); 955 m_vr.union_ (other_vr); 956 return !m_vr.equal_p (save); 957 } 958 959 /* Return true if value range information in the lattice is yet unknown. */ 960 961 bool 962 ipcp_vr_lattice::top_p () const 963 { 964 return m_vr.undefined_p (); 965 } 966 967 /* Return true if value range information in the lattice is known to be 968 unusable. */ 969 970 bool 971 ipcp_vr_lattice::bottom_p () const 972 { 973 return m_vr.varying_p (); 974 } 975 976 /* Set value range information in the lattice to bottom. Return true if it 977 previously was in a different state. */ 978 979 bool 980 ipcp_vr_lattice::set_to_bottom () 981 { 982 if (m_vr.varying_p ()) 983 return false; 984 /* ?? We create all sorts of VARYING ranges for floats, structures, 985 and other types which we cannot handle as ranges. We should 986 probably avoid handling them throughout the pass, but it's easier 987 to create a sensible VARYING here and let the lattice 988 propagate. */ 989 m_vr.set_varying (integer_type_node); 990 return true; 991 } 992 993 /* Set lattice value to bottom, if it already isn't the case. */ 994 995 bool 996 ipcp_bits_lattice::set_to_bottom () 997 { 998 if (bottom_p ()) 999 return false; 1000 m_lattice_val = IPA_BITS_VARYING; 1001 m_value = 0; 1002 m_mask = -1; 1003 return true; 1004 } 1005 1006 /* Set to constant if it isn't already. Only meant to be called 1007 when switching state from TOP. */ 1008 1009 bool 1010 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask) 1011 { 1012 gcc_assert (top_p ()); 1013 m_lattice_val = IPA_BITS_CONSTANT; 1014 m_value = wi::bit_and (wi::bit_not (mask), value); 1015 m_mask = mask; 1016 return true; 1017 } 1018 1019 /* Return true if any of the known bits are non-zero. */ 1020 1021 bool 1022 ipcp_bits_lattice::known_nonzero_p () const 1023 { 1024 if (!constant_p ()) 1025 return false; 1026 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0); 1027 } 1028 1029 /* Convert operand to value, mask form. */ 1030 1031 void 1032 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp) 1033 { 1034 wide_int get_nonzero_bits (const_tree); 1035 1036 if (TREE_CODE (operand) == INTEGER_CST) 1037 { 1038 *valuep = wi::to_widest (operand); 1039 *maskp = 0; 1040 } 1041 else 1042 { 1043 *valuep = 0; 1044 *maskp = -1; 1045 } 1046 } 1047 1048 /* Meet operation, similar to ccp_lattice_meet, we xor values 1049 if this->value, value have different values at same bit positions, we want 1050 to drop that bit to varying. Return true if mask is changed. 1051 This function assumes that the lattice value is in CONSTANT state. If 1052 DROP_ALL_ONES, mask out any known bits with value one afterwards. */ 1053 1054 bool 1055 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, 1056 unsigned precision, bool drop_all_ones) 1057 { 1058 gcc_assert (constant_p ()); 1059 1060 widest_int old_mask = m_mask; 1061 m_mask = (m_mask | mask) | (m_value ^ value); 1062 if (drop_all_ones) 1063 m_mask |= m_value; 1064 m_value &= ~m_mask; 1065 1066 if (wi::sext (m_mask, precision) == -1) 1067 return set_to_bottom (); 1068 1069 return m_mask != old_mask; 1070 } 1071 1072 /* Meet the bits lattice with operand 1073 described by <value, mask, sgn, precision. */ 1074 1075 bool 1076 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask, 1077 unsigned precision) 1078 { 1079 if (bottom_p ()) 1080 return false; 1081 1082 if (top_p ()) 1083 { 1084 if (wi::sext (mask, precision) == -1) 1085 return set_to_bottom (); 1086 return set_to_constant (value, mask); 1087 } 1088 1089 return meet_with_1 (value, mask, precision, false); 1090 } 1091 1092 /* Meet bits lattice with the result of bit_value_binop (other, operand) 1093 if code is binary operation or bit_value_unop (other) if code is unary op. 1094 In the case when code is nop_expr, no adjustment is required. If 1095 DROP_ALL_ONES, mask out any known bits with value one afterwards. */ 1096 1097 bool 1098 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, 1099 signop sgn, enum tree_code code, tree operand, 1100 bool drop_all_ones) 1101 { 1102 if (other.bottom_p ()) 1103 return set_to_bottom (); 1104 1105 if (bottom_p () || other.top_p ()) 1106 return false; 1107 1108 widest_int adjusted_value, adjusted_mask; 1109 1110 if (TREE_CODE_CLASS (code) == tcc_binary) 1111 { 1112 tree type = TREE_TYPE (operand); 1113 widest_int o_value, o_mask; 1114 get_value_and_mask (operand, &o_value, &o_mask); 1115 1116 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask, 1117 sgn, precision, other.get_value (), other.get_mask (), 1118 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask); 1119 1120 if (wi::sext (adjusted_mask, precision) == -1) 1121 return set_to_bottom (); 1122 } 1123 1124 else if (TREE_CODE_CLASS (code) == tcc_unary) 1125 { 1126 bit_value_unop (code, sgn, precision, &adjusted_value, 1127 &adjusted_mask, sgn, precision, other.get_value (), 1128 other.get_mask ()); 1129 1130 if (wi::sext (adjusted_mask, precision) == -1) 1131 return set_to_bottom (); 1132 } 1133 1134 else 1135 return set_to_bottom (); 1136 1137 if (top_p ()) 1138 { 1139 if (drop_all_ones) 1140 { 1141 adjusted_mask |= adjusted_value; 1142 adjusted_value &= ~adjusted_mask; 1143 } 1144 if (wi::sext (adjusted_mask, precision) == -1) 1145 return set_to_bottom (); 1146 return set_to_constant (adjusted_value, adjusted_mask); 1147 } 1148 else 1149 return meet_with_1 (adjusted_value, adjusted_mask, precision, 1150 drop_all_ones); 1151 } 1152 1153 /* Mark bot aggregate and scalar lattices as containing an unknown variable, 1154 return true is any of them has not been marked as such so far. */ 1155 1156 static inline bool 1157 set_all_contains_variable (class ipcp_param_lattices *plats) 1158 { 1159 bool ret; 1160 ret = plats->itself.set_contains_variable (); 1161 ret |= plats->ctxlat.set_contains_variable (); 1162 ret |= set_agg_lats_contain_variable (plats); 1163 ret |= plats->bits_lattice.set_to_bottom (); 1164 ret |= plats->m_value_range.set_to_bottom (); 1165 return ret; 1166 } 1167 1168 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA 1169 points to by the number of callers to NODE. */ 1170 1171 static bool 1172 count_callers (cgraph_node *node, void *data) 1173 { 1174 int *caller_count = (int *) data; 1175 1176 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) 1177 /* Local thunks can be handled transparently, but if the thunk cannot 1178 be optimized out, count it as a real use. */ 1179 if (!cs->caller->thunk.thunk_p || !cs->caller->local) 1180 ++*caller_count; 1181 return false; 1182 } 1183 1184 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on 1185 the one caller of some other node. Set the caller's corresponding flag. */ 1186 1187 static bool 1188 set_single_call_flag (cgraph_node *node, void *) 1189 { 1190 cgraph_edge *cs = node->callers; 1191 /* Local thunks can be handled transparently, skip them. */ 1192 while (cs && cs->caller->thunk.thunk_p && cs->caller->local) 1193 cs = cs->next_caller; 1194 if (cs && IPA_NODE_REF (cs->caller)) 1195 { 1196 IPA_NODE_REF (cs->caller)->node_calling_single_call = true; 1197 return true; 1198 } 1199 return false; 1200 } 1201 1202 /* Initialize ipcp_lattices. */ 1203 1204 static void 1205 initialize_node_lattices (struct cgraph_node *node) 1206 { 1207 class ipa_node_params *info = IPA_NODE_REF (node); 1208 struct cgraph_edge *ie; 1209 bool disable = false, variable = false; 1210 int i; 1211 1212 gcc_checking_assert (node->has_gimple_body_p ()); 1213 1214 if (!ipa_get_param_count (info)) 1215 disable = true; 1216 else if (node->local) 1217 { 1218 int caller_count = 0; 1219 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count, 1220 true); 1221 gcc_checking_assert (caller_count > 0); 1222 if (caller_count == 1) 1223 node->call_for_symbol_thunks_and_aliases (set_single_call_flag, 1224 NULL, true); 1225 } 1226 else 1227 { 1228 /* When cloning is allowed, we can assume that externally visible 1229 functions are not called. We will compensate this by cloning 1230 later. */ 1231 if (ipcp_versionable_function_p (node) 1232 && ipcp_cloning_candidate_p (node)) 1233 variable = true; 1234 else 1235 disable = true; 1236 } 1237 1238 if (dump_file && (dump_flags & TDF_DETAILS) 1239 && !node->alias && !node->thunk.thunk_p) 1240 { 1241 fprintf (dump_file, "Initializing lattices of %s\n", 1242 node->dump_name ()); 1243 if (disable || variable) 1244 fprintf (dump_file, " Marking all lattices as %s\n", 1245 disable ? "BOTTOM" : "VARIABLE"); 1246 } 1247 1248 auto_vec<bool, 16> surviving_params; 1249 bool pre_modified = false; 1250 if (!disable && node->clone.param_adjustments) 1251 { 1252 /* At the moment all IPA optimizations should use the number of 1253 parameters of the prevailing decl as the m_always_copy_start. 1254 Handling any other value would complicate the code below, so for the 1255 time bing let's only assert it is so. */ 1256 gcc_assert ((node->clone.param_adjustments->m_always_copy_start 1257 == ipa_get_param_count (info)) 1258 || node->clone.param_adjustments->m_always_copy_start < 0); 1259 1260 pre_modified = true; 1261 node->clone.param_adjustments->get_surviving_params (&surviving_params); 1262 1263 if (dump_file && (dump_flags & TDF_DETAILS) 1264 && !node->alias && !node->thunk.thunk_p) 1265 { 1266 bool first = true; 1267 for (int j = 0; j < ipa_get_param_count (info); j++) 1268 { 1269 if (j < (int) surviving_params.length () 1270 && surviving_params[j]) 1271 continue; 1272 if (first) 1273 { 1274 fprintf (dump_file, 1275 " The following parameters are dead on arrival:"); 1276 first = false; 1277 } 1278 fprintf (dump_file, " %u", j); 1279 } 1280 if (!first) 1281 fprintf (dump_file, "\n"); 1282 } 1283 } 1284 1285 for (i = 0; i < ipa_get_param_count (info); i++) 1286 { 1287 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 1288 if (disable 1289 || (pre_modified && (surviving_params.length () <= (unsigned) i 1290 || !surviving_params[i]))) 1291 { 1292 plats->itself.set_to_bottom (); 1293 plats->ctxlat.set_to_bottom (); 1294 set_agg_lats_to_bottom (plats); 1295 plats->bits_lattice.set_to_bottom (); 1296 plats->m_value_range.set_to_bottom (); 1297 } 1298 else 1299 { 1300 plats->m_value_range.init (); 1301 if (variable) 1302 set_all_contains_variable (plats); 1303 } 1304 } 1305 1306 for (ie = node->indirect_calls; ie; ie = ie->next_callee) 1307 if (ie->indirect_info->polymorphic 1308 && ie->indirect_info->param_index >= 0) 1309 { 1310 gcc_checking_assert (ie->indirect_info->param_index >= 0); 1311 ipa_get_parm_lattices (info, 1312 ie->indirect_info->param_index)->virt_call = 1; 1313 } 1314 } 1315 1316 /* Return the result of a (possibly arithmetic) operation on the constant 1317 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is 1318 the type of the parameter to which the result is passed. Return 1319 NULL_TREE if that cannot be determined or be considered an 1320 interprocedural invariant. */ 1321 1322 static tree 1323 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, 1324 tree res_type) 1325 { 1326 tree res; 1327 1328 if (opcode == NOP_EXPR) 1329 return input; 1330 if (!is_gimple_ip_invariant (input)) 1331 return NULL_TREE; 1332 1333 if (!res_type) 1334 { 1335 if (TREE_CODE_CLASS (opcode) == tcc_comparison) 1336 res_type = boolean_type_node; 1337 else if (expr_type_first_operand_type_p (opcode)) 1338 res_type = TREE_TYPE (input); 1339 else 1340 return NULL_TREE; 1341 } 1342 1343 if (TREE_CODE_CLASS (opcode) == tcc_unary) 1344 res = fold_unary (opcode, res_type, input); 1345 else 1346 res = fold_binary (opcode, res_type, input, operand); 1347 1348 if (res && !is_gimple_ip_invariant (res)) 1349 return NULL_TREE; 1350 1351 return res; 1352 } 1353 1354 /* Return the result of a (possibly arithmetic) pass through jump function 1355 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter 1356 to which the result is passed. Return NULL_TREE if that cannot be 1357 determined or be considered an interprocedural invariant. */ 1358 1359 static tree 1360 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, 1361 tree res_type) 1362 { 1363 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc), 1364 input, 1365 ipa_get_jf_pass_through_operand (jfunc), 1366 res_type); 1367 } 1368 1369 /* Return the result of an ancestor jump function JFUNC on the constant value 1370 INPUT. Return NULL_TREE if that cannot be determined. */ 1371 1372 static tree 1373 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input) 1374 { 1375 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO); 1376 if (TREE_CODE (input) == ADDR_EXPR) 1377 { 1378 gcc_checking_assert (is_gimple_ip_invariant_address (input)); 1379 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc); 1380 if (known_eq (off, 0)) 1381 return input; 1382 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT); 1383 return build1 (ADDR_EXPR, TREE_TYPE (input), 1384 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input, 1385 build_int_cst (ptr_type_node, byte_offset))); 1386 } 1387 else if (ipa_get_jf_ancestor_keep_null (jfunc) 1388 && zerop (input)) 1389 return input; 1390 else 1391 return NULL_TREE; 1392 } 1393 1394 /* Determine whether JFUNC evaluates to a single known constant value and if 1395 so, return it. Otherwise return NULL. INFO describes the caller node or 1396 the one it is inlined to, so that pass-through jump functions can be 1397 evaluated. PARM_TYPE is the type of the parameter to which the result is 1398 passed. */ 1399 1400 tree 1401 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, 1402 tree parm_type) 1403 { 1404 if (jfunc->type == IPA_JF_CONST) 1405 return ipa_get_jf_constant (jfunc); 1406 else if (jfunc->type == IPA_JF_PASS_THROUGH 1407 || jfunc->type == IPA_JF_ANCESTOR) 1408 { 1409 tree input; 1410 int idx; 1411 1412 if (jfunc->type == IPA_JF_PASS_THROUGH) 1413 idx = ipa_get_jf_pass_through_formal_id (jfunc); 1414 else 1415 idx = ipa_get_jf_ancestor_formal_id (jfunc); 1416 1417 if (info->ipcp_orig_node) 1418 input = info->known_csts[idx]; 1419 else 1420 { 1421 ipcp_lattice<tree> *lat; 1422 1423 if (!info->lattices 1424 || idx >= ipa_get_param_count (info)) 1425 return NULL_TREE; 1426 lat = ipa_get_scalar_lat (info, idx); 1427 if (!lat->is_single_const ()) 1428 return NULL_TREE; 1429 input = lat->values->value; 1430 } 1431 1432 if (!input) 1433 return NULL_TREE; 1434 1435 if (jfunc->type == IPA_JF_PASS_THROUGH) 1436 return ipa_get_jf_pass_through_result (jfunc, input, parm_type); 1437 else 1438 return ipa_get_jf_ancestor_result (jfunc, input); 1439 } 1440 else 1441 return NULL_TREE; 1442 } 1443 1444 /* Determine whether JFUNC evaluates to single known polymorphic context, given 1445 that INFO describes the caller node or the one it is inlined to, CS is the 1446 call graph edge corresponding to JFUNC and CSIDX index of the described 1447 parameter. */ 1448 1449 ipa_polymorphic_call_context 1450 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx, 1451 ipa_jump_func *jfunc) 1452 { 1453 ipa_edge_args *args = IPA_EDGE_REF (cs); 1454 ipa_polymorphic_call_context ctx; 1455 ipa_polymorphic_call_context *edge_ctx 1456 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL; 1457 1458 if (edge_ctx && !edge_ctx->useless_p ()) 1459 ctx = *edge_ctx; 1460 1461 if (jfunc->type == IPA_JF_PASS_THROUGH 1462 || jfunc->type == IPA_JF_ANCESTOR) 1463 { 1464 ipa_polymorphic_call_context srcctx; 1465 int srcidx; 1466 bool type_preserved = true; 1467 if (jfunc->type == IPA_JF_PASS_THROUGH) 1468 { 1469 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) 1470 return ctx; 1471 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); 1472 srcidx = ipa_get_jf_pass_through_formal_id (jfunc); 1473 } 1474 else 1475 { 1476 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); 1477 srcidx = ipa_get_jf_ancestor_formal_id (jfunc); 1478 } 1479 if (info->ipcp_orig_node) 1480 { 1481 if (info->known_contexts.exists ()) 1482 srcctx = info->known_contexts[srcidx]; 1483 } 1484 else 1485 { 1486 if (!info->lattices 1487 || srcidx >= ipa_get_param_count (info)) 1488 return ctx; 1489 ipcp_lattice<ipa_polymorphic_call_context> *lat; 1490 lat = ipa_get_poly_ctx_lat (info, srcidx); 1491 if (!lat->is_single_const ()) 1492 return ctx; 1493 srcctx = lat->values->value; 1494 } 1495 if (srcctx.useless_p ()) 1496 return ctx; 1497 if (jfunc->type == IPA_JF_ANCESTOR) 1498 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc)); 1499 if (!type_preserved) 1500 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor); 1501 srcctx.combine_with (ctx); 1502 return srcctx; 1503 } 1504 1505 return ctx; 1506 } 1507 1508 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to 1509 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if 1510 the result is a range or an anti-range. */ 1511 1512 static bool 1513 ipa_vr_operation_and_type_effects (value_range *dst_vr, 1514 value_range *src_vr, 1515 enum tree_code operation, 1516 tree dst_type, tree src_type) 1517 { 1518 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type); 1519 if (dst_vr->varying_p () || dst_vr->undefined_p ()) 1520 return false; 1521 return true; 1522 } 1523 1524 /* Determine value_range of JFUNC given that INFO describes the caller node or 1525 the one it is inlined to, CS is the call graph edge corresponding to JFUNC 1526 and PARM_TYPE of the parameter. */ 1527 1528 value_range 1529 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs, 1530 ipa_jump_func *jfunc, tree parm_type) 1531 { 1532 value_range vr; 1533 return vr; 1534 if (jfunc->m_vr) 1535 ipa_vr_operation_and_type_effects (&vr, 1536 jfunc->m_vr, 1537 NOP_EXPR, parm_type, 1538 jfunc->m_vr->type ()); 1539 if (vr.singleton_p ()) 1540 return vr; 1541 if (jfunc->type == IPA_JF_PASS_THROUGH) 1542 { 1543 int idx; 1544 ipcp_transformation *sum 1545 = ipcp_get_transformation_summary (cs->caller->inlined_to 1546 ? cs->caller->inlined_to 1547 : cs->caller); 1548 if (!sum || !sum->m_vr) 1549 return vr; 1550 1551 idx = ipa_get_jf_pass_through_formal_id (jfunc); 1552 1553 if (!(*sum->m_vr)[idx].known) 1554 return vr; 1555 tree vr_type = ipa_get_type (info, idx); 1556 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min), 1557 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max), 1558 (*sum->m_vr)[idx].type); 1559 1560 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc); 1561 1562 if (TREE_CODE_CLASS (operation) == tcc_unary) 1563 { 1564 value_range res; 1565 1566 if (ipa_vr_operation_and_type_effects (&res, 1567 &srcvr, 1568 operation, parm_type, 1569 vr_type)) 1570 vr.intersect (res); 1571 } 1572 else 1573 { 1574 value_range op_res, res; 1575 tree op = ipa_get_jf_pass_through_operand (jfunc); 1576 value_range op_vr (op, op); 1577 1578 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr); 1579 if (ipa_vr_operation_and_type_effects (&res, 1580 &op_res, 1581 NOP_EXPR, parm_type, 1582 vr_type)) 1583 vr.intersect (res); 1584 } 1585 } 1586 return vr; 1587 } 1588 1589 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a 1590 parameter with the given INDEX. */ 1591 1592 static tree 1593 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, 1594 int index) 1595 { 1596 struct ipa_agg_replacement_value *aggval; 1597 1598 aggval = ipa_get_agg_replacements_for_node (node); 1599 while (aggval) 1600 { 1601 if (aggval->offset == offset 1602 && aggval->index == index) 1603 return aggval->value; 1604 aggval = aggval->next; 1605 } 1606 return NULL_TREE; 1607 } 1608 1609 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a 1610 single known constant value and if so, return it. Otherwise return NULL. 1611 NODE and INFO describes the caller node or the one it is inlined to, and 1612 its related info. */ 1613 1614 static tree 1615 ipa_agg_value_from_node (class ipa_node_params *info, 1616 struct cgraph_node *node, 1617 struct ipa_agg_jf_item *item) 1618 { 1619 tree value = NULL_TREE; 1620 int src_idx; 1621 1622 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) 1623 return NULL_TREE; 1624 1625 if (item->jftype == IPA_JF_CONST) 1626 return item->value.constant; 1627 1628 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH 1629 || item->jftype == IPA_JF_LOAD_AGG); 1630 1631 src_idx = item->value.pass_through.formal_id; 1632 1633 if (info->ipcp_orig_node) 1634 { 1635 if (item->jftype == IPA_JF_PASS_THROUGH) 1636 value = info->known_csts[src_idx]; 1637 else 1638 value = get_clone_agg_value (node, item->value.load_agg.offset, 1639 src_idx); 1640 } 1641 else if (info->lattices) 1642 { 1643 class ipcp_param_lattices *src_plats 1644 = ipa_get_parm_lattices (info, src_idx); 1645 1646 if (item->jftype == IPA_JF_PASS_THROUGH) 1647 { 1648 struct ipcp_lattice<tree> *lat = &src_plats->itself; 1649 1650 if (!lat->is_single_const ()) 1651 return NULL_TREE; 1652 1653 value = lat->values->value; 1654 } 1655 else if (src_plats->aggs 1656 && !src_plats->aggs_bottom 1657 && !src_plats->aggs_contain_variable 1658 && src_plats->aggs_by_ref == item->value.load_agg.by_ref) 1659 { 1660 struct ipcp_agg_lattice *aglat; 1661 1662 for (aglat = src_plats->aggs; aglat; aglat = aglat->next) 1663 { 1664 if (aglat->offset > item->value.load_agg.offset) 1665 break; 1666 1667 if (aglat->offset == item->value.load_agg.offset) 1668 { 1669 if (aglat->is_single_const ()) 1670 value = aglat->values->value; 1671 break; 1672 } 1673 } 1674 } 1675 } 1676 1677 if (!value) 1678 return NULL_TREE; 1679 1680 if (item->jftype == IPA_JF_LOAD_AGG) 1681 { 1682 tree load_type = item->value.load_agg.type; 1683 tree value_type = TREE_TYPE (value); 1684 1685 /* Ensure value type is compatible with load type. */ 1686 if (!useless_type_conversion_p (load_type, value_type)) 1687 return NULL_TREE; 1688 } 1689 1690 return ipa_get_jf_arith_result (item->value.pass_through.operation, 1691 value, 1692 item->value.pass_through.operand, 1693 item->type); 1694 } 1695 1696 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for 1697 an aggregate and if so, return it. Otherwise return an empty set. NODE 1698 and INFO describes the caller node or the one it is inlined to, and its 1699 related info. */ 1700 1701 struct ipa_agg_value_set 1702 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node, 1703 struct ipa_agg_jump_function *agg_jfunc) 1704 { 1705 struct ipa_agg_value_set agg; 1706 struct ipa_agg_jf_item *item; 1707 int i; 1708 1709 agg.items = vNULL; 1710 agg.by_ref = agg_jfunc->by_ref; 1711 1712 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item) 1713 { 1714 tree value = ipa_agg_value_from_node (info, node, item); 1715 1716 if (value) 1717 { 1718 struct ipa_agg_value value_item; 1719 1720 value_item.offset = item->offset; 1721 value_item.value = value; 1722 1723 agg.items.safe_push (value_item); 1724 } 1725 } 1726 return agg; 1727 } 1728 1729 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not 1730 bottom, not containing a variable component and without any known value at 1731 the same time. */ 1732 1733 DEBUG_FUNCTION void 1734 ipcp_verify_propagated_values (void) 1735 { 1736 struct cgraph_node *node; 1737 1738 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 1739 { 1740 class ipa_node_params *info = IPA_NODE_REF (node); 1741 if (!opt_for_fn (node->decl, flag_ipa_cp) 1742 || !opt_for_fn (node->decl, optimize)) 1743 continue; 1744 int i, count = ipa_get_param_count (info); 1745 1746 for (i = 0; i < count; i++) 1747 { 1748 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i); 1749 1750 if (!lat->bottom 1751 && !lat->contains_variable 1752 && lat->values_count == 0) 1753 { 1754 if (dump_file) 1755 { 1756 symtab->dump (dump_file); 1757 fprintf (dump_file, "\nIPA lattices after constant " 1758 "propagation, before gcc_unreachable:\n"); 1759 print_all_lattices (dump_file, true, false); 1760 } 1761 1762 gcc_unreachable (); 1763 } 1764 } 1765 } 1766 } 1767 1768 /* Return true iff X and Y should be considered equal values by IPA-CP. */ 1769 1770 static bool 1771 values_equal_for_ipcp_p (tree x, tree y) 1772 { 1773 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE); 1774 1775 if (x == y) 1776 return true; 1777 1778 if (TREE_CODE (x) == ADDR_EXPR 1779 && TREE_CODE (y) == ADDR_EXPR 1780 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL 1781 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL) 1782 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)), 1783 DECL_INITIAL (TREE_OPERAND (y, 0)), 0); 1784 else 1785 return operand_equal_p (x, y, 0); 1786 } 1787 1788 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */ 1789 1790 static bool 1791 values_equal_for_ipcp_p (ipa_polymorphic_call_context x, 1792 ipa_polymorphic_call_context y) 1793 { 1794 return x.equal_to (y); 1795 } 1796 1797 1798 /* Add a new value source to the value represented by THIS, marking that a 1799 value comes from edge CS and (if the underlying jump function is a 1800 pass-through or an ancestor one) from a caller value SRC_VAL of a caller 1801 parameter described by SRC_INDEX. OFFSET is negative if the source was the 1802 scalar value of the parameter itself or the offset within an aggregate. */ 1803 1804 template <typename valtype> 1805 void 1806 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val, 1807 int src_idx, HOST_WIDE_INT offset) 1808 { 1809 ipcp_value_source<valtype> *src; 1810 1811 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>; 1812 src->offset = offset; 1813 src->cs = cs; 1814 src->val = src_val; 1815 src->index = src_idx; 1816 1817 src->next = sources; 1818 sources = src; 1819 } 1820 1821 /* Allocate a new ipcp_value holding a tree constant, initialize its value to 1822 SOURCE and clear all other fields. */ 1823 1824 static ipcp_value<tree> * 1825 allocate_and_init_ipcp_value (tree source) 1826 { 1827 ipcp_value<tree> *val; 1828 1829 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>(); 1830 val->value = source; 1831 return val; 1832 } 1833 1834 /* Allocate a new ipcp_value holding a polymorphic context, initialize its 1835 value to SOURCE and clear all other fields. */ 1836 1837 static ipcp_value<ipa_polymorphic_call_context> * 1838 allocate_and_init_ipcp_value (ipa_polymorphic_call_context source) 1839 { 1840 ipcp_value<ipa_polymorphic_call_context> *val; 1841 1842 // TODO 1843 val = new (ipcp_poly_ctx_values_pool.allocate ()) 1844 ipcp_value<ipa_polymorphic_call_context>(); 1845 val->value = source; 1846 return val; 1847 } 1848 1849 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, 1850 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same 1851 meaning. OFFSET -1 means the source is scalar and not a part of an 1852 aggregate. If non-NULL, VAL_P records address of existing or newly added 1853 ipcp_value. UNLIMITED means whether value count should not exceed the limit 1854 given by PARAM_IPA_CP_VALUE_LIST_SIZE. */ 1855 1856 template <typename valtype> 1857 bool 1858 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, 1859 ipcp_value<valtype> *src_val, 1860 int src_idx, HOST_WIDE_INT offset, 1861 ipcp_value<valtype> **val_p, 1862 bool unlimited) 1863 { 1864 ipcp_value<valtype> *val, *last_val = NULL; 1865 1866 if (val_p) 1867 *val_p = NULL; 1868 1869 if (bottom) 1870 return false; 1871 1872 for (val = values; val; last_val = val, val = val->next) 1873 if (values_equal_for_ipcp_p (val->value, newval)) 1874 { 1875 if (val_p) 1876 *val_p = val; 1877 1878 if (ipa_edge_within_scc (cs)) 1879 { 1880 ipcp_value_source<valtype> *s; 1881 for (s = val->sources; s; s = s->next) 1882 if (s->cs == cs && s->val == src_val) 1883 break; 1884 if (s) 1885 return false; 1886 } 1887 1888 val->add_source (cs, src_val, src_idx, offset); 1889 return false; 1890 } 1891 1892 if (!unlimited && values_count == opt_for_fn (cs->caller->decl, 1893 param_ipa_cp_value_list_size)) 1894 { 1895 /* We can only free sources, not the values themselves, because sources 1896 of other values in this SCC might point to them. */ 1897 for (val = values; val; val = val->next) 1898 { 1899 while (val->sources) 1900 { 1901 ipcp_value_source<valtype> *src = val->sources; 1902 val->sources = src->next; 1903 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src); 1904 } 1905 } 1906 values = NULL; 1907 return set_to_bottom (); 1908 } 1909 1910 values_count++; 1911 val = allocate_and_init_ipcp_value (newval); 1912 val->add_source (cs, src_val, src_idx, offset); 1913 val->next = NULL; 1914 1915 /* Add the new value to end of value list, which can reduce iterations 1916 of propagation stage for recursive function. */ 1917 if (last_val) 1918 last_val->next = val; 1919 else 1920 values = val; 1921 1922 if (val_p) 1923 *val_p = val; 1924 1925 return true; 1926 } 1927 1928 /* Return true, if a ipcp_value VAL is orginated from parameter value of 1929 self-feeding recursive function via some kind of pass-through jump 1930 function. */ 1931 1932 static bool 1933 self_recursively_generated_p (ipcp_value<tree> *val) 1934 { 1935 class ipa_node_params *info = NULL; 1936 1937 for (ipcp_value_source<tree> *src = val->sources; src; src = src->next) 1938 { 1939 cgraph_edge *cs = src->cs; 1940 1941 if (!src->val || cs->caller != cs->callee->function_symbol ()) 1942 return false; 1943 1944 if (src->val == val) 1945 continue; 1946 1947 if (!info) 1948 info = IPA_NODE_REF (cs->caller); 1949 1950 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, 1951 src->index); 1952 ipcp_lattice<tree> *src_lat; 1953 ipcp_value<tree> *src_val; 1954 1955 if (src->offset == -1) 1956 src_lat = &plats->itself; 1957 else 1958 { 1959 struct ipcp_agg_lattice *src_aglat; 1960 1961 for (src_aglat = plats->aggs; src_aglat; src_aglat = src_aglat->next) 1962 if (src_aglat->offset == src->offset) 1963 break; 1964 1965 if (!src_aglat) 1966 return false; 1967 1968 src_lat = src_aglat; 1969 } 1970 1971 for (src_val = src_lat->values; src_val; src_val = src_val->next) 1972 if (src_val == val) 1973 break; 1974 1975 if (!src_val) 1976 return false; 1977 } 1978 1979 return true; 1980 } 1981 1982 /* A helper function that returns result of operation specified by OPCODE on 1983 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the 1984 value of SRC_VAL. If the operation is binary, OPND2 is a constant value 1985 acting as its second operand. If non-NULL, RES_TYPE is expected type of 1986 the result. */ 1987 1988 static tree 1989 get_val_across_arith_op (enum tree_code opcode, 1990 tree opnd1_type, 1991 tree opnd2, 1992 ipcp_value<tree> *src_val, 1993 tree res_type) 1994 { 1995 tree opnd1 = src_val->value; 1996 1997 /* Skip source values that is incompatible with specified type. */ 1998 if (opnd1_type 1999 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) 2000 return NULL_TREE; 2001 2002 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); 2003 } 2004 2005 /* Propagate values through an arithmetic transformation described by a jump 2006 function associated with edge CS, taking values from SRC_LAT and putting 2007 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. 2008 OPND2 is a constant value if transformation is a binary operation. 2009 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of 2010 a part of the aggregate. SRC_IDX is the index of the source parameter. 2011 RES_TYPE is the value type of result being propagated into. Return true if 2012 DEST_LAT changed. */ 2013 2014 static bool 2015 propagate_vals_across_arith_jfunc (cgraph_edge *cs, 2016 enum tree_code opcode, 2017 tree opnd1_type, 2018 tree opnd2, 2019 ipcp_lattice<tree> *src_lat, 2020 ipcp_lattice<tree> *dest_lat, 2021 HOST_WIDE_INT src_offset, 2022 int src_idx, 2023 tree res_type) 2024 { 2025 ipcp_value<tree> *src_val; 2026 bool ret = false; 2027 2028 /* Due to circular dependencies, propagating within an SCC through arithmetic 2029 transformation would create infinite number of values. But for 2030 self-feeding recursive function, we could allow propagation in a limited 2031 count, and this can enable a simple kind of recursive function versioning. 2032 For other scenario, we would just make lattices bottom. */ 2033 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) 2034 { 2035 int i; 2036 2037 int max_recursive_depth = opt_for_fn(cs->caller->decl, 2038 param_ipa_cp_max_recursive_depth); 2039 if (src_lat != dest_lat || max_recursive_depth < 1) 2040 return dest_lat->set_contains_variable (); 2041 2042 /* No benefit if recursive execution is in low probability. */ 2043 if (cs->sreal_frequency () * 100 2044 <= ((sreal) 1) * opt_for_fn (cs->caller->decl, 2045 param_ipa_cp_min_recursive_probability)) 2046 return dest_lat->set_contains_variable (); 2047 2048 auto_vec<ipcp_value<tree> *, 8> val_seeds; 2049 2050 for (src_val = src_lat->values; src_val; src_val = src_val->next) 2051 { 2052 /* Now we do not use self-recursively generated value as propagation 2053 source, this is absolutely conservative, but could avoid explosion 2054 of lattice's value space, especially when one recursive function 2055 calls another recursive. */ 2056 if (self_recursively_generated_p (src_val)) 2057 { 2058 ipcp_value_source<tree> *s; 2059 2060 /* If the lattice has already been propagated for the call site, 2061 no need to do that again. */ 2062 for (s = src_val->sources; s; s = s->next) 2063 if (s->cs == cs) 2064 return dest_lat->set_contains_variable (); 2065 } 2066 else 2067 val_seeds.safe_push (src_val); 2068 } 2069 2070 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size); 2071 2072 /* Recursively generate lattice values with a limited count. */ 2073 FOR_EACH_VEC_ELT (val_seeds, i, src_val) 2074 { 2075 for (int j = 1; j < max_recursive_depth; j++) 2076 { 2077 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, 2078 src_val, res_type); 2079 if (!cstval) 2080 break; 2081 2082 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, 2083 src_offset, &src_val, true); 2084 gcc_checking_assert (src_val); 2085 } 2086 } 2087 ret |= dest_lat->set_contains_variable (); 2088 } 2089 else 2090 for (src_val = src_lat->values; src_val; src_val = src_val->next) 2091 { 2092 /* Now we do not use self-recursively generated value as propagation 2093 source, otherwise it is easy to make value space of normal lattice 2094 overflow. */ 2095 if (self_recursively_generated_p (src_val)) 2096 { 2097 ret |= dest_lat->set_contains_variable (); 2098 continue; 2099 } 2100 2101 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, 2102 src_val, res_type); 2103 if (cstval) 2104 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, 2105 src_offset); 2106 else 2107 ret |= dest_lat->set_contains_variable (); 2108 } 2109 2110 return ret; 2111 } 2112 2113 /* Propagate values through a pass-through jump function JFUNC associated with 2114 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX 2115 is the index of the source parameter. PARM_TYPE is the type of the 2116 parameter to which the result is passed. */ 2117 2118 static bool 2119 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, 2120 ipcp_lattice<tree> *src_lat, 2121 ipcp_lattice<tree> *dest_lat, int src_idx, 2122 tree parm_type) 2123 { 2124 return propagate_vals_across_arith_jfunc (cs, 2125 ipa_get_jf_pass_through_operation (jfunc), 2126 NULL_TREE, 2127 ipa_get_jf_pass_through_operand (jfunc), 2128 src_lat, dest_lat, -1, src_idx, parm_type); 2129 } 2130 2131 /* Propagate values through an ancestor jump function JFUNC associated with 2132 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX 2133 is the index of the source parameter. */ 2134 2135 static bool 2136 propagate_vals_across_ancestor (struct cgraph_edge *cs, 2137 struct ipa_jump_func *jfunc, 2138 ipcp_lattice<tree> *src_lat, 2139 ipcp_lattice<tree> *dest_lat, int src_idx) 2140 { 2141 ipcp_value<tree> *src_val; 2142 bool ret = false; 2143 2144 if (ipa_edge_within_scc (cs)) 2145 return dest_lat->set_contains_variable (); 2146 2147 for (src_val = src_lat->values; src_val; src_val = src_val->next) 2148 { 2149 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value); 2150 2151 if (t) 2152 ret |= dest_lat->add_value (t, cs, src_val, src_idx); 2153 else 2154 ret |= dest_lat->set_contains_variable (); 2155 } 2156 2157 return ret; 2158 } 2159 2160 /* Propagate scalar values across jump function JFUNC that is associated with 2161 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the 2162 parameter to which the result is passed. */ 2163 2164 static bool 2165 propagate_scalar_across_jump_function (struct cgraph_edge *cs, 2166 struct ipa_jump_func *jfunc, 2167 ipcp_lattice<tree> *dest_lat, 2168 tree param_type) 2169 { 2170 if (dest_lat->bottom) 2171 return false; 2172 2173 if (jfunc->type == IPA_JF_CONST) 2174 { 2175 tree val = ipa_get_jf_constant (jfunc); 2176 return dest_lat->add_value (val, cs, NULL, 0); 2177 } 2178 else if (jfunc->type == IPA_JF_PASS_THROUGH 2179 || jfunc->type == IPA_JF_ANCESTOR) 2180 { 2181 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2182 ipcp_lattice<tree> *src_lat; 2183 int src_idx; 2184 bool ret; 2185 2186 if (jfunc->type == IPA_JF_PASS_THROUGH) 2187 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2188 else 2189 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 2190 2191 src_lat = ipa_get_scalar_lat (caller_info, src_idx); 2192 if (src_lat->bottom) 2193 return dest_lat->set_contains_variable (); 2194 2195 /* If we would need to clone the caller and cannot, do not propagate. */ 2196 if (!ipcp_versionable_function_p (cs->caller) 2197 && (src_lat->contains_variable 2198 || (src_lat->values_count > 1))) 2199 return dest_lat->set_contains_variable (); 2200 2201 if (jfunc->type == IPA_JF_PASS_THROUGH) 2202 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat, 2203 dest_lat, src_idx, param_type); 2204 else 2205 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat, 2206 src_idx); 2207 2208 if (src_lat->contains_variable) 2209 ret |= dest_lat->set_contains_variable (); 2210 2211 return ret; 2212 } 2213 2214 /* TODO: We currently do not handle member method pointers in IPA-CP (we only 2215 use it for indirect inlining), we should propagate them too. */ 2216 return dest_lat->set_contains_variable (); 2217 } 2218 2219 /* Propagate scalar values across jump function JFUNC that is associated with 2220 edge CS and describes argument IDX and put the values into DEST_LAT. */ 2221 2222 static bool 2223 propagate_context_across_jump_function (cgraph_edge *cs, 2224 ipa_jump_func *jfunc, int idx, 2225 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat) 2226 { 2227 ipa_edge_args *args = IPA_EDGE_REF (cs); 2228 if (dest_lat->bottom) 2229 return false; 2230 bool ret = false; 2231 bool added_sth = false; 2232 bool type_preserved = true; 2233 2234 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr 2235 = ipa_get_ith_polymorhic_call_context (args, idx); 2236 2237 if (edge_ctx_ptr) 2238 edge_ctx = *edge_ctx_ptr; 2239 2240 if (jfunc->type == IPA_JF_PASS_THROUGH 2241 || jfunc->type == IPA_JF_ANCESTOR) 2242 { 2243 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2244 int src_idx; 2245 ipcp_lattice<ipa_polymorphic_call_context> *src_lat; 2246 2247 /* TODO: Once we figure out how to propagate speculations, it will 2248 probably be a good idea to switch to speculation if type_preserved is 2249 not set instead of punting. */ 2250 if (jfunc->type == IPA_JF_PASS_THROUGH) 2251 { 2252 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) 2253 goto prop_fail; 2254 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc); 2255 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2256 } 2257 else 2258 { 2259 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc); 2260 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 2261 } 2262 2263 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx); 2264 /* If we would need to clone the caller and cannot, do not propagate. */ 2265 if (!ipcp_versionable_function_p (cs->caller) 2266 && (src_lat->contains_variable 2267 || (src_lat->values_count > 1))) 2268 goto prop_fail; 2269 2270 ipcp_value<ipa_polymorphic_call_context> *src_val; 2271 for (src_val = src_lat->values; src_val; src_val = src_val->next) 2272 { 2273 ipa_polymorphic_call_context cur = src_val->value; 2274 2275 if (!type_preserved) 2276 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor); 2277 if (jfunc->type == IPA_JF_ANCESTOR) 2278 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc)); 2279 /* TODO: In cases we know how the context is going to be used, 2280 we can improve the result by passing proper OTR_TYPE. */ 2281 cur.combine_with (edge_ctx); 2282 if (!cur.useless_p ()) 2283 { 2284 if (src_lat->contains_variable 2285 && !edge_ctx.equal_to (cur)) 2286 ret |= dest_lat->set_contains_variable (); 2287 ret |= dest_lat->add_value (cur, cs, src_val, src_idx); 2288 added_sth = true; 2289 } 2290 } 2291 } 2292 2293 prop_fail: 2294 if (!added_sth) 2295 { 2296 if (!edge_ctx.useless_p ()) 2297 ret |= dest_lat->add_value (edge_ctx, cs); 2298 else 2299 ret |= dest_lat->set_contains_variable (); 2300 } 2301 2302 return ret; 2303 } 2304 2305 /* Propagate bits across jfunc that is associated with 2306 edge cs and update dest_lattice accordingly. */ 2307 2308 bool 2309 propagate_bits_across_jump_function (cgraph_edge *cs, int idx, 2310 ipa_jump_func *jfunc, 2311 ipcp_bits_lattice *dest_lattice) 2312 { 2313 if (dest_lattice->bottom_p ()) 2314 return false; 2315 2316 enum availability availability; 2317 cgraph_node *callee = cs->callee->function_symbol (&availability); 2318 class ipa_node_params *callee_info = IPA_NODE_REF (callee); 2319 tree parm_type = ipa_get_type (callee_info, idx); 2320 2321 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the 2322 transform for these cases. Similarly, we can have bad type mismatches 2323 with LTO, avoid doing anything with those too. */ 2324 if (!parm_type 2325 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type))) 2326 { 2327 if (dump_file && (dump_flags & TDF_DETAILS)) 2328 fprintf (dump_file, "Setting dest_lattice to bottom, because type of " 2329 "param %i of %s is NULL or unsuitable for bits propagation\n", 2330 idx, cs->callee->dump_name ()); 2331 2332 return dest_lattice->set_to_bottom (); 2333 } 2334 2335 unsigned precision = TYPE_PRECISION (parm_type); 2336 signop sgn = TYPE_SIGN (parm_type); 2337 2338 if (jfunc->type == IPA_JF_PASS_THROUGH 2339 || jfunc->type == IPA_JF_ANCESTOR) 2340 { 2341 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2342 tree operand = NULL_TREE; 2343 enum tree_code code; 2344 unsigned src_idx; 2345 bool keep_null = false; 2346 2347 if (jfunc->type == IPA_JF_PASS_THROUGH) 2348 { 2349 code = ipa_get_jf_pass_through_operation (jfunc); 2350 src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2351 if (code != NOP_EXPR) 2352 operand = ipa_get_jf_pass_through_operand (jfunc); 2353 } 2354 else 2355 { 2356 code = POINTER_PLUS_EXPR; 2357 src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 2358 unsigned HOST_WIDE_INT offset 2359 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT; 2360 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset); 2361 operand = build_int_cstu (size_type_node, offset); 2362 } 2363 2364 class ipcp_param_lattices *src_lats 2365 = ipa_get_parm_lattices (caller_info, src_idx); 2366 2367 /* Try to propagate bits if src_lattice is bottom, but jfunc is known. 2368 for eg consider: 2369 int f(int x) 2370 { 2371 g (x & 0xff); 2372 } 2373 Assume lattice for x is bottom, however we can still propagate 2374 result of x & 0xff == 0xff, which gets computed during ccp1 pass 2375 and we store it in jump function during analysis stage. */ 2376 2377 if (!src_lats->bits_lattice.bottom_p ()) 2378 { 2379 bool drop_all_ones 2380 = keep_null && !src_lats->bits_lattice.known_nonzero_p (); 2381 2382 return dest_lattice->meet_with (src_lats->bits_lattice, precision, 2383 sgn, code, operand, drop_all_ones); 2384 } 2385 } 2386 2387 if (jfunc->bits) 2388 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask, 2389 precision); 2390 else 2391 return dest_lattice->set_to_bottom (); 2392 } 2393 2394 /* Propagate value range across jump function JFUNC that is associated with 2395 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS 2396 accordingly. */ 2397 2398 static bool 2399 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, 2400 class ipcp_param_lattices *dest_plats, 2401 tree param_type) 2402 { 2403 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range; 2404 2405 if (dest_lat->bottom_p ()) 2406 return false; 2407 2408 if (!param_type 2409 || (!INTEGRAL_TYPE_P (param_type) 2410 && !POINTER_TYPE_P (param_type))) 2411 return dest_lat->set_to_bottom (); 2412 2413 if (jfunc->type == IPA_JF_PASS_THROUGH) 2414 { 2415 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc); 2416 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2417 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2418 class ipcp_param_lattices *src_lats 2419 = ipa_get_parm_lattices (caller_info, src_idx); 2420 tree operand_type = ipa_get_type (caller_info, src_idx); 2421 2422 if (src_lats->m_value_range.bottom_p ()) 2423 return dest_lat->set_to_bottom (); 2424 2425 value_range vr; 2426 if (TREE_CODE_CLASS (operation) == tcc_unary) 2427 ipa_vr_operation_and_type_effects (&vr, 2428 &src_lats->m_value_range.m_vr, 2429 operation, param_type, 2430 operand_type); 2431 /* A crude way to prevent unbounded number of value range updates 2432 in SCC components. We should allow limited number of updates within 2433 SCC, too. */ 2434 else if (!ipa_edge_within_scc (cs)) 2435 { 2436 tree op = ipa_get_jf_pass_through_operand (jfunc); 2437 value_range op_vr (op, op); 2438 value_range op_res,res; 2439 2440 range_fold_binary_expr (&op_res, operation, operand_type, 2441 &src_lats->m_value_range.m_vr, &op_vr); 2442 ipa_vr_operation_and_type_effects (&vr, 2443 &op_res, 2444 NOP_EXPR, param_type, 2445 operand_type); 2446 } 2447 if (!vr.undefined_p () && !vr.varying_p ()) 2448 { 2449 if (jfunc->m_vr) 2450 { 2451 value_range jvr; 2452 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr, 2453 NOP_EXPR, 2454 param_type, 2455 jfunc->m_vr->type ())) 2456 vr.intersect (jvr); 2457 } 2458 return dest_lat->meet_with (&vr); 2459 } 2460 } 2461 else if (jfunc->type == IPA_JF_CONST) 2462 { 2463 tree val = ipa_get_jf_constant (jfunc); 2464 if (TREE_CODE (val) == INTEGER_CST) 2465 { 2466 val = fold_convert (param_type, val); 2467 if (TREE_OVERFLOW_P (val)) 2468 val = drop_tree_overflow (val); 2469 2470 value_range tmpvr (val, val); 2471 return dest_lat->meet_with (&tmpvr); 2472 } 2473 } 2474 2475 value_range vr; 2476 if (jfunc->m_vr 2477 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR, 2478 param_type, 2479 jfunc->m_vr->type ())) 2480 return dest_lat->meet_with (&vr); 2481 else 2482 return dest_lat->set_to_bottom (); 2483 } 2484 2485 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches 2486 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all 2487 other cases, return false). If there are no aggregate items, set 2488 aggs_by_ref to NEW_AGGS_BY_REF. */ 2489 2490 static bool 2491 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats, 2492 bool new_aggs_by_ref) 2493 { 2494 if (dest_plats->aggs) 2495 { 2496 if (dest_plats->aggs_by_ref != new_aggs_by_ref) 2497 { 2498 set_agg_lats_to_bottom (dest_plats); 2499 return true; 2500 } 2501 } 2502 else 2503 dest_plats->aggs_by_ref = new_aggs_by_ref; 2504 return false; 2505 } 2506 2507 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an 2508 already existing lattice for the given OFFSET and SIZE, marking all skipped 2509 lattices as containing variable and checking for overlaps. If there is no 2510 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize 2511 it with offset, size and contains_variable to PRE_EXISTING, and return true, 2512 unless there are too many already. If there are two many, return false. If 2513 there are overlaps turn whole DEST_PLATS to bottom and return false. If any 2514 skipped lattices were newly marked as containing variable, set *CHANGE to 2515 true. MAX_AGG_ITEMS is the maximum number of lattices. */ 2516 2517 static bool 2518 merge_agg_lats_step (class ipcp_param_lattices *dest_plats, 2519 HOST_WIDE_INT offset, HOST_WIDE_INT val_size, 2520 struct ipcp_agg_lattice ***aglat, 2521 bool pre_existing, bool *change, int max_agg_items) 2522 { 2523 gcc_checking_assert (offset >= 0); 2524 2525 while (**aglat && (**aglat)->offset < offset) 2526 { 2527 if ((**aglat)->offset + (**aglat)->size > offset) 2528 { 2529 set_agg_lats_to_bottom (dest_plats); 2530 return false; 2531 } 2532 *change |= (**aglat)->set_contains_variable (); 2533 *aglat = &(**aglat)->next; 2534 } 2535 2536 if (**aglat && (**aglat)->offset == offset) 2537 { 2538 if ((**aglat)->size != val_size) 2539 { 2540 set_agg_lats_to_bottom (dest_plats); 2541 return false; 2542 } 2543 gcc_assert (!(**aglat)->next 2544 || (**aglat)->next->offset >= offset + val_size); 2545 return true; 2546 } 2547 else 2548 { 2549 struct ipcp_agg_lattice *new_al; 2550 2551 if (**aglat && (**aglat)->offset < offset + val_size) 2552 { 2553 set_agg_lats_to_bottom (dest_plats); 2554 return false; 2555 } 2556 if (dest_plats->aggs_count == max_agg_items) 2557 return false; 2558 dest_plats->aggs_count++; 2559 new_al = ipcp_agg_lattice_pool.allocate (); 2560 memset (new_al, 0, sizeof (*new_al)); 2561 2562 new_al->offset = offset; 2563 new_al->size = val_size; 2564 new_al->contains_variable = pre_existing; 2565 2566 new_al->next = **aglat; 2567 **aglat = new_al; 2568 return true; 2569 } 2570 } 2571 2572 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as 2573 containing an unknown value. */ 2574 2575 static bool 2576 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat) 2577 { 2578 bool ret = false; 2579 while (aglat) 2580 { 2581 ret |= aglat->set_contains_variable (); 2582 aglat = aglat->next; 2583 } 2584 return ret; 2585 } 2586 2587 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting 2588 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source 2589 parameter used for lattice value sources. Return true if DEST_PLATS changed 2590 in any way. */ 2591 2592 static bool 2593 merge_aggregate_lattices (struct cgraph_edge *cs, 2594 class ipcp_param_lattices *dest_plats, 2595 class ipcp_param_lattices *src_plats, 2596 int src_idx, HOST_WIDE_INT offset_delta) 2597 { 2598 bool pre_existing = dest_plats->aggs != NULL; 2599 struct ipcp_agg_lattice **dst_aglat; 2600 bool ret = false; 2601 2602 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref)) 2603 return true; 2604 if (src_plats->aggs_bottom) 2605 return set_agg_lats_contain_variable (dest_plats); 2606 if (src_plats->aggs_contain_variable) 2607 ret |= set_agg_lats_contain_variable (dest_plats); 2608 dst_aglat = &dest_plats->aggs; 2609 2610 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl, 2611 param_ipa_max_agg_items); 2612 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs; 2613 src_aglat; 2614 src_aglat = src_aglat->next) 2615 { 2616 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta; 2617 2618 if (new_offset < 0) 2619 continue; 2620 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size, 2621 &dst_aglat, pre_existing, &ret, max_agg_items)) 2622 { 2623 struct ipcp_agg_lattice *new_al = *dst_aglat; 2624 2625 dst_aglat = &(*dst_aglat)->next; 2626 if (src_aglat->bottom) 2627 { 2628 ret |= new_al->set_contains_variable (); 2629 continue; 2630 } 2631 if (src_aglat->contains_variable) 2632 ret |= new_al->set_contains_variable (); 2633 for (ipcp_value<tree> *val = src_aglat->values; 2634 val; 2635 val = val->next) 2636 ret |= new_al->add_value (val->value, cs, val, src_idx, 2637 src_aglat->offset); 2638 } 2639 else if (dest_plats->aggs_bottom) 2640 return true; 2641 } 2642 ret |= set_chain_of_aglats_contains_variable (*dst_aglat); 2643 return ret; 2644 } 2645 2646 /* Determine whether there is anything to propagate FROM SRC_PLATS through a 2647 pass-through JFUNC and if so, whether it has conform and conforms to the 2648 rules about propagating values passed by reference. */ 2649 2650 static bool 2651 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats, 2652 struct ipa_jump_func *jfunc) 2653 { 2654 return src_plats->aggs 2655 && (!src_plats->aggs_by_ref 2656 || ipa_get_jf_pass_through_agg_preserved (jfunc)); 2657 } 2658 2659 /* Propagate values through ITEM, jump function for a part of an aggregate, 2660 into corresponding aggregate lattice AGLAT. CS is the call graph edge 2661 associated with the jump function. Return true if AGLAT changed in any 2662 way. */ 2663 2664 static bool 2665 propagate_aggregate_lattice (struct cgraph_edge *cs, 2666 struct ipa_agg_jf_item *item, 2667 struct ipcp_agg_lattice *aglat) 2668 { 2669 class ipa_node_params *caller_info; 2670 class ipcp_param_lattices *src_plats; 2671 struct ipcp_lattice<tree> *src_lat; 2672 HOST_WIDE_INT src_offset; 2673 int src_idx; 2674 tree load_type; 2675 bool ret; 2676 2677 if (item->jftype == IPA_JF_CONST) 2678 { 2679 tree value = item->value.constant; 2680 2681 gcc_checking_assert (is_gimple_ip_invariant (value)); 2682 return aglat->add_value (value, cs, NULL, 0); 2683 } 2684 2685 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH 2686 || item->jftype == IPA_JF_LOAD_AGG); 2687 2688 caller_info = IPA_NODE_REF (cs->caller); 2689 src_idx = item->value.pass_through.formal_id; 2690 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 2691 2692 if (item->jftype == IPA_JF_PASS_THROUGH) 2693 { 2694 load_type = NULL_TREE; 2695 src_lat = &src_plats->itself; 2696 src_offset = -1; 2697 } 2698 else 2699 { 2700 HOST_WIDE_INT load_offset = item->value.load_agg.offset; 2701 struct ipcp_agg_lattice *src_aglat; 2702 2703 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next) 2704 if (src_aglat->offset >= load_offset) 2705 break; 2706 2707 load_type = item->value.load_agg.type; 2708 if (!src_aglat 2709 || src_aglat->offset > load_offset 2710 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type)) 2711 || src_plats->aggs_by_ref != item->value.load_agg.by_ref) 2712 return aglat->set_contains_variable (); 2713 2714 src_lat = src_aglat; 2715 src_offset = load_offset; 2716 } 2717 2718 if (src_lat->bottom 2719 || (!ipcp_versionable_function_p (cs->caller) 2720 && !src_lat->is_single_const ())) 2721 return aglat->set_contains_variable (); 2722 2723 ret = propagate_vals_across_arith_jfunc (cs, 2724 item->value.pass_through.operation, 2725 load_type, 2726 item->value.pass_through.operand, 2727 src_lat, aglat, 2728 src_offset, 2729 src_idx, 2730 item->type); 2731 2732 if (src_lat->contains_variable) 2733 ret |= aglat->set_contains_variable (); 2734 2735 return ret; 2736 } 2737 2738 /* Propagate scalar values across jump function JFUNC that is associated with 2739 edge CS and put the values into DEST_LAT. */ 2740 2741 static bool 2742 propagate_aggs_across_jump_function (struct cgraph_edge *cs, 2743 struct ipa_jump_func *jfunc, 2744 class ipcp_param_lattices *dest_plats) 2745 { 2746 bool ret = false; 2747 2748 if (dest_plats->aggs_bottom) 2749 return false; 2750 2751 if (jfunc->type == IPA_JF_PASS_THROUGH 2752 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 2753 { 2754 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2755 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 2756 class ipcp_param_lattices *src_plats; 2757 2758 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 2759 if (agg_pass_through_permissible_p (src_plats, jfunc)) 2760 { 2761 /* Currently we do not produce clobber aggregate jump 2762 functions, replace with merging when we do. */ 2763 gcc_assert (!jfunc->agg.items); 2764 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, 2765 src_idx, 0); 2766 } 2767 else 2768 ret |= set_agg_lats_contain_variable (dest_plats); 2769 } 2770 else if (jfunc->type == IPA_JF_ANCESTOR 2771 && ipa_get_jf_ancestor_agg_preserved (jfunc)) 2772 { 2773 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 2774 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 2775 class ipcp_param_lattices *src_plats; 2776 2777 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 2778 if (src_plats->aggs && src_plats->aggs_by_ref) 2779 { 2780 /* Currently we do not produce clobber aggregate jump 2781 functions, replace with merging when we do. */ 2782 gcc_assert (!jfunc->agg.items); 2783 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx, 2784 ipa_get_jf_ancestor_offset (jfunc)); 2785 } 2786 else if (!src_plats->aggs_by_ref) 2787 ret |= set_agg_lats_to_bottom (dest_plats); 2788 else 2789 ret |= set_agg_lats_contain_variable (dest_plats); 2790 } 2791 else if (jfunc->agg.items) 2792 { 2793 bool pre_existing = dest_plats->aggs != NULL; 2794 struct ipcp_agg_lattice **aglat = &dest_plats->aggs; 2795 struct ipa_agg_jf_item *item; 2796 int i; 2797 2798 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref)) 2799 return true; 2800 2801 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl, 2802 param_ipa_max_agg_items); 2803 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item) 2804 { 2805 HOST_WIDE_INT val_size; 2806 2807 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) 2808 continue; 2809 val_size = tree_to_shwi (TYPE_SIZE (item->type)); 2810 2811 if (merge_agg_lats_step (dest_plats, item->offset, val_size, 2812 &aglat, pre_existing, &ret, max_agg_items)) 2813 { 2814 ret |= propagate_aggregate_lattice (cs, item, *aglat); 2815 aglat = &(*aglat)->next; 2816 } 2817 else if (dest_plats->aggs_bottom) 2818 return true; 2819 } 2820 2821 ret |= set_chain_of_aglats_contains_variable (*aglat); 2822 } 2823 else 2824 ret |= set_agg_lats_contain_variable (dest_plats); 2825 2826 return ret; 2827 } 2828 2829 /* Return true if on the way cfrom CS->caller to the final (non-alias and 2830 non-thunk) destination, the call passes through a thunk. */ 2831 2832 static bool 2833 call_passes_through_thunk_p (cgraph_edge *cs) 2834 { 2835 cgraph_node *alias_or_thunk = cs->callee; 2836 while (alias_or_thunk->alias) 2837 alias_or_thunk = alias_or_thunk->get_alias_target (); 2838 return alias_or_thunk->thunk.thunk_p; 2839 } 2840 2841 /* Propagate constants from the caller to the callee of CS. INFO describes the 2842 caller. */ 2843 2844 static bool 2845 propagate_constants_across_call (struct cgraph_edge *cs) 2846 { 2847 class ipa_node_params *callee_info; 2848 enum availability availability; 2849 cgraph_node *callee; 2850 class ipa_edge_args *args; 2851 bool ret = false; 2852 int i, args_count, parms_count; 2853 2854 callee = cs->callee->function_symbol (&availability); 2855 if (!callee->definition) 2856 return false; 2857 gcc_checking_assert (callee->has_gimple_body_p ()); 2858 callee_info = IPA_NODE_REF (callee); 2859 if (!callee_info) 2860 return false; 2861 2862 args = IPA_EDGE_REF (cs); 2863 parms_count = ipa_get_param_count (callee_info); 2864 if (parms_count == 0) 2865 return false; 2866 if (!args 2867 || !opt_for_fn (cs->caller->decl, flag_ipa_cp) 2868 || !opt_for_fn (cs->caller->decl, optimize)) 2869 { 2870 for (i = 0; i < parms_count; i++) 2871 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, 2872 i)); 2873 return ret; 2874 } 2875 args_count = ipa_get_cs_argument_count (args); 2876 2877 /* If this call goes through a thunk we must not propagate to the first (0th) 2878 parameter. However, we might need to uncover a thunk from below a series 2879 of aliases first. */ 2880 if (call_passes_through_thunk_p (cs)) 2881 { 2882 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, 2883 0)); 2884 i = 1; 2885 } 2886 else 2887 i = 0; 2888 2889 for (; (i < args_count) && (i < parms_count); i++) 2890 { 2891 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); 2892 class ipcp_param_lattices *dest_plats; 2893 tree param_type = ipa_get_type (callee_info, i); 2894 2895 dest_plats = ipa_get_parm_lattices (callee_info, i); 2896 if (availability == AVAIL_INTERPOSABLE) 2897 ret |= set_all_contains_variable (dest_plats); 2898 else 2899 { 2900 ret |= propagate_scalar_across_jump_function (cs, jump_func, 2901 &dest_plats->itself, 2902 param_type); 2903 ret |= propagate_context_across_jump_function (cs, jump_func, i, 2904 &dest_plats->ctxlat); 2905 ret 2906 |= propagate_bits_across_jump_function (cs, i, jump_func, 2907 &dest_plats->bits_lattice); 2908 ret |= propagate_aggs_across_jump_function (cs, jump_func, 2909 dest_plats); 2910 if (opt_for_fn (callee->decl, flag_ipa_vrp)) 2911 ret |= propagate_vr_across_jump_function (cs, jump_func, 2912 dest_plats, param_type); 2913 else 2914 ret |= dest_plats->m_value_range.set_to_bottom (); 2915 } 2916 } 2917 for (; i < parms_count; i++) 2918 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i)); 2919 2920 return ret; 2921 } 2922 2923 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS 2924 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter 2925 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */ 2926 2927 static tree 2928 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, 2929 vec<tree> known_csts, 2930 vec<ipa_polymorphic_call_context> known_contexts, 2931 vec<ipa_agg_value_set> known_aggs, 2932 struct ipa_agg_replacement_value *agg_reps, 2933 bool *speculative) 2934 { 2935 int param_index = ie->indirect_info->param_index; 2936 HOST_WIDE_INT anc_offset; 2937 tree t = NULL; 2938 tree target = NULL; 2939 2940 *speculative = false; 2941 2942 if (param_index == -1) 2943 return NULL_TREE; 2944 2945 if (!ie->indirect_info->polymorphic) 2946 { 2947 tree t = NULL; 2948 2949 if (ie->indirect_info->agg_contents) 2950 { 2951 t = NULL; 2952 if (agg_reps && ie->indirect_info->guaranteed_unmodified) 2953 { 2954 while (agg_reps) 2955 { 2956 if (agg_reps->index == param_index 2957 && agg_reps->offset == ie->indirect_info->offset 2958 && agg_reps->by_ref == ie->indirect_info->by_ref) 2959 { 2960 t = agg_reps->value; 2961 break; 2962 } 2963 agg_reps = agg_reps->next; 2964 } 2965 } 2966 if (!t) 2967 { 2968 struct ipa_agg_value_set *agg; 2969 if (known_aggs.length () > (unsigned int) param_index) 2970 agg = &known_aggs[param_index]; 2971 else 2972 agg = NULL; 2973 bool from_global_constant; 2974 t = ipa_find_agg_cst_for_param (agg, 2975 (unsigned) param_index 2976 < known_csts.length () 2977 ? known_csts[param_index] 2978 : NULL, 2979 ie->indirect_info->offset, 2980 ie->indirect_info->by_ref, 2981 &from_global_constant); 2982 if (t 2983 && !from_global_constant 2984 && !ie->indirect_info->guaranteed_unmodified) 2985 t = NULL_TREE; 2986 } 2987 } 2988 else if ((unsigned) param_index < known_csts.length ()) 2989 t = known_csts[param_index]; 2990 2991 if (t 2992 && TREE_CODE (t) == ADDR_EXPR 2993 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL) 2994 return TREE_OPERAND (t, 0); 2995 else 2996 return NULL_TREE; 2997 } 2998 2999 if (!opt_for_fn (ie->caller->decl, flag_devirtualize)) 3000 return NULL_TREE; 3001 3002 gcc_assert (!ie->indirect_info->agg_contents); 3003 anc_offset = ie->indirect_info->offset; 3004 3005 t = NULL; 3006 3007 /* Try to work out value of virtual table pointer value in replacements. */ 3008 if (!t && agg_reps && !ie->indirect_info->by_ref) 3009 { 3010 while (agg_reps) 3011 { 3012 if (agg_reps->index == param_index 3013 && agg_reps->offset == ie->indirect_info->offset 3014 && agg_reps->by_ref) 3015 { 3016 t = agg_reps->value; 3017 break; 3018 } 3019 agg_reps = agg_reps->next; 3020 } 3021 } 3022 3023 /* Try to work out value of virtual table pointer value in known 3024 aggregate values. */ 3025 if (!t && known_aggs.length () > (unsigned int) param_index 3026 && !ie->indirect_info->by_ref) 3027 { 3028 struct ipa_agg_value_set *agg = &known_aggs[param_index]; 3029 t = ipa_find_agg_cst_for_param (agg, 3030 (unsigned) param_index 3031 < known_csts.length () 3032 ? known_csts[param_index] : NULL, 3033 ie->indirect_info->offset, true); 3034 } 3035 3036 /* If we found the virtual table pointer, lookup the target. */ 3037 if (t) 3038 { 3039 tree vtable; 3040 unsigned HOST_WIDE_INT offset; 3041 if (vtable_pointer_value_to_vtable (t, &vtable, &offset)) 3042 { 3043 bool can_refer; 3044 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token, 3045 vtable, offset, &can_refer); 3046 if (can_refer) 3047 { 3048 if (!target 3049 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE) 3050 || !possible_polymorphic_call_target_p 3051 (ie, cgraph_node::get (target))) 3052 { 3053 /* Do not speculate builtin_unreachable, it is stupid! */ 3054 if (ie->indirect_info->vptr_changed) 3055 return NULL; 3056 target = ipa_impossible_devirt_target (ie, target); 3057 } 3058 *speculative = ie->indirect_info->vptr_changed; 3059 if (!*speculative) 3060 return target; 3061 } 3062 } 3063 } 3064 3065 /* Do we know the constant value of pointer? */ 3066 if (!t && (unsigned) param_index < known_csts.length ()) 3067 t = known_csts[param_index]; 3068 3069 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO); 3070 3071 ipa_polymorphic_call_context context; 3072 if (known_contexts.length () > (unsigned int) param_index) 3073 { 3074 context = known_contexts[param_index]; 3075 context.offset_by (anc_offset); 3076 if (ie->indirect_info->vptr_changed) 3077 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, 3078 ie->indirect_info->otr_type); 3079 if (t) 3080 { 3081 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context 3082 (t, ie->indirect_info->otr_type, anc_offset); 3083 if (!ctx2.useless_p ()) 3084 context.combine_with (ctx2, ie->indirect_info->otr_type); 3085 } 3086 } 3087 else if (t) 3088 { 3089 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type, 3090 anc_offset); 3091 if (ie->indirect_info->vptr_changed) 3092 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor, 3093 ie->indirect_info->otr_type); 3094 } 3095 else 3096 return NULL_TREE; 3097 3098 vec <cgraph_node *>targets; 3099 bool final; 3100 3101 targets = possible_polymorphic_call_targets 3102 (ie->indirect_info->otr_type, 3103 ie->indirect_info->otr_token, 3104 context, &final); 3105 if (!final || targets.length () > 1) 3106 { 3107 struct cgraph_node *node; 3108 if (*speculative) 3109 return target; 3110 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively) 3111 || ie->speculative || !ie->maybe_hot_p ()) 3112 return NULL; 3113 node = try_speculative_devirtualization (ie->indirect_info->otr_type, 3114 ie->indirect_info->otr_token, 3115 context); 3116 if (node) 3117 { 3118 *speculative = true; 3119 target = node->decl; 3120 } 3121 else 3122 return NULL; 3123 } 3124 else 3125 { 3126 *speculative = false; 3127 if (targets.length () == 1) 3128 target = targets[0]->decl; 3129 else 3130 target = ipa_impossible_devirt_target (ie, NULL_TREE); 3131 } 3132 3133 if (target && !possible_polymorphic_call_target_p (ie, 3134 cgraph_node::get (target))) 3135 { 3136 if (*speculative) 3137 return NULL; 3138 target = ipa_impossible_devirt_target (ie, target); 3139 } 3140 3141 return target; 3142 } 3143 3144 3145 /* If an indirect edge IE can be turned into a direct one based on KNOWN_CSTS, 3146 KNOWN_CONTEXTS (which can be vNULL) or KNOWN_AGGS (which also can be vNULL) 3147 return the destination. */ 3148 3149 tree 3150 ipa_get_indirect_edge_target (struct cgraph_edge *ie, 3151 vec<tree> known_csts, 3152 vec<ipa_polymorphic_call_context> known_contexts, 3153 vec<ipa_agg_value_set> known_aggs, 3154 bool *speculative) 3155 { 3156 return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, 3157 known_aggs, NULL, speculative); 3158 } 3159 3160 /* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS 3161 and KNOWN_CONTEXTS. */ 3162 3163 static int 3164 devirtualization_time_bonus (struct cgraph_node *node, 3165 vec<tree> known_csts, 3166 vec<ipa_polymorphic_call_context> known_contexts, 3167 vec<ipa_agg_value_set> known_aggs) 3168 { 3169 struct cgraph_edge *ie; 3170 int res = 0; 3171 3172 for (ie = node->indirect_calls; ie; ie = ie->next_callee) 3173 { 3174 struct cgraph_node *callee; 3175 class ipa_fn_summary *isummary; 3176 enum availability avail; 3177 tree target; 3178 bool speculative; 3179 3180 target = ipa_get_indirect_edge_target (ie, known_csts, known_contexts, 3181 known_aggs, &speculative); 3182 if (!target) 3183 continue; 3184 3185 /* Only bare minimum benefit for clearly un-inlineable targets. */ 3186 res += 1; 3187 callee = cgraph_node::get (target); 3188 if (!callee || !callee->definition) 3189 continue; 3190 callee = callee->function_symbol (&avail); 3191 if (avail < AVAIL_AVAILABLE) 3192 continue; 3193 isummary = ipa_fn_summaries->get (callee); 3194 if (!isummary || !isummary->inlinable) 3195 continue; 3196 3197 int size = ipa_size_summaries->get (callee)->size; 3198 /* FIXME: The values below need re-considering and perhaps also 3199 integrating into the cost metrics, at lest in some very basic way. */ 3200 int max_inline_insns_auto 3201 = opt_for_fn (callee->decl, param_max_inline_insns_auto); 3202 if (size <= max_inline_insns_auto / 4) 3203 res += 31 / ((int)speculative + 1); 3204 else if (size <= max_inline_insns_auto / 2) 3205 res += 15 / ((int)speculative + 1); 3206 else if (size <= max_inline_insns_auto 3207 || DECL_DECLARED_INLINE_P (callee->decl)) 3208 res += 7 / ((int)speculative + 1); 3209 } 3210 3211 return res; 3212 } 3213 3214 /* Return time bonus incurred because of HINTS. */ 3215 3216 static int 3217 hint_time_bonus (cgraph_node *node, ipa_hints hints) 3218 { 3219 int result = 0; 3220 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) 3221 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); 3222 return result; 3223 } 3224 3225 /* If there is a reason to penalize the function described by INFO in the 3226 cloning goodness evaluation, do so. */ 3227 3228 static inline int64_t 3229 incorporate_penalties (cgraph_node *node, ipa_node_params *info, 3230 int64_t evaluation) 3231 { 3232 if (info->node_within_scc && !info->node_is_self_scc) 3233 evaluation = (evaluation 3234 * (100 - opt_for_fn (node->decl, 3235 param_ipa_cp_recursion_penalty))) / 100; 3236 3237 if (info->node_calling_single_call) 3238 evaluation = (evaluation 3239 * (100 - opt_for_fn (node->decl, 3240 param_ipa_cp_single_call_penalty))) 3241 / 100; 3242 3243 return evaluation; 3244 } 3245 3246 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT 3247 and SIZE_COST and with the sum of frequencies of incoming edges to the 3248 potential new clone in FREQUENCIES. */ 3249 3250 static bool 3251 good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit, 3252 int freq_sum, profile_count count_sum, int size_cost) 3253 { 3254 if (time_benefit == 0 3255 || !opt_for_fn (node->decl, flag_ipa_cp_clone) 3256 || node->optimize_for_size_p ()) 3257 return false; 3258 3259 gcc_assert (size_cost > 0); 3260 3261 class ipa_node_params *info = IPA_NODE_REF (node); 3262 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold); 3263 if (max_count > profile_count::zero ()) 3264 { 3265 int factor = RDIV (count_sum.probability_in 3266 (max_count).to_reg_br_prob_base () 3267 * 1000, REG_BR_PROB_BASE); 3268 int64_t evaluation = (((int64_t) time_benefit * factor) 3269 / size_cost); 3270 evaluation = incorporate_penalties (node, info, evaluation); 3271 3272 if (dump_file && (dump_flags & TDF_DETAILS)) 3273 { 3274 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " 3275 "size: %i, count_sum: ", time_benefit, size_cost); 3276 count_sum.dump (dump_file); 3277 fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64 3278 ", threshold: %i\n", 3279 info->node_within_scc 3280 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", 3281 info->node_calling_single_call ? ", single_call" : "", 3282 evaluation, eval_threshold); 3283 } 3284 3285 return evaluation >= eval_threshold; 3286 } 3287 else 3288 { 3289 int64_t evaluation = (((int64_t) time_benefit * freq_sum) 3290 / size_cost); 3291 evaluation = incorporate_penalties (node, info, evaluation); 3292 3293 if (dump_file && (dump_flags & TDF_DETAILS)) 3294 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " 3295 "size: %i, freq_sum: %i%s%s) -> evaluation: " 3296 "%" PRId64 ", threshold: %i\n", 3297 time_benefit, size_cost, freq_sum, 3298 info->node_within_scc 3299 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", 3300 info->node_calling_single_call ? ", single_call" : "", 3301 evaluation, eval_threshold); 3302 3303 return evaluation >= eval_threshold; 3304 } 3305 } 3306 3307 /* Return all context independent values from aggregate lattices in PLATS in a 3308 vector. Return NULL if there are none. */ 3309 3310 static vec<ipa_agg_value> 3311 context_independent_aggregate_values (class ipcp_param_lattices *plats) 3312 { 3313 vec<ipa_agg_value> res = vNULL; 3314 3315 if (plats->aggs_bottom 3316 || plats->aggs_contain_variable 3317 || plats->aggs_count == 0) 3318 return vNULL; 3319 3320 for (struct ipcp_agg_lattice *aglat = plats->aggs; 3321 aglat; 3322 aglat = aglat->next) 3323 if (aglat->is_single_const ()) 3324 { 3325 struct ipa_agg_value item; 3326 item.offset = aglat->offset; 3327 item.value = aglat->values->value; 3328 res.safe_push (item); 3329 } 3330 return res; 3331 } 3332 3333 /* Allocate KNOWN_CSTS, KNOWN_CONTEXTS and, if non-NULL, KNOWN_AGGS and 3334 populate them with values of parameters that are known independent of the 3335 context. INFO describes the function. If REMOVABLE_PARAMS_COST is 3336 non-NULL, the movement cost of all removable parameters will be stored in 3337 it. */ 3338 3339 static bool 3340 gather_context_independent_values (class ipa_node_params *info, 3341 vec<tree> *known_csts, 3342 vec<ipa_polymorphic_call_context> 3343 *known_contexts, 3344 vec<ipa_agg_value_set> *known_aggs, 3345 int *removable_params_cost) 3346 { 3347 int i, count = ipa_get_param_count (info); 3348 bool ret = false; 3349 3350 known_csts->create (0); 3351 known_contexts->create (0); 3352 known_csts->safe_grow_cleared (count); 3353 known_contexts->safe_grow_cleared (count); 3354 if (known_aggs) 3355 { 3356 known_aggs->create (0); 3357 known_aggs->safe_grow_cleared (count); 3358 } 3359 3360 if (removable_params_cost) 3361 *removable_params_cost = 0; 3362 3363 for (i = 0; i < count; i++) 3364 { 3365 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3366 ipcp_lattice<tree> *lat = &plats->itself; 3367 3368 if (lat->is_single_const ()) 3369 { 3370 ipcp_value<tree> *val = lat->values; 3371 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); 3372 (*known_csts)[i] = val->value; 3373 if (removable_params_cost) 3374 *removable_params_cost 3375 += estimate_move_cost (TREE_TYPE (val->value), false); 3376 ret = true; 3377 } 3378 else if (removable_params_cost 3379 && !ipa_is_param_used (info, i)) 3380 *removable_params_cost 3381 += ipa_get_param_move_cost (info, i); 3382 3383 if (!ipa_is_param_used (info, i)) 3384 continue; 3385 3386 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 3387 /* Do not account known context as reason for cloning. We can see 3388 if it permits devirtualization. */ 3389 if (ctxlat->is_single_const ()) 3390 (*known_contexts)[i] = ctxlat->values->value; 3391 3392 if (known_aggs) 3393 { 3394 vec<ipa_agg_value> agg_items; 3395 struct ipa_agg_value_set *agg; 3396 3397 agg_items = context_independent_aggregate_values (plats); 3398 agg = &(*known_aggs)[i]; 3399 agg->items = agg_items; 3400 agg->by_ref = plats->aggs_by_ref; 3401 ret |= !agg_items.is_empty (); 3402 } 3403 } 3404 3405 return ret; 3406 } 3407 3408 /* Perform time and size measurement of NODE with the context given in 3409 KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost 3410 given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of 3411 all context-independent removable parameters and EST_MOVE_COST of estimated 3412 movement of the considered parameter and store it into VAL. */ 3413 3414 static void 3415 perform_estimation_of_a_value (cgraph_node *node, vec<tree> known_csts, 3416 vec<ipa_polymorphic_call_context> known_contexts, 3417 vec<ipa_agg_value_set> known_aggs, 3418 int removable_params_cost, 3419 int est_move_cost, ipcp_value_base *val) 3420 { 3421 int size, time_benefit; 3422 sreal time, base_time; 3423 ipa_hints hints; 3424 3425 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, 3426 known_aggs, &size, &time, 3427 &base_time, &hints); 3428 base_time -= time; 3429 if (base_time > 65535) 3430 base_time = 65535; 3431 3432 /* Extern inline functions have no cloning local time benefits because they 3433 will be inlined anyway. The only reason to clone them is if it enables 3434 optimization in any of the functions they call. */ 3435 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) 3436 time_benefit = 0; 3437 else 3438 time_benefit = base_time.to_int () 3439 + devirtualization_time_bonus (node, known_csts, known_contexts, 3440 known_aggs) 3441 + hint_time_bonus (node, hints) 3442 + removable_params_cost + est_move_cost; 3443 3444 gcc_checking_assert (size >=0); 3445 /* The inliner-heuristics based estimates may think that in certain 3446 contexts some functions do not have any size at all but we want 3447 all specializations to have at least a tiny cost, not least not to 3448 divide by zero. */ 3449 if (size == 0) 3450 size = 1; 3451 3452 val->local_time_benefit = time_benefit; 3453 val->local_size_cost = size; 3454 } 3455 3456 /* Get the overall limit oof growth based on parameters extracted from growth. 3457 it does not really make sense to mix functions with different overall growth 3458 limits but it is possible and if it happens, we do not want to select one 3459 limit at random. */ 3460 3461 static long 3462 get_max_overall_size (cgraph_node *node) 3463 { 3464 long max_new_size = orig_overall_size; 3465 long large_unit = opt_for_fn (node->decl, param_large_unit_insns); 3466 if (max_new_size < large_unit) 3467 max_new_size = large_unit; 3468 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth); 3469 max_new_size += max_new_size * unit_growth / 100 + 1; 3470 return max_new_size; 3471 } 3472 3473 /* Iterate over known values of parameters of NODE and estimate the local 3474 effects in terms of time and size they have. */ 3475 3476 static void 3477 estimate_local_effects (struct cgraph_node *node) 3478 { 3479 class ipa_node_params *info = IPA_NODE_REF (node); 3480 int i, count = ipa_get_param_count (info); 3481 vec<tree> known_csts; 3482 vec<ipa_polymorphic_call_context> known_contexts; 3483 vec<ipa_agg_value_set> known_aggs; 3484 bool always_const; 3485 int removable_params_cost; 3486 3487 if (!count || !ipcp_versionable_function_p (node)) 3488 return; 3489 3490 if (dump_file && (dump_flags & TDF_DETAILS)) 3491 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ()); 3492 3493 always_const = gather_context_independent_values (info, &known_csts, 3494 &known_contexts, &known_aggs, 3495 &removable_params_cost); 3496 int devirt_bonus = devirtualization_time_bonus (node, known_csts, 3497 known_contexts, known_aggs); 3498 if (always_const || devirt_bonus 3499 || (removable_params_cost && node->can_change_signature)) 3500 { 3501 struct caller_statistics stats; 3502 ipa_hints hints; 3503 sreal time, base_time; 3504 int size; 3505 3506 init_caller_stats (&stats); 3507 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 3508 false); 3509 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, 3510 known_aggs, &size, &time, 3511 &base_time, &hints); 3512 time -= devirt_bonus; 3513 time -= hint_time_bonus (node, hints); 3514 time -= removable_params_cost; 3515 size -= stats.n_calls * removable_params_cost; 3516 3517 if (dump_file) 3518 fprintf (dump_file, " - context independent values, size: %i, " 3519 "time_benefit: %f\n", size, (base_time - time).to_double ()); 3520 3521 if (size <= 0 || node->local) 3522 { 3523 info->do_clone_for_all_contexts = true; 3524 3525 if (dump_file) 3526 fprintf (dump_file, " Decided to specialize for all " 3527 "known contexts, code not going to grow.\n"); 3528 } 3529 else if (good_cloning_opportunity_p (node, 3530 MIN ((base_time - time).to_int (), 3531 65536), 3532 stats.freq_sum, stats.count_sum, 3533 size)) 3534 { 3535 if (size + overall_size <= get_max_overall_size (node)) 3536 { 3537 info->do_clone_for_all_contexts = true; 3538 overall_size += size; 3539 3540 if (dump_file) 3541 fprintf (dump_file, " Decided to specialize for all " 3542 "known contexts, growth deemed beneficial.\n"); 3543 } 3544 else if (dump_file && (dump_flags & TDF_DETAILS)) 3545 fprintf (dump_file, " Not cloning for all contexts because " 3546 "maximum unit size would be reached with %li.\n", 3547 size + overall_size); 3548 } 3549 else if (dump_file && (dump_flags & TDF_DETAILS)) 3550 fprintf (dump_file, " Not cloning for all contexts because " 3551 "!good_cloning_opportunity_p.\n"); 3552 3553 } 3554 3555 for (i = 0; i < count; i++) 3556 { 3557 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3558 ipcp_lattice<tree> *lat = &plats->itself; 3559 ipcp_value<tree> *val; 3560 3561 if (lat->bottom 3562 || !lat->values 3563 || known_csts[i]) 3564 continue; 3565 3566 for (val = lat->values; val; val = val->next) 3567 { 3568 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); 3569 known_csts[i] = val->value; 3570 3571 int emc = estimate_move_cost (TREE_TYPE (val->value), true); 3572 perform_estimation_of_a_value (node, known_csts, known_contexts, 3573 known_aggs, 3574 removable_params_cost, emc, val); 3575 3576 if (dump_file && (dump_flags & TDF_DETAILS)) 3577 { 3578 fprintf (dump_file, " - estimates for value "); 3579 print_ipcp_constant_value (dump_file, val->value); 3580 fprintf (dump_file, " for "); 3581 ipa_dump_param (dump_file, info, i); 3582 fprintf (dump_file, ": time_benefit: %i, size: %i\n", 3583 val->local_time_benefit, val->local_size_cost); 3584 } 3585 } 3586 known_csts[i] = NULL_TREE; 3587 } 3588 3589 for (i = 0; i < count; i++) 3590 { 3591 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3592 3593 if (!plats->virt_call) 3594 continue; 3595 3596 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 3597 ipcp_value<ipa_polymorphic_call_context> *val; 3598 3599 if (ctxlat->bottom 3600 || !ctxlat->values 3601 || !known_contexts[i].useless_p ()) 3602 continue; 3603 3604 for (val = ctxlat->values; val; val = val->next) 3605 { 3606 known_contexts[i] = val->value; 3607 perform_estimation_of_a_value (node, known_csts, known_contexts, 3608 known_aggs, 3609 removable_params_cost, 0, val); 3610 3611 if (dump_file && (dump_flags & TDF_DETAILS)) 3612 { 3613 fprintf (dump_file, " - estimates for polymorphic context "); 3614 print_ipcp_constant_value (dump_file, val->value); 3615 fprintf (dump_file, " for "); 3616 ipa_dump_param (dump_file, info, i); 3617 fprintf (dump_file, ": time_benefit: %i, size: %i\n", 3618 val->local_time_benefit, val->local_size_cost); 3619 } 3620 } 3621 known_contexts[i] = ipa_polymorphic_call_context (); 3622 } 3623 3624 for (i = 0; i < count; i++) 3625 { 3626 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3627 struct ipa_agg_value_set *agg; 3628 struct ipcp_agg_lattice *aglat; 3629 3630 if (plats->aggs_bottom || !plats->aggs) 3631 continue; 3632 3633 agg = &known_aggs[i]; 3634 for (aglat = plats->aggs; aglat; aglat = aglat->next) 3635 { 3636 ipcp_value<tree> *val; 3637 if (aglat->bottom || !aglat->values 3638 /* If the following is true, the one value is in known_aggs. */ 3639 || (!plats->aggs_contain_variable 3640 && aglat->is_single_const ())) 3641 continue; 3642 3643 for (val = aglat->values; val; val = val->next) 3644 { 3645 struct ipa_agg_value item; 3646 3647 item.offset = aglat->offset; 3648 item.value = val->value; 3649 agg->items.safe_push (item); 3650 3651 perform_estimation_of_a_value (node, known_csts, known_contexts, 3652 known_aggs, 3653 removable_params_cost, 0, val); 3654 3655 if (dump_file && (dump_flags & TDF_DETAILS)) 3656 { 3657 fprintf (dump_file, " - estimates for value "); 3658 print_ipcp_constant_value (dump_file, val->value); 3659 fprintf (dump_file, " for "); 3660 ipa_dump_param (dump_file, info, i); 3661 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC 3662 "]: time_benefit: %i, size: %i\n", 3663 plats->aggs_by_ref ? "ref " : "", 3664 aglat->offset, 3665 val->local_time_benefit, val->local_size_cost); 3666 } 3667 3668 agg->items.pop (); 3669 } 3670 } 3671 } 3672 3673 known_csts.release (); 3674 known_contexts.release (); 3675 ipa_release_agg_values (known_aggs); 3676 } 3677 3678 3679 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the 3680 topological sort of values. */ 3681 3682 template <typename valtype> 3683 void 3684 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val) 3685 { 3686 ipcp_value_source<valtype> *src; 3687 3688 if (cur_val->dfs) 3689 return; 3690 3691 dfs_counter++; 3692 cur_val->dfs = dfs_counter; 3693 cur_val->low_link = dfs_counter; 3694 3695 cur_val->topo_next = stack; 3696 stack = cur_val; 3697 cur_val->on_stack = true; 3698 3699 for (src = cur_val->sources; src; src = src->next) 3700 if (src->val) 3701 { 3702 if (src->val->dfs == 0) 3703 { 3704 add_val (src->val); 3705 if (src->val->low_link < cur_val->low_link) 3706 cur_val->low_link = src->val->low_link; 3707 } 3708 else if (src->val->on_stack 3709 && src->val->dfs < cur_val->low_link) 3710 cur_val->low_link = src->val->dfs; 3711 } 3712 3713 if (cur_val->dfs == cur_val->low_link) 3714 { 3715 ipcp_value<valtype> *v, *scc_list = NULL; 3716 3717 do 3718 { 3719 v = stack; 3720 stack = v->topo_next; 3721 v->on_stack = false; 3722 3723 v->scc_next = scc_list; 3724 scc_list = v; 3725 } 3726 while (v != cur_val); 3727 3728 cur_val->topo_next = values_topo; 3729 values_topo = cur_val; 3730 } 3731 } 3732 3733 /* Add all values in lattices associated with NODE to the topological sort if 3734 they are not there yet. */ 3735 3736 static void 3737 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo) 3738 { 3739 class ipa_node_params *info = IPA_NODE_REF (node); 3740 int i, count = ipa_get_param_count (info); 3741 3742 for (i = 0; i < count; i++) 3743 { 3744 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 3745 ipcp_lattice<tree> *lat = &plats->itself; 3746 struct ipcp_agg_lattice *aglat; 3747 3748 if (!lat->bottom) 3749 { 3750 ipcp_value<tree> *val; 3751 for (val = lat->values; val; val = val->next) 3752 topo->constants.add_val (val); 3753 } 3754 3755 if (!plats->aggs_bottom) 3756 for (aglat = plats->aggs; aglat; aglat = aglat->next) 3757 if (!aglat->bottom) 3758 { 3759 ipcp_value<tree> *val; 3760 for (val = aglat->values; val; val = val->next) 3761 topo->constants.add_val (val); 3762 } 3763 3764 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 3765 if (!ctxlat->bottom) 3766 { 3767 ipcp_value<ipa_polymorphic_call_context> *ctxval; 3768 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next) 3769 topo->contexts.add_val (ctxval); 3770 } 3771 } 3772 } 3773 3774 /* One pass of constants propagation along the call graph edges, from callers 3775 to callees (requires topological ordering in TOPO), iterate over strongly 3776 connected components. */ 3777 3778 static void 3779 propagate_constants_topo (class ipa_topo_info *topo) 3780 { 3781 int i; 3782 3783 for (i = topo->nnodes - 1; i >= 0; i--) 3784 { 3785 unsigned j; 3786 struct cgraph_node *v, *node = topo->order[i]; 3787 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node); 3788 3789 /* First, iteratively propagate within the strongly connected component 3790 until all lattices stabilize. */ 3791 FOR_EACH_VEC_ELT (cycle_nodes, j, v) 3792 if (v->has_gimple_body_p ()) 3793 { 3794 if (opt_for_fn (v->decl, flag_ipa_cp) 3795 && opt_for_fn (v->decl, optimize)) 3796 push_node_to_stack (topo, v); 3797 /* When V is not optimized, we can not push it to stack, but 3798 still we need to set all its callees lattices to bottom. */ 3799 else 3800 { 3801 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee) 3802 propagate_constants_across_call (cs); 3803 } 3804 } 3805 3806 v = pop_node_from_stack (topo); 3807 while (v) 3808 { 3809 struct cgraph_edge *cs; 3810 class ipa_node_params *info = NULL; 3811 bool self_scc = true; 3812 3813 for (cs = v->callees; cs; cs = cs->next_callee) 3814 if (ipa_edge_within_scc (cs)) 3815 { 3816 cgraph_node *callee = cs->callee->function_symbol (); 3817 3818 if (v != callee) 3819 self_scc = false; 3820 3821 if (!info) 3822 { 3823 info = IPA_NODE_REF (v); 3824 info->node_within_scc = true; 3825 } 3826 3827 if (propagate_constants_across_call (cs)) 3828 push_node_to_stack (topo, callee); 3829 } 3830 3831 if (info) 3832 info->node_is_self_scc = self_scc; 3833 3834 v = pop_node_from_stack (topo); 3835 } 3836 3837 /* Afterwards, propagate along edges leading out of the SCC, calculates 3838 the local effects of the discovered constants and all valid values to 3839 their topological sort. */ 3840 FOR_EACH_VEC_ELT (cycle_nodes, j, v) 3841 if (v->has_gimple_body_p () 3842 && opt_for_fn (v->decl, flag_ipa_cp) 3843 && opt_for_fn (v->decl, optimize)) 3844 { 3845 struct cgraph_edge *cs; 3846 3847 estimate_local_effects (v); 3848 add_all_node_vals_to_toposort (v, topo); 3849 for (cs = v->callees; cs; cs = cs->next_callee) 3850 if (!ipa_edge_within_scc (cs)) 3851 propagate_constants_across_call (cs); 3852 } 3853 cycle_nodes.release (); 3854 } 3855 } 3856 3857 3858 /* Return the sum of A and B if none of them is bigger than INT_MAX/2, return 3859 the bigger one if otherwise. */ 3860 3861 static int 3862 safe_add (int a, int b) 3863 { 3864 if (a > INT_MAX/2 || b > INT_MAX/2) 3865 return a > b ? a : b; 3866 else 3867 return a + b; 3868 } 3869 3870 3871 /* Propagate the estimated effects of individual values along the topological 3872 from the dependent values to those they depend on. */ 3873 3874 template <typename valtype> 3875 void 3876 value_topo_info<valtype>::propagate_effects () 3877 { 3878 ipcp_value<valtype> *base; 3879 3880 for (base = values_topo; base; base = base->topo_next) 3881 { 3882 ipcp_value_source<valtype> *src; 3883 ipcp_value<valtype> *val; 3884 int time = 0, size = 0; 3885 3886 for (val = base; val; val = val->scc_next) 3887 { 3888 time = safe_add (time, 3889 val->local_time_benefit + val->prop_time_benefit); 3890 size = safe_add (size, val->local_size_cost + val->prop_size_cost); 3891 } 3892 3893 for (val = base; val; val = val->scc_next) 3894 for (src = val->sources; src; src = src->next) 3895 if (src->val 3896 && src->cs->maybe_hot_p ()) 3897 { 3898 src->val->prop_time_benefit = safe_add (time, 3899 src->val->prop_time_benefit); 3900 src->val->prop_size_cost = safe_add (size, 3901 src->val->prop_size_cost); 3902 } 3903 } 3904 } 3905 3906 3907 /* Propagate constants, polymorphic contexts and their effects from the 3908 summaries interprocedurally. */ 3909 3910 static void 3911 ipcp_propagate_stage (class ipa_topo_info *topo) 3912 { 3913 struct cgraph_node *node; 3914 3915 if (dump_file) 3916 fprintf (dump_file, "\n Propagating constants:\n\n"); 3917 3918 max_count = profile_count::uninitialized (); 3919 3920 FOR_EACH_DEFINED_FUNCTION (node) 3921 { 3922 if (node->has_gimple_body_p () 3923 && opt_for_fn (node->decl, flag_ipa_cp) 3924 && opt_for_fn (node->decl, optimize)) 3925 { 3926 class ipa_node_params *info = IPA_NODE_REF (node); 3927 determine_versionability (node, info); 3928 info->lattices = XCNEWVEC (class ipcp_param_lattices, 3929 ipa_get_param_count (info)); 3930 initialize_node_lattices (node); 3931 } 3932 ipa_size_summary *s = ipa_size_summaries->get (node); 3933 if (node->definition && !node->alias && s != NULL) 3934 overall_size += s->self_size; 3935 max_count = max_count.max (node->count.ipa ()); 3936 } 3937 3938 orig_overall_size = overall_size; 3939 3940 if (dump_file) 3941 fprintf (dump_file, "\noverall_size: %li\n", overall_size); 3942 3943 propagate_constants_topo (topo); 3944 if (flag_checking) 3945 ipcp_verify_propagated_values (); 3946 topo->constants.propagate_effects (); 3947 topo->contexts.propagate_effects (); 3948 3949 if (dump_file) 3950 { 3951 fprintf (dump_file, "\nIPA lattices after all propagation:\n"); 3952 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true); 3953 } 3954 } 3955 3956 /* Discover newly direct outgoing edges from NODE which is a new clone with 3957 known KNOWN_CSTS and make them direct. */ 3958 3959 static void 3960 ipcp_discover_new_direct_edges (struct cgraph_node *node, 3961 vec<tree> known_csts, 3962 vec<ipa_polymorphic_call_context> 3963 known_contexts, 3964 struct ipa_agg_replacement_value *aggvals) 3965 { 3966 struct cgraph_edge *ie, *next_ie; 3967 bool found = false; 3968 3969 for (ie = node->indirect_calls; ie; ie = next_ie) 3970 { 3971 tree target; 3972 bool speculative; 3973 3974 next_ie = ie->next_callee; 3975 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, 3976 vNULL, aggvals, &speculative); 3977 if (target) 3978 { 3979 bool agg_contents = ie->indirect_info->agg_contents; 3980 bool polymorphic = ie->indirect_info->polymorphic; 3981 int param_index = ie->indirect_info->param_index; 3982 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target, 3983 speculative); 3984 found = true; 3985 3986 if (cs && !agg_contents && !polymorphic) 3987 { 3988 class ipa_node_params *info = IPA_NODE_REF (node); 3989 int c = ipa_get_controlled_uses (info, param_index); 3990 if (c != IPA_UNDESCRIBED_USE) 3991 { 3992 struct ipa_ref *to_del; 3993 3994 c--; 3995 ipa_set_controlled_uses (info, param_index, c); 3996 if (dump_file && (dump_flags & TDF_DETAILS)) 3997 fprintf (dump_file, " controlled uses count of param " 3998 "%i bumped down to %i\n", param_index, c); 3999 if (c == 0 4000 && (to_del = node->find_reference (cs->callee, NULL, 0))) 4001 { 4002 if (dump_file && (dump_flags & TDF_DETAILS)) 4003 fprintf (dump_file, " and even removing its " 4004 "cloning-created reference\n"); 4005 to_del->remove_reference (); 4006 } 4007 } 4008 } 4009 } 4010 } 4011 /* Turning calls to direct calls will improve overall summary. */ 4012 if (found) 4013 ipa_update_overall_fn_summary (node); 4014 } 4015 4016 class edge_clone_summary; 4017 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL; 4018 4019 /* Edge clone summary. */ 4020 4021 class edge_clone_summary 4022 { 4023 public: 4024 /* Default constructor. */ 4025 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {} 4026 4027 /* Default destructor. */ 4028 ~edge_clone_summary () 4029 { 4030 if (prev_clone) 4031 edge_clone_summaries->get (prev_clone)->next_clone = next_clone; 4032 if (next_clone) 4033 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone; 4034 } 4035 4036 cgraph_edge *prev_clone; 4037 cgraph_edge *next_clone; 4038 }; 4039 4040 class edge_clone_summary_t: 4041 public call_summary <edge_clone_summary *> 4042 { 4043 public: 4044 edge_clone_summary_t (symbol_table *symtab): 4045 call_summary <edge_clone_summary *> (symtab) 4046 { 4047 m_initialize_when_cloning = true; 4048 } 4049 4050 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, 4051 edge_clone_summary *src_data, 4052 edge_clone_summary *dst_data); 4053 }; 4054 4055 /* Edge duplication hook. */ 4056 4057 void 4058 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge, 4059 edge_clone_summary *src_data, 4060 edge_clone_summary *dst_data) 4061 { 4062 if (src_data->next_clone) 4063 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge; 4064 dst_data->prev_clone = src_edge; 4065 dst_data->next_clone = src_data->next_clone; 4066 src_data->next_clone = dst_edge; 4067 } 4068 4069 /* Return true is CS calls DEST or its clone for all contexts. When 4070 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive 4071 edges from/to an all-context clone. */ 4072 4073 static bool 4074 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest, 4075 bool allow_recursion_to_clone) 4076 { 4077 enum availability availability; 4078 cgraph_node *callee = cs->callee->function_symbol (&availability); 4079 4080 if (availability <= AVAIL_INTERPOSABLE) 4081 return false; 4082 if (callee == dest) 4083 return true; 4084 if (!allow_recursion_to_clone && cs->caller == callee) 4085 return false; 4086 4087 class ipa_node_params *info = IPA_NODE_REF (callee); 4088 return info->is_all_contexts_clone && info->ipcp_orig_node == dest; 4089 } 4090 4091 /* Return true if edge CS does bring about the value described by SRC to 4092 DEST_VAL of node DEST or its clone for all contexts. */ 4093 4094 static bool 4095 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src, 4096 cgraph_node *dest, ipcp_value<tree> *dest_val) 4097 { 4098 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 4099 4100 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val) 4101 || caller_info->node_dead) 4102 return false; 4103 4104 if (!src->val) 4105 return true; 4106 4107 if (caller_info->ipcp_orig_node) 4108 { 4109 tree t; 4110 if (src->offset == -1) 4111 t = caller_info->known_csts[src->index]; 4112 else 4113 t = get_clone_agg_value (cs->caller, src->offset, src->index); 4114 return (t != NULL_TREE 4115 && values_equal_for_ipcp_p (src->val->value, t)); 4116 } 4117 else 4118 { 4119 if (src->val == dest_val) 4120 return true; 4121 4122 struct ipcp_agg_lattice *aglat; 4123 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, 4124 src->index); 4125 if (src->offset == -1) 4126 return (plats->itself.is_single_const () 4127 && values_equal_for_ipcp_p (src->val->value, 4128 plats->itself.values->value)); 4129 else 4130 { 4131 if (plats->aggs_bottom || plats->aggs_contain_variable) 4132 return false; 4133 for (aglat = plats->aggs; aglat; aglat = aglat->next) 4134 if (aglat->offset == src->offset) 4135 return (aglat->is_single_const () 4136 && values_equal_for_ipcp_p (src->val->value, 4137 aglat->values->value)); 4138 } 4139 return false; 4140 } 4141 } 4142 4143 /* Return true if edge CS does bring about the value described by SRC to 4144 DST_VAL of node DEST or its clone for all contexts. */ 4145 4146 static bool 4147 cgraph_edge_brings_value_p (cgraph_edge *cs, 4148 ipcp_value_source<ipa_polymorphic_call_context> *src, 4149 cgraph_node *dest, 4150 ipcp_value<ipa_polymorphic_call_context> *) 4151 { 4152 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 4153 4154 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true) 4155 || caller_info->node_dead) 4156 return false; 4157 if (!src->val) 4158 return true; 4159 4160 if (caller_info->ipcp_orig_node) 4161 return (caller_info->known_contexts.length () > (unsigned) src->index) 4162 && values_equal_for_ipcp_p (src->val->value, 4163 caller_info->known_contexts[src->index]); 4164 4165 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, 4166 src->index); 4167 return plats->ctxlat.is_single_const () 4168 && values_equal_for_ipcp_p (src->val->value, 4169 plats->ctxlat.values->value); 4170 } 4171 4172 /* Get the next clone in the linked list of clones of an edge. */ 4173 4174 static inline struct cgraph_edge * 4175 get_next_cgraph_edge_clone (struct cgraph_edge *cs) 4176 { 4177 edge_clone_summary *s = edge_clone_summaries->get (cs); 4178 return s != NULL ? s->next_clone : NULL; 4179 } 4180 4181 /* Given VAL that is intended for DEST, iterate over all its sources and if any 4182 of them is viable and hot, return true. In that case, for those that still 4183 hold, add their edge frequency and their number into *FREQUENCY and 4184 *CALLER_COUNT respectively. */ 4185 4186 template <typename valtype> 4187 static bool 4188 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, 4189 int *freq_sum, 4190 profile_count *count_sum, int *caller_count) 4191 { 4192 ipcp_value_source<valtype> *src; 4193 int freq = 0, count = 0; 4194 profile_count cnt = profile_count::zero (); 4195 bool hot = false; 4196 bool non_self_recursive = false; 4197 4198 for (src = val->sources; src; src = src->next) 4199 { 4200 struct cgraph_edge *cs = src->cs; 4201 while (cs) 4202 { 4203 if (cgraph_edge_brings_value_p (cs, src, dest, val)) 4204 { 4205 count++; 4206 freq += cs->frequency (); 4207 if (cs->count.ipa ().initialized_p ()) 4208 cnt += cs->count.ipa (); 4209 hot |= cs->maybe_hot_p (); 4210 if (cs->caller != dest) 4211 non_self_recursive = true; 4212 } 4213 cs = get_next_cgraph_edge_clone (cs); 4214 } 4215 } 4216 4217 /* If the only edges bringing a value are self-recursive ones, do not bother 4218 evaluating it. */ 4219 if (!non_self_recursive) 4220 return false; 4221 4222 *freq_sum = freq; 4223 *count_sum = cnt; 4224 *caller_count = count; 4225 4226 if (!hot && IPA_NODE_REF (dest)->node_within_scc) 4227 { 4228 struct cgraph_edge *cs; 4229 4230 /* Cold non-SCC source edge could trigger hot recursive execution of 4231 function. Consider the case as hot and rely on following cost model 4232 computation to further select right one. */ 4233 for (cs = dest->callers; cs; cs = cs->next_caller) 4234 if (cs->caller == dest && cs->maybe_hot_p ()) 4235 return true; 4236 } 4237 4238 return hot; 4239 } 4240 4241 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers 4242 to let a non-self-recursive caller be the first element. Thus, we can 4243 simplify intersecting operations on values that arrive from all of these 4244 callers, especially when there exists self-recursive call. Return true if 4245 this kind of adjustment is possible. */ 4246 4247 static bool 4248 adjust_callers_for_value_intersection (vec<cgraph_edge *> callers, 4249 cgraph_node *node) 4250 { 4251 for (unsigned i = 0; i < callers.length (); i++) 4252 { 4253 cgraph_edge *cs = callers[i]; 4254 4255 if (cs->caller != node) 4256 { 4257 if (i > 0) 4258 { 4259 callers[i] = callers[0]; 4260 callers[0] = cs; 4261 } 4262 return true; 4263 } 4264 } 4265 return false; 4266 } 4267 4268 /* Return a vector of incoming edges that do bring value VAL to node DEST. It 4269 is assumed their number is known and equal to CALLER_COUNT. */ 4270 4271 template <typename valtype> 4272 static vec<cgraph_edge *> 4273 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, 4274 int caller_count) 4275 { 4276 ipcp_value_source<valtype> *src; 4277 vec<cgraph_edge *> ret; 4278 4279 ret.create (caller_count); 4280 for (src = val->sources; src; src = src->next) 4281 { 4282 struct cgraph_edge *cs = src->cs; 4283 while (cs) 4284 { 4285 if (cgraph_edge_brings_value_p (cs, src, dest, val)) 4286 ret.quick_push (cs); 4287 cs = get_next_cgraph_edge_clone (cs); 4288 } 4289 } 4290 4291 if (caller_count > 1) 4292 adjust_callers_for_value_intersection (ret, dest); 4293 4294 return ret; 4295 } 4296 4297 /* Construct a replacement map for a know VALUE for a formal parameter PARAM. 4298 Return it or NULL if for some reason it cannot be created. */ 4299 4300 static struct ipa_replace_map * 4301 get_replacement_map (class ipa_node_params *info, tree value, int parm_num) 4302 { 4303 struct ipa_replace_map *replace_map; 4304 4305 replace_map = ggc_alloc<ipa_replace_map> (); 4306 if (dump_file) 4307 { 4308 fprintf (dump_file, " replacing "); 4309 ipa_dump_param (dump_file, info, parm_num); 4310 4311 fprintf (dump_file, " with const "); 4312 print_generic_expr (dump_file, value); 4313 fprintf (dump_file, "\n"); 4314 } 4315 replace_map->parm_num = parm_num; 4316 replace_map->new_tree = value; 4317 return replace_map; 4318 } 4319 4320 /* Dump new profiling counts */ 4321 4322 static void 4323 dump_profile_updates (struct cgraph_node *orig_node, 4324 struct cgraph_node *new_node) 4325 { 4326 struct cgraph_edge *cs; 4327 4328 fprintf (dump_file, " setting count of the specialized node to "); 4329 new_node->count.dump (dump_file); 4330 fprintf (dump_file, "\n"); 4331 for (cs = new_node->callees; cs; cs = cs->next_callee) 4332 { 4333 fprintf (dump_file, " edge to %s has count ", 4334 cs->callee->dump_name ()); 4335 cs->count.dump (dump_file); 4336 fprintf (dump_file, "\n"); 4337 } 4338 4339 fprintf (dump_file, " setting count of the original node to "); 4340 orig_node->count.dump (dump_file); 4341 fprintf (dump_file, "\n"); 4342 for (cs = orig_node->callees; cs; cs = cs->next_callee) 4343 { 4344 fprintf (dump_file, " edge to %s is left with ", 4345 cs->callee->dump_name ()); 4346 cs->count.dump (dump_file); 4347 fprintf (dump_file, "\n"); 4348 } 4349 } 4350 4351 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update 4352 their profile information to reflect this. */ 4353 4354 static void 4355 update_profiling_info (struct cgraph_node *orig_node, 4356 struct cgraph_node *new_node) 4357 { 4358 struct cgraph_edge *cs; 4359 struct caller_statistics stats; 4360 profile_count new_sum, orig_sum; 4361 profile_count remainder, orig_node_count = orig_node->count; 4362 profile_count orig_new_node_count = new_node->count; 4363 4364 if (!(orig_node_count.ipa () > profile_count::zero ())) 4365 return; 4366 4367 init_caller_stats (&stats); 4368 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 4369 false); 4370 orig_sum = stats.count_sum; 4371 init_caller_stats (&stats); 4372 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, 4373 false); 4374 new_sum = stats.count_sum; 4375 4376 if (orig_node_count < orig_sum + new_sum) 4377 { 4378 if (dump_file) 4379 { 4380 fprintf (dump_file, " Problem: node %s has too low count ", 4381 orig_node->dump_name ()); 4382 orig_node_count.dump (dump_file); 4383 fprintf (dump_file, "while the sum of incoming count is "); 4384 (orig_sum + new_sum).dump (dump_file); 4385 fprintf (dump_file, "\n"); 4386 } 4387 4388 orig_node_count = (orig_sum + new_sum).apply_scale (12, 10); 4389 if (dump_file) 4390 { 4391 fprintf (dump_file, " proceeding by pretending it was "); 4392 orig_node_count.dump (dump_file); 4393 fprintf (dump_file, "\n"); 4394 } 4395 } 4396 4397 remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa () 4398 - new_sum.ipa ()); 4399 4400 /* With partial train run we do not want to assume that original's 4401 count is zero whenever we redurect all executed edges to clone. 4402 Simply drop profile to local one in this case. */ 4403 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p () 4404 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p () 4405 && flag_profile_partial_training) 4406 remainder = remainder.guessed_local (); 4407 4408 new_sum = orig_node_count.combine_with_ipa_count (new_sum); 4409 new_node->count = new_sum; 4410 orig_node->count = remainder; 4411 4412 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count); 4413 for (cs = new_node->callees; cs; cs = cs->next_callee) 4414 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); 4415 for (cs = new_node->indirect_calls; cs; cs = cs->next_callee) 4416 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); 4417 4418 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count); 4419 for (cs = orig_node->callees; cs; cs = cs->next_callee) 4420 cs->count = cs->count.apply_scale (remainder, orig_node_count); 4421 for (cs = orig_node->indirect_calls; cs; cs = cs->next_callee) 4422 cs->count = cs->count.apply_scale (remainder, orig_node_count); 4423 4424 if (dump_file) 4425 dump_profile_updates (orig_node, new_node); 4426 } 4427 4428 /* Update the respective profile of specialized NEW_NODE and the original 4429 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM 4430 have been redirected to the specialized version. */ 4431 4432 static void 4433 update_specialized_profile (struct cgraph_node *new_node, 4434 struct cgraph_node *orig_node, 4435 profile_count redirected_sum) 4436 { 4437 struct cgraph_edge *cs; 4438 profile_count new_node_count, orig_node_count = orig_node->count; 4439 4440 if (dump_file) 4441 { 4442 fprintf (dump_file, " the sum of counts of redirected edges is "); 4443 redirected_sum.dump (dump_file); 4444 fprintf (dump_file, "\n"); 4445 } 4446 if (!(orig_node_count > profile_count::zero ())) 4447 return; 4448 4449 gcc_assert (orig_node_count >= redirected_sum); 4450 4451 new_node_count = new_node->count; 4452 new_node->count += redirected_sum; 4453 orig_node->count -= redirected_sum; 4454 4455 for (cs = new_node->callees; cs; cs = cs->next_callee) 4456 cs->count += cs->count.apply_scale (redirected_sum, new_node_count); 4457 4458 for (cs = orig_node->callees; cs; cs = cs->next_callee) 4459 { 4460 profile_count dec = cs->count.apply_scale (redirected_sum, 4461 orig_node_count); 4462 cs->count -= dec; 4463 } 4464 4465 if (dump_file) 4466 dump_profile_updates (orig_node, new_node); 4467 } 4468 4469 /* Return true if we would like to remove a parameter from NODE when cloning it 4470 with KNOWN_CSTS scalar constants. */ 4471 4472 static bool 4473 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts) 4474 { 4475 auto_vec<bool, 16> surviving; 4476 bool filled_vec = false; 4477 ipa_node_params *info = IPA_NODE_REF (node); 4478 int i, count = ipa_get_param_count (info); 4479 4480 for (i = 0; i < count; i++) 4481 { 4482 if (!known_csts[i] && ipa_is_param_used (info, i)) 4483 continue; 4484 4485 if (!filled_vec) 4486 { 4487 if (!node->clone.param_adjustments) 4488 return true; 4489 node->clone.param_adjustments->get_surviving_params (&surviving); 4490 filled_vec = true; 4491 } 4492 if (surviving.length() < (unsigned) i && surviving[i]) 4493 return true; 4494 } 4495 return false; 4496 } 4497 4498 /* Create a specialized version of NODE with known constants in KNOWN_CSTS, 4499 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and 4500 redirect all edges in CALLERS to it. */ 4501 4502 static struct cgraph_node * 4503 create_specialized_node (struct cgraph_node *node, 4504 vec<tree> known_csts, 4505 vec<ipa_polymorphic_call_context> known_contexts, 4506 struct ipa_agg_replacement_value *aggvals, 4507 vec<cgraph_edge *> callers) 4508 { 4509 class ipa_node_params *new_info, *info = IPA_NODE_REF (node); 4510 vec<ipa_replace_map *, va_gc> *replace_trees = NULL; 4511 vec<ipa_adjusted_param, va_gc> *new_params = NULL; 4512 struct ipa_agg_replacement_value *av; 4513 struct cgraph_node *new_node; 4514 int i, count = ipa_get_param_count (info); 4515 ipa_param_adjustments *old_adjustments = node->clone.param_adjustments; 4516 ipa_param_adjustments *new_adjustments; 4517 gcc_assert (!info->ipcp_orig_node); 4518 gcc_assert (node->can_change_signature 4519 || !old_adjustments); 4520 4521 if (old_adjustments) 4522 { 4523 /* At the moment all IPA optimizations should use the number of 4524 parameters of the prevailing decl as the m_always_copy_start. 4525 Handling any other value would complicate the code below, so for the 4526 time bing let's only assert it is so. */ 4527 gcc_assert (old_adjustments->m_always_copy_start == count 4528 || old_adjustments->m_always_copy_start < 0); 4529 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params); 4530 for (i = 0; i < old_adj_count; i++) 4531 { 4532 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i]; 4533 if (!node->can_change_signature 4534 || old_adj->op != IPA_PARAM_OP_COPY 4535 || (!known_csts[old_adj->base_index] 4536 && ipa_is_param_used (info, old_adj->base_index))) 4537 { 4538 ipa_adjusted_param new_adj = *old_adj; 4539 4540 new_adj.prev_clone_adjustment = true; 4541 new_adj.prev_clone_index = i; 4542 vec_safe_push (new_params, new_adj); 4543 } 4544 } 4545 bool skip_return = old_adjustments->m_skip_return; 4546 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ()) 4547 ipa_param_adjustments (new_params, count, 4548 skip_return)); 4549 } 4550 else if (node->can_change_signature 4551 && want_remove_some_param_p (node, known_csts)) 4552 { 4553 ipa_adjusted_param adj; 4554 memset (&adj, 0, sizeof (adj)); 4555 adj.op = IPA_PARAM_OP_COPY; 4556 for (i = 0; i < count; i++) 4557 if (!known_csts[i] && ipa_is_param_used (info, i)) 4558 { 4559 adj.base_index = i; 4560 adj.prev_clone_index = i; 4561 vec_safe_push (new_params, adj); 4562 } 4563 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ()) 4564 ipa_param_adjustments (new_params, count, false)); 4565 } 4566 else 4567 new_adjustments = NULL; 4568 4569 replace_trees = vec_safe_copy (node->clone.tree_map); 4570 for (i = 0; i < count; i++) 4571 { 4572 tree t = known_csts[i]; 4573 if (t) 4574 { 4575 struct ipa_replace_map *replace_map; 4576 4577 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); 4578 replace_map = get_replacement_map (info, t, i); 4579 if (replace_map) 4580 vec_safe_push (replace_trees, replace_map); 4581 } 4582 } 4583 auto_vec<cgraph_edge *, 2> self_recursive_calls; 4584 for (i = callers.length () - 1; i >= 0; i--) 4585 { 4586 cgraph_edge *cs = callers[i]; 4587 if (cs->caller == node) 4588 { 4589 self_recursive_calls.safe_push (cs); 4590 callers.unordered_remove (i); 4591 } 4592 } 4593 4594 unsigned &suffix_counter = clone_num_suffixes->get_or_insert ( 4595 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME ( 4596 node->decl))); 4597 new_node = node->create_virtual_clone (callers, replace_trees, 4598 new_adjustments, "constprop", 4599 suffix_counter); 4600 suffix_counter++; 4601 4602 bool have_self_recursive_calls = !self_recursive_calls.is_empty (); 4603 for (unsigned j = 0; j < self_recursive_calls.length (); j++) 4604 { 4605 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]); 4606 /* Cloned edges can disappear during cloning as speculation can be 4607 resolved, check that we have one and that it comes from the last 4608 cloning. */ 4609 if (cs && cs->caller == new_node) 4610 cs->redirect_callee_duplicating_thunks (new_node); 4611 /* Any future code that would make more than one clone of an outgoing 4612 edge would confuse this mechanism, so let's check that does not 4613 happen. */ 4614 gcc_checking_assert (!cs 4615 || !get_next_cgraph_edge_clone (cs) 4616 || get_next_cgraph_edge_clone (cs)->caller != new_node); 4617 } 4618 if (have_self_recursive_calls) 4619 new_node->expand_all_artificial_thunks (); 4620 4621 ipa_set_node_agg_value_chain (new_node, aggvals); 4622 for (av = aggvals; av; av = av->next) 4623 new_node->maybe_create_reference (av->value, NULL); 4624 4625 if (dump_file && (dump_flags & TDF_DETAILS)) 4626 { 4627 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ()); 4628 if (known_contexts.exists ()) 4629 { 4630 for (i = 0; i < count; i++) 4631 if (!known_contexts[i].useless_p ()) 4632 { 4633 fprintf (dump_file, " known ctx %i is ", i); 4634 known_contexts[i].dump (dump_file); 4635 } 4636 } 4637 if (aggvals) 4638 ipa_dump_agg_replacement_values (dump_file, aggvals); 4639 } 4640 ipa_check_create_node_params (); 4641 update_profiling_info (node, new_node); 4642 new_info = IPA_NODE_REF (new_node); 4643 new_info->ipcp_orig_node = node; 4644 new_node->ipcp_clone = true; 4645 new_info->known_csts = known_csts; 4646 new_info->known_contexts = known_contexts; 4647 4648 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals); 4649 4650 callers.release (); 4651 return new_node; 4652 } 4653 4654 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a 4655 pass-through function to itself when the cgraph_node involved is not an 4656 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple 4657 no-operation pass-through. */ 4658 4659 static bool 4660 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i, 4661 bool simple = true) 4662 { 4663 enum availability availability; 4664 if (cs->caller == cs->callee->function_symbol (&availability) 4665 && availability > AVAIL_INTERPOSABLE 4666 && jfunc->type == IPA_JF_PASS_THROUGH 4667 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 4668 && ipa_get_jf_pass_through_formal_id (jfunc) == i 4669 && IPA_NODE_REF (cs->caller) 4670 && !IPA_NODE_REF (cs->caller)->ipcp_orig_node) 4671 return true; 4672 return false; 4673 } 4674 4675 /* Return true if JFUNC, which describes a part of an aggregate represented or 4676 pointed to by the i-th parameter of call CS, is a pass-through function to 4677 itself when the cgraph_node involved is not an IPA-CP clone.. When 4678 SIMPLE is true, further check if JFUNC is a simple no-operation 4679 pass-through. */ 4680 4681 static bool 4682 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc, 4683 int i, bool simple = true) 4684 { 4685 enum availability availability; 4686 if (cs->caller == cs->callee->function_symbol (&availability) 4687 && availability > AVAIL_INTERPOSABLE 4688 && jfunc->jftype == IPA_JF_LOAD_AGG 4689 && jfunc->offset == jfunc->value.load_agg.offset 4690 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR) 4691 && jfunc->value.pass_through.formal_id == i 4692 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type) 4693 && IPA_NODE_REF (cs->caller) 4694 && !IPA_NODE_REF (cs->caller)->ipcp_orig_node) 4695 return true; 4696 return false; 4697 } 4698 4699 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in 4700 KNOWN_CSTS with constants that are also known for all of the CALLERS. */ 4701 4702 static void 4703 find_more_scalar_values_for_callers_subset (struct cgraph_node *node, 4704 vec<tree> known_csts, 4705 vec<cgraph_edge *> callers) 4706 { 4707 class ipa_node_params *info = IPA_NODE_REF (node); 4708 int i, count = ipa_get_param_count (info); 4709 4710 for (i = 0; i < count; i++) 4711 { 4712 struct cgraph_edge *cs; 4713 tree newval = NULL_TREE; 4714 int j; 4715 bool first = true; 4716 tree type = ipa_get_type (info, i); 4717 4718 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i]) 4719 continue; 4720 4721 FOR_EACH_VEC_ELT (callers, j, cs) 4722 { 4723 struct ipa_jump_func *jump_func; 4724 tree t; 4725 4726 if (!IPA_EDGE_REF (cs) 4727 || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) 4728 || (i == 0 4729 && call_passes_through_thunk_p (cs))) 4730 { 4731 newval = NULL_TREE; 4732 break; 4733 } 4734 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); 4735 4736 /* Besides simple pass-through jump function, arithmetic jump 4737 function could also introduce argument-direct-pass-through for 4738 self-feeding recursive call. For example, 4739 4740 fn (int i) 4741 { 4742 fn (i & 1); 4743 } 4744 4745 Given that i is 0, recursive propagation via (i & 1) also gets 4746 0. */ 4747 if (self_recursive_pass_through_p (cs, jump_func, i, false)) 4748 { 4749 gcc_assert (newval); 4750 t = ipa_get_jf_arith_result ( 4751 ipa_get_jf_pass_through_operation (jump_func), 4752 newval, 4753 ipa_get_jf_pass_through_operand (jump_func), 4754 type); 4755 } 4756 else 4757 t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func, 4758 type); 4759 if (!t 4760 || (newval 4761 && !values_equal_for_ipcp_p (t, newval)) 4762 || (!first && !newval)) 4763 { 4764 newval = NULL_TREE; 4765 break; 4766 } 4767 else 4768 newval = t; 4769 first = false; 4770 } 4771 4772 if (newval) 4773 { 4774 if (dump_file && (dump_flags & TDF_DETAILS)) 4775 { 4776 fprintf (dump_file, " adding an extra known scalar value "); 4777 print_ipcp_constant_value (dump_file, newval); 4778 fprintf (dump_file, " for "); 4779 ipa_dump_param (dump_file, info, i); 4780 fprintf (dump_file, "\n"); 4781 } 4782 4783 known_csts[i] = newval; 4784 } 4785 } 4786 } 4787 4788 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in 4789 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the 4790 CALLERS. */ 4791 4792 static void 4793 find_more_contexts_for_caller_subset (cgraph_node *node, 4794 vec<ipa_polymorphic_call_context> 4795 *known_contexts, 4796 vec<cgraph_edge *> callers) 4797 { 4798 ipa_node_params *info = IPA_NODE_REF (node); 4799 int i, count = ipa_get_param_count (info); 4800 4801 for (i = 0; i < count; i++) 4802 { 4803 cgraph_edge *cs; 4804 4805 if (ipa_get_poly_ctx_lat (info, i)->bottom 4806 || (known_contexts->exists () 4807 && !(*known_contexts)[i].useless_p ())) 4808 continue; 4809 4810 ipa_polymorphic_call_context newval; 4811 bool first = true; 4812 int j; 4813 4814 FOR_EACH_VEC_ELT (callers, j, cs) 4815 { 4816 if (!IPA_EDGE_REF (cs) 4817 || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) 4818 return; 4819 ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), 4820 i); 4821 ipa_polymorphic_call_context ctx; 4822 ctx = ipa_context_from_jfunc (IPA_NODE_REF (cs->caller), cs, i, 4823 jfunc); 4824 if (first) 4825 { 4826 newval = ctx; 4827 first = false; 4828 } 4829 else 4830 newval.meet_with (ctx); 4831 if (newval.useless_p ()) 4832 break; 4833 } 4834 4835 if (!newval.useless_p ()) 4836 { 4837 if (dump_file && (dump_flags & TDF_DETAILS)) 4838 { 4839 fprintf (dump_file, " adding an extra known polymorphic " 4840 "context "); 4841 print_ipcp_constant_value (dump_file, newval); 4842 fprintf (dump_file, " for "); 4843 ipa_dump_param (dump_file, info, i); 4844 fprintf (dump_file, "\n"); 4845 } 4846 4847 if (!known_contexts->exists ()) 4848 known_contexts->safe_grow_cleared (ipa_get_param_count (info)); 4849 (*known_contexts)[i] = newval; 4850 } 4851 4852 } 4853 } 4854 4855 /* Go through PLATS and create a vector of values consisting of values and 4856 offsets (minus OFFSET) of lattices that contain only a single value. */ 4857 4858 static vec<ipa_agg_value> 4859 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset) 4860 { 4861 vec<ipa_agg_value> res = vNULL; 4862 4863 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) 4864 return vNULL; 4865 4866 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) 4867 if (aglat->is_single_const ()) 4868 { 4869 struct ipa_agg_value ti; 4870 ti.offset = aglat->offset - offset; 4871 ti.value = aglat->values->value; 4872 res.safe_push (ti); 4873 } 4874 return res; 4875 } 4876 4877 /* Intersect all values in INTER with single value lattices in PLATS (while 4878 subtracting OFFSET). */ 4879 4880 static void 4881 intersect_with_plats (class ipcp_param_lattices *plats, 4882 vec<ipa_agg_value> *inter, 4883 HOST_WIDE_INT offset) 4884 { 4885 struct ipcp_agg_lattice *aglat; 4886 struct ipa_agg_value *item; 4887 int k; 4888 4889 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) 4890 { 4891 inter->release (); 4892 return; 4893 } 4894 4895 aglat = plats->aggs; 4896 FOR_EACH_VEC_ELT (*inter, k, item) 4897 { 4898 bool found = false; 4899 if (!item->value) 4900 continue; 4901 while (aglat) 4902 { 4903 if (aglat->offset - offset > item->offset) 4904 break; 4905 if (aglat->offset - offset == item->offset) 4906 { 4907 if (aglat->is_single_const ()) 4908 { 4909 tree value = aglat->values->value; 4910 4911 if (values_equal_for_ipcp_p (item->value, value)) 4912 found = true; 4913 } 4914 break; 4915 } 4916 aglat = aglat->next; 4917 } 4918 if (!found) 4919 item->value = NULL_TREE; 4920 } 4921 } 4922 4923 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the 4924 vector result while subtracting OFFSET from the individual value offsets. */ 4925 4926 static vec<ipa_agg_value> 4927 agg_replacements_to_vector (struct cgraph_node *node, int index, 4928 HOST_WIDE_INT offset) 4929 { 4930 struct ipa_agg_replacement_value *av; 4931 vec<ipa_agg_value> res = vNULL; 4932 4933 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next) 4934 if (av->index == index 4935 && (av->offset - offset) >= 0) 4936 { 4937 struct ipa_agg_value item; 4938 gcc_checking_assert (av->value); 4939 item.offset = av->offset - offset; 4940 item.value = av->value; 4941 res.safe_push (item); 4942 } 4943 4944 return res; 4945 } 4946 4947 /* Intersect all values in INTER with those that we have already scheduled to 4948 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone 4949 (while subtracting OFFSET). */ 4950 4951 static void 4952 intersect_with_agg_replacements (struct cgraph_node *node, int index, 4953 vec<ipa_agg_value> *inter, 4954 HOST_WIDE_INT offset) 4955 { 4956 struct ipa_agg_replacement_value *srcvals; 4957 struct ipa_agg_value *item; 4958 int i; 4959 4960 srcvals = ipa_get_agg_replacements_for_node (node); 4961 if (!srcvals) 4962 { 4963 inter->release (); 4964 return; 4965 } 4966 4967 FOR_EACH_VEC_ELT (*inter, i, item) 4968 { 4969 struct ipa_agg_replacement_value *av; 4970 bool found = false; 4971 if (!item->value) 4972 continue; 4973 for (av = srcvals; av; av = av->next) 4974 { 4975 gcc_checking_assert (av->value); 4976 if (av->index == index 4977 && av->offset - offset == item->offset) 4978 { 4979 if (values_equal_for_ipcp_p (item->value, av->value)) 4980 found = true; 4981 break; 4982 } 4983 } 4984 if (!found) 4985 item->value = NULL_TREE; 4986 } 4987 } 4988 4989 /* Intersect values in INTER with aggregate values that come along edge CS to 4990 parameter number INDEX and return it. If INTER does not actually exist yet, 4991 copy all incoming values to it. If we determine we ended up with no values 4992 whatsoever, return a released vector. */ 4993 4994 static vec<ipa_agg_value> 4995 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index, 4996 vec<ipa_agg_value> inter) 4997 { 4998 struct ipa_jump_func *jfunc; 4999 jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index); 5000 if (jfunc->type == IPA_JF_PASS_THROUGH 5001 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) 5002 { 5003 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 5004 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); 5005 5006 if (caller_info->ipcp_orig_node) 5007 { 5008 struct cgraph_node *orig_node = caller_info->ipcp_orig_node; 5009 class ipcp_param_lattices *orig_plats; 5010 orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node), 5011 src_idx); 5012 if (agg_pass_through_permissible_p (orig_plats, jfunc)) 5013 { 5014 if (!inter.exists ()) 5015 inter = agg_replacements_to_vector (cs->caller, src_idx, 0); 5016 else 5017 intersect_with_agg_replacements (cs->caller, src_idx, 5018 &inter, 0); 5019 } 5020 else 5021 { 5022 inter.release (); 5023 return vNULL; 5024 } 5025 } 5026 else 5027 { 5028 class ipcp_param_lattices *src_plats; 5029 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 5030 if (agg_pass_through_permissible_p (src_plats, jfunc)) 5031 { 5032 /* Currently we do not produce clobber aggregate jump 5033 functions, adjust when we do. */ 5034 gcc_checking_assert (!jfunc->agg.items); 5035 if (!inter.exists ()) 5036 inter = copy_plats_to_inter (src_plats, 0); 5037 else 5038 intersect_with_plats (src_plats, &inter, 0); 5039 } 5040 else 5041 { 5042 inter.release (); 5043 return vNULL; 5044 } 5045 } 5046 } 5047 else if (jfunc->type == IPA_JF_ANCESTOR 5048 && ipa_get_jf_ancestor_agg_preserved (jfunc)) 5049 { 5050 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 5051 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); 5052 class ipcp_param_lattices *src_plats; 5053 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc); 5054 5055 if (caller_info->ipcp_orig_node) 5056 { 5057 if (!inter.exists ()) 5058 inter = agg_replacements_to_vector (cs->caller, src_idx, delta); 5059 else 5060 intersect_with_agg_replacements (cs->caller, src_idx, &inter, 5061 delta); 5062 } 5063 else 5064 { 5065 src_plats = ipa_get_parm_lattices (caller_info, src_idx); 5066 /* Currently we do not produce clobber aggregate jump 5067 functions, adjust when we do. */ 5068 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items); 5069 if (!inter.exists ()) 5070 inter = copy_plats_to_inter (src_plats, delta); 5071 else 5072 intersect_with_plats (src_plats, &inter, delta); 5073 } 5074 } 5075 else if (jfunc->agg.items) 5076 { 5077 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 5078 struct ipa_agg_value *item; 5079 int k; 5080 5081 if (!inter.exists ()) 5082 for (unsigned i = 0; i < jfunc->agg.items->length (); i++) 5083 { 5084 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i]; 5085 tree value = ipa_agg_value_from_node (caller_info, cs->caller, 5086 agg_item); 5087 if (value) 5088 { 5089 struct ipa_agg_value agg_value; 5090 5091 agg_value.value = value; 5092 agg_value.offset = agg_item->offset; 5093 inter.safe_push (agg_value); 5094 } 5095 } 5096 else 5097 FOR_EACH_VEC_ELT (inter, k, item) 5098 { 5099 int l = 0; 5100 bool found = false; 5101 5102 if (!item->value) 5103 continue; 5104 5105 while ((unsigned) l < jfunc->agg.items->length ()) 5106 { 5107 struct ipa_agg_jf_item *ti; 5108 ti = &(*jfunc->agg.items)[l]; 5109 if (ti->offset > item->offset) 5110 break; 5111 if (ti->offset == item->offset) 5112 { 5113 tree value; 5114 5115 /* Besides simple pass-through aggregate jump function, 5116 arithmetic aggregate jump function could also bring 5117 same aggregate value as parameter passed-in for 5118 self-feeding recursive call. For example, 5119 5120 fn (int *i) 5121 { 5122 int j = *i & 1; 5123 fn (&j); 5124 } 5125 5126 Given that *i is 0, recursive propagation via (*i & 1) 5127 also gets 0. */ 5128 if (self_recursive_agg_pass_through_p (cs, ti, index, 5129 false)) 5130 value = ipa_get_jf_arith_result ( 5131 ti->value.pass_through.operation, 5132 item->value, 5133 ti->value.pass_through.operand, 5134 ti->type); 5135 else 5136 value = ipa_agg_value_from_node (caller_info, 5137 cs->caller, ti); 5138 5139 if (value && values_equal_for_ipcp_p (item->value, value)) 5140 found = true; 5141 break; 5142 } 5143 l++; 5144 } 5145 if (!found) 5146 item->value = NULL; 5147 } 5148 } 5149 else 5150 { 5151 inter.release (); 5152 return vNULL; 5153 } 5154 return inter; 5155 } 5156 5157 /* Look at edges in CALLERS and collect all known aggregate values that arrive 5158 from all of them. */ 5159 5160 static struct ipa_agg_replacement_value * 5161 find_aggregate_values_for_callers_subset (struct cgraph_node *node, 5162 vec<cgraph_edge *> callers) 5163 { 5164 class ipa_node_params *dest_info = IPA_NODE_REF (node); 5165 struct ipa_agg_replacement_value *res; 5166 struct ipa_agg_replacement_value **tail = &res; 5167 struct cgraph_edge *cs; 5168 int i, j, count = ipa_get_param_count (dest_info); 5169 5170 FOR_EACH_VEC_ELT (callers, j, cs) 5171 { 5172 if (!IPA_EDGE_REF (cs)) 5173 { 5174 count = 0; 5175 break; 5176 } 5177 int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); 5178 if (c < count) 5179 count = c; 5180 } 5181 5182 for (i = 0; i < count; i++) 5183 { 5184 struct cgraph_edge *cs; 5185 vec<ipa_agg_value> inter = vNULL; 5186 struct ipa_agg_value *item; 5187 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); 5188 int j; 5189 5190 /* Among other things, the following check should deal with all by_ref 5191 mismatches. */ 5192 if (plats->aggs_bottom) 5193 continue; 5194 5195 FOR_EACH_VEC_ELT (callers, j, cs) 5196 { 5197 struct ipa_jump_func *jfunc 5198 = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); 5199 if (self_recursive_pass_through_p (cs, jfunc, i) 5200 && (!plats->aggs_by_ref 5201 || ipa_get_jf_pass_through_agg_preserved (jfunc))) 5202 continue; 5203 inter = intersect_aggregates_with_edge (cs, i, inter); 5204 5205 if (!inter.exists ()) 5206 goto next_param; 5207 } 5208 5209 FOR_EACH_VEC_ELT (inter, j, item) 5210 { 5211 struct ipa_agg_replacement_value *v; 5212 5213 if (!item->value) 5214 continue; 5215 5216 v = ggc_alloc<ipa_agg_replacement_value> (); 5217 v->index = i; 5218 v->offset = item->offset; 5219 v->value = item->value; 5220 v->by_ref = plats->aggs_by_ref; 5221 *tail = v; 5222 tail = &v->next; 5223 } 5224 5225 next_param: 5226 if (inter.exists ()) 5227 inter.release (); 5228 } 5229 *tail = NULL; 5230 return res; 5231 } 5232 5233 /* Determine whether CS also brings all scalar values that the NODE is 5234 specialized for. */ 5235 5236 static bool 5237 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs, 5238 struct cgraph_node *node) 5239 { 5240 class ipa_node_params *dest_info = IPA_NODE_REF (node); 5241 int count = ipa_get_param_count (dest_info); 5242 class ipa_node_params *caller_info; 5243 class ipa_edge_args *args; 5244 int i; 5245 5246 caller_info = IPA_NODE_REF (cs->caller); 5247 args = IPA_EDGE_REF (cs); 5248 for (i = 0; i < count; i++) 5249 { 5250 struct ipa_jump_func *jump_func; 5251 tree val, t; 5252 5253 val = dest_info->known_csts[i]; 5254 if (!val) 5255 continue; 5256 5257 if (i >= ipa_get_cs_argument_count (args)) 5258 return false; 5259 jump_func = ipa_get_ith_jump_func (args, i); 5260 t = ipa_value_from_jfunc (caller_info, jump_func, 5261 ipa_get_type (dest_info, i)); 5262 if (!t || !values_equal_for_ipcp_p (val, t)) 5263 return false; 5264 } 5265 return true; 5266 } 5267 5268 /* Determine whether CS also brings all aggregate values that NODE is 5269 specialized for. */ 5270 static bool 5271 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs, 5272 struct cgraph_node *node) 5273 { 5274 class ipa_node_params *orig_node_info; 5275 struct ipa_agg_replacement_value *aggval; 5276 int i, ec, count; 5277 5278 aggval = ipa_get_agg_replacements_for_node (node); 5279 if (!aggval) 5280 return true; 5281 5282 count = ipa_get_param_count (IPA_NODE_REF (node)); 5283 ec = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); 5284 if (ec < count) 5285 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 5286 if (aggval->index >= ec) 5287 return false; 5288 5289 orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node); 5290 5291 for (i = 0; i < count; i++) 5292 { 5293 class ipcp_param_lattices *plats; 5294 bool interesting = false; 5295 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 5296 if (aggval->index == i) 5297 { 5298 interesting = true; 5299 break; 5300 } 5301 if (!interesting) 5302 continue; 5303 5304 plats = ipa_get_parm_lattices (orig_node_info, aggval->index); 5305 if (plats->aggs_bottom) 5306 return false; 5307 5308 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL); 5309 if (!values.exists ()) 5310 return false; 5311 5312 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) 5313 if (aggval->index == i) 5314 { 5315 struct ipa_agg_value *item; 5316 int j; 5317 bool found = false; 5318 FOR_EACH_VEC_ELT (values, j, item) 5319 if (item->value 5320 && item->offset == av->offset 5321 && values_equal_for_ipcp_p (item->value, av->value)) 5322 { 5323 found = true; 5324 break; 5325 } 5326 if (!found) 5327 { 5328 values.release (); 5329 return false; 5330 } 5331 } 5332 values.release (); 5333 } 5334 return true; 5335 } 5336 5337 /* Given an original NODE and a VAL for which we have already created a 5338 specialized clone, look whether there are incoming edges that still lead 5339 into the old node but now also bring the requested value and also conform to 5340 all other criteria such that they can be redirected the special node. 5341 This function can therefore redirect the final edge in a SCC. */ 5342 5343 template <typename valtype> 5344 static void 5345 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val) 5346 { 5347 ipcp_value_source<valtype> *src; 5348 profile_count redirected_sum = profile_count::zero (); 5349 5350 for (src = val->sources; src; src = src->next) 5351 { 5352 struct cgraph_edge *cs = src->cs; 5353 while (cs) 5354 { 5355 if (cgraph_edge_brings_value_p (cs, src, node, val) 5356 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node) 5357 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node)) 5358 { 5359 if (dump_file) 5360 fprintf (dump_file, " - adding an extra caller %s of %s\n", 5361 cs->caller->dump_name (), 5362 val->spec_node->dump_name ()); 5363 5364 cs->redirect_callee_duplicating_thunks (val->spec_node); 5365 val->spec_node->expand_all_artificial_thunks (); 5366 if (cs->count.ipa ().initialized_p ()) 5367 redirected_sum = redirected_sum + cs->count.ipa (); 5368 } 5369 cs = get_next_cgraph_edge_clone (cs); 5370 } 5371 } 5372 5373 if (redirected_sum.nonzero_p ()) 5374 update_specialized_profile (val->spec_node, node, redirected_sum); 5375 } 5376 5377 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */ 5378 5379 static bool 5380 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts) 5381 { 5382 ipa_polymorphic_call_context *ctx; 5383 int i; 5384 5385 FOR_EACH_VEC_ELT (known_contexts, i, ctx) 5386 if (!ctx->useless_p ()) 5387 return true; 5388 return false; 5389 } 5390 5391 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */ 5392 5393 static vec<ipa_polymorphic_call_context> 5394 copy_useful_known_contexts (vec<ipa_polymorphic_call_context> known_contexts) 5395 { 5396 if (known_contexts_useful_p (known_contexts)) 5397 return known_contexts.copy (); 5398 else 5399 return vNULL; 5400 } 5401 5402 /* Copy KNOWN_CSTS and modify the copy according to VAL and INDEX. If 5403 non-empty, replace KNOWN_CONTEXTS with its copy too. */ 5404 5405 static void 5406 modify_known_vectors_with_val (vec<tree> *known_csts, 5407 vec<ipa_polymorphic_call_context> *known_contexts, 5408 ipcp_value<tree> *val, 5409 int index) 5410 { 5411 *known_csts = known_csts->copy (); 5412 *known_contexts = copy_useful_known_contexts (*known_contexts); 5413 (*known_csts)[index] = val->value; 5414 } 5415 5416 /* Replace KNOWN_CSTS with its copy. Also copy KNOWN_CONTEXTS and modify the 5417 copy according to VAL and INDEX. */ 5418 5419 static void 5420 modify_known_vectors_with_val (vec<tree> *known_csts, 5421 vec<ipa_polymorphic_call_context> *known_contexts, 5422 ipcp_value<ipa_polymorphic_call_context> *val, 5423 int index) 5424 { 5425 *known_csts = known_csts->copy (); 5426 *known_contexts = known_contexts->copy (); 5427 (*known_contexts)[index] = val->value; 5428 } 5429 5430 /* Return true if OFFSET indicates this was not an aggregate value or there is 5431 a replacement equivalent to VALUE, INDEX and OFFSET among those in the 5432 AGGVALS list. */ 5433 5434 DEBUG_FUNCTION bool 5435 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals, 5436 int index, HOST_WIDE_INT offset, tree value) 5437 { 5438 if (offset == -1) 5439 return true; 5440 5441 while (aggvals) 5442 { 5443 if (aggvals->index == index 5444 && aggvals->offset == offset 5445 && values_equal_for_ipcp_p (aggvals->value, value)) 5446 return true; 5447 aggvals = aggvals->next; 5448 } 5449 return false; 5450 } 5451 5452 /* Return true if offset is minus one because source of a polymorphic context 5453 cannot be an aggregate value. */ 5454 5455 DEBUG_FUNCTION bool 5456 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *, 5457 int , HOST_WIDE_INT offset, 5458 ipa_polymorphic_call_context) 5459 { 5460 return offset == -1; 5461 } 5462 5463 /* Decide whether to create a special version of NODE for value VAL of parameter 5464 at the given INDEX. If OFFSET is -1, the value is for the parameter itself, 5465 otherwise it is stored at the given OFFSET of the parameter. KNOWN_CSTS, 5466 KNOWN_CONTEXTS and KNOWN_AGGS describe the other already known values. */ 5467 5468 template <typename valtype> 5469 static bool 5470 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, 5471 ipcp_value<valtype> *val, vec<tree> known_csts, 5472 vec<ipa_polymorphic_call_context> known_contexts) 5473 { 5474 struct ipa_agg_replacement_value *aggvals; 5475 int freq_sum, caller_count; 5476 profile_count count_sum; 5477 vec<cgraph_edge *> callers; 5478 5479 if (val->spec_node) 5480 { 5481 perhaps_add_new_callers (node, val); 5482 return false; 5483 } 5484 else if (val->local_size_cost + overall_size > get_max_overall_size (node)) 5485 { 5486 if (dump_file && (dump_flags & TDF_DETAILS)) 5487 fprintf (dump_file, " Ignoring candidate value because " 5488 "maximum unit size would be reached with %li.\n", 5489 val->local_size_cost + overall_size); 5490 return false; 5491 } 5492 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &count_sum, 5493 &caller_count)) 5494 return false; 5495 5496 if (dump_file && (dump_flags & TDF_DETAILS)) 5497 { 5498 fprintf (dump_file, " - considering value "); 5499 print_ipcp_constant_value (dump_file, val->value); 5500 fprintf (dump_file, " for "); 5501 ipa_dump_param (dump_file, IPA_NODE_REF (node), index); 5502 if (offset != -1) 5503 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset); 5504 fprintf (dump_file, " (caller_count: %i)\n", caller_count); 5505 } 5506 5507 if (!good_cloning_opportunity_p (node, val->local_time_benefit, 5508 freq_sum, count_sum, 5509 val->local_size_cost) 5510 && !good_cloning_opportunity_p (node, 5511 val->local_time_benefit 5512 + val->prop_time_benefit, 5513 freq_sum, count_sum, 5514 val->local_size_cost 5515 + val->prop_size_cost)) 5516 return false; 5517 5518 if (dump_file) 5519 fprintf (dump_file, " Creating a specialized node of %s.\n", 5520 node->dump_name ()); 5521 5522 callers = gather_edges_for_value (val, node, caller_count); 5523 if (offset == -1) 5524 modify_known_vectors_with_val (&known_csts, &known_contexts, val, index); 5525 else 5526 { 5527 known_csts = known_csts.copy (); 5528 known_contexts = copy_useful_known_contexts (known_contexts); 5529 } 5530 find_more_scalar_values_for_callers_subset (node, known_csts, callers); 5531 find_more_contexts_for_caller_subset (node, &known_contexts, callers); 5532 aggvals = find_aggregate_values_for_callers_subset (node, callers); 5533 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index, 5534 offset, val->value)); 5535 val->spec_node = create_specialized_node (node, known_csts, known_contexts, 5536 aggvals, callers); 5537 overall_size += val->local_size_cost; 5538 5539 /* TODO: If for some lattice there is only one other known value 5540 left, make a special node for it too. */ 5541 5542 return true; 5543 } 5544 5545 /* Decide whether and what specialized clones of NODE should be created. */ 5546 5547 static bool 5548 decide_whether_version_node (struct cgraph_node *node) 5549 { 5550 class ipa_node_params *info = IPA_NODE_REF (node); 5551 int i, count = ipa_get_param_count (info); 5552 vec<tree> known_csts; 5553 vec<ipa_polymorphic_call_context> known_contexts; 5554 bool ret = false; 5555 5556 if (count == 0) 5557 return false; 5558 5559 if (dump_file && (dump_flags & TDF_DETAILS)) 5560 fprintf (dump_file, "\nEvaluating opportunities for %s.\n", 5561 node->dump_name ()); 5562 5563 gather_context_independent_values (info, &known_csts, &known_contexts, 5564 NULL, NULL); 5565 5566 for (i = 0; i < count;i++) 5567 { 5568 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5569 ipcp_lattice<tree> *lat = &plats->itself; 5570 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat; 5571 5572 if (!lat->bottom 5573 && !known_csts[i]) 5574 { 5575 ipcp_value<tree> *val; 5576 for (val = lat->values; val; val = val->next) 5577 ret |= decide_about_value (node, i, -1, val, known_csts, 5578 known_contexts); 5579 } 5580 5581 if (!plats->aggs_bottom) 5582 { 5583 struct ipcp_agg_lattice *aglat; 5584 ipcp_value<tree> *val; 5585 for (aglat = plats->aggs; aglat; aglat = aglat->next) 5586 if (!aglat->bottom && aglat->values 5587 /* If the following is false, the one value is in 5588 known_aggs. */ 5589 && (plats->aggs_contain_variable 5590 || !aglat->is_single_const ())) 5591 for (val = aglat->values; val; val = val->next) 5592 ret |= decide_about_value (node, i, aglat->offset, val, 5593 known_csts, known_contexts); 5594 } 5595 5596 if (!ctxlat->bottom 5597 && known_contexts[i].useless_p ()) 5598 { 5599 ipcp_value<ipa_polymorphic_call_context> *val; 5600 for (val = ctxlat->values; val; val = val->next) 5601 ret |= decide_about_value (node, i, -1, val, known_csts, 5602 known_contexts); 5603 } 5604 5605 info = IPA_NODE_REF (node); 5606 } 5607 5608 if (info->do_clone_for_all_contexts) 5609 { 5610 struct cgraph_node *clone; 5611 vec<cgraph_edge *> callers = node->collect_callers (); 5612 5613 for (int i = callers.length () - 1; i >= 0; i--) 5614 { 5615 cgraph_edge *cs = callers[i]; 5616 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); 5617 5618 if (caller_info && caller_info->node_dead) 5619 callers.unordered_remove (i); 5620 } 5621 5622 if (!adjust_callers_for_value_intersection (callers, node)) 5623 { 5624 /* If node is not called by anyone, or all its caller edges are 5625 self-recursive, the node is not really be in use, no need to 5626 do cloning. */ 5627 callers.release (); 5628 known_csts.release (); 5629 known_contexts.release (); 5630 info->do_clone_for_all_contexts = false; 5631 return ret; 5632 } 5633 5634 if (dump_file) 5635 fprintf (dump_file, " - Creating a specialized node of %s " 5636 "for all known contexts.\n", node->dump_name ()); 5637 5638 find_more_scalar_values_for_callers_subset (node, known_csts, callers); 5639 find_more_contexts_for_caller_subset (node, &known_contexts, callers); 5640 ipa_agg_replacement_value *aggvals 5641 = find_aggregate_values_for_callers_subset (node, callers); 5642 5643 if (!known_contexts_useful_p (known_contexts)) 5644 { 5645 known_contexts.release (); 5646 known_contexts = vNULL; 5647 } 5648 clone = create_specialized_node (node, known_csts, known_contexts, 5649 aggvals, callers); 5650 info = IPA_NODE_REF (node); 5651 info->do_clone_for_all_contexts = false; 5652 IPA_NODE_REF (clone)->is_all_contexts_clone = true; 5653 ret = true; 5654 } 5655 else 5656 { 5657 known_csts.release (); 5658 known_contexts.release (); 5659 } 5660 5661 return ret; 5662 } 5663 5664 /* Transitively mark all callees of NODE within the same SCC as not dead. */ 5665 5666 static void 5667 spread_undeadness (struct cgraph_node *node) 5668 { 5669 struct cgraph_edge *cs; 5670 5671 for (cs = node->callees; cs; cs = cs->next_callee) 5672 if (ipa_edge_within_scc (cs)) 5673 { 5674 struct cgraph_node *callee; 5675 class ipa_node_params *info; 5676 5677 callee = cs->callee->function_symbol (NULL); 5678 info = IPA_NODE_REF (callee); 5679 5680 if (info && info->node_dead) 5681 { 5682 info->node_dead = 0; 5683 spread_undeadness (callee); 5684 } 5685 } 5686 } 5687 5688 /* Return true if NODE has a caller from outside of its SCC that is not 5689 dead. Worker callback for cgraph_for_node_and_aliases. */ 5690 5691 static bool 5692 has_undead_caller_from_outside_scc_p (struct cgraph_node *node, 5693 void *data ATTRIBUTE_UNUSED) 5694 { 5695 struct cgraph_edge *cs; 5696 5697 for (cs = node->callers; cs; cs = cs->next_caller) 5698 if (cs->caller->thunk.thunk_p 5699 && cs->caller->call_for_symbol_thunks_and_aliases 5700 (has_undead_caller_from_outside_scc_p, NULL, true)) 5701 return true; 5702 else if (!ipa_edge_within_scc (cs) 5703 && (!IPA_NODE_REF (cs->caller) /* Unoptimized caller. */ 5704 || !IPA_NODE_REF (cs->caller)->node_dead)) 5705 return true; 5706 return false; 5707 } 5708 5709 5710 /* Identify nodes within the same SCC as NODE which are no longer needed 5711 because of new clones and will be removed as unreachable. */ 5712 5713 static void 5714 identify_dead_nodes (struct cgraph_node *node) 5715 { 5716 struct cgraph_node *v; 5717 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 5718 if (v->local 5719 && IPA_NODE_REF (v) 5720 && !v->call_for_symbol_thunks_and_aliases 5721 (has_undead_caller_from_outside_scc_p, NULL, true)) 5722 IPA_NODE_REF (v)->node_dead = 1; 5723 5724 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 5725 if (IPA_NODE_REF (v) && !IPA_NODE_REF (v)->node_dead) 5726 spread_undeadness (v); 5727 5728 if (dump_file && (dump_flags & TDF_DETAILS)) 5729 { 5730 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 5731 if (IPA_NODE_REF (v) && IPA_NODE_REF (v)->node_dead) 5732 fprintf (dump_file, " Marking node as dead: %s.\n", v->dump_name ()); 5733 } 5734 } 5735 5736 /* The decision stage. Iterate over the topological order of call graph nodes 5737 TOPO and make specialized clones if deemed beneficial. */ 5738 5739 static void 5740 ipcp_decision_stage (class ipa_topo_info *topo) 5741 { 5742 int i; 5743 5744 if (dump_file) 5745 fprintf (dump_file, "\nIPA decision stage:\n\n"); 5746 5747 for (i = topo->nnodes - 1; i >= 0; i--) 5748 { 5749 struct cgraph_node *node = topo->order[i]; 5750 bool change = false, iterate = true; 5751 5752 while (iterate) 5753 { 5754 struct cgraph_node *v; 5755 iterate = false; 5756 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) 5757 if (v->has_gimple_body_p () 5758 && ipcp_versionable_function_p (v)) 5759 iterate |= decide_whether_version_node (v); 5760 5761 change |= iterate; 5762 } 5763 if (change) 5764 identify_dead_nodes (node); 5765 } 5766 } 5767 5768 /* Look up all the bits information that we have discovered and copy it over 5769 to the transformation summary. */ 5770 5771 static void 5772 ipcp_store_bits_results (void) 5773 { 5774 cgraph_node *node; 5775 5776 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 5777 { 5778 ipa_node_params *info = IPA_NODE_REF (node); 5779 bool dumped_sth = false; 5780 bool found_useful_result = false; 5781 5782 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info) 5783 { 5784 if (dump_file) 5785 fprintf (dump_file, "Not considering %s for ipa bitwise propagation " 5786 "; -fipa-bit-cp: disabled.\n", 5787 node->dump_name ()); 5788 continue; 5789 } 5790 5791 if (info->ipcp_orig_node) 5792 info = IPA_NODE_REF (info->ipcp_orig_node); 5793 if (!info->lattices) 5794 /* Newly expanded artificial thunks do not have lattices. */ 5795 continue; 5796 5797 unsigned count = ipa_get_param_count (info); 5798 for (unsigned i = 0; i < count; i++) 5799 { 5800 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5801 if (plats->bits_lattice.constant_p ()) 5802 { 5803 found_useful_result = true; 5804 break; 5805 } 5806 } 5807 5808 if (!found_useful_result) 5809 continue; 5810 5811 ipcp_transformation_initialize (); 5812 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node); 5813 vec_safe_reserve_exact (ts->bits, count); 5814 5815 for (unsigned i = 0; i < count; i++) 5816 { 5817 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5818 ipa_bits *jfbits; 5819 5820 if (plats->bits_lattice.constant_p ()) 5821 jfbits 5822 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (), 5823 plats->bits_lattice.get_mask ()); 5824 else 5825 jfbits = NULL; 5826 5827 ts->bits->quick_push (jfbits); 5828 if (!dump_file || !jfbits) 5829 continue; 5830 if (!dumped_sth) 5831 { 5832 fprintf (dump_file, "Propagated bits info for function %s:\n", 5833 node->dump_name ()); 5834 dumped_sth = true; 5835 } 5836 fprintf (dump_file, " param %i: value = ", i); 5837 print_hex (jfbits->value, dump_file); 5838 fprintf (dump_file, ", mask = "); 5839 print_hex (jfbits->mask, dump_file); 5840 fprintf (dump_file, "\n"); 5841 } 5842 } 5843 } 5844 5845 /* Look up all VR information that we have discovered and copy it over 5846 to the transformation summary. */ 5847 5848 static void 5849 ipcp_store_vr_results (void) 5850 { 5851 cgraph_node *node; 5852 5853 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 5854 { 5855 ipa_node_params *info = IPA_NODE_REF (node); 5856 bool found_useful_result = false; 5857 5858 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp)) 5859 { 5860 if (dump_file) 5861 fprintf (dump_file, "Not considering %s for VR discovery " 5862 "and propagate; -fipa-ipa-vrp: disabled.\n", 5863 node->dump_name ()); 5864 continue; 5865 } 5866 5867 if (info->ipcp_orig_node) 5868 info = IPA_NODE_REF (info->ipcp_orig_node); 5869 if (!info->lattices) 5870 /* Newly expanded artificial thunks do not have lattices. */ 5871 continue; 5872 5873 unsigned count = ipa_get_param_count (info); 5874 for (unsigned i = 0; i < count; i++) 5875 { 5876 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5877 if (!plats->m_value_range.bottom_p () 5878 && !plats->m_value_range.top_p ()) 5879 { 5880 found_useful_result = true; 5881 break; 5882 } 5883 } 5884 if (!found_useful_result) 5885 continue; 5886 5887 ipcp_transformation_initialize (); 5888 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node); 5889 vec_safe_reserve_exact (ts->m_vr, count); 5890 5891 for (unsigned i = 0; i < count; i++) 5892 { 5893 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); 5894 ipa_vr vr; 5895 5896 if (!plats->m_value_range.bottom_p () 5897 && !plats->m_value_range.top_p ()) 5898 { 5899 vr.known = true; 5900 vr.type = plats->m_value_range.m_vr.kind (); 5901 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ()); 5902 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ()); 5903 } 5904 else 5905 { 5906 vr.known = false; 5907 vr.type = VR_VARYING; 5908 vr.min = vr.max = wi::zero (INT_TYPE_SIZE); 5909 } 5910 ts->m_vr->quick_push (vr); 5911 } 5912 } 5913 } 5914 5915 /* The IPCP driver. */ 5916 5917 static unsigned int 5918 ipcp_driver (void) 5919 { 5920 class ipa_topo_info topo; 5921 5922 if (edge_clone_summaries == NULL) 5923 edge_clone_summaries = new edge_clone_summary_t (symtab); 5924 5925 ipa_check_create_node_params (); 5926 ipa_check_create_edge_args (); 5927 clone_num_suffixes = new hash_map<const char *, unsigned>; 5928 5929 if (dump_file) 5930 { 5931 fprintf (dump_file, "\nIPA structures before propagation:\n"); 5932 if (dump_flags & TDF_DETAILS) 5933 ipa_print_all_params (dump_file); 5934 ipa_print_all_jump_functions (dump_file); 5935 } 5936 5937 /* Topological sort. */ 5938 build_toporder_info (&topo); 5939 /* Do the interprocedural propagation. */ 5940 ipcp_propagate_stage (&topo); 5941 /* Decide what constant propagation and cloning should be performed. */ 5942 ipcp_decision_stage (&topo); 5943 /* Store results of bits propagation. */ 5944 ipcp_store_bits_results (); 5945 /* Store results of value range propagation. */ 5946 ipcp_store_vr_results (); 5947 5948 /* Free all IPCP structures. */ 5949 delete clone_num_suffixes; 5950 free_toporder_info (&topo); 5951 delete edge_clone_summaries; 5952 edge_clone_summaries = NULL; 5953 ipa_free_all_structures_after_ipa_cp (); 5954 if (dump_file) 5955 fprintf (dump_file, "\nIPA constant propagation end\n"); 5956 return 0; 5957 } 5958 5959 /* Initialization and computation of IPCP data structures. This is the initial 5960 intraprocedural analysis of functions, which gathers information to be 5961 propagated later on. */ 5962 5963 static void 5964 ipcp_generate_summary (void) 5965 { 5966 struct cgraph_node *node; 5967 5968 if (dump_file) 5969 fprintf (dump_file, "\nIPA constant propagation start:\n"); 5970 ipa_register_cgraph_hooks (); 5971 5972 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 5973 ipa_analyze_node (node); 5974 } 5975 5976 namespace { 5977 5978 const pass_data pass_data_ipa_cp = 5979 { 5980 IPA_PASS, /* type */ 5981 "cp", /* name */ 5982 OPTGROUP_NONE, /* optinfo_flags */ 5983 TV_IPA_CONSTANT_PROP, /* tv_id */ 5984 0, /* properties_required */ 5985 0, /* properties_provided */ 5986 0, /* properties_destroyed */ 5987 0, /* todo_flags_start */ 5988 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */ 5989 }; 5990 5991 class pass_ipa_cp : public ipa_opt_pass_d 5992 { 5993 public: 5994 pass_ipa_cp (gcc::context *ctxt) 5995 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt, 5996 ipcp_generate_summary, /* generate_summary */ 5997 NULL, /* write_summary */ 5998 NULL, /* read_summary */ 5999 ipcp_write_transformation_summaries, /* 6000 write_optimization_summary */ 6001 ipcp_read_transformation_summaries, /* 6002 read_optimization_summary */ 6003 NULL, /* stmt_fixup */ 6004 0, /* function_transform_todo_flags_start */ 6005 ipcp_transform_function, /* function_transform */ 6006 NULL) /* variable_transform */ 6007 {} 6008 6009 /* opt_pass methods: */ 6010 virtual bool gate (function *) 6011 { 6012 /* FIXME: We should remove the optimize check after we ensure we never run 6013 IPA passes when not optimizing. */ 6014 return (flag_ipa_cp && optimize) || in_lto_p; 6015 } 6016 6017 virtual unsigned int execute (function *) { return ipcp_driver (); } 6018 6019 }; // class pass_ipa_cp 6020 6021 } // anon namespace 6022 6023 ipa_opt_pass_d * 6024 make_pass_ipa_cp (gcc::context *ctxt) 6025 { 6026 return new pass_ipa_cp (ctxt); 6027 } 6028 6029 /* Reset all state within ipa-cp.c so that we can rerun the compiler 6030 within the same process. For use by toplev::finalize. */ 6031 6032 void 6033 ipa_cp_c_finalize (void) 6034 { 6035 max_count = profile_count::uninitialized (); 6036 overall_size = 0; 6037 orig_overall_size = 0; 6038 ipcp_free_transformation_sum (); 6039 } 6040