1 /* Full and partial redundancy elimination and code hoisting on SSA GIMPLE. 2 Copyright (C) 2001-2020 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> and Steven Bosscher 4 <stevenb@suse.de> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "predict.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "ssa.h" 33 #include "cgraph.h" 34 #include "gimple-pretty-print.h" 35 #include "fold-const.h" 36 #include "cfganal.h" 37 #include "gimple-fold.h" 38 #include "tree-eh.h" 39 #include "gimplify.h" 40 #include "gimple-iterator.h" 41 #include "tree-cfg.h" 42 #include "tree-into-ssa.h" 43 #include "tree-dfa.h" 44 #include "tree-ssa.h" 45 #include "cfgloop.h" 46 #include "tree-ssa-sccvn.h" 47 #include "tree-scalar-evolution.h" 48 #include "dbgcnt.h" 49 #include "domwalk.h" 50 #include "tree-ssa-propagate.h" 51 #include "tree-ssa-dce.h" 52 #include "tree-cfgcleanup.h" 53 #include "alias.h" 54 55 /* Even though this file is called tree-ssa-pre.c, we actually 56 implement a bit more than just PRE here. All of them piggy-back 57 on GVN which is implemented in tree-ssa-sccvn.c. 58 59 1. Full Redundancy Elimination (FRE) 60 This is the elimination phase of GVN. 61 62 2. Partial Redundancy Elimination (PRE) 63 This is adds computation of AVAIL_OUT and ANTIC_IN and 64 doing expression insertion to form GVN-PRE. 65 66 3. Code hoisting 67 This optimization uses the ANTIC_IN sets computed for PRE 68 to move expressions further up than PRE would do, to make 69 multiple computations of the same value fully redundant. 70 This pass is explained below (after the explanation of the 71 basic algorithm for PRE). 72 */ 73 74 /* TODO: 75 76 1. Avail sets can be shared by making an avail_find_leader that 77 walks up the dominator tree and looks in those avail sets. 78 This might affect code optimality, it's unclear right now. 79 Currently the AVAIL_OUT sets are the remaining quadraticness in 80 memory of GVN-PRE. 81 2. Strength reduction can be performed by anticipating expressions 82 we can repair later on. 83 3. We can do back-substitution or smarter value numbering to catch 84 commutative expressions split up over multiple statements. 85 */ 86 87 /* For ease of terminology, "expression node" in the below refers to 88 every expression node but GIMPLE_ASSIGN, because GIMPLE_ASSIGNs 89 represent the actual statement containing the expressions we care about, 90 and we cache the value number by putting it in the expression. */ 91 92 /* Basic algorithm for Partial Redundancy Elimination: 93 94 First we walk the statements to generate the AVAIL sets, the 95 EXP_GEN sets, and the tmp_gen sets. EXP_GEN sets represent the 96 generation of values/expressions by a given block. We use them 97 when computing the ANTIC sets. The AVAIL sets consist of 98 SSA_NAME's that represent values, so we know what values are 99 available in what blocks. AVAIL is a forward dataflow problem. In 100 SSA, values are never killed, so we don't need a kill set, or a 101 fixpoint iteration, in order to calculate the AVAIL sets. In 102 traditional parlance, AVAIL sets tell us the downsafety of the 103 expressions/values. 104 105 Next, we generate the ANTIC sets. These sets represent the 106 anticipatable expressions. ANTIC is a backwards dataflow 107 problem. An expression is anticipatable in a given block if it could 108 be generated in that block. This means that if we had to perform 109 an insertion in that block, of the value of that expression, we 110 could. Calculating the ANTIC sets requires phi translation of 111 expressions, because the flow goes backwards through phis. We must 112 iterate to a fixpoint of the ANTIC sets, because we have a kill 113 set. Even in SSA form, values are not live over the entire 114 function, only from their definition point onwards. So we have to 115 remove values from the ANTIC set once we go past the definition 116 point of the leaders that make them up. 117 compute_antic/compute_antic_aux performs this computation. 118 119 Third, we perform insertions to make partially redundant 120 expressions fully redundant. 121 122 An expression is partially redundant (excluding partial 123 anticipation) if: 124 125 1. It is AVAIL in some, but not all, of the predecessors of a 126 given block. 127 2. It is ANTIC in all the predecessors. 128 129 In order to make it fully redundant, we insert the expression into 130 the predecessors where it is not available, but is ANTIC. 131 132 When optimizing for size, we only eliminate the partial redundancy 133 if we need to insert in only one predecessor. This avoids almost 134 completely the code size increase that PRE usually causes. 135 136 For the partial anticipation case, we only perform insertion if it 137 is partially anticipated in some block, and fully available in all 138 of the predecessors. 139 140 do_pre_regular_insertion/do_pre_partial_partial_insertion 141 performs these steps, driven by insert/insert_aux. 142 143 Fourth, we eliminate fully redundant expressions. 144 This is a simple statement walk that replaces redundant 145 calculations with the now available values. */ 146 147 /* Basic algorithm for Code Hoisting: 148 149 Code hoisting is: Moving value computations up in the control flow 150 graph to make multiple copies redundant. Typically this is a size 151 optimization, but there are cases where it also is helpful for speed. 152 153 A simple code hoisting algorithm is implemented that piggy-backs on 154 the PRE infrastructure. For code hoisting, we have to know ANTIC_OUT 155 which is effectively ANTIC_IN - AVAIL_OUT. The latter two have to be 156 computed for PRE, and we can use them to perform a limited version of 157 code hoisting, too. 158 159 For the purpose of this implementation, a value is hoistable to a basic 160 block B if the following properties are met: 161 162 1. The value is in ANTIC_IN(B) -- the value will be computed on all 163 paths from B to function exit and it can be computed in B); 164 165 2. The value is not in AVAIL_OUT(B) -- there would be no need to 166 compute the value again and make it available twice; 167 168 3. All successors of B are dominated by B -- makes sure that inserting 169 a computation of the value in B will make the remaining 170 computations fully redundant; 171 172 4. At least one successor has the value in AVAIL_OUT -- to avoid 173 hoisting values up too far; 174 175 5. There are at least two successors of B -- hoisting in straight 176 line code is pointless. 177 178 The third condition is not strictly necessary, but it would complicate 179 the hoisting pass a lot. In fact, I don't know of any code hoisting 180 algorithm that does not have this requirement. Fortunately, experiments 181 have show that most candidate hoistable values are in regions that meet 182 this condition (e.g. diamond-shape regions). 183 184 The forth condition is necessary to avoid hoisting things up too far 185 away from the uses of the value. Nothing else limits the algorithm 186 from hoisting everything up as far as ANTIC_IN allows. Experiments 187 with SPEC and CSiBE have shown that hoisting up too far results in more 188 spilling, less benefits for code size, and worse benchmark scores. 189 Fortunately, in practice most of the interesting hoisting opportunities 190 are caught despite this limitation. 191 192 For hoistable values that meet all conditions, expressions are inserted 193 to make the calculation of the hoistable value fully redundant. We 194 perform code hoisting insertions after each round of PRE insertions, 195 because code hoisting never exposes new PRE opportunities, but PRE can 196 create new code hoisting opportunities. 197 198 The code hoisting algorithm is implemented in do_hoist_insert, driven 199 by insert/insert_aux. */ 200 201 /* Representations of value numbers: 202 203 Value numbers are represented by a representative SSA_NAME. We 204 will create fake SSA_NAME's in situations where we need a 205 representative but do not have one (because it is a complex 206 expression). In order to facilitate storing the value numbers in 207 bitmaps, and keep the number of wasted SSA_NAME's down, we also 208 associate a value_id with each value number, and create full blown 209 ssa_name's only where we actually need them (IE in operands of 210 existing expressions). 211 212 Theoretically you could replace all the value_id's with 213 SSA_NAME_VERSION, but this would allocate a large number of 214 SSA_NAME's (which are each > 30 bytes) just to get a 4 byte number. 215 It would also require an additional indirection at each point we 216 use the value id. */ 217 218 /* Representation of expressions on value numbers: 219 220 Expressions consisting of value numbers are represented the same 221 way as our VN internally represents them, with an additional 222 "pre_expr" wrapping around them in order to facilitate storing all 223 of the expressions in the same sets. */ 224 225 /* Representation of sets: 226 227 The dataflow sets do not need to be sorted in any particular order 228 for the majority of their lifetime, are simply represented as two 229 bitmaps, one that keeps track of values present in the set, and one 230 that keeps track of expressions present in the set. 231 232 When we need them in topological order, we produce it on demand by 233 transforming the bitmap into an array and sorting it into topo 234 order. */ 235 236 /* Type of expression, used to know which member of the PRE_EXPR union 237 is valid. */ 238 239 enum pre_expr_kind 240 { 241 NAME, 242 NARY, 243 REFERENCE, 244 CONSTANT 245 }; 246 247 union pre_expr_union 248 { 249 tree name; 250 tree constant; 251 vn_nary_op_t nary; 252 vn_reference_t reference; 253 }; 254 255 typedef struct pre_expr_d : nofree_ptr_hash <pre_expr_d> 256 { 257 enum pre_expr_kind kind; 258 unsigned int id; 259 location_t loc; 260 pre_expr_union u; 261 262 /* hash_table support. */ 263 static inline hashval_t hash (const pre_expr_d *); 264 static inline int equal (const pre_expr_d *, const pre_expr_d *); 265 } *pre_expr; 266 267 #define PRE_EXPR_NAME(e) (e)->u.name 268 #define PRE_EXPR_NARY(e) (e)->u.nary 269 #define PRE_EXPR_REFERENCE(e) (e)->u.reference 270 #define PRE_EXPR_CONSTANT(e) (e)->u.constant 271 272 /* Compare E1 and E1 for equality. */ 273 274 inline int 275 pre_expr_d::equal (const pre_expr_d *e1, const pre_expr_d *e2) 276 { 277 if (e1->kind != e2->kind) 278 return false; 279 280 switch (e1->kind) 281 { 282 case CONSTANT: 283 return vn_constant_eq_with_type (PRE_EXPR_CONSTANT (e1), 284 PRE_EXPR_CONSTANT (e2)); 285 case NAME: 286 return PRE_EXPR_NAME (e1) == PRE_EXPR_NAME (e2); 287 case NARY: 288 return vn_nary_op_eq (PRE_EXPR_NARY (e1), PRE_EXPR_NARY (e2)); 289 case REFERENCE: 290 return vn_reference_eq (PRE_EXPR_REFERENCE (e1), 291 PRE_EXPR_REFERENCE (e2)); 292 default: 293 gcc_unreachable (); 294 } 295 } 296 297 /* Hash E. */ 298 299 inline hashval_t 300 pre_expr_d::hash (const pre_expr_d *e) 301 { 302 switch (e->kind) 303 { 304 case CONSTANT: 305 return vn_hash_constant_with_type (PRE_EXPR_CONSTANT (e)); 306 case NAME: 307 return SSA_NAME_VERSION (PRE_EXPR_NAME (e)); 308 case NARY: 309 return PRE_EXPR_NARY (e)->hashcode; 310 case REFERENCE: 311 return PRE_EXPR_REFERENCE (e)->hashcode; 312 default: 313 gcc_unreachable (); 314 } 315 } 316 317 /* Next global expression id number. */ 318 static unsigned int next_expression_id; 319 320 /* Mapping from expression to id number we can use in bitmap sets. */ 321 static vec<pre_expr> expressions; 322 static hash_table<pre_expr_d> *expression_to_id; 323 static vec<unsigned> name_to_id; 324 325 /* Allocate an expression id for EXPR. */ 326 327 static inline unsigned int 328 alloc_expression_id (pre_expr expr) 329 { 330 struct pre_expr_d **slot; 331 /* Make sure we won't overflow. */ 332 gcc_assert (next_expression_id + 1 > next_expression_id); 333 expr->id = next_expression_id++; 334 expressions.safe_push (expr); 335 if (expr->kind == NAME) 336 { 337 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr)); 338 /* vec::safe_grow_cleared allocates no headroom. Avoid frequent 339 re-allocations by using vec::reserve upfront. */ 340 unsigned old_len = name_to_id.length (); 341 name_to_id.reserve (num_ssa_names - old_len); 342 name_to_id.quick_grow_cleared (num_ssa_names); 343 gcc_assert (name_to_id[version] == 0); 344 name_to_id[version] = expr->id; 345 } 346 else 347 { 348 slot = expression_to_id->find_slot (expr, INSERT); 349 gcc_assert (!*slot); 350 *slot = expr; 351 } 352 return next_expression_id - 1; 353 } 354 355 /* Return the expression id for tree EXPR. */ 356 357 static inline unsigned int 358 get_expression_id (const pre_expr expr) 359 { 360 return expr->id; 361 } 362 363 static inline unsigned int 364 lookup_expression_id (const pre_expr expr) 365 { 366 struct pre_expr_d **slot; 367 368 if (expr->kind == NAME) 369 { 370 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr)); 371 if (name_to_id.length () <= version) 372 return 0; 373 return name_to_id[version]; 374 } 375 else 376 { 377 slot = expression_to_id->find_slot (expr, NO_INSERT); 378 if (!slot) 379 return 0; 380 return ((pre_expr)*slot)->id; 381 } 382 } 383 384 /* Return the existing expression id for EXPR, or create one if one 385 does not exist yet. */ 386 387 static inline unsigned int 388 get_or_alloc_expression_id (pre_expr expr) 389 { 390 unsigned int id = lookup_expression_id (expr); 391 if (id == 0) 392 return alloc_expression_id (expr); 393 return expr->id = id; 394 } 395 396 /* Return the expression that has expression id ID */ 397 398 static inline pre_expr 399 expression_for_id (unsigned int id) 400 { 401 return expressions[id]; 402 } 403 404 static object_allocator<pre_expr_d> pre_expr_pool ("pre_expr nodes"); 405 406 /* Given an SSA_NAME NAME, get or create a pre_expr to represent it. */ 407 408 static pre_expr 409 get_or_alloc_expr_for_name (tree name) 410 { 411 struct pre_expr_d expr; 412 pre_expr result; 413 unsigned int result_id; 414 415 expr.kind = NAME; 416 expr.id = 0; 417 PRE_EXPR_NAME (&expr) = name; 418 result_id = lookup_expression_id (&expr); 419 if (result_id != 0) 420 return expression_for_id (result_id); 421 422 result = pre_expr_pool.allocate (); 423 result->kind = NAME; 424 result->loc = UNKNOWN_LOCATION; 425 PRE_EXPR_NAME (result) = name; 426 alloc_expression_id (result); 427 return result; 428 } 429 430 /* An unordered bitmap set. One bitmap tracks values, the other, 431 expressions. */ 432 typedef class bitmap_set 433 { 434 public: 435 bitmap_head expressions; 436 bitmap_head values; 437 } *bitmap_set_t; 438 439 #define FOR_EACH_EXPR_ID_IN_SET(set, id, bi) \ 440 EXECUTE_IF_SET_IN_BITMAP (&(set)->expressions, 0, (id), (bi)) 441 442 #define FOR_EACH_VALUE_ID_IN_SET(set, id, bi) \ 443 EXECUTE_IF_SET_IN_BITMAP (&(set)->values, 0, (id), (bi)) 444 445 /* Mapping from value id to expressions with that value_id. */ 446 static vec<bitmap> value_expressions; 447 448 /* Sets that we need to keep track of. */ 449 typedef struct bb_bitmap_sets 450 { 451 /* The EXP_GEN set, which represents expressions/values generated in 452 a basic block. */ 453 bitmap_set_t exp_gen; 454 455 /* The PHI_GEN set, which represents PHI results generated in a 456 basic block. */ 457 bitmap_set_t phi_gen; 458 459 /* The TMP_GEN set, which represents results/temporaries generated 460 in a basic block. IE the LHS of an expression. */ 461 bitmap_set_t tmp_gen; 462 463 /* The AVAIL_OUT set, which represents which values are available in 464 a given basic block. */ 465 bitmap_set_t avail_out; 466 467 /* The ANTIC_IN set, which represents which values are anticipatable 468 in a given basic block. */ 469 bitmap_set_t antic_in; 470 471 /* The PA_IN set, which represents which values are 472 partially anticipatable in a given basic block. */ 473 bitmap_set_t pa_in; 474 475 /* The NEW_SETS set, which is used during insertion to augment the 476 AVAIL_OUT set of blocks with the new insertions performed during 477 the current iteration. */ 478 bitmap_set_t new_sets; 479 480 /* A cache for value_dies_in_block_x. */ 481 bitmap expr_dies; 482 483 /* The live virtual operand on successor edges. */ 484 tree vop_on_exit; 485 486 /* True if we have visited this block during ANTIC calculation. */ 487 unsigned int visited : 1; 488 489 /* True when the block contains a call that might not return. */ 490 unsigned int contains_may_not_return_call : 1; 491 } *bb_value_sets_t; 492 493 #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen 494 #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen 495 #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen 496 #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out 497 #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in 498 #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in 499 #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets 500 #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)->aux))->expr_dies 501 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited 502 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)->aux))->contains_may_not_return_call 503 #define BB_LIVE_VOP_ON_EXIT(BB) ((bb_value_sets_t) ((BB)->aux))->vop_on_exit 504 505 506 /* This structure is used to keep track of statistics on what 507 optimization PRE was able to perform. */ 508 static struct 509 { 510 /* The number of new expressions/temporaries generated by PRE. */ 511 int insertions; 512 513 /* The number of inserts found due to partial anticipation */ 514 int pa_insert; 515 516 /* The number of inserts made for code hoisting. */ 517 int hoist_insert; 518 519 /* The number of new PHI nodes added by PRE. */ 520 int phis; 521 } pre_stats; 522 523 static bool do_partial_partial; 524 static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int); 525 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr); 526 static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr); 527 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t); 528 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int); 529 static void bitmap_insert_into_set (bitmap_set_t, pre_expr); 530 static bitmap_set_t bitmap_set_new (void); 531 static tree create_expression_by_pieces (basic_block, pre_expr, gimple_seq *, 532 tree); 533 static tree find_or_generate_expression (basic_block, tree, gimple_seq *); 534 static unsigned int get_expr_value_id (pre_expr); 535 536 /* We can add and remove elements and entries to and from sets 537 and hash tables, so we use alloc pools for them. */ 538 539 static object_allocator<bitmap_set> bitmap_set_pool ("Bitmap sets"); 540 static bitmap_obstack grand_bitmap_obstack; 541 542 /* A three tuple {e, pred, v} used to cache phi translations in the 543 phi_translate_table. */ 544 545 typedef struct expr_pred_trans_d : free_ptr_hash<expr_pred_trans_d> 546 { 547 /* The expression. */ 548 pre_expr e; 549 550 /* The predecessor block along which we translated the expression. */ 551 basic_block pred; 552 553 /* The value that resulted from the translation. */ 554 pre_expr v; 555 556 /* The hashcode for the expression, pred pair. This is cached for 557 speed reasons. */ 558 hashval_t hashcode; 559 560 /* hash_table support. */ 561 static inline hashval_t hash (const expr_pred_trans_d *); 562 static inline int equal (const expr_pred_trans_d *, const expr_pred_trans_d *); 563 } *expr_pred_trans_t; 564 typedef const struct expr_pred_trans_d *const_expr_pred_trans_t; 565 566 inline hashval_t 567 expr_pred_trans_d::hash (const expr_pred_trans_d *e) 568 { 569 return e->hashcode; 570 } 571 572 inline int 573 expr_pred_trans_d::equal (const expr_pred_trans_d *ve1, 574 const expr_pred_trans_d *ve2) 575 { 576 basic_block b1 = ve1->pred; 577 basic_block b2 = ve2->pred; 578 579 /* If they are not translations for the same basic block, they can't 580 be equal. */ 581 if (b1 != b2) 582 return false; 583 return pre_expr_d::equal (ve1->e, ve2->e); 584 } 585 586 /* The phi_translate_table caches phi translations for a given 587 expression and predecessor. */ 588 static hash_table<expr_pred_trans_d> *phi_translate_table; 589 590 /* Add the tuple mapping from {expression E, basic block PRED} to 591 the phi translation table and return whether it pre-existed. */ 592 593 static inline bool 594 phi_trans_add (expr_pred_trans_t *entry, pre_expr e, basic_block pred) 595 { 596 expr_pred_trans_t *slot; 597 expr_pred_trans_d tem; 598 hashval_t hash = iterative_hash_hashval_t (pre_expr_d::hash (e), 599 pred->index); 600 tem.e = e; 601 tem.pred = pred; 602 tem.hashcode = hash; 603 slot = phi_translate_table->find_slot_with_hash (&tem, hash, INSERT); 604 if (*slot) 605 { 606 *entry = *slot; 607 return true; 608 } 609 610 *entry = *slot = XNEW (struct expr_pred_trans_d); 611 (*entry)->e = e; 612 (*entry)->pred = pred; 613 (*entry)->hashcode = hash; 614 return false; 615 } 616 617 618 /* Add expression E to the expression set of value id V. */ 619 620 static void 621 add_to_value (unsigned int v, pre_expr e) 622 { 623 bitmap set; 624 625 gcc_checking_assert (get_expr_value_id (e) == v); 626 627 if (v >= value_expressions.length ()) 628 { 629 value_expressions.safe_grow_cleared (v + 1); 630 } 631 632 set = value_expressions[v]; 633 if (!set) 634 { 635 set = BITMAP_ALLOC (&grand_bitmap_obstack); 636 value_expressions[v] = set; 637 } 638 639 bitmap_set_bit (set, get_or_alloc_expression_id (e)); 640 } 641 642 /* Create a new bitmap set and return it. */ 643 644 static bitmap_set_t 645 bitmap_set_new (void) 646 { 647 bitmap_set_t ret = bitmap_set_pool.allocate (); 648 bitmap_initialize (&ret->expressions, &grand_bitmap_obstack); 649 bitmap_initialize (&ret->values, &grand_bitmap_obstack); 650 return ret; 651 } 652 653 /* Return the value id for a PRE expression EXPR. */ 654 655 static unsigned int 656 get_expr_value_id (pre_expr expr) 657 { 658 unsigned int id; 659 switch (expr->kind) 660 { 661 case CONSTANT: 662 id = get_constant_value_id (PRE_EXPR_CONSTANT (expr)); 663 break; 664 case NAME: 665 id = VN_INFO (PRE_EXPR_NAME (expr))->value_id; 666 break; 667 case NARY: 668 gcc_assert (!PRE_EXPR_NARY (expr)->predicated_values); 669 id = PRE_EXPR_NARY (expr)->value_id; 670 break; 671 case REFERENCE: 672 id = PRE_EXPR_REFERENCE (expr)->value_id; 673 break; 674 default: 675 gcc_unreachable (); 676 } 677 /* ??? We cannot assert that expr has a value-id (it can be 0), because 678 we assign value-ids only to expressions that have a result 679 in set_hashtable_value_ids. */ 680 return id; 681 } 682 683 /* Return a VN valnum (SSA name or constant) for the PRE value-id VAL. */ 684 685 static tree 686 vn_valnum_from_value_id (unsigned int val) 687 { 688 bitmap_iterator bi; 689 unsigned int i; 690 bitmap exprset = value_expressions[val]; 691 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 692 { 693 pre_expr vexpr = expression_for_id (i); 694 if (vexpr->kind == NAME) 695 return VN_INFO (PRE_EXPR_NAME (vexpr))->valnum; 696 else if (vexpr->kind == CONSTANT) 697 return PRE_EXPR_CONSTANT (vexpr); 698 } 699 return NULL_TREE; 700 } 701 702 /* Insert an expression EXPR into a bitmapped set. */ 703 704 static void 705 bitmap_insert_into_set (bitmap_set_t set, pre_expr expr) 706 { 707 unsigned int val = get_expr_value_id (expr); 708 if (! value_id_constant_p (val)) 709 { 710 /* Note this is the only function causing multiple expressions 711 for the same value to appear in a set. This is needed for 712 TMP_GEN, PHI_GEN and NEW_SETs. */ 713 bitmap_set_bit (&set->values, val); 714 bitmap_set_bit (&set->expressions, get_or_alloc_expression_id (expr)); 715 } 716 } 717 718 /* Copy a bitmapped set ORIG, into bitmapped set DEST. */ 719 720 static void 721 bitmap_set_copy (bitmap_set_t dest, bitmap_set_t orig) 722 { 723 bitmap_copy (&dest->expressions, &orig->expressions); 724 bitmap_copy (&dest->values, &orig->values); 725 } 726 727 728 /* Free memory used up by SET. */ 729 static void 730 bitmap_set_free (bitmap_set_t set) 731 { 732 bitmap_clear (&set->expressions); 733 bitmap_clear (&set->values); 734 } 735 736 737 /* Generate an topological-ordered array of bitmap set SET. */ 738 739 static vec<pre_expr> 740 sorted_array_from_bitmap_set (bitmap_set_t set) 741 { 742 unsigned int i, j; 743 bitmap_iterator bi, bj; 744 vec<pre_expr> result; 745 746 /* Pre-allocate enough space for the array. */ 747 result.create (bitmap_count_bits (&set->expressions)); 748 749 FOR_EACH_VALUE_ID_IN_SET (set, i, bi) 750 { 751 /* The number of expressions having a given value is usually 752 relatively small. Thus, rather than making a vector of all 753 the expressions and sorting it by value-id, we walk the values 754 and check in the reverse mapping that tells us what expressions 755 have a given value, to filter those in our set. As a result, 756 the expressions are inserted in value-id order, which means 757 topological order. 758 759 If this is somehow a significant lose for some cases, we can 760 choose which set to walk based on the set size. */ 761 bitmap exprset = value_expressions[i]; 762 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, j, bj) 763 { 764 if (bitmap_bit_p (&set->expressions, j)) 765 result.quick_push (expression_for_id (j)); 766 } 767 } 768 769 return result; 770 } 771 772 /* Subtract all expressions contained in ORIG from DEST. */ 773 774 static bitmap_set_t 775 bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig) 776 { 777 bitmap_set_t result = bitmap_set_new (); 778 bitmap_iterator bi; 779 unsigned int i; 780 781 bitmap_and_compl (&result->expressions, &dest->expressions, 782 &orig->expressions); 783 784 FOR_EACH_EXPR_ID_IN_SET (result, i, bi) 785 { 786 pre_expr expr = expression_for_id (i); 787 unsigned int value_id = get_expr_value_id (expr); 788 bitmap_set_bit (&result->values, value_id); 789 } 790 791 return result; 792 } 793 794 /* Subtract all values in bitmap set B from bitmap set A. */ 795 796 static void 797 bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b) 798 { 799 unsigned int i; 800 bitmap_iterator bi; 801 unsigned to_remove = -1U; 802 bitmap_and_compl_into (&a->values, &b->values); 803 FOR_EACH_EXPR_ID_IN_SET (a, i, bi) 804 { 805 if (to_remove != -1U) 806 { 807 bitmap_clear_bit (&a->expressions, to_remove); 808 to_remove = -1U; 809 } 810 pre_expr expr = expression_for_id (i); 811 if (! bitmap_bit_p (&a->values, get_expr_value_id (expr))) 812 to_remove = i; 813 } 814 if (to_remove != -1U) 815 bitmap_clear_bit (&a->expressions, to_remove); 816 } 817 818 819 /* Return true if bitmapped set SET contains the value VALUE_ID. */ 820 821 static bool 822 bitmap_set_contains_value (bitmap_set_t set, unsigned int value_id) 823 { 824 if (value_id_constant_p (value_id)) 825 return true; 826 827 return bitmap_bit_p (&set->values, value_id); 828 } 829 830 /* Return true if two bitmap sets are equal. */ 831 832 static bool 833 bitmap_set_equal (bitmap_set_t a, bitmap_set_t b) 834 { 835 return bitmap_equal_p (&a->values, &b->values); 836 } 837 838 /* Replace an instance of EXPR's VALUE with EXPR in SET if it exists, 839 and add it otherwise. */ 840 841 static void 842 bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr) 843 { 844 unsigned int val = get_expr_value_id (expr); 845 if (value_id_constant_p (val)) 846 return; 847 848 if (bitmap_set_contains_value (set, val)) 849 { 850 /* The number of expressions having a given value is usually 851 significantly less than the total number of expressions in SET. 852 Thus, rather than check, for each expression in SET, whether it 853 has the value LOOKFOR, we walk the reverse mapping that tells us 854 what expressions have a given value, and see if any of those 855 expressions are in our set. For large testcases, this is about 856 5-10x faster than walking the bitmap. If this is somehow a 857 significant lose for some cases, we can choose which set to walk 858 based on the set size. */ 859 unsigned int i; 860 bitmap_iterator bi; 861 bitmap exprset = value_expressions[val]; 862 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 863 { 864 if (bitmap_clear_bit (&set->expressions, i)) 865 { 866 bitmap_set_bit (&set->expressions, get_expression_id (expr)); 867 return; 868 } 869 } 870 gcc_unreachable (); 871 } 872 else 873 bitmap_insert_into_set (set, expr); 874 } 875 876 /* Insert EXPR into SET if EXPR's value is not already present in 877 SET. */ 878 879 static void 880 bitmap_value_insert_into_set (bitmap_set_t set, pre_expr expr) 881 { 882 unsigned int val = get_expr_value_id (expr); 883 884 gcc_checking_assert (expr->id == get_or_alloc_expression_id (expr)); 885 886 /* Constant values are always considered to be part of the set. */ 887 if (value_id_constant_p (val)) 888 return; 889 890 /* If the value membership changed, add the expression. */ 891 if (bitmap_set_bit (&set->values, val)) 892 bitmap_set_bit (&set->expressions, expr->id); 893 } 894 895 /* Print out EXPR to outfile. */ 896 897 static void 898 print_pre_expr (FILE *outfile, const pre_expr expr) 899 { 900 if (! expr) 901 { 902 fprintf (outfile, "NULL"); 903 return; 904 } 905 switch (expr->kind) 906 { 907 case CONSTANT: 908 print_generic_expr (outfile, PRE_EXPR_CONSTANT (expr)); 909 break; 910 case NAME: 911 print_generic_expr (outfile, PRE_EXPR_NAME (expr)); 912 break; 913 case NARY: 914 { 915 unsigned int i; 916 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 917 fprintf (outfile, "{%s,", get_tree_code_name (nary->opcode)); 918 for (i = 0; i < nary->length; i++) 919 { 920 print_generic_expr (outfile, nary->op[i]); 921 if (i != (unsigned) nary->length - 1) 922 fprintf (outfile, ","); 923 } 924 fprintf (outfile, "}"); 925 } 926 break; 927 928 case REFERENCE: 929 { 930 vn_reference_op_t vro; 931 unsigned int i; 932 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 933 fprintf (outfile, "{"); 934 for (i = 0; 935 ref->operands.iterate (i, &vro); 936 i++) 937 { 938 bool closebrace = false; 939 if (vro->opcode != SSA_NAME 940 && TREE_CODE_CLASS (vro->opcode) != tcc_declaration) 941 { 942 fprintf (outfile, "%s", get_tree_code_name (vro->opcode)); 943 if (vro->op0) 944 { 945 fprintf (outfile, "<"); 946 closebrace = true; 947 } 948 } 949 if (vro->op0) 950 { 951 print_generic_expr (outfile, vro->op0); 952 if (vro->op1) 953 { 954 fprintf (outfile, ","); 955 print_generic_expr (outfile, vro->op1); 956 } 957 if (vro->op2) 958 { 959 fprintf (outfile, ","); 960 print_generic_expr (outfile, vro->op2); 961 } 962 } 963 if (closebrace) 964 fprintf (outfile, ">"); 965 if (i != ref->operands.length () - 1) 966 fprintf (outfile, ","); 967 } 968 fprintf (outfile, "}"); 969 if (ref->vuse) 970 { 971 fprintf (outfile, "@"); 972 print_generic_expr (outfile, ref->vuse); 973 } 974 } 975 break; 976 } 977 } 978 void debug_pre_expr (pre_expr); 979 980 /* Like print_pre_expr but always prints to stderr. */ 981 DEBUG_FUNCTION void 982 debug_pre_expr (pre_expr e) 983 { 984 print_pre_expr (stderr, e); 985 fprintf (stderr, "\n"); 986 } 987 988 /* Print out SET to OUTFILE. */ 989 990 static void 991 print_bitmap_set (FILE *outfile, bitmap_set_t set, 992 const char *setname, int blockindex) 993 { 994 fprintf (outfile, "%s[%d] := { ", setname, blockindex); 995 if (set) 996 { 997 bool first = true; 998 unsigned i; 999 bitmap_iterator bi; 1000 1001 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 1002 { 1003 const pre_expr expr = expression_for_id (i); 1004 1005 if (!first) 1006 fprintf (outfile, ", "); 1007 first = false; 1008 print_pre_expr (outfile, expr); 1009 1010 fprintf (outfile, " (%04d)", get_expr_value_id (expr)); 1011 } 1012 } 1013 fprintf (outfile, " }\n"); 1014 } 1015 1016 void debug_bitmap_set (bitmap_set_t); 1017 1018 DEBUG_FUNCTION void 1019 debug_bitmap_set (bitmap_set_t set) 1020 { 1021 print_bitmap_set (stderr, set, "debug", 0); 1022 } 1023 1024 void debug_bitmap_sets_for (basic_block); 1025 1026 DEBUG_FUNCTION void 1027 debug_bitmap_sets_for (basic_block bb) 1028 { 1029 print_bitmap_set (stderr, AVAIL_OUT (bb), "avail_out", bb->index); 1030 print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index); 1031 print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index); 1032 print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index); 1033 print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index); 1034 if (do_partial_partial) 1035 print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index); 1036 print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index); 1037 } 1038 1039 /* Print out the expressions that have VAL to OUTFILE. */ 1040 1041 static void 1042 print_value_expressions (FILE *outfile, unsigned int val) 1043 { 1044 bitmap set = value_expressions[val]; 1045 if (set) 1046 { 1047 bitmap_set x; 1048 char s[10]; 1049 sprintf (s, "%04d", val); 1050 x.expressions = *set; 1051 print_bitmap_set (outfile, &x, s, 0); 1052 } 1053 } 1054 1055 1056 DEBUG_FUNCTION void 1057 debug_value_expressions (unsigned int val) 1058 { 1059 print_value_expressions (stderr, val); 1060 } 1061 1062 /* Given a CONSTANT, allocate a new CONSTANT type PRE_EXPR to 1063 represent it. */ 1064 1065 static pre_expr 1066 get_or_alloc_expr_for_constant (tree constant) 1067 { 1068 unsigned int result_id; 1069 unsigned int value_id; 1070 struct pre_expr_d expr; 1071 pre_expr newexpr; 1072 1073 expr.kind = CONSTANT; 1074 PRE_EXPR_CONSTANT (&expr) = constant; 1075 result_id = lookup_expression_id (&expr); 1076 if (result_id != 0) 1077 return expression_for_id (result_id); 1078 1079 newexpr = pre_expr_pool.allocate (); 1080 newexpr->kind = CONSTANT; 1081 newexpr->loc = UNKNOWN_LOCATION; 1082 PRE_EXPR_CONSTANT (newexpr) = constant; 1083 alloc_expression_id (newexpr); 1084 value_id = get_or_alloc_constant_value_id (constant); 1085 add_to_value (value_id, newexpr); 1086 return newexpr; 1087 } 1088 1089 /* Get or allocate a pre_expr for a piece of GIMPLE, and return it. 1090 Currently only supports constants and SSA_NAMES. */ 1091 static pre_expr 1092 get_or_alloc_expr_for (tree t) 1093 { 1094 if (TREE_CODE (t) == SSA_NAME) 1095 return get_or_alloc_expr_for_name (t); 1096 else if (is_gimple_min_invariant (t)) 1097 return get_or_alloc_expr_for_constant (t); 1098 gcc_unreachable (); 1099 } 1100 1101 /* Return the folded version of T if T, when folded, is a gimple 1102 min_invariant or an SSA name. Otherwise, return T. */ 1103 1104 static pre_expr 1105 fully_constant_expression (pre_expr e) 1106 { 1107 switch (e->kind) 1108 { 1109 case CONSTANT: 1110 return e; 1111 case NARY: 1112 { 1113 vn_nary_op_t nary = PRE_EXPR_NARY (e); 1114 tree res = vn_nary_simplify (nary); 1115 if (!res) 1116 return e; 1117 if (is_gimple_min_invariant (res)) 1118 return get_or_alloc_expr_for_constant (res); 1119 if (TREE_CODE (res) == SSA_NAME) 1120 return get_or_alloc_expr_for_name (res); 1121 return e; 1122 } 1123 case REFERENCE: 1124 { 1125 vn_reference_t ref = PRE_EXPR_REFERENCE (e); 1126 tree folded; 1127 if ((folded = fully_constant_vn_reference_p (ref))) 1128 return get_or_alloc_expr_for_constant (folded); 1129 return e; 1130 } 1131 default: 1132 return e; 1133 } 1134 return e; 1135 } 1136 1137 /* Translate the VUSE backwards through phi nodes in PHIBLOCK, so that 1138 it has the value it would have in BLOCK. Set *SAME_VALID to true 1139 in case the new vuse doesn't change the value id of the OPERANDS. */ 1140 1141 static tree 1142 translate_vuse_through_block (vec<vn_reference_op_s> operands, 1143 alias_set_type set, alias_set_type base_set, 1144 tree type, tree vuse, 1145 basic_block phiblock, 1146 basic_block block, bool *same_valid) 1147 { 1148 gimple *phi = SSA_NAME_DEF_STMT (vuse); 1149 ao_ref ref; 1150 edge e = NULL; 1151 bool use_oracle; 1152 1153 if (same_valid) 1154 *same_valid = true; 1155 1156 if (gimple_bb (phi) != phiblock) 1157 return vuse; 1158 1159 unsigned int cnt = param_sccvn_max_alias_queries_per_access; 1160 use_oracle = ao_ref_init_from_vn_reference (&ref, set, base_set, 1161 type, operands); 1162 1163 /* Use the alias-oracle to find either the PHI node in this block, 1164 the first VUSE used in this block that is equivalent to vuse or 1165 the first VUSE which definition in this block kills the value. */ 1166 if (gimple_code (phi) == GIMPLE_PHI) 1167 e = find_edge (block, phiblock); 1168 else if (use_oracle) 1169 while (cnt > 0 1170 && !stmt_may_clobber_ref_p_1 (phi, &ref)) 1171 { 1172 --cnt; 1173 vuse = gimple_vuse (phi); 1174 phi = SSA_NAME_DEF_STMT (vuse); 1175 if (gimple_bb (phi) != phiblock) 1176 return vuse; 1177 if (gimple_code (phi) == GIMPLE_PHI) 1178 { 1179 e = find_edge (block, phiblock); 1180 break; 1181 } 1182 } 1183 else 1184 return NULL_TREE; 1185 1186 if (e) 1187 { 1188 if (use_oracle && same_valid) 1189 { 1190 bitmap visited = NULL; 1191 /* Try to find a vuse that dominates this phi node by skipping 1192 non-clobbering statements. */ 1193 vuse = get_continuation_for_phi (phi, &ref, true, 1194 cnt, &visited, false, NULL, NULL); 1195 if (visited) 1196 BITMAP_FREE (visited); 1197 } 1198 else 1199 vuse = NULL_TREE; 1200 /* If we didn't find any, the value ID can't stay the same. */ 1201 if (!vuse && same_valid) 1202 *same_valid = false; 1203 /* ??? We would like to return vuse here as this is the canonical 1204 upmost vdef that this reference is associated with. But during 1205 insertion of the references into the hash tables we only ever 1206 directly insert with their direct gimple_vuse, hence returning 1207 something else would make us not find the other expression. */ 1208 return PHI_ARG_DEF (phi, e->dest_idx); 1209 } 1210 1211 return NULL_TREE; 1212 } 1213 1214 /* Like bitmap_find_leader, but checks for the value existing in SET1 *or* 1215 SET2 *or* SET3. This is used to avoid making a set consisting of the union 1216 of PA_IN and ANTIC_IN during insert and phi-translation. */ 1217 1218 static inline pre_expr 1219 find_leader_in_sets (unsigned int val, bitmap_set_t set1, bitmap_set_t set2, 1220 bitmap_set_t set3 = NULL) 1221 { 1222 pre_expr result = NULL; 1223 1224 if (set1) 1225 result = bitmap_find_leader (set1, val); 1226 if (!result && set2) 1227 result = bitmap_find_leader (set2, val); 1228 if (!result && set3) 1229 result = bitmap_find_leader (set3, val); 1230 return result; 1231 } 1232 1233 /* Get the tree type for our PRE expression e. */ 1234 1235 static tree 1236 get_expr_type (const pre_expr e) 1237 { 1238 switch (e->kind) 1239 { 1240 case NAME: 1241 return TREE_TYPE (PRE_EXPR_NAME (e)); 1242 case CONSTANT: 1243 return TREE_TYPE (PRE_EXPR_CONSTANT (e)); 1244 case REFERENCE: 1245 return PRE_EXPR_REFERENCE (e)->type; 1246 case NARY: 1247 return PRE_EXPR_NARY (e)->type; 1248 } 1249 gcc_unreachable (); 1250 } 1251 1252 /* Get a representative SSA_NAME for a given expression that is available in B. 1253 Since all of our sub-expressions are treated as values, we require 1254 them to be SSA_NAME's for simplicity. 1255 Prior versions of GVNPRE used to use "value handles" here, so that 1256 an expression would be VH.11 + VH.10 instead of d_3 + e_6. In 1257 either case, the operands are really values (IE we do not expect 1258 them to be usable without finding leaders). */ 1259 1260 static tree 1261 get_representative_for (const pre_expr e, basic_block b = NULL) 1262 { 1263 tree name, valnum = NULL_TREE; 1264 unsigned int value_id = get_expr_value_id (e); 1265 1266 switch (e->kind) 1267 { 1268 case NAME: 1269 return PRE_EXPR_NAME (e); 1270 case CONSTANT: 1271 return PRE_EXPR_CONSTANT (e); 1272 case NARY: 1273 case REFERENCE: 1274 { 1275 /* Go through all of the expressions representing this value 1276 and pick out an SSA_NAME. */ 1277 unsigned int i; 1278 bitmap_iterator bi; 1279 bitmap exprs = value_expressions[value_id]; 1280 EXECUTE_IF_SET_IN_BITMAP (exprs, 0, i, bi) 1281 { 1282 pre_expr rep = expression_for_id (i); 1283 if (rep->kind == NAME) 1284 { 1285 tree name = PRE_EXPR_NAME (rep); 1286 valnum = VN_INFO (name)->valnum; 1287 gimple *def = SSA_NAME_DEF_STMT (name); 1288 /* We have to return either a new representative or one 1289 that can be used for expression simplification and thus 1290 is available in B. */ 1291 if (! b 1292 || gimple_nop_p (def) 1293 || dominated_by_p (CDI_DOMINATORS, b, gimple_bb (def))) 1294 return name; 1295 } 1296 else if (rep->kind == CONSTANT) 1297 return PRE_EXPR_CONSTANT (rep); 1298 } 1299 } 1300 break; 1301 } 1302 1303 /* If we reached here we couldn't find an SSA_NAME. This can 1304 happen when we've discovered a value that has never appeared in 1305 the program as set to an SSA_NAME, as the result of phi translation. 1306 Create one here. 1307 ??? We should be able to re-use this when we insert the statement 1308 to compute it. */ 1309 name = make_temp_ssa_name (get_expr_type (e), gimple_build_nop (), "pretmp"); 1310 VN_INFO (name)->value_id = value_id; 1311 VN_INFO (name)->valnum = valnum ? valnum : name; 1312 /* ??? For now mark this SSA name for release by VN. */ 1313 VN_INFO (name)->needs_insertion = true; 1314 add_to_value (value_id, get_or_alloc_expr_for_name (name)); 1315 if (dump_file && (dump_flags & TDF_DETAILS)) 1316 { 1317 fprintf (dump_file, "Created SSA_NAME representative "); 1318 print_generic_expr (dump_file, name); 1319 fprintf (dump_file, " for expression:"); 1320 print_pre_expr (dump_file, e); 1321 fprintf (dump_file, " (%04d)\n", value_id); 1322 } 1323 1324 return name; 1325 } 1326 1327 1328 static pre_expr 1329 phi_translate (bitmap_set_t, pre_expr, bitmap_set_t, bitmap_set_t, edge); 1330 1331 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of 1332 the phis in PRED. Return NULL if we can't find a leader for each part 1333 of the translated expression. */ 1334 1335 static pre_expr 1336 phi_translate_1 (bitmap_set_t dest, 1337 pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e) 1338 { 1339 basic_block pred = e->src; 1340 basic_block phiblock = e->dest; 1341 location_t expr_loc = expr->loc; 1342 switch (expr->kind) 1343 { 1344 case NARY: 1345 { 1346 unsigned int i; 1347 bool changed = false; 1348 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1349 vn_nary_op_t newnary = XALLOCAVAR (struct vn_nary_op_s, 1350 sizeof_vn_nary_op (nary->length)); 1351 memcpy (newnary, nary, sizeof_vn_nary_op (nary->length)); 1352 1353 for (i = 0; i < newnary->length; i++) 1354 { 1355 if (TREE_CODE (newnary->op[i]) != SSA_NAME) 1356 continue; 1357 else 1358 { 1359 pre_expr leader, result; 1360 unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id; 1361 leader = find_leader_in_sets (op_val_id, set1, set2); 1362 result = phi_translate (dest, leader, set1, set2, e); 1363 if (result && result != leader) 1364 /* If op has a leader in the sets we translate make 1365 sure to use the value of the translated expression. 1366 We might need a new representative for that. */ 1367 newnary->op[i] = get_representative_for (result, pred); 1368 else if (!result) 1369 return NULL; 1370 1371 changed |= newnary->op[i] != nary->op[i]; 1372 } 1373 } 1374 if (changed) 1375 { 1376 pre_expr constant; 1377 unsigned int new_val_id; 1378 1379 PRE_EXPR_NARY (expr) = newnary; 1380 constant = fully_constant_expression (expr); 1381 PRE_EXPR_NARY (expr) = nary; 1382 if (constant != expr) 1383 { 1384 /* For non-CONSTANTs we have to make sure we can eventually 1385 insert the expression. Which means we need to have a 1386 leader for it. */ 1387 if (constant->kind != CONSTANT) 1388 { 1389 /* Do not allow simplifications to non-constants over 1390 backedges as this will likely result in a loop PHI node 1391 to be inserted and increased register pressure. 1392 See PR77498 - this avoids doing predcoms work in 1393 a less efficient way. */ 1394 if (e->flags & EDGE_DFS_BACK) 1395 ; 1396 else 1397 { 1398 unsigned value_id = get_expr_value_id (constant); 1399 /* We want a leader in ANTIC_OUT or AVAIL_OUT here. 1400 dest has what we computed into ANTIC_OUT sofar 1401 so pick from that - since topological sorting 1402 by sorted_array_from_bitmap_set isn't perfect 1403 we may lose some cases here. */ 1404 constant = find_leader_in_sets (value_id, dest, 1405 AVAIL_OUT (pred)); 1406 if (constant) 1407 { 1408 if (dump_file && (dump_flags & TDF_DETAILS)) 1409 { 1410 fprintf (dump_file, "simplifying "); 1411 print_pre_expr (dump_file, expr); 1412 fprintf (dump_file, " translated %d -> %d to ", 1413 phiblock->index, pred->index); 1414 PRE_EXPR_NARY (expr) = newnary; 1415 print_pre_expr (dump_file, expr); 1416 PRE_EXPR_NARY (expr) = nary; 1417 fprintf (dump_file, " to "); 1418 print_pre_expr (dump_file, constant); 1419 fprintf (dump_file, "\n"); 1420 } 1421 return constant; 1422 } 1423 } 1424 } 1425 else 1426 return constant; 1427 } 1428 1429 /* vn_nary_* do not valueize operands. */ 1430 for (i = 0; i < newnary->length; ++i) 1431 if (TREE_CODE (newnary->op[i]) == SSA_NAME) 1432 newnary->op[i] = VN_INFO (newnary->op[i])->valnum; 1433 tree result = vn_nary_op_lookup_pieces (newnary->length, 1434 newnary->opcode, 1435 newnary->type, 1436 &newnary->op[0], 1437 &nary); 1438 if (result && is_gimple_min_invariant (result)) 1439 return get_or_alloc_expr_for_constant (result); 1440 1441 expr = pre_expr_pool.allocate (); 1442 expr->kind = NARY; 1443 expr->id = 0; 1444 expr->loc = expr_loc; 1445 if (nary && !nary->predicated_values) 1446 { 1447 PRE_EXPR_NARY (expr) = nary; 1448 new_val_id = nary->value_id; 1449 get_or_alloc_expression_id (expr); 1450 } 1451 else 1452 { 1453 new_val_id = get_next_value_id (); 1454 value_expressions.safe_grow_cleared (get_max_value_id () + 1); 1455 nary = vn_nary_op_insert_pieces (newnary->length, 1456 newnary->opcode, 1457 newnary->type, 1458 &newnary->op[0], 1459 result, new_val_id); 1460 PRE_EXPR_NARY (expr) = nary; 1461 get_or_alloc_expression_id (expr); 1462 } 1463 add_to_value (new_val_id, expr); 1464 } 1465 return expr; 1466 } 1467 break; 1468 1469 case REFERENCE: 1470 { 1471 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1472 vec<vn_reference_op_s> operands = ref->operands; 1473 tree vuse = ref->vuse; 1474 tree newvuse = vuse; 1475 vec<vn_reference_op_s> newoperands = vNULL; 1476 bool changed = false, same_valid = true; 1477 unsigned int i, n; 1478 vn_reference_op_t operand; 1479 vn_reference_t newref; 1480 1481 for (i = 0; operands.iterate (i, &operand); i++) 1482 { 1483 pre_expr opresult; 1484 pre_expr leader; 1485 tree op[3]; 1486 tree type = operand->type; 1487 vn_reference_op_s newop = *operand; 1488 op[0] = operand->op0; 1489 op[1] = operand->op1; 1490 op[2] = operand->op2; 1491 for (n = 0; n < 3; ++n) 1492 { 1493 unsigned int op_val_id; 1494 if (!op[n]) 1495 continue; 1496 if (TREE_CODE (op[n]) != SSA_NAME) 1497 { 1498 /* We can't possibly insert these. */ 1499 if (n != 0 1500 && !is_gimple_min_invariant (op[n])) 1501 break; 1502 continue; 1503 } 1504 op_val_id = VN_INFO (op[n])->value_id; 1505 leader = find_leader_in_sets (op_val_id, set1, set2); 1506 opresult = phi_translate (dest, leader, set1, set2, e); 1507 if (opresult && opresult != leader) 1508 { 1509 tree name = get_representative_for (opresult); 1510 changed |= name != op[n]; 1511 op[n] = name; 1512 } 1513 else if (!opresult) 1514 break; 1515 } 1516 if (n != 3) 1517 { 1518 newoperands.release (); 1519 return NULL; 1520 } 1521 if (!changed) 1522 continue; 1523 if (!newoperands.exists ()) 1524 newoperands = operands.copy (); 1525 /* We may have changed from an SSA_NAME to a constant */ 1526 if (newop.opcode == SSA_NAME && TREE_CODE (op[0]) != SSA_NAME) 1527 newop.opcode = TREE_CODE (op[0]); 1528 newop.type = type; 1529 newop.op0 = op[0]; 1530 newop.op1 = op[1]; 1531 newop.op2 = op[2]; 1532 newoperands[i] = newop; 1533 } 1534 gcc_checking_assert (i == operands.length ()); 1535 1536 if (vuse) 1537 { 1538 newvuse = translate_vuse_through_block (newoperands.exists () 1539 ? newoperands : operands, 1540 ref->set, ref->base_set, 1541 ref->type, 1542 vuse, phiblock, pred, 1543 changed 1544 ? NULL : &same_valid); 1545 if (newvuse == NULL_TREE) 1546 { 1547 newoperands.release (); 1548 return NULL; 1549 } 1550 } 1551 1552 if (changed || newvuse != vuse) 1553 { 1554 unsigned int new_val_id; 1555 1556 tree result = vn_reference_lookup_pieces (newvuse, ref->set, 1557 ref->base_set, 1558 ref->type, 1559 newoperands.exists () 1560 ? newoperands : operands, 1561 &newref, VN_WALK); 1562 if (result) 1563 newoperands.release (); 1564 1565 /* We can always insert constants, so if we have a partial 1566 redundant constant load of another type try to translate it 1567 to a constant of appropriate type. */ 1568 if (result && is_gimple_min_invariant (result)) 1569 { 1570 tree tem = result; 1571 if (!useless_type_conversion_p (ref->type, TREE_TYPE (result))) 1572 { 1573 tem = fold_unary (VIEW_CONVERT_EXPR, ref->type, result); 1574 if (tem && !is_gimple_min_invariant (tem)) 1575 tem = NULL_TREE; 1576 } 1577 if (tem) 1578 return get_or_alloc_expr_for_constant (tem); 1579 } 1580 1581 /* If we'd have to convert things we would need to validate 1582 if we can insert the translated expression. So fail 1583 here for now - we cannot insert an alias with a different 1584 type in the VN tables either, as that would assert. */ 1585 if (result 1586 && !useless_type_conversion_p (ref->type, TREE_TYPE (result))) 1587 return NULL; 1588 else if (!result && newref 1589 && !useless_type_conversion_p (ref->type, newref->type)) 1590 { 1591 newoperands.release (); 1592 return NULL; 1593 } 1594 1595 expr = pre_expr_pool.allocate (); 1596 expr->kind = REFERENCE; 1597 expr->id = 0; 1598 expr->loc = expr_loc; 1599 1600 if (newref) 1601 new_val_id = newref->value_id; 1602 else 1603 { 1604 if (changed || !same_valid) 1605 { 1606 new_val_id = get_next_value_id (); 1607 value_expressions.safe_grow_cleared 1608 (get_max_value_id () + 1); 1609 } 1610 else 1611 new_val_id = ref->value_id; 1612 if (!newoperands.exists ()) 1613 newoperands = operands.copy (); 1614 newref = vn_reference_insert_pieces (newvuse, ref->set, 1615 ref->base_set, ref->type, 1616 newoperands, 1617 result, new_val_id); 1618 newoperands = vNULL; 1619 } 1620 PRE_EXPR_REFERENCE (expr) = newref; 1621 get_or_alloc_expression_id (expr); 1622 add_to_value (new_val_id, expr); 1623 } 1624 newoperands.release (); 1625 return expr; 1626 } 1627 break; 1628 1629 case NAME: 1630 { 1631 tree name = PRE_EXPR_NAME (expr); 1632 gimple *def_stmt = SSA_NAME_DEF_STMT (name); 1633 /* If the SSA name is defined by a PHI node in this block, 1634 translate it. */ 1635 if (gimple_code (def_stmt) == GIMPLE_PHI 1636 && gimple_bb (def_stmt) == phiblock) 1637 { 1638 tree def = PHI_ARG_DEF (def_stmt, e->dest_idx); 1639 1640 /* Handle constant. */ 1641 if (is_gimple_min_invariant (def)) 1642 return get_or_alloc_expr_for_constant (def); 1643 1644 return get_or_alloc_expr_for_name (def); 1645 } 1646 /* Otherwise return it unchanged - it will get removed if its 1647 value is not available in PREDs AVAIL_OUT set of expressions 1648 by the subtraction of TMP_GEN. */ 1649 return expr; 1650 } 1651 1652 default: 1653 gcc_unreachable (); 1654 } 1655 } 1656 1657 /* Wrapper around phi_translate_1 providing caching functionality. */ 1658 1659 static pre_expr 1660 phi_translate (bitmap_set_t dest, pre_expr expr, 1661 bitmap_set_t set1, bitmap_set_t set2, edge e) 1662 { 1663 expr_pred_trans_t slot = NULL; 1664 pre_expr phitrans; 1665 1666 if (!expr) 1667 return NULL; 1668 1669 /* Constants contain no values that need translation. */ 1670 if (expr->kind == CONSTANT) 1671 return expr; 1672 1673 if (value_id_constant_p (get_expr_value_id (expr))) 1674 return expr; 1675 1676 /* Don't add translations of NAMEs as those are cheap to translate. */ 1677 if (expr->kind != NAME) 1678 { 1679 if (phi_trans_add (&slot, expr, e->src)) 1680 return slot->v; 1681 /* Store NULL for the value we want to return in the case of 1682 recursing. */ 1683 slot->v = NULL; 1684 } 1685 1686 /* Translate. */ 1687 basic_block saved_valueize_bb = vn_context_bb; 1688 vn_context_bb = e->src; 1689 phitrans = phi_translate_1 (dest, expr, set1, set2, e); 1690 vn_context_bb = saved_valueize_bb; 1691 1692 if (slot) 1693 { 1694 if (phitrans) 1695 slot->v = phitrans; 1696 else 1697 /* Remove failed translations again, they cause insert 1698 iteration to not pick up new opportunities reliably. */ 1699 phi_translate_table->remove_elt_with_hash (slot, slot->hashcode); 1700 } 1701 1702 return phitrans; 1703 } 1704 1705 1706 /* For each expression in SET, translate the values through phi nodes 1707 in PHIBLOCK using edge PHIBLOCK->PRED, and store the resulting 1708 expressions in DEST. */ 1709 1710 static void 1711 phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e) 1712 { 1713 vec<pre_expr> exprs; 1714 pre_expr expr; 1715 int i; 1716 1717 if (gimple_seq_empty_p (phi_nodes (e->dest))) 1718 { 1719 bitmap_set_copy (dest, set); 1720 return; 1721 } 1722 1723 exprs = sorted_array_from_bitmap_set (set); 1724 FOR_EACH_VEC_ELT (exprs, i, expr) 1725 { 1726 pre_expr translated; 1727 translated = phi_translate (dest, expr, set, NULL, e); 1728 if (!translated) 1729 continue; 1730 1731 bitmap_insert_into_set (dest, translated); 1732 } 1733 exprs.release (); 1734 } 1735 1736 /* Find the leader for a value (i.e., the name representing that 1737 value) in a given set, and return it. Return NULL if no leader 1738 is found. */ 1739 1740 static pre_expr 1741 bitmap_find_leader (bitmap_set_t set, unsigned int val) 1742 { 1743 if (value_id_constant_p (val)) 1744 { 1745 unsigned int i; 1746 bitmap_iterator bi; 1747 bitmap exprset = value_expressions[val]; 1748 1749 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 1750 { 1751 pre_expr expr = expression_for_id (i); 1752 if (expr->kind == CONSTANT) 1753 return expr; 1754 } 1755 } 1756 if (bitmap_set_contains_value (set, val)) 1757 { 1758 /* Rather than walk the entire bitmap of expressions, and see 1759 whether any of them has the value we are looking for, we look 1760 at the reverse mapping, which tells us the set of expressions 1761 that have a given value (IE value->expressions with that 1762 value) and see if any of those expressions are in our set. 1763 The number of expressions per value is usually significantly 1764 less than the number of expressions in the set. In fact, for 1765 large testcases, doing it this way is roughly 5-10x faster 1766 than walking the bitmap. 1767 If this is somehow a significant lose for some cases, we can 1768 choose which set to walk based on which set is smaller. */ 1769 unsigned int i; 1770 bitmap_iterator bi; 1771 bitmap exprset = value_expressions[val]; 1772 1773 EXECUTE_IF_AND_IN_BITMAP (exprset, &set->expressions, 0, i, bi) 1774 return expression_for_id (i); 1775 } 1776 return NULL; 1777 } 1778 1779 /* Determine if EXPR, a memory expression, is ANTIC_IN at the top of 1780 BLOCK by seeing if it is not killed in the block. Note that we are 1781 only determining whether there is a store that kills it. Because 1782 of the order in which clean iterates over values, we are guaranteed 1783 that altered operands will have caused us to be eliminated from the 1784 ANTIC_IN set already. */ 1785 1786 static bool 1787 value_dies_in_block_x (pre_expr expr, basic_block block) 1788 { 1789 tree vuse = PRE_EXPR_REFERENCE (expr)->vuse; 1790 vn_reference_t refx = PRE_EXPR_REFERENCE (expr); 1791 gimple *def; 1792 gimple_stmt_iterator gsi; 1793 unsigned id = get_expression_id (expr); 1794 bool res = false; 1795 ao_ref ref; 1796 1797 if (!vuse) 1798 return false; 1799 1800 /* Lookup a previously calculated result. */ 1801 if (EXPR_DIES (block) 1802 && bitmap_bit_p (EXPR_DIES (block), id * 2)) 1803 return bitmap_bit_p (EXPR_DIES (block), id * 2 + 1); 1804 1805 /* A memory expression {e, VUSE} dies in the block if there is a 1806 statement that may clobber e. If, starting statement walk from the 1807 top of the basic block, a statement uses VUSE there can be no kill 1808 inbetween that use and the original statement that loaded {e, VUSE}, 1809 so we can stop walking. */ 1810 ref.base = NULL_TREE; 1811 for (gsi = gsi_start_bb (block); !gsi_end_p (gsi); gsi_next (&gsi)) 1812 { 1813 tree def_vuse, def_vdef; 1814 def = gsi_stmt (gsi); 1815 def_vuse = gimple_vuse (def); 1816 def_vdef = gimple_vdef (def); 1817 1818 /* Not a memory statement. */ 1819 if (!def_vuse) 1820 continue; 1821 1822 /* Not a may-def. */ 1823 if (!def_vdef) 1824 { 1825 /* A load with the same VUSE, we're done. */ 1826 if (def_vuse == vuse) 1827 break; 1828 1829 continue; 1830 } 1831 1832 /* Init ref only if we really need it. */ 1833 if (ref.base == NULL_TREE 1834 && !ao_ref_init_from_vn_reference (&ref, refx->set, refx->base_set, 1835 refx->type, refx->operands)) 1836 { 1837 res = true; 1838 break; 1839 } 1840 /* If the statement may clobber expr, it dies. */ 1841 if (stmt_may_clobber_ref_p_1 (def, &ref)) 1842 { 1843 res = true; 1844 break; 1845 } 1846 } 1847 1848 /* Remember the result. */ 1849 if (!EXPR_DIES (block)) 1850 EXPR_DIES (block) = BITMAP_ALLOC (&grand_bitmap_obstack); 1851 bitmap_set_bit (EXPR_DIES (block), id * 2); 1852 if (res) 1853 bitmap_set_bit (EXPR_DIES (block), id * 2 + 1); 1854 1855 return res; 1856 } 1857 1858 1859 /* Determine if OP is valid in SET1 U SET2, which it is when the union 1860 contains its value-id. */ 1861 1862 static bool 1863 op_valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree op) 1864 { 1865 if (op && TREE_CODE (op) == SSA_NAME) 1866 { 1867 unsigned int value_id = VN_INFO (op)->value_id; 1868 if (!(bitmap_set_contains_value (set1, value_id) 1869 || (set2 && bitmap_set_contains_value (set2, value_id)))) 1870 return false; 1871 } 1872 return true; 1873 } 1874 1875 /* Determine if the expression EXPR is valid in SET1 U SET2. 1876 ONLY SET2 CAN BE NULL. 1877 This means that we have a leader for each part of the expression 1878 (if it consists of values), or the expression is an SSA_NAME. 1879 For loads/calls, we also see if the vuse is killed in this block. */ 1880 1881 static bool 1882 valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, pre_expr expr) 1883 { 1884 switch (expr->kind) 1885 { 1886 case NAME: 1887 /* By construction all NAMEs are available. Non-available 1888 NAMEs are removed by subtracting TMP_GEN from the sets. */ 1889 return true; 1890 case NARY: 1891 { 1892 unsigned int i; 1893 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1894 for (i = 0; i < nary->length; i++) 1895 if (!op_valid_in_sets (set1, set2, nary->op[i])) 1896 return false; 1897 return true; 1898 } 1899 break; 1900 case REFERENCE: 1901 { 1902 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1903 vn_reference_op_t vro; 1904 unsigned int i; 1905 1906 FOR_EACH_VEC_ELT (ref->operands, i, vro) 1907 { 1908 if (!op_valid_in_sets (set1, set2, vro->op0) 1909 || !op_valid_in_sets (set1, set2, vro->op1) 1910 || !op_valid_in_sets (set1, set2, vro->op2)) 1911 return false; 1912 } 1913 return true; 1914 } 1915 default: 1916 gcc_unreachable (); 1917 } 1918 } 1919 1920 /* Clean the set of expressions SET1 that are no longer valid in SET1 or SET2. 1921 This means expressions that are made up of values we have no leaders for 1922 in SET1 or SET2. */ 1923 1924 static void 1925 clean (bitmap_set_t set1, bitmap_set_t set2 = NULL) 1926 { 1927 vec<pre_expr> exprs = sorted_array_from_bitmap_set (set1); 1928 pre_expr expr; 1929 int i; 1930 1931 FOR_EACH_VEC_ELT (exprs, i, expr) 1932 { 1933 if (!valid_in_sets (set1, set2, expr)) 1934 { 1935 unsigned int val = get_expr_value_id (expr); 1936 bitmap_clear_bit (&set1->expressions, get_expression_id (expr)); 1937 /* We are entered with possibly multiple expressions for a value 1938 so before removing a value from the set see if there's an 1939 expression for it left. */ 1940 if (! bitmap_find_leader (set1, val)) 1941 bitmap_clear_bit (&set1->values, val); 1942 } 1943 } 1944 exprs.release (); 1945 } 1946 1947 /* Clean the set of expressions that are no longer valid in SET because 1948 they are clobbered in BLOCK or because they trap and may not be executed. */ 1949 1950 static void 1951 prune_clobbered_mems (bitmap_set_t set, basic_block block) 1952 { 1953 bitmap_iterator bi; 1954 unsigned i; 1955 unsigned to_remove = -1U; 1956 bool any_removed = false; 1957 1958 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 1959 { 1960 /* Remove queued expr. */ 1961 if (to_remove != -1U) 1962 { 1963 bitmap_clear_bit (&set->expressions, to_remove); 1964 any_removed = true; 1965 to_remove = -1U; 1966 } 1967 1968 pre_expr expr = expression_for_id (i); 1969 if (expr->kind == REFERENCE) 1970 { 1971 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1972 if (ref->vuse) 1973 { 1974 gimple *def_stmt = SSA_NAME_DEF_STMT (ref->vuse); 1975 if (!gimple_nop_p (def_stmt) 1976 && ((gimple_bb (def_stmt) != block 1977 && !dominated_by_p (CDI_DOMINATORS, 1978 block, gimple_bb (def_stmt))) 1979 || (gimple_bb (def_stmt) == block 1980 && value_dies_in_block_x (expr, block)))) 1981 to_remove = i; 1982 } 1983 /* If the REFERENCE may trap make sure the block does not contain 1984 a possible exit point. 1985 ??? This is overly conservative if we translate AVAIL_OUT 1986 as the available expression might be after the exit point. */ 1987 if (BB_MAY_NOTRETURN (block) 1988 && vn_reference_may_trap (ref)) 1989 to_remove = i; 1990 } 1991 else if (expr->kind == NARY) 1992 { 1993 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1994 /* If the NARY may trap make sure the block does not contain 1995 a possible exit point. 1996 ??? This is overly conservative if we translate AVAIL_OUT 1997 as the available expression might be after the exit point. */ 1998 if (BB_MAY_NOTRETURN (block) 1999 && vn_nary_may_trap (nary)) 2000 to_remove = i; 2001 } 2002 } 2003 2004 /* Remove queued expr. */ 2005 if (to_remove != -1U) 2006 { 2007 bitmap_clear_bit (&set->expressions, to_remove); 2008 any_removed = true; 2009 } 2010 2011 /* Above we only removed expressions, now clean the set of values 2012 which no longer have any corresponding expression. We cannot 2013 clear the value at the time we remove an expression since there 2014 may be multiple expressions per value. 2015 If we'd queue possibly to be removed values we could use 2016 the bitmap_find_leader way to see if there's still an expression 2017 for it. For some ratio of to be removed values and number of 2018 values/expressions in the set this might be faster than rebuilding 2019 the value-set. */ 2020 if (any_removed) 2021 { 2022 bitmap_clear (&set->values); 2023 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 2024 { 2025 pre_expr expr = expression_for_id (i); 2026 unsigned int value_id = get_expr_value_id (expr); 2027 bitmap_set_bit (&set->values, value_id); 2028 } 2029 } 2030 } 2031 2032 /* Compute the ANTIC set for BLOCK. 2033 2034 If succs(BLOCK) > 1 then 2035 ANTIC_OUT[BLOCK] = intersection of ANTIC_IN[b] for all succ(BLOCK) 2036 else if succs(BLOCK) == 1 then 2037 ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)]) 2038 2039 ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK]) 2040 2041 Note that clean() is deferred until after the iteration. */ 2042 2043 static bool 2044 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) 2045 { 2046 bitmap_set_t S, old, ANTIC_OUT; 2047 edge e; 2048 edge_iterator ei; 2049 2050 bool was_visited = BB_VISITED (block); 2051 bool changed = ! BB_VISITED (block); 2052 BB_VISITED (block) = 1; 2053 old = ANTIC_OUT = S = NULL; 2054 2055 /* If any edges from predecessors are abnormal, antic_in is empty, 2056 so do nothing. */ 2057 if (block_has_abnormal_pred_edge) 2058 goto maybe_dump_sets; 2059 2060 old = ANTIC_IN (block); 2061 ANTIC_OUT = bitmap_set_new (); 2062 2063 /* If the block has no successors, ANTIC_OUT is empty. */ 2064 if (EDGE_COUNT (block->succs) == 0) 2065 ; 2066 /* If we have one successor, we could have some phi nodes to 2067 translate through. */ 2068 else if (single_succ_p (block)) 2069 { 2070 e = single_succ_edge (block); 2071 gcc_assert (BB_VISITED (e->dest)); 2072 phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e); 2073 } 2074 /* If we have multiple successors, we take the intersection of all of 2075 them. Note that in the case of loop exit phi nodes, we may have 2076 phis to translate through. */ 2077 else 2078 { 2079 size_t i; 2080 edge first = NULL; 2081 2082 auto_vec<edge> worklist (EDGE_COUNT (block->succs)); 2083 FOR_EACH_EDGE (e, ei, block->succs) 2084 { 2085 if (!first 2086 && BB_VISITED (e->dest)) 2087 first = e; 2088 else if (BB_VISITED (e->dest)) 2089 worklist.quick_push (e); 2090 else 2091 { 2092 /* Unvisited successors get their ANTIC_IN replaced by the 2093 maximal set to arrive at a maximum ANTIC_IN solution. 2094 We can ignore them in the intersection operation and thus 2095 need not explicitely represent that maximum solution. */ 2096 if (dump_file && (dump_flags & TDF_DETAILS)) 2097 fprintf (dump_file, "ANTIC_IN is MAX on %d->%d\n", 2098 e->src->index, e->dest->index); 2099 } 2100 } 2101 2102 /* Of multiple successors we have to have visited one already 2103 which is guaranteed by iteration order. */ 2104 gcc_assert (first != NULL); 2105 2106 phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first); 2107 2108 /* If we have multiple successors we need to intersect the ANTIC_OUT 2109 sets. For values that's a simple intersection but for 2110 expressions it is a union. Given we want to have a single 2111 expression per value in our sets we have to canonicalize. 2112 Avoid randomness and running into cycles like for PR82129 and 2113 canonicalize the expression we choose to the one with the 2114 lowest id. This requires we actually compute the union first. */ 2115 FOR_EACH_VEC_ELT (worklist, i, e) 2116 { 2117 if (!gimple_seq_empty_p (phi_nodes (e->dest))) 2118 { 2119 bitmap_set_t tmp = bitmap_set_new (); 2120 phi_translate_set (tmp, ANTIC_IN (e->dest), e); 2121 bitmap_and_into (&ANTIC_OUT->values, &tmp->values); 2122 bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions); 2123 bitmap_set_free (tmp); 2124 } 2125 else 2126 { 2127 bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values); 2128 bitmap_ior_into (&ANTIC_OUT->expressions, 2129 &ANTIC_IN (e->dest)->expressions); 2130 } 2131 } 2132 if (! worklist.is_empty ()) 2133 { 2134 /* Prune expressions not in the value set. */ 2135 bitmap_iterator bi; 2136 unsigned int i; 2137 unsigned int to_clear = -1U; 2138 FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi) 2139 { 2140 if (to_clear != -1U) 2141 { 2142 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear); 2143 to_clear = -1U; 2144 } 2145 pre_expr expr = expression_for_id (i); 2146 unsigned int value_id = get_expr_value_id (expr); 2147 if (!bitmap_bit_p (&ANTIC_OUT->values, value_id)) 2148 to_clear = i; 2149 } 2150 if (to_clear != -1U) 2151 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear); 2152 } 2153 } 2154 2155 /* Prune expressions that are clobbered in block and thus become 2156 invalid if translated from ANTIC_OUT to ANTIC_IN. */ 2157 prune_clobbered_mems (ANTIC_OUT, block); 2158 2159 /* Generate ANTIC_OUT - TMP_GEN. */ 2160 S = bitmap_set_subtract_expressions (ANTIC_OUT, TMP_GEN (block)); 2161 2162 /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */ 2163 ANTIC_IN (block) = bitmap_set_subtract_expressions (EXP_GEN (block), 2164 TMP_GEN (block)); 2165 2166 /* Then union in the ANTIC_OUT - TMP_GEN values, 2167 to get ANTIC_OUT U EXP_GEN - TMP_GEN */ 2168 bitmap_ior_into (&ANTIC_IN (block)->values, &S->values); 2169 bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions); 2170 2171 /* clean (ANTIC_IN (block)) is defered to after the iteration converged 2172 because it can cause non-convergence, see for example PR81181. */ 2173 2174 /* Intersect ANTIC_IN with the old ANTIC_IN. This is required until 2175 we properly represent the maximum expression set, thus not prune 2176 values without expressions during the iteration. */ 2177 if (was_visited 2178 && bitmap_and_into (&ANTIC_IN (block)->values, &old->values)) 2179 { 2180 if (dump_file && (dump_flags & TDF_DETAILS)) 2181 fprintf (dump_file, "warning: intersecting with old ANTIC_IN " 2182 "shrinks the set\n"); 2183 /* Prune expressions not in the value set. */ 2184 bitmap_iterator bi; 2185 unsigned int i; 2186 unsigned int to_clear = -1U; 2187 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (block), i, bi) 2188 { 2189 if (to_clear != -1U) 2190 { 2191 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear); 2192 to_clear = -1U; 2193 } 2194 pre_expr expr = expression_for_id (i); 2195 unsigned int value_id = get_expr_value_id (expr); 2196 if (!bitmap_bit_p (&ANTIC_IN (block)->values, value_id)) 2197 to_clear = i; 2198 } 2199 if (to_clear != -1U) 2200 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear); 2201 } 2202 2203 if (!bitmap_set_equal (old, ANTIC_IN (block))) 2204 changed = true; 2205 2206 maybe_dump_sets: 2207 if (dump_file && (dump_flags & TDF_DETAILS)) 2208 { 2209 if (ANTIC_OUT) 2210 print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index); 2211 2212 if (changed) 2213 fprintf (dump_file, "[changed] "); 2214 print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN", 2215 block->index); 2216 2217 if (S) 2218 print_bitmap_set (dump_file, S, "S", block->index); 2219 } 2220 if (old) 2221 bitmap_set_free (old); 2222 if (S) 2223 bitmap_set_free (S); 2224 if (ANTIC_OUT) 2225 bitmap_set_free (ANTIC_OUT); 2226 return changed; 2227 } 2228 2229 /* Compute PARTIAL_ANTIC for BLOCK. 2230 2231 If succs(BLOCK) > 1 then 2232 PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not 2233 in ANTIC_OUT for all succ(BLOCK) 2234 else if succs(BLOCK) == 1 then 2235 PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)]) 2236 2237 PA_IN[BLOCK] = clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] - ANTIC_IN[BLOCK]) 2238 2239 */ 2240 static void 2241 compute_partial_antic_aux (basic_block block, 2242 bool block_has_abnormal_pred_edge) 2243 { 2244 bitmap_set_t old_PA_IN; 2245 bitmap_set_t PA_OUT; 2246 edge e; 2247 edge_iterator ei; 2248 unsigned long max_pa = param_max_partial_antic_length; 2249 2250 old_PA_IN = PA_OUT = NULL; 2251 2252 /* If any edges from predecessors are abnormal, antic_in is empty, 2253 so do nothing. */ 2254 if (block_has_abnormal_pred_edge) 2255 goto maybe_dump_sets; 2256 2257 /* If there are too many partially anticipatable values in the 2258 block, phi_translate_set can take an exponential time: stop 2259 before the translation starts. */ 2260 if (max_pa 2261 && single_succ_p (block) 2262 && bitmap_count_bits (&PA_IN (single_succ (block))->values) > max_pa) 2263 goto maybe_dump_sets; 2264 2265 old_PA_IN = PA_IN (block); 2266 PA_OUT = bitmap_set_new (); 2267 2268 /* If the block has no successors, ANTIC_OUT is empty. */ 2269 if (EDGE_COUNT (block->succs) == 0) 2270 ; 2271 /* If we have one successor, we could have some phi nodes to 2272 translate through. Note that we can't phi translate across DFS 2273 back edges in partial antic, because it uses a union operation on 2274 the successors. For recurrences like IV's, we will end up 2275 generating a new value in the set on each go around (i + 3 (VH.1) 2276 VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */ 2277 else if (single_succ_p (block)) 2278 { 2279 e = single_succ_edge (block); 2280 if (!(e->flags & EDGE_DFS_BACK)) 2281 phi_translate_set (PA_OUT, PA_IN (e->dest), e); 2282 } 2283 /* If we have multiple successors, we take the union of all of 2284 them. */ 2285 else 2286 { 2287 size_t i; 2288 2289 auto_vec<edge> worklist (EDGE_COUNT (block->succs)); 2290 FOR_EACH_EDGE (e, ei, block->succs) 2291 { 2292 if (e->flags & EDGE_DFS_BACK) 2293 continue; 2294 worklist.quick_push (e); 2295 } 2296 if (worklist.length () > 0) 2297 { 2298 FOR_EACH_VEC_ELT (worklist, i, e) 2299 { 2300 unsigned int i; 2301 bitmap_iterator bi; 2302 2303 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi) 2304 bitmap_value_insert_into_set (PA_OUT, 2305 expression_for_id (i)); 2306 if (!gimple_seq_empty_p (phi_nodes (e->dest))) 2307 { 2308 bitmap_set_t pa_in = bitmap_set_new (); 2309 phi_translate_set (pa_in, PA_IN (e->dest), e); 2310 FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi) 2311 bitmap_value_insert_into_set (PA_OUT, 2312 expression_for_id (i)); 2313 bitmap_set_free (pa_in); 2314 } 2315 else 2316 FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi) 2317 bitmap_value_insert_into_set (PA_OUT, 2318 expression_for_id (i)); 2319 } 2320 } 2321 } 2322 2323 /* Prune expressions that are clobbered in block and thus become 2324 invalid if translated from PA_OUT to PA_IN. */ 2325 prune_clobbered_mems (PA_OUT, block); 2326 2327 /* PA_IN starts with PA_OUT - TMP_GEN. 2328 Then we subtract things from ANTIC_IN. */ 2329 PA_IN (block) = bitmap_set_subtract_expressions (PA_OUT, TMP_GEN (block)); 2330 2331 /* For partial antic, we want to put back in the phi results, since 2332 we will properly avoid making them partially antic over backedges. */ 2333 bitmap_ior_into (&PA_IN (block)->values, &PHI_GEN (block)->values); 2334 bitmap_ior_into (&PA_IN (block)->expressions, &PHI_GEN (block)->expressions); 2335 2336 /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */ 2337 bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block)); 2338 2339 clean (PA_IN (block), ANTIC_IN (block)); 2340 2341 maybe_dump_sets: 2342 if (dump_file && (dump_flags & TDF_DETAILS)) 2343 { 2344 if (PA_OUT) 2345 print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index); 2346 2347 print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index); 2348 } 2349 if (old_PA_IN) 2350 bitmap_set_free (old_PA_IN); 2351 if (PA_OUT) 2352 bitmap_set_free (PA_OUT); 2353 } 2354 2355 /* Compute ANTIC and partial ANTIC sets. */ 2356 2357 static void 2358 compute_antic (void) 2359 { 2360 bool changed = true; 2361 int num_iterations = 0; 2362 basic_block block; 2363 int i; 2364 edge_iterator ei; 2365 edge e; 2366 2367 /* If any predecessor edges are abnormal, we punt, so antic_in is empty. 2368 We pre-build the map of blocks with incoming abnormal edges here. */ 2369 auto_sbitmap has_abnormal_preds (last_basic_block_for_fn (cfun)); 2370 bitmap_clear (has_abnormal_preds); 2371 2372 FOR_ALL_BB_FN (block, cfun) 2373 { 2374 BB_VISITED (block) = 0; 2375 2376 FOR_EACH_EDGE (e, ei, block->preds) 2377 if (e->flags & EDGE_ABNORMAL) 2378 { 2379 bitmap_set_bit (has_abnormal_preds, block->index); 2380 break; 2381 } 2382 2383 /* While we are here, give empty ANTIC_IN sets to each block. */ 2384 ANTIC_IN (block) = bitmap_set_new (); 2385 if (do_partial_partial) 2386 PA_IN (block) = bitmap_set_new (); 2387 } 2388 2389 /* At the exit block we anticipate nothing. */ 2390 BB_VISITED (EXIT_BLOCK_PTR_FOR_FN (cfun)) = 1; 2391 2392 /* For ANTIC computation we need a postorder that also guarantees that 2393 a block with a single successor is visited after its successor. 2394 RPO on the inverted CFG has this property. */ 2395 auto_vec<int, 20> postorder; 2396 inverted_post_order_compute (&postorder); 2397 2398 auto_sbitmap worklist (last_basic_block_for_fn (cfun) + 1); 2399 bitmap_clear (worklist); 2400 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 2401 bitmap_set_bit (worklist, e->src->index); 2402 while (changed) 2403 { 2404 if (dump_file && (dump_flags & TDF_DETAILS)) 2405 fprintf (dump_file, "Starting iteration %d\n", num_iterations); 2406 /* ??? We need to clear our PHI translation cache here as the 2407 ANTIC sets shrink and we restrict valid translations to 2408 those having operands with leaders in ANTIC. Same below 2409 for PA ANTIC computation. */ 2410 num_iterations++; 2411 changed = false; 2412 for (i = postorder.length () - 1; i >= 0; i--) 2413 { 2414 if (bitmap_bit_p (worklist, postorder[i])) 2415 { 2416 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); 2417 bitmap_clear_bit (worklist, block->index); 2418 if (compute_antic_aux (block, 2419 bitmap_bit_p (has_abnormal_preds, 2420 block->index))) 2421 { 2422 FOR_EACH_EDGE (e, ei, block->preds) 2423 bitmap_set_bit (worklist, e->src->index); 2424 changed = true; 2425 } 2426 } 2427 } 2428 /* Theoretically possible, but *highly* unlikely. */ 2429 gcc_checking_assert (num_iterations < 500); 2430 } 2431 2432 /* We have to clean after the dataflow problem converged as cleaning 2433 can cause non-convergence because it is based on expressions 2434 rather than values. */ 2435 FOR_EACH_BB_FN (block, cfun) 2436 clean (ANTIC_IN (block)); 2437 2438 statistics_histogram_event (cfun, "compute_antic iterations", 2439 num_iterations); 2440 2441 if (do_partial_partial) 2442 { 2443 /* For partial antic we ignore backedges and thus we do not need 2444 to perform any iteration when we process blocks in postorder. */ 2445 for (i = postorder.length () - 1; i >= 0; i--) 2446 { 2447 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); 2448 compute_partial_antic_aux (block, 2449 bitmap_bit_p (has_abnormal_preds, 2450 block->index)); 2451 } 2452 } 2453 } 2454 2455 2456 /* Inserted expressions are placed onto this worklist, which is used 2457 for performing quick dead code elimination of insertions we made 2458 that didn't turn out to be necessary. */ 2459 static bitmap inserted_exprs; 2460 2461 /* The actual worker for create_component_ref_by_pieces. */ 2462 2463 static tree 2464 create_component_ref_by_pieces_1 (basic_block block, vn_reference_t ref, 2465 unsigned int *operand, gimple_seq *stmts) 2466 { 2467 vn_reference_op_t currop = &ref->operands[*operand]; 2468 tree genop; 2469 ++*operand; 2470 switch (currop->opcode) 2471 { 2472 case CALL_EXPR: 2473 gcc_unreachable (); 2474 2475 case MEM_REF: 2476 { 2477 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand, 2478 stmts); 2479 if (!baseop) 2480 return NULL_TREE; 2481 tree offset = currop->op0; 2482 if (TREE_CODE (baseop) == ADDR_EXPR 2483 && handled_component_p (TREE_OPERAND (baseop, 0))) 2484 { 2485 poly_int64 off; 2486 tree base; 2487 base = get_addr_base_and_unit_offset (TREE_OPERAND (baseop, 0), 2488 &off); 2489 gcc_assert (base); 2490 offset = int_const_binop (PLUS_EXPR, offset, 2491 build_int_cst (TREE_TYPE (offset), 2492 off)); 2493 baseop = build_fold_addr_expr (base); 2494 } 2495 genop = build2 (MEM_REF, currop->type, baseop, offset); 2496 MR_DEPENDENCE_CLIQUE (genop) = currop->clique; 2497 MR_DEPENDENCE_BASE (genop) = currop->base; 2498 REF_REVERSE_STORAGE_ORDER (genop) = currop->reverse; 2499 return genop; 2500 } 2501 2502 case TARGET_MEM_REF: 2503 { 2504 tree genop0 = NULL_TREE, genop1 = NULL_TREE; 2505 vn_reference_op_t nextop = &ref->operands[(*operand)++]; 2506 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand, 2507 stmts); 2508 if (!baseop) 2509 return NULL_TREE; 2510 if (currop->op0) 2511 { 2512 genop0 = find_or_generate_expression (block, currop->op0, stmts); 2513 if (!genop0) 2514 return NULL_TREE; 2515 } 2516 if (nextop->op0) 2517 { 2518 genop1 = find_or_generate_expression (block, nextop->op0, stmts); 2519 if (!genop1) 2520 return NULL_TREE; 2521 } 2522 genop = build5 (TARGET_MEM_REF, currop->type, 2523 baseop, currop->op2, genop0, currop->op1, genop1); 2524 2525 MR_DEPENDENCE_CLIQUE (genop) = currop->clique; 2526 MR_DEPENDENCE_BASE (genop) = currop->base; 2527 return genop; 2528 } 2529 2530 case ADDR_EXPR: 2531 if (currop->op0) 2532 { 2533 gcc_assert (is_gimple_min_invariant (currop->op0)); 2534 return currop->op0; 2535 } 2536 /* Fallthrough. */ 2537 case REALPART_EXPR: 2538 case IMAGPART_EXPR: 2539 case VIEW_CONVERT_EXPR: 2540 { 2541 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2542 stmts); 2543 if (!genop0) 2544 return NULL_TREE; 2545 return fold_build1 (currop->opcode, currop->type, genop0); 2546 } 2547 2548 case WITH_SIZE_EXPR: 2549 { 2550 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2551 stmts); 2552 if (!genop0) 2553 return NULL_TREE; 2554 tree genop1 = find_or_generate_expression (block, currop->op0, stmts); 2555 if (!genop1) 2556 return NULL_TREE; 2557 return fold_build2 (currop->opcode, currop->type, genop0, genop1); 2558 } 2559 2560 case BIT_FIELD_REF: 2561 { 2562 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2563 stmts); 2564 if (!genop0) 2565 return NULL_TREE; 2566 tree op1 = currop->op0; 2567 tree op2 = currop->op1; 2568 tree t = build3 (BIT_FIELD_REF, currop->type, genop0, op1, op2); 2569 REF_REVERSE_STORAGE_ORDER (t) = currop->reverse; 2570 return fold (t); 2571 } 2572 2573 /* For array ref vn_reference_op's, operand 1 of the array ref 2574 is op0 of the reference op and operand 3 of the array ref is 2575 op1. */ 2576 case ARRAY_RANGE_REF: 2577 case ARRAY_REF: 2578 { 2579 tree genop0; 2580 tree genop1 = currop->op0; 2581 tree genop2 = currop->op1; 2582 tree genop3 = currop->op2; 2583 genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2584 stmts); 2585 if (!genop0) 2586 return NULL_TREE; 2587 genop1 = find_or_generate_expression (block, genop1, stmts); 2588 if (!genop1) 2589 return NULL_TREE; 2590 if (genop2) 2591 { 2592 tree domain_type = TYPE_DOMAIN (TREE_TYPE (genop0)); 2593 /* Drop zero minimum index if redundant. */ 2594 if (integer_zerop (genop2) 2595 && (!domain_type 2596 || integer_zerop (TYPE_MIN_VALUE (domain_type)))) 2597 genop2 = NULL_TREE; 2598 else 2599 { 2600 genop2 = find_or_generate_expression (block, genop2, stmts); 2601 if (!genop2) 2602 return NULL_TREE; 2603 } 2604 } 2605 if (genop3) 2606 { 2607 tree elmt_type = TREE_TYPE (TREE_TYPE (genop0)); 2608 /* We can't always put a size in units of the element alignment 2609 here as the element alignment may be not visible. See 2610 PR43783. Simply drop the element size for constant 2611 sizes. */ 2612 if (TREE_CODE (genop3) == INTEGER_CST 2613 && TREE_CODE (TYPE_SIZE_UNIT (elmt_type)) == INTEGER_CST 2614 && wi::eq_p (wi::to_offset (TYPE_SIZE_UNIT (elmt_type)), 2615 (wi::to_offset (genop3) 2616 * vn_ref_op_align_unit (currop)))) 2617 genop3 = NULL_TREE; 2618 else 2619 { 2620 genop3 = find_or_generate_expression (block, genop3, stmts); 2621 if (!genop3) 2622 return NULL_TREE; 2623 } 2624 } 2625 return build4 (currop->opcode, currop->type, genop0, genop1, 2626 genop2, genop3); 2627 } 2628 case COMPONENT_REF: 2629 { 2630 tree op0; 2631 tree op1; 2632 tree genop2 = currop->op1; 2633 op0 = create_component_ref_by_pieces_1 (block, ref, operand, stmts); 2634 if (!op0) 2635 return NULL_TREE; 2636 /* op1 should be a FIELD_DECL, which are represented by themselves. */ 2637 op1 = currop->op0; 2638 if (genop2) 2639 { 2640 genop2 = find_or_generate_expression (block, genop2, stmts); 2641 if (!genop2) 2642 return NULL_TREE; 2643 } 2644 return fold_build3 (COMPONENT_REF, TREE_TYPE (op1), op0, op1, genop2); 2645 } 2646 2647 case SSA_NAME: 2648 { 2649 genop = find_or_generate_expression (block, currop->op0, stmts); 2650 return genop; 2651 } 2652 case STRING_CST: 2653 case INTEGER_CST: 2654 case POLY_INT_CST: 2655 case COMPLEX_CST: 2656 case VECTOR_CST: 2657 case REAL_CST: 2658 case CONSTRUCTOR: 2659 case VAR_DECL: 2660 case PARM_DECL: 2661 case CONST_DECL: 2662 case RESULT_DECL: 2663 case FUNCTION_DECL: 2664 return currop->op0; 2665 2666 default: 2667 gcc_unreachable (); 2668 } 2669 } 2670 2671 /* For COMPONENT_REF's and ARRAY_REF's, we can't have any intermediates for the 2672 COMPONENT_REF or MEM_REF or ARRAY_REF portion, because we'd end up with 2673 trying to rename aggregates into ssa form directly, which is a no no. 2674 2675 Thus, this routine doesn't create temporaries, it just builds a 2676 single access expression for the array, calling 2677 find_or_generate_expression to build the innermost pieces. 2678 2679 This function is a subroutine of create_expression_by_pieces, and 2680 should not be called on it's own unless you really know what you 2681 are doing. */ 2682 2683 static tree 2684 create_component_ref_by_pieces (basic_block block, vn_reference_t ref, 2685 gimple_seq *stmts) 2686 { 2687 unsigned int op = 0; 2688 return create_component_ref_by_pieces_1 (block, ref, &op, stmts); 2689 } 2690 2691 /* Find a simple leader for an expression, or generate one using 2692 create_expression_by_pieces from a NARY expression for the value. 2693 BLOCK is the basic_block we are looking for leaders in. 2694 OP is the tree expression to find a leader for or generate. 2695 Returns the leader or NULL_TREE on failure. */ 2696 2697 static tree 2698 find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts) 2699 { 2700 pre_expr expr = get_or_alloc_expr_for (op); 2701 unsigned int lookfor = get_expr_value_id (expr); 2702 pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor); 2703 if (leader) 2704 { 2705 if (leader->kind == NAME) 2706 return PRE_EXPR_NAME (leader); 2707 else if (leader->kind == CONSTANT) 2708 return PRE_EXPR_CONSTANT (leader); 2709 2710 /* Defer. */ 2711 return NULL_TREE; 2712 } 2713 2714 /* It must be a complex expression, so generate it recursively. Note 2715 that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c 2716 where the insert algorithm fails to insert a required expression. */ 2717 bitmap exprset = value_expressions[lookfor]; 2718 bitmap_iterator bi; 2719 unsigned int i; 2720 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 2721 { 2722 pre_expr temp = expression_for_id (i); 2723 /* We cannot insert random REFERENCE expressions at arbitrary 2724 places. We can insert NARYs which eventually re-materializes 2725 its operand values. */ 2726 if (temp->kind == NARY) 2727 return create_expression_by_pieces (block, temp, stmts, 2728 get_expr_type (expr)); 2729 } 2730 2731 /* Defer. */ 2732 return NULL_TREE; 2733 } 2734 2735 /* Create an expression in pieces, so that we can handle very complex 2736 expressions that may be ANTIC, but not necessary GIMPLE. 2737 BLOCK is the basic block the expression will be inserted into, 2738 EXPR is the expression to insert (in value form) 2739 STMTS is a statement list to append the necessary insertions into. 2740 2741 This function will die if we hit some value that shouldn't be 2742 ANTIC but is (IE there is no leader for it, or its components). 2743 The function returns NULL_TREE in case a different antic expression 2744 has to be inserted first. 2745 This function may also generate expressions that are themselves 2746 partially or fully redundant. Those that are will be either made 2747 fully redundant during the next iteration of insert (for partially 2748 redundant ones), or eliminated by eliminate (for fully redundant 2749 ones). */ 2750 2751 static tree 2752 create_expression_by_pieces (basic_block block, pre_expr expr, 2753 gimple_seq *stmts, tree type) 2754 { 2755 tree name; 2756 tree folded; 2757 gimple_seq forced_stmts = NULL; 2758 unsigned int value_id; 2759 gimple_stmt_iterator gsi; 2760 tree exprtype = type ? type : get_expr_type (expr); 2761 pre_expr nameexpr; 2762 gassign *newstmt; 2763 2764 switch (expr->kind) 2765 { 2766 /* We may hit the NAME/CONSTANT case if we have to convert types 2767 that value numbering saw through. */ 2768 case NAME: 2769 folded = PRE_EXPR_NAME (expr); 2770 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (folded)) 2771 return NULL_TREE; 2772 if (useless_type_conversion_p (exprtype, TREE_TYPE (folded))) 2773 return folded; 2774 break; 2775 case CONSTANT: 2776 { 2777 folded = PRE_EXPR_CONSTANT (expr); 2778 tree tem = fold_convert (exprtype, folded); 2779 if (is_gimple_min_invariant (tem)) 2780 return tem; 2781 break; 2782 } 2783 case REFERENCE: 2784 if (PRE_EXPR_REFERENCE (expr)->operands[0].opcode == CALL_EXPR) 2785 { 2786 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 2787 unsigned int operand = 1; 2788 vn_reference_op_t currop = &ref->operands[0]; 2789 tree sc = NULL_TREE; 2790 tree fn = find_or_generate_expression (block, currop->op0, stmts); 2791 if (!fn) 2792 return NULL_TREE; 2793 if (currop->op1) 2794 { 2795 sc = find_or_generate_expression (block, currop->op1, stmts); 2796 if (!sc) 2797 return NULL_TREE; 2798 } 2799 auto_vec<tree> args (ref->operands.length () - 1); 2800 while (operand < ref->operands.length ()) 2801 { 2802 tree arg = create_component_ref_by_pieces_1 (block, ref, 2803 &operand, stmts); 2804 if (!arg) 2805 return NULL_TREE; 2806 args.quick_push (arg); 2807 } 2808 gcall *call = gimple_build_call_vec (fn, args); 2809 gimple_set_location (call, expr->loc); 2810 gimple_call_set_fntype (call, currop->type); 2811 if (sc) 2812 gimple_call_set_chain (call, sc); 2813 tree forcedname = make_ssa_name (TREE_TYPE (currop->type)); 2814 gimple_call_set_lhs (call, forcedname); 2815 /* There's no CCP pass after PRE which would re-compute alignment 2816 information so make sure we re-materialize this here. */ 2817 if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED) 2818 && args.length () - 2 <= 1 2819 && tree_fits_uhwi_p (args[1]) 2820 && (args.length () != 3 || tree_fits_uhwi_p (args[2]))) 2821 { 2822 unsigned HOST_WIDE_INT halign = tree_to_uhwi (args[1]); 2823 unsigned HOST_WIDE_INT hmisalign 2824 = args.length () == 3 ? tree_to_uhwi (args[2]) : 0; 2825 if ((halign & (halign - 1)) == 0 2826 && (hmisalign & ~(halign - 1)) == 0 2827 && (unsigned int)halign != 0) 2828 set_ptr_info_alignment (get_ptr_info (forcedname), 2829 halign, hmisalign); 2830 } 2831 gimple_set_vuse (call, BB_LIVE_VOP_ON_EXIT (block)); 2832 gimple_seq_add_stmt_without_update (&forced_stmts, call); 2833 folded = forcedname; 2834 } 2835 else 2836 { 2837 folded = create_component_ref_by_pieces (block, 2838 PRE_EXPR_REFERENCE (expr), 2839 stmts); 2840 if (!folded) 2841 return NULL_TREE; 2842 name = make_temp_ssa_name (exprtype, NULL, "pretmp"); 2843 newstmt = gimple_build_assign (name, folded); 2844 gimple_set_location (newstmt, expr->loc); 2845 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt); 2846 gimple_set_vuse (newstmt, BB_LIVE_VOP_ON_EXIT (block)); 2847 folded = name; 2848 } 2849 break; 2850 case NARY: 2851 { 2852 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 2853 tree *genop = XALLOCAVEC (tree, nary->length); 2854 unsigned i; 2855 for (i = 0; i < nary->length; ++i) 2856 { 2857 genop[i] = find_or_generate_expression (block, nary->op[i], stmts); 2858 if (!genop[i]) 2859 return NULL_TREE; 2860 /* Ensure genop[] is properly typed for POINTER_PLUS_EXPR. It 2861 may have conversions stripped. */ 2862 if (nary->opcode == POINTER_PLUS_EXPR) 2863 { 2864 if (i == 0) 2865 genop[i] = gimple_convert (&forced_stmts, 2866 nary->type, genop[i]); 2867 else if (i == 1) 2868 genop[i] = gimple_convert (&forced_stmts, 2869 sizetype, genop[i]); 2870 } 2871 else 2872 genop[i] = gimple_convert (&forced_stmts, 2873 TREE_TYPE (nary->op[i]), genop[i]); 2874 } 2875 if (nary->opcode == CONSTRUCTOR) 2876 { 2877 vec<constructor_elt, va_gc> *elts = NULL; 2878 for (i = 0; i < nary->length; ++i) 2879 CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, genop[i]); 2880 folded = build_constructor (nary->type, elts); 2881 name = make_temp_ssa_name (exprtype, NULL, "pretmp"); 2882 newstmt = gimple_build_assign (name, folded); 2883 gimple_set_location (newstmt, expr->loc); 2884 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt); 2885 folded = name; 2886 } 2887 else 2888 { 2889 switch (nary->length) 2890 { 2891 case 1: 2892 folded = gimple_build (&forced_stmts, expr->loc, 2893 nary->opcode, nary->type, genop[0]); 2894 break; 2895 case 2: 2896 folded = gimple_build (&forced_stmts, expr->loc, nary->opcode, 2897 nary->type, genop[0], genop[1]); 2898 break; 2899 case 3: 2900 folded = gimple_build (&forced_stmts, expr->loc, nary->opcode, 2901 nary->type, genop[0], genop[1], 2902 genop[2]); 2903 break; 2904 default: 2905 gcc_unreachable (); 2906 } 2907 } 2908 } 2909 break; 2910 default: 2911 gcc_unreachable (); 2912 } 2913 2914 folded = gimple_convert (&forced_stmts, exprtype, folded); 2915 2916 /* If there is nothing to insert, return the simplified result. */ 2917 if (gimple_seq_empty_p (forced_stmts)) 2918 return folded; 2919 /* If we simplified to a constant return it and discard eventually 2920 built stmts. */ 2921 if (is_gimple_min_invariant (folded)) 2922 { 2923 gimple_seq_discard (forced_stmts); 2924 return folded; 2925 } 2926 /* Likewise if we simplified to sth not queued for insertion. */ 2927 bool found = false; 2928 gsi = gsi_last (forced_stmts); 2929 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2930 { 2931 gimple *stmt = gsi_stmt (gsi); 2932 tree forcedname = gimple_get_lhs (stmt); 2933 if (forcedname == folded) 2934 { 2935 found = true; 2936 break; 2937 } 2938 } 2939 if (! found) 2940 { 2941 gimple_seq_discard (forced_stmts); 2942 return folded; 2943 } 2944 gcc_assert (TREE_CODE (folded) == SSA_NAME); 2945 2946 /* If we have any intermediate expressions to the value sets, add them 2947 to the value sets and chain them in the instruction stream. */ 2948 if (forced_stmts) 2949 { 2950 gsi = gsi_start (forced_stmts); 2951 for (; !gsi_end_p (gsi); gsi_next (&gsi)) 2952 { 2953 gimple *stmt = gsi_stmt (gsi); 2954 tree forcedname = gimple_get_lhs (stmt); 2955 pre_expr nameexpr; 2956 2957 if (forcedname != folded) 2958 { 2959 VN_INFO (forcedname)->valnum = forcedname; 2960 VN_INFO (forcedname)->value_id = get_next_value_id (); 2961 nameexpr = get_or_alloc_expr_for_name (forcedname); 2962 add_to_value (VN_INFO (forcedname)->value_id, nameexpr); 2963 if (NEW_SETS (block)) 2964 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); 2965 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); 2966 } 2967 2968 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname)); 2969 } 2970 gimple_seq_add_seq (stmts, forced_stmts); 2971 } 2972 2973 name = folded; 2974 2975 /* Fold the last statement. */ 2976 gsi = gsi_last (*stmts); 2977 if (fold_stmt_inplace (&gsi)) 2978 update_stmt (gsi_stmt (gsi)); 2979 2980 /* Add a value number to the temporary. 2981 The value may already exist in either NEW_SETS, or AVAIL_OUT, because 2982 we are creating the expression by pieces, and this particular piece of 2983 the expression may have been represented. There is no harm in replacing 2984 here. */ 2985 value_id = get_expr_value_id (expr); 2986 VN_INFO (name)->value_id = value_id; 2987 VN_INFO (name)->valnum = vn_valnum_from_value_id (value_id); 2988 if (VN_INFO (name)->valnum == NULL_TREE) 2989 VN_INFO (name)->valnum = name; 2990 gcc_assert (VN_INFO (name)->valnum != NULL_TREE); 2991 nameexpr = get_or_alloc_expr_for_name (name); 2992 add_to_value (value_id, nameexpr); 2993 if (NEW_SETS (block)) 2994 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); 2995 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); 2996 2997 pre_stats.insertions++; 2998 if (dump_file && (dump_flags & TDF_DETAILS)) 2999 { 3000 fprintf (dump_file, "Inserted "); 3001 print_gimple_stmt (dump_file, gsi_stmt (gsi_last (*stmts)), 0); 3002 fprintf (dump_file, " in predecessor %d (%04d)\n", 3003 block->index, value_id); 3004 } 3005 3006 return name; 3007 } 3008 3009 3010 /* Insert the to-be-made-available values of expression EXPRNUM for each 3011 predecessor, stored in AVAIL, into the predecessors of BLOCK, and 3012 merge the result with a phi node, given the same value number as 3013 NODE. Return true if we have inserted new stuff. */ 3014 3015 static bool 3016 insert_into_preds_of_block (basic_block block, unsigned int exprnum, 3017 vec<pre_expr> avail) 3018 { 3019 pre_expr expr = expression_for_id (exprnum); 3020 pre_expr newphi; 3021 unsigned int val = get_expr_value_id (expr); 3022 edge pred; 3023 bool insertions = false; 3024 bool nophi = false; 3025 basic_block bprime; 3026 pre_expr eprime; 3027 edge_iterator ei; 3028 tree type = get_expr_type (expr); 3029 tree temp; 3030 gphi *phi; 3031 3032 /* Make sure we aren't creating an induction variable. */ 3033 if (bb_loop_depth (block) > 0 && EDGE_COUNT (block->preds) == 2) 3034 { 3035 bool firstinsideloop = false; 3036 bool secondinsideloop = false; 3037 firstinsideloop = flow_bb_inside_loop_p (block->loop_father, 3038 EDGE_PRED (block, 0)->src); 3039 secondinsideloop = flow_bb_inside_loop_p (block->loop_father, 3040 EDGE_PRED (block, 1)->src); 3041 /* Induction variables only have one edge inside the loop. */ 3042 if ((firstinsideloop ^ secondinsideloop) 3043 && expr->kind != REFERENCE) 3044 { 3045 if (dump_file && (dump_flags & TDF_DETAILS)) 3046 fprintf (dump_file, "Skipping insertion of phi for partial redundancy: Looks like an induction variable\n"); 3047 nophi = true; 3048 } 3049 } 3050 3051 /* Make the necessary insertions. */ 3052 FOR_EACH_EDGE (pred, ei, block->preds) 3053 { 3054 gimple_seq stmts = NULL; 3055 tree builtexpr; 3056 bprime = pred->src; 3057 eprime = avail[pred->dest_idx]; 3058 builtexpr = create_expression_by_pieces (bprime, eprime, 3059 &stmts, type); 3060 gcc_assert (!(pred->flags & EDGE_ABNORMAL)); 3061 if (!gimple_seq_empty_p (stmts)) 3062 { 3063 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pred, stmts); 3064 gcc_assert (! new_bb); 3065 insertions = true; 3066 } 3067 if (!builtexpr) 3068 { 3069 /* We cannot insert a PHI node if we failed to insert 3070 on one edge. */ 3071 nophi = true; 3072 continue; 3073 } 3074 if (is_gimple_min_invariant (builtexpr)) 3075 avail[pred->dest_idx] = get_or_alloc_expr_for_constant (builtexpr); 3076 else 3077 avail[pred->dest_idx] = get_or_alloc_expr_for_name (builtexpr); 3078 } 3079 /* If we didn't want a phi node, and we made insertions, we still have 3080 inserted new stuff, and thus return true. If we didn't want a phi node, 3081 and didn't make insertions, we haven't added anything new, so return 3082 false. */ 3083 if (nophi && insertions) 3084 return true; 3085 else if (nophi && !insertions) 3086 return false; 3087 3088 /* Now build a phi for the new variable. */ 3089 temp = make_temp_ssa_name (type, NULL, "prephitmp"); 3090 phi = create_phi_node (temp, block); 3091 3092 VN_INFO (temp)->value_id = val; 3093 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val); 3094 if (VN_INFO (temp)->valnum == NULL_TREE) 3095 VN_INFO (temp)->valnum = temp; 3096 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp)); 3097 FOR_EACH_EDGE (pred, ei, block->preds) 3098 { 3099 pre_expr ae = avail[pred->dest_idx]; 3100 gcc_assert (get_expr_type (ae) == type 3101 || useless_type_conversion_p (type, get_expr_type (ae))); 3102 if (ae->kind == CONSTANT) 3103 add_phi_arg (phi, unshare_expr (PRE_EXPR_CONSTANT (ae)), 3104 pred, UNKNOWN_LOCATION); 3105 else 3106 add_phi_arg (phi, PRE_EXPR_NAME (ae), pred, UNKNOWN_LOCATION); 3107 } 3108 3109 newphi = get_or_alloc_expr_for_name (temp); 3110 add_to_value (val, newphi); 3111 3112 /* The value should *not* exist in PHI_GEN, or else we wouldn't be doing 3113 this insertion, since we test for the existence of this value in PHI_GEN 3114 before proceeding with the partial redundancy checks in insert_aux. 3115 3116 The value may exist in AVAIL_OUT, in particular, it could be represented 3117 by the expression we are trying to eliminate, in which case we want the 3118 replacement to occur. If it's not existing in AVAIL_OUT, we want it 3119 inserted there. 3120 3121 Similarly, to the PHI_GEN case, the value should not exist in NEW_SETS of 3122 this block, because if it did, it would have existed in our dominator's 3123 AVAIL_OUT, and would have been skipped due to the full redundancy check. 3124 */ 3125 3126 bitmap_insert_into_set (PHI_GEN (block), newphi); 3127 bitmap_value_replace_in_set (AVAIL_OUT (block), 3128 newphi); 3129 if (NEW_SETS (block)) 3130 bitmap_insert_into_set (NEW_SETS (block), newphi); 3131 3132 /* If we insert a PHI node for a conversion of another PHI node 3133 in the same basic-block try to preserve range information. 3134 This is important so that followup loop passes receive optimal 3135 number of iteration analysis results. See PR61743. */ 3136 if (expr->kind == NARY 3137 && CONVERT_EXPR_CODE_P (expr->u.nary->opcode) 3138 && TREE_CODE (expr->u.nary->op[0]) == SSA_NAME 3139 && gimple_bb (SSA_NAME_DEF_STMT (expr->u.nary->op[0])) == block 3140 && INTEGRAL_TYPE_P (type) 3141 && INTEGRAL_TYPE_P (TREE_TYPE (expr->u.nary->op[0])) 3142 && (TYPE_PRECISION (type) 3143 >= TYPE_PRECISION (TREE_TYPE (expr->u.nary->op[0]))) 3144 && SSA_NAME_RANGE_INFO (expr->u.nary->op[0])) 3145 { 3146 wide_int min, max; 3147 if (get_range_info (expr->u.nary->op[0], &min, &max) == VR_RANGE 3148 && !wi::neg_p (min, SIGNED) 3149 && !wi::neg_p (max, SIGNED)) 3150 /* Just handle extension and sign-changes of all-positive ranges. */ 3151 set_range_info (temp, 3152 SSA_NAME_RANGE_TYPE (expr->u.nary->op[0]), 3153 wide_int_storage::from (min, TYPE_PRECISION (type), 3154 TYPE_SIGN (type)), 3155 wide_int_storage::from (max, TYPE_PRECISION (type), 3156 TYPE_SIGN (type))); 3157 } 3158 3159 if (dump_file && (dump_flags & TDF_DETAILS)) 3160 { 3161 fprintf (dump_file, "Created phi "); 3162 print_gimple_stmt (dump_file, phi, 0); 3163 fprintf (dump_file, " in block %d (%04d)\n", block->index, val); 3164 } 3165 pre_stats.phis++; 3166 return true; 3167 } 3168 3169 3170 3171 /* Perform insertion of partially redundant or hoistable values. 3172 For BLOCK, do the following: 3173 1. Propagate the NEW_SETS of the dominator into the current block. 3174 If the block has multiple predecessors, 3175 2a. Iterate over the ANTIC expressions for the block to see if 3176 any of them are partially redundant. 3177 2b. If so, insert them into the necessary predecessors to make 3178 the expression fully redundant. 3179 2c. Insert a new PHI merging the values of the predecessors. 3180 2d. Insert the new PHI, and the new expressions, into the 3181 NEW_SETS set. 3182 If the block has multiple successors, 3183 3a. Iterate over the ANTIC values for the block to see if 3184 any of them are good candidates for hoisting. 3185 3b. If so, insert expressions computing the values in BLOCK, 3186 and add the new expressions into the NEW_SETS set. 3187 4. Recursively call ourselves on the dominator children of BLOCK. 3188 3189 Steps 1, 2a, and 4 are done by insert_aux. 2b, 2c and 2d are done by 3190 do_pre_regular_insertion and do_partial_insertion. 3a and 3b are 3191 done in do_hoist_insertion. 3192 */ 3193 3194 static bool 3195 do_pre_regular_insertion (basic_block block, basic_block dom) 3196 { 3197 bool new_stuff = false; 3198 vec<pre_expr> exprs; 3199 pre_expr expr; 3200 auto_vec<pre_expr> avail; 3201 int i; 3202 3203 exprs = sorted_array_from_bitmap_set (ANTIC_IN (block)); 3204 avail.safe_grow (EDGE_COUNT (block->preds)); 3205 3206 FOR_EACH_VEC_ELT (exprs, i, expr) 3207 { 3208 if (expr->kind == NARY 3209 || expr->kind == REFERENCE) 3210 { 3211 unsigned int val; 3212 bool by_some = false; 3213 bool cant_insert = false; 3214 bool all_same = true; 3215 pre_expr first_s = NULL; 3216 edge pred; 3217 basic_block bprime; 3218 pre_expr eprime = NULL; 3219 edge_iterator ei; 3220 pre_expr edoubleprime = NULL; 3221 bool do_insertion = false; 3222 3223 val = get_expr_value_id (expr); 3224 if (bitmap_set_contains_value (PHI_GEN (block), val)) 3225 continue; 3226 if (bitmap_set_contains_value (AVAIL_OUT (dom), val)) 3227 { 3228 if (dump_file && (dump_flags & TDF_DETAILS)) 3229 { 3230 fprintf (dump_file, "Found fully redundant value: "); 3231 print_pre_expr (dump_file, expr); 3232 fprintf (dump_file, "\n"); 3233 } 3234 continue; 3235 } 3236 3237 FOR_EACH_EDGE (pred, ei, block->preds) 3238 { 3239 unsigned int vprime; 3240 3241 /* We should never run insertion for the exit block 3242 and so not come across fake pred edges. */ 3243 gcc_assert (!(pred->flags & EDGE_FAKE)); 3244 bprime = pred->src; 3245 /* We are looking at ANTIC_OUT of bprime. */ 3246 eprime = phi_translate (NULL, expr, ANTIC_IN (block), NULL, pred); 3247 3248 /* eprime will generally only be NULL if the 3249 value of the expression, translated 3250 through the PHI for this predecessor, is 3251 undefined. If that is the case, we can't 3252 make the expression fully redundant, 3253 because its value is undefined along a 3254 predecessor path. We can thus break out 3255 early because it doesn't matter what the 3256 rest of the results are. */ 3257 if (eprime == NULL) 3258 { 3259 avail[pred->dest_idx] = NULL; 3260 cant_insert = true; 3261 break; 3262 } 3263 3264 vprime = get_expr_value_id (eprime); 3265 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), 3266 vprime); 3267 if (edoubleprime == NULL) 3268 { 3269 avail[pred->dest_idx] = eprime; 3270 all_same = false; 3271 } 3272 else 3273 { 3274 avail[pred->dest_idx] = edoubleprime; 3275 by_some = true; 3276 /* We want to perform insertions to remove a redundancy on 3277 a path in the CFG we want to optimize for speed. */ 3278 if (optimize_edge_for_speed_p (pred)) 3279 do_insertion = true; 3280 if (first_s == NULL) 3281 first_s = edoubleprime; 3282 else if (!pre_expr_d::equal (first_s, edoubleprime)) 3283 all_same = false; 3284 } 3285 } 3286 /* If we can insert it, it's not the same value 3287 already existing along every predecessor, and 3288 it's defined by some predecessor, it is 3289 partially redundant. */ 3290 if (!cant_insert && !all_same && by_some) 3291 { 3292 if (!do_insertion) 3293 { 3294 if (dump_file && (dump_flags & TDF_DETAILS)) 3295 { 3296 fprintf (dump_file, "Skipping partial redundancy for " 3297 "expression "); 3298 print_pre_expr (dump_file, expr); 3299 fprintf (dump_file, " (%04d), no redundancy on to be " 3300 "optimized for speed edge\n", val); 3301 } 3302 } 3303 else if (dbg_cnt (treepre_insert)) 3304 { 3305 if (dump_file && (dump_flags & TDF_DETAILS)) 3306 { 3307 fprintf (dump_file, "Found partial redundancy for " 3308 "expression "); 3309 print_pre_expr (dump_file, expr); 3310 fprintf (dump_file, " (%04d)\n", 3311 get_expr_value_id (expr)); 3312 } 3313 if (insert_into_preds_of_block (block, 3314 get_expression_id (expr), 3315 avail)) 3316 new_stuff = true; 3317 } 3318 } 3319 /* If all edges produce the same value and that value is 3320 an invariant, then the PHI has the same value on all 3321 edges. Note this. */ 3322 else if (!cant_insert 3323 && all_same 3324 && (edoubleprime->kind != NAME 3325 || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI 3326 (PRE_EXPR_NAME (edoubleprime)))) 3327 { 3328 gcc_assert (edoubleprime->kind == CONSTANT 3329 || edoubleprime->kind == NAME); 3330 3331 tree temp = make_temp_ssa_name (get_expr_type (expr), 3332 NULL, "pretmp"); 3333 gassign *assign 3334 = gimple_build_assign (temp, 3335 edoubleprime->kind == CONSTANT ? 3336 PRE_EXPR_CONSTANT (edoubleprime) : 3337 PRE_EXPR_NAME (edoubleprime)); 3338 gimple_stmt_iterator gsi = gsi_after_labels (block); 3339 gsi_insert_before (&gsi, assign, GSI_NEW_STMT); 3340 3341 VN_INFO (temp)->value_id = val; 3342 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val); 3343 if (VN_INFO (temp)->valnum == NULL_TREE) 3344 VN_INFO (temp)->valnum = temp; 3345 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp)); 3346 pre_expr newe = get_or_alloc_expr_for_name (temp); 3347 add_to_value (val, newe); 3348 bitmap_value_replace_in_set (AVAIL_OUT (block), newe); 3349 bitmap_insert_into_set (NEW_SETS (block), newe); 3350 } 3351 } 3352 } 3353 3354 exprs.release (); 3355 return new_stuff; 3356 } 3357 3358 3359 /* Perform insertion for partially anticipatable expressions. There 3360 is only one case we will perform insertion for these. This case is 3361 if the expression is partially anticipatable, and fully available. 3362 In this case, we know that putting it earlier will enable us to 3363 remove the later computation. */ 3364 3365 static bool 3366 do_pre_partial_partial_insertion (basic_block block, basic_block dom) 3367 { 3368 bool new_stuff = false; 3369 vec<pre_expr> exprs; 3370 pre_expr expr; 3371 auto_vec<pre_expr> avail; 3372 int i; 3373 3374 exprs = sorted_array_from_bitmap_set (PA_IN (block)); 3375 avail.safe_grow (EDGE_COUNT (block->preds)); 3376 3377 FOR_EACH_VEC_ELT (exprs, i, expr) 3378 { 3379 if (expr->kind == NARY 3380 || expr->kind == REFERENCE) 3381 { 3382 unsigned int val; 3383 bool by_all = true; 3384 bool cant_insert = false; 3385 edge pred; 3386 basic_block bprime; 3387 pre_expr eprime = NULL; 3388 edge_iterator ei; 3389 3390 val = get_expr_value_id (expr); 3391 if (bitmap_set_contains_value (PHI_GEN (block), val)) 3392 continue; 3393 if (bitmap_set_contains_value (AVAIL_OUT (dom), val)) 3394 continue; 3395 3396 FOR_EACH_EDGE (pred, ei, block->preds) 3397 { 3398 unsigned int vprime; 3399 pre_expr edoubleprime; 3400 3401 /* We should never run insertion for the exit block 3402 and so not come across fake pred edges. */ 3403 gcc_assert (!(pred->flags & EDGE_FAKE)); 3404 bprime = pred->src; 3405 eprime = phi_translate (NULL, expr, ANTIC_IN (block), 3406 PA_IN (block), pred); 3407 3408 /* eprime will generally only be NULL if the 3409 value of the expression, translated 3410 through the PHI for this predecessor, is 3411 undefined. If that is the case, we can't 3412 make the expression fully redundant, 3413 because its value is undefined along a 3414 predecessor path. We can thus break out 3415 early because it doesn't matter what the 3416 rest of the results are. */ 3417 if (eprime == NULL) 3418 { 3419 avail[pred->dest_idx] = NULL; 3420 cant_insert = true; 3421 break; 3422 } 3423 3424 vprime = get_expr_value_id (eprime); 3425 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), vprime); 3426 avail[pred->dest_idx] = edoubleprime; 3427 if (edoubleprime == NULL) 3428 { 3429 by_all = false; 3430 break; 3431 } 3432 } 3433 3434 /* If we can insert it, it's not the same value 3435 already existing along every predecessor, and 3436 it's defined by some predecessor, it is 3437 partially redundant. */ 3438 if (!cant_insert && by_all) 3439 { 3440 edge succ; 3441 bool do_insertion = false; 3442 3443 /* Insert only if we can remove a later expression on a path 3444 that we want to optimize for speed. 3445 The phi node that we will be inserting in BLOCK is not free, 3446 and inserting it for the sake of !optimize_for_speed successor 3447 may cause regressions on the speed path. */ 3448 FOR_EACH_EDGE (succ, ei, block->succs) 3449 { 3450 if (bitmap_set_contains_value (PA_IN (succ->dest), val) 3451 || bitmap_set_contains_value (ANTIC_IN (succ->dest), val)) 3452 { 3453 if (optimize_edge_for_speed_p (succ)) 3454 do_insertion = true; 3455 } 3456 } 3457 3458 if (!do_insertion) 3459 { 3460 if (dump_file && (dump_flags & TDF_DETAILS)) 3461 { 3462 fprintf (dump_file, "Skipping partial partial redundancy " 3463 "for expression "); 3464 print_pre_expr (dump_file, expr); 3465 fprintf (dump_file, " (%04d), not (partially) anticipated " 3466 "on any to be optimized for speed edges\n", val); 3467 } 3468 } 3469 else if (dbg_cnt (treepre_insert)) 3470 { 3471 pre_stats.pa_insert++; 3472 if (dump_file && (dump_flags & TDF_DETAILS)) 3473 { 3474 fprintf (dump_file, "Found partial partial redundancy " 3475 "for expression "); 3476 print_pre_expr (dump_file, expr); 3477 fprintf (dump_file, " (%04d)\n", 3478 get_expr_value_id (expr)); 3479 } 3480 if (insert_into_preds_of_block (block, 3481 get_expression_id (expr), 3482 avail)) 3483 new_stuff = true; 3484 } 3485 } 3486 } 3487 } 3488 3489 exprs.release (); 3490 return new_stuff; 3491 } 3492 3493 /* Insert expressions in BLOCK to compute hoistable values up. 3494 Return TRUE if something was inserted, otherwise return FALSE. 3495 The caller has to make sure that BLOCK has at least two successors. */ 3496 3497 static bool 3498 do_hoist_insertion (basic_block block) 3499 { 3500 edge e; 3501 edge_iterator ei; 3502 bool new_stuff = false; 3503 unsigned i; 3504 gimple_stmt_iterator last; 3505 3506 /* At least two successors, or else... */ 3507 gcc_assert (EDGE_COUNT (block->succs) >= 2); 3508 3509 /* Check that all successors of BLOCK are dominated by block. 3510 We could use dominated_by_p() for this, but actually there is a much 3511 quicker check: any successor that is dominated by BLOCK can't have 3512 more than one predecessor edge. */ 3513 FOR_EACH_EDGE (e, ei, block->succs) 3514 if (! single_pred_p (e->dest)) 3515 return false; 3516 3517 /* Determine the insertion point. If we cannot safely insert before 3518 the last stmt if we'd have to, bail out. */ 3519 last = gsi_last_bb (block); 3520 if (!gsi_end_p (last) 3521 && !is_ctrl_stmt (gsi_stmt (last)) 3522 && stmt_ends_bb_p (gsi_stmt (last))) 3523 return false; 3524 3525 /* Compute the set of hoistable expressions from ANTIC_IN. First compute 3526 hoistable values. */ 3527 bitmap_set hoistable_set; 3528 3529 /* A hoistable value must be in ANTIC_IN(block) 3530 but not in AVAIL_OUT(BLOCK). */ 3531 bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack); 3532 bitmap_and_compl (&hoistable_set.values, 3533 &ANTIC_IN (block)->values, &AVAIL_OUT (block)->values); 3534 3535 /* Short-cut for a common case: hoistable_set is empty. */ 3536 if (bitmap_empty_p (&hoistable_set.values)) 3537 return false; 3538 3539 /* Compute which of the hoistable values is in AVAIL_OUT of 3540 at least one of the successors of BLOCK. */ 3541 bitmap_head availout_in_some; 3542 bitmap_initialize (&availout_in_some, &grand_bitmap_obstack); 3543 FOR_EACH_EDGE (e, ei, block->succs) 3544 /* Do not consider expressions solely because their availability 3545 on loop exits. They'd be ANTIC-IN throughout the whole loop 3546 and thus effectively hoisted across loops by combination of 3547 PRE and hoisting. */ 3548 if (! loop_exit_edge_p (block->loop_father, e)) 3549 bitmap_ior_and_into (&availout_in_some, &hoistable_set.values, 3550 &AVAIL_OUT (e->dest)->values); 3551 bitmap_clear (&hoistable_set.values); 3552 3553 /* Short-cut for a common case: availout_in_some is empty. */ 3554 if (bitmap_empty_p (&availout_in_some)) 3555 return false; 3556 3557 /* Hack hoitable_set in-place so we can use sorted_array_from_bitmap_set. */ 3558 bitmap_move (&hoistable_set.values, &availout_in_some); 3559 hoistable_set.expressions = ANTIC_IN (block)->expressions; 3560 3561 /* Now finally construct the topological-ordered expression set. */ 3562 vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set); 3563 3564 bitmap_clear (&hoistable_set.values); 3565 3566 /* If there are candidate values for hoisting, insert expressions 3567 strategically to make the hoistable expressions fully redundant. */ 3568 pre_expr expr; 3569 FOR_EACH_VEC_ELT (exprs, i, expr) 3570 { 3571 /* While we try to sort expressions topologically above the 3572 sorting doesn't work out perfectly. Catch expressions we 3573 already inserted. */ 3574 unsigned int value_id = get_expr_value_id (expr); 3575 if (bitmap_set_contains_value (AVAIL_OUT (block), value_id)) 3576 { 3577 if (dump_file && (dump_flags & TDF_DETAILS)) 3578 { 3579 fprintf (dump_file, 3580 "Already inserted expression for "); 3581 print_pre_expr (dump_file, expr); 3582 fprintf (dump_file, " (%04d)\n", value_id); 3583 } 3584 continue; 3585 } 3586 3587 /* If we end up with a punned expression representation and this 3588 happens to be a float typed one give up - we can't know for 3589 sure whether all paths perform the floating-point load we are 3590 about to insert and on some targets this can cause correctness 3591 issues. See PR88240. */ 3592 if (expr->kind == REFERENCE 3593 && PRE_EXPR_REFERENCE (expr)->punned 3594 && FLOAT_TYPE_P (get_expr_type (expr))) 3595 continue; 3596 3597 /* OK, we should hoist this value. Perform the transformation. */ 3598 pre_stats.hoist_insert++; 3599 if (dump_file && (dump_flags & TDF_DETAILS)) 3600 { 3601 fprintf (dump_file, 3602 "Inserting expression in block %d for code hoisting: ", 3603 block->index); 3604 print_pre_expr (dump_file, expr); 3605 fprintf (dump_file, " (%04d)\n", value_id); 3606 } 3607 3608 gimple_seq stmts = NULL; 3609 tree res = create_expression_by_pieces (block, expr, &stmts, 3610 get_expr_type (expr)); 3611 3612 /* Do not return true if expression creation ultimately 3613 did not insert any statements. */ 3614 if (gimple_seq_empty_p (stmts)) 3615 res = NULL_TREE; 3616 else 3617 { 3618 if (gsi_end_p (last) || is_ctrl_stmt (gsi_stmt (last))) 3619 gsi_insert_seq_before (&last, stmts, GSI_SAME_STMT); 3620 else 3621 gsi_insert_seq_after (&last, stmts, GSI_NEW_STMT); 3622 } 3623 3624 /* Make sure to not return true if expression creation ultimately 3625 failed but also make sure to insert any stmts produced as they 3626 are tracked in inserted_exprs. */ 3627 if (! res) 3628 continue; 3629 3630 new_stuff = true; 3631 } 3632 3633 exprs.release (); 3634 3635 return new_stuff; 3636 } 3637 3638 /* Perform insertion of partially redundant and hoistable values. */ 3639 3640 static void 3641 insert (void) 3642 { 3643 basic_block bb; 3644 3645 FOR_ALL_BB_FN (bb, cfun) 3646 NEW_SETS (bb) = bitmap_set_new (); 3647 3648 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); 3649 int rpo_num = pre_and_rev_post_order_compute (NULL, rpo, false); 3650 3651 int num_iterations = 0; 3652 bool changed; 3653 do 3654 { 3655 num_iterations++; 3656 if (dump_file && dump_flags & TDF_DETAILS) 3657 fprintf (dump_file, "Starting insert iteration %d\n", num_iterations); 3658 3659 changed = false; 3660 for (int idx = 0; idx < rpo_num; ++idx) 3661 { 3662 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]); 3663 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, block); 3664 if (dom) 3665 { 3666 unsigned i; 3667 bitmap_iterator bi; 3668 bitmap_set_t newset; 3669 3670 /* First, update the AVAIL_OUT set with anything we may have 3671 inserted higher up in the dominator tree. */ 3672 newset = NEW_SETS (dom); 3673 if (newset) 3674 { 3675 /* Note that we need to value_replace both NEW_SETS, and 3676 AVAIL_OUT. For both the case of NEW_SETS, the value may be 3677 represented by some non-simple expression here that we want 3678 to replace it with. */ 3679 FOR_EACH_EXPR_ID_IN_SET (newset, i, bi) 3680 { 3681 pre_expr expr = expression_for_id (i); 3682 bitmap_value_replace_in_set (NEW_SETS (block), expr); 3683 bitmap_value_replace_in_set (AVAIL_OUT (block), expr); 3684 } 3685 } 3686 3687 /* Insert expressions for partial redundancies. */ 3688 if (flag_tree_pre && !single_pred_p (block)) 3689 { 3690 changed |= do_pre_regular_insertion (block, dom); 3691 if (do_partial_partial) 3692 changed |= do_pre_partial_partial_insertion (block, dom); 3693 } 3694 } 3695 } 3696 3697 /* Clear the NEW sets before the next iteration. We have already 3698 fully propagated its contents. */ 3699 if (changed) 3700 FOR_ALL_BB_FN (bb, cfun) 3701 bitmap_set_free (NEW_SETS (bb)); 3702 } 3703 while (changed); 3704 3705 statistics_histogram_event (cfun, "insert iterations", num_iterations); 3706 3707 /* AVAIL_OUT is not needed after insertion so we don't have to 3708 propagate NEW_SETS from hoist insertion. */ 3709 FOR_ALL_BB_FN (bb, cfun) 3710 { 3711 bitmap_set_pool.remove (NEW_SETS (bb)); 3712 NEW_SETS (bb) = NULL; 3713 } 3714 3715 /* Insert expressions for hoisting. Do a backward walk here since 3716 inserting into BLOCK exposes new opportunities in its predecessors. 3717 Since PRE and hoist insertions can cause back-to-back iteration 3718 and we are interested in PRE insertion exposed hoisting opportunities 3719 but not in hoisting exposed PRE ones do hoist insertion only after 3720 PRE insertion iteration finished and do not iterate it. */ 3721 if (flag_code_hoisting) 3722 for (int idx = rpo_num - 1; idx >= 0; --idx) 3723 { 3724 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]); 3725 if (EDGE_COUNT (block->succs) >= 2) 3726 changed |= do_hoist_insertion (block); 3727 } 3728 3729 free (rpo); 3730 } 3731 3732 3733 /* Compute the AVAIL set for all basic blocks. 3734 3735 This function performs value numbering of the statements in each basic 3736 block. The AVAIL sets are built from information we glean while doing 3737 this value numbering, since the AVAIL sets contain only one entry per 3738 value. 3739 3740 AVAIL_IN[BLOCK] = AVAIL_OUT[dom(BLOCK)]. 3741 AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */ 3742 3743 static void 3744 compute_avail (void) 3745 { 3746 3747 basic_block block, son; 3748 basic_block *worklist; 3749 size_t sp = 0; 3750 unsigned i; 3751 tree name; 3752 3753 /* We pretend that default definitions are defined in the entry block. 3754 This includes function arguments and the static chain decl. */ 3755 FOR_EACH_SSA_NAME (i, name, cfun) 3756 { 3757 pre_expr e; 3758 if (!SSA_NAME_IS_DEFAULT_DEF (name) 3759 || has_zero_uses (name) 3760 || virtual_operand_p (name)) 3761 continue; 3762 3763 e = get_or_alloc_expr_for_name (name); 3764 add_to_value (get_expr_value_id (e), e); 3765 bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), e); 3766 bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3767 e); 3768 } 3769 3770 if (dump_file && (dump_flags & TDF_DETAILS)) 3771 { 3772 print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3773 "tmp_gen", ENTRY_BLOCK); 3774 print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3775 "avail_out", ENTRY_BLOCK); 3776 } 3777 3778 /* Allocate the worklist. */ 3779 worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); 3780 3781 /* Seed the algorithm by putting the dominator children of the entry 3782 block on the worklist. */ 3783 for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun)); 3784 son; 3785 son = next_dom_son (CDI_DOMINATORS, son)) 3786 worklist[sp++] = son; 3787 3788 BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun)) 3789 = ssa_default_def (cfun, gimple_vop (cfun)); 3790 3791 /* Loop until the worklist is empty. */ 3792 while (sp) 3793 { 3794 gimple *stmt; 3795 basic_block dom; 3796 3797 /* Pick a block from the worklist. */ 3798 block = worklist[--sp]; 3799 vn_context_bb = block; 3800 3801 /* Initially, the set of available values in BLOCK is that of 3802 its immediate dominator. */ 3803 dom = get_immediate_dominator (CDI_DOMINATORS, block); 3804 if (dom) 3805 { 3806 bitmap_set_copy (AVAIL_OUT (block), AVAIL_OUT (dom)); 3807 BB_LIVE_VOP_ON_EXIT (block) = BB_LIVE_VOP_ON_EXIT (dom); 3808 } 3809 3810 /* Generate values for PHI nodes. */ 3811 for (gphi_iterator gsi = gsi_start_phis (block); !gsi_end_p (gsi); 3812 gsi_next (&gsi)) 3813 { 3814 tree result = gimple_phi_result (gsi.phi ()); 3815 3816 /* We have no need for virtual phis, as they don't represent 3817 actual computations. */ 3818 if (virtual_operand_p (result)) 3819 { 3820 BB_LIVE_VOP_ON_EXIT (block) = result; 3821 continue; 3822 } 3823 3824 pre_expr e = get_or_alloc_expr_for_name (result); 3825 add_to_value (get_expr_value_id (e), e); 3826 bitmap_value_insert_into_set (AVAIL_OUT (block), e); 3827 bitmap_insert_into_set (PHI_GEN (block), e); 3828 } 3829 3830 BB_MAY_NOTRETURN (block) = 0; 3831 3832 /* Now compute value numbers and populate value sets with all 3833 the expressions computed in BLOCK. */ 3834 for (gimple_stmt_iterator gsi = gsi_start_bb (block); !gsi_end_p (gsi); 3835 gsi_next (&gsi)) 3836 { 3837 ssa_op_iter iter; 3838 tree op; 3839 3840 stmt = gsi_stmt (gsi); 3841 3842 /* Cache whether the basic-block has any non-visible side-effect 3843 or control flow. 3844 If this isn't a call or it is the last stmt in the 3845 basic-block then the CFG represents things correctly. */ 3846 if (is_gimple_call (stmt) && !stmt_ends_bb_p (stmt)) 3847 { 3848 /* Non-looping const functions always return normally. 3849 Otherwise the call might not return or have side-effects 3850 that forbids hoisting possibly trapping expressions 3851 before it. */ 3852 int flags = gimple_call_flags (stmt); 3853 if (!(flags & ECF_CONST) 3854 || (flags & ECF_LOOPING_CONST_OR_PURE)) 3855 BB_MAY_NOTRETURN (block) = 1; 3856 } 3857 3858 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF) 3859 { 3860 pre_expr e = get_or_alloc_expr_for_name (op); 3861 3862 add_to_value (get_expr_value_id (e), e); 3863 bitmap_insert_into_set (TMP_GEN (block), e); 3864 bitmap_value_insert_into_set (AVAIL_OUT (block), e); 3865 } 3866 3867 if (gimple_vdef (stmt)) 3868 BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt); 3869 3870 if (gimple_has_side_effects (stmt) 3871 || stmt_could_throw_p (cfun, stmt) 3872 || is_gimple_debug (stmt)) 3873 continue; 3874 3875 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 3876 { 3877 if (ssa_undefined_value_p (op)) 3878 continue; 3879 pre_expr e = get_or_alloc_expr_for_name (op); 3880 bitmap_value_insert_into_set (EXP_GEN (block), e); 3881 } 3882 3883 switch (gimple_code (stmt)) 3884 { 3885 case GIMPLE_RETURN: 3886 continue; 3887 3888 case GIMPLE_CALL: 3889 { 3890 vn_reference_t ref; 3891 vn_reference_s ref1; 3892 pre_expr result = NULL; 3893 3894 /* We can value number only calls to real functions. */ 3895 if (gimple_call_internal_p (stmt)) 3896 continue; 3897 3898 vn_reference_lookup_call (as_a <gcall *> (stmt), &ref, &ref1); 3899 if (!ref) 3900 continue; 3901 3902 /* If the value of the call is not invalidated in 3903 this block until it is computed, add the expression 3904 to EXP_GEN. */ 3905 if (!gimple_vuse (stmt) 3906 || gimple_code 3907 (SSA_NAME_DEF_STMT (gimple_vuse (stmt))) == GIMPLE_PHI 3908 || gimple_bb (SSA_NAME_DEF_STMT 3909 (gimple_vuse (stmt))) != block) 3910 { 3911 result = pre_expr_pool.allocate (); 3912 result->kind = REFERENCE; 3913 result->id = 0; 3914 result->loc = gimple_location (stmt); 3915 PRE_EXPR_REFERENCE (result) = ref; 3916 3917 get_or_alloc_expression_id (result); 3918 add_to_value (get_expr_value_id (result), result); 3919 bitmap_value_insert_into_set (EXP_GEN (block), result); 3920 } 3921 continue; 3922 } 3923 3924 case GIMPLE_ASSIGN: 3925 { 3926 pre_expr result = NULL; 3927 switch (vn_get_stmt_kind (stmt)) 3928 { 3929 case VN_NARY: 3930 { 3931 enum tree_code code = gimple_assign_rhs_code (stmt); 3932 vn_nary_op_t nary; 3933 3934 /* COND_EXPR and VEC_COND_EXPR are awkward in 3935 that they contain an embedded complex expression. 3936 Don't even try to shove those through PRE. */ 3937 if (code == COND_EXPR 3938 || code == VEC_COND_EXPR) 3939 continue; 3940 3941 vn_nary_op_lookup_stmt (stmt, &nary); 3942 if (!nary || nary->predicated_values) 3943 continue; 3944 3945 /* If the NARY traps and there was a preceding 3946 point in the block that might not return avoid 3947 adding the nary to EXP_GEN. */ 3948 if (BB_MAY_NOTRETURN (block) 3949 && vn_nary_may_trap (nary)) 3950 continue; 3951 3952 result = pre_expr_pool.allocate (); 3953 result->kind = NARY; 3954 result->id = 0; 3955 result->loc = gimple_location (stmt); 3956 PRE_EXPR_NARY (result) = nary; 3957 break; 3958 } 3959 3960 case VN_REFERENCE: 3961 { 3962 tree rhs1 = gimple_assign_rhs1 (stmt); 3963 ao_ref rhs1_ref; 3964 ao_ref_init (&rhs1_ref, rhs1); 3965 alias_set_type set = ao_ref_alias_set (&rhs1_ref); 3966 alias_set_type base_set 3967 = ao_ref_base_alias_set (&rhs1_ref); 3968 vec<vn_reference_op_s> operands 3969 = vn_reference_operands_for_lookup (rhs1); 3970 vn_reference_t ref; 3971 vn_reference_lookup_pieces (gimple_vuse (stmt), set, 3972 base_set, TREE_TYPE (rhs1), 3973 operands, &ref, VN_WALK); 3974 if (!ref) 3975 { 3976 operands.release (); 3977 continue; 3978 } 3979 3980 /* If the REFERENCE traps and there was a preceding 3981 point in the block that might not return avoid 3982 adding the reference to EXP_GEN. */ 3983 if (BB_MAY_NOTRETURN (block) 3984 && vn_reference_may_trap (ref)) 3985 continue; 3986 3987 /* If the value of the reference is not invalidated in 3988 this block until it is computed, add the expression 3989 to EXP_GEN. */ 3990 if (gimple_vuse (stmt)) 3991 { 3992 gimple *def_stmt; 3993 bool ok = true; 3994 def_stmt = SSA_NAME_DEF_STMT (gimple_vuse (stmt)); 3995 while (!gimple_nop_p (def_stmt) 3996 && gimple_code (def_stmt) != GIMPLE_PHI 3997 && gimple_bb (def_stmt) == block) 3998 { 3999 if (stmt_may_clobber_ref_p 4000 (def_stmt, gimple_assign_rhs1 (stmt))) 4001 { 4002 ok = false; 4003 break; 4004 } 4005 def_stmt 4006 = SSA_NAME_DEF_STMT (gimple_vuse (def_stmt)); 4007 } 4008 if (!ok) 4009 { 4010 operands.release (); 4011 continue; 4012 } 4013 } 4014 4015 /* If the load was value-numbered to another 4016 load make sure we do not use its expression 4017 for insertion if it wouldn't be a valid 4018 replacement. */ 4019 /* At the momemt we have a testcase 4020 for hoist insertion of aligned vs. misaligned 4021 variants in gcc.dg/torture/pr65270-1.c thus 4022 with just alignment to be considered we can 4023 simply replace the expression in the hashtable 4024 with the most conservative one. */ 4025 vn_reference_op_t ref1 = &ref->operands.last (); 4026 while (ref1->opcode != TARGET_MEM_REF 4027 && ref1->opcode != MEM_REF 4028 && ref1 != &ref->operands[0]) 4029 --ref1; 4030 vn_reference_op_t ref2 = &operands.last (); 4031 while (ref2->opcode != TARGET_MEM_REF 4032 && ref2->opcode != MEM_REF 4033 && ref2 != &operands[0]) 4034 --ref2; 4035 if ((ref1->opcode == TARGET_MEM_REF 4036 || ref1->opcode == MEM_REF) 4037 && (TYPE_ALIGN (ref1->type) 4038 > TYPE_ALIGN (ref2->type))) 4039 ref1->type 4040 = build_aligned_type (ref1->type, 4041 TYPE_ALIGN (ref2->type)); 4042 /* TBAA behavior is an obvious part so make sure 4043 that the hashtable one covers this as well 4044 by adjusting the ref alias set and its base. */ 4045 if (ref->set == set 4046 || alias_set_subset_of (set, ref->set)) 4047 ; 4048 else if (ref1->opcode != ref2->opcode 4049 || (ref1->opcode != MEM_REF 4050 && ref1->opcode != TARGET_MEM_REF)) 4051 { 4052 /* With mismatching base opcodes or bases 4053 other than MEM_REF or TARGET_MEM_REF we 4054 can't do any easy TBAA adjustment. */ 4055 operands.release (); 4056 continue; 4057 } 4058 else if (alias_set_subset_of (ref->set, set)) 4059 { 4060 ref->set = set; 4061 if (ref1->opcode == MEM_REF) 4062 ref1->op0 4063 = wide_int_to_tree (TREE_TYPE (ref2->op0), 4064 wi::to_wide (ref1->op0)); 4065 else 4066 ref1->op2 4067 = wide_int_to_tree (TREE_TYPE (ref2->op2), 4068 wi::to_wide (ref1->op2)); 4069 } 4070 else 4071 { 4072 ref->set = 0; 4073 if (ref1->opcode == MEM_REF) 4074 ref1->op0 4075 = wide_int_to_tree (ptr_type_node, 4076 wi::to_wide (ref1->op0)); 4077 else 4078 ref1->op2 4079 = wide_int_to_tree (ptr_type_node, 4080 wi::to_wide (ref1->op2)); 4081 } 4082 operands.release (); 4083 4084 result = pre_expr_pool.allocate (); 4085 result->kind = REFERENCE; 4086 result->id = 0; 4087 result->loc = gimple_location (stmt); 4088 PRE_EXPR_REFERENCE (result) = ref; 4089 break; 4090 } 4091 4092 default: 4093 continue; 4094 } 4095 4096 get_or_alloc_expression_id (result); 4097 add_to_value (get_expr_value_id (result), result); 4098 bitmap_value_insert_into_set (EXP_GEN (block), result); 4099 continue; 4100 } 4101 default: 4102 break; 4103 } 4104 } 4105 4106 if (dump_file && (dump_flags & TDF_DETAILS)) 4107 { 4108 print_bitmap_set (dump_file, EXP_GEN (block), 4109 "exp_gen", block->index); 4110 print_bitmap_set (dump_file, PHI_GEN (block), 4111 "phi_gen", block->index); 4112 print_bitmap_set (dump_file, TMP_GEN (block), 4113 "tmp_gen", block->index); 4114 print_bitmap_set (dump_file, AVAIL_OUT (block), 4115 "avail_out", block->index); 4116 } 4117 4118 /* Put the dominator children of BLOCK on the worklist of blocks 4119 to compute available sets for. */ 4120 for (son = first_dom_son (CDI_DOMINATORS, block); 4121 son; 4122 son = next_dom_son (CDI_DOMINATORS, son)) 4123 worklist[sp++] = son; 4124 } 4125 vn_context_bb = NULL; 4126 4127 free (worklist); 4128 } 4129 4130 4131 /* Initialize data structures used by PRE. */ 4132 4133 static void 4134 init_pre (void) 4135 { 4136 basic_block bb; 4137 4138 next_expression_id = 1; 4139 expressions.create (0); 4140 expressions.safe_push (NULL); 4141 value_expressions.create (get_max_value_id () + 1); 4142 value_expressions.safe_grow_cleared (get_max_value_id () + 1); 4143 name_to_id.create (0); 4144 4145 inserted_exprs = BITMAP_ALLOC (NULL); 4146 4147 connect_infinite_loops_to_exit (); 4148 memset (&pre_stats, 0, sizeof (pre_stats)); 4149 4150 alloc_aux_for_blocks (sizeof (struct bb_bitmap_sets)); 4151 4152 calculate_dominance_info (CDI_DOMINATORS); 4153 4154 bitmap_obstack_initialize (&grand_bitmap_obstack); 4155 phi_translate_table = new hash_table<expr_pred_trans_d> (5110); 4156 expression_to_id = new hash_table<pre_expr_d> (num_ssa_names * 3); 4157 FOR_ALL_BB_FN (bb, cfun) 4158 { 4159 EXP_GEN (bb) = bitmap_set_new (); 4160 PHI_GEN (bb) = bitmap_set_new (); 4161 TMP_GEN (bb) = bitmap_set_new (); 4162 AVAIL_OUT (bb) = bitmap_set_new (); 4163 } 4164 } 4165 4166 4167 /* Deallocate data structures used by PRE. */ 4168 4169 static void 4170 fini_pre () 4171 { 4172 value_expressions.release (); 4173 expressions.release (); 4174 BITMAP_FREE (inserted_exprs); 4175 bitmap_obstack_release (&grand_bitmap_obstack); 4176 bitmap_set_pool.release (); 4177 pre_expr_pool.release (); 4178 delete phi_translate_table; 4179 phi_translate_table = NULL; 4180 delete expression_to_id; 4181 expression_to_id = NULL; 4182 name_to_id.release (); 4183 4184 free_aux_for_blocks (); 4185 } 4186 4187 namespace { 4188 4189 const pass_data pass_data_pre = 4190 { 4191 GIMPLE_PASS, /* type */ 4192 "pre", /* name */ 4193 OPTGROUP_NONE, /* optinfo_flags */ 4194 TV_TREE_PRE, /* tv_id */ 4195 ( PROP_cfg | PROP_ssa ), /* properties_required */ 4196 0, /* properties_provided */ 4197 0, /* properties_destroyed */ 4198 TODO_rebuild_alias, /* todo_flags_start */ 4199 0, /* todo_flags_finish */ 4200 }; 4201 4202 class pass_pre : public gimple_opt_pass 4203 { 4204 public: 4205 pass_pre (gcc::context *ctxt) 4206 : gimple_opt_pass (pass_data_pre, ctxt) 4207 {} 4208 4209 /* opt_pass methods: */ 4210 virtual bool gate (function *) 4211 { return flag_tree_pre != 0 || flag_code_hoisting != 0; } 4212 virtual unsigned int execute (function *); 4213 4214 }; // class pass_pre 4215 4216 /* Valueization hook for RPO VN when we are calling back to it 4217 at ANTIC compute time. */ 4218 4219 static tree 4220 pre_valueize (tree name) 4221 { 4222 if (TREE_CODE (name) == SSA_NAME) 4223 { 4224 tree tem = VN_INFO (name)->valnum; 4225 if (tem != VN_TOP && tem != name) 4226 { 4227 if (TREE_CODE (tem) != SSA_NAME 4228 || SSA_NAME_IS_DEFAULT_DEF (tem)) 4229 return tem; 4230 /* We create temporary SSA names for representatives that 4231 do not have a definition (yet) but are not default defs either 4232 assume they are fine to use. */ 4233 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (tem)); 4234 if (! def_bb 4235 || dominated_by_p (CDI_DOMINATORS, vn_context_bb, def_bb)) 4236 return tem; 4237 /* ??? Now we could look for a leader. Ideally we'd somehow 4238 expose RPO VN leaders and get rid of AVAIL_OUT as well... */ 4239 } 4240 } 4241 return name; 4242 } 4243 4244 unsigned int 4245 pass_pre::execute (function *fun) 4246 { 4247 unsigned int todo = 0; 4248 4249 do_partial_partial = 4250 flag_tree_partial_pre && optimize_function_for_speed_p (fun); 4251 4252 /* This has to happen before VN runs because 4253 loop_optimizer_init may create new phis, etc. */ 4254 loop_optimizer_init (LOOPS_NORMAL); 4255 split_edges_for_insertion (); 4256 scev_initialize (); 4257 calculate_dominance_info (CDI_DOMINATORS); 4258 4259 run_rpo_vn (VN_WALK); 4260 4261 init_pre (); 4262 4263 vn_valueize = pre_valueize; 4264 4265 /* Insert can get quite slow on an incredibly large number of basic 4266 blocks due to some quadratic behavior. Until this behavior is 4267 fixed, don't run it when he have an incredibly large number of 4268 bb's. If we aren't going to run insert, there is no point in 4269 computing ANTIC, either, even though it's plenty fast nor do 4270 we require AVAIL. */ 4271 if (n_basic_blocks_for_fn (fun) < 4000) 4272 { 4273 compute_avail (); 4274 compute_antic (); 4275 insert (); 4276 } 4277 4278 /* Make sure to remove fake edges before committing our inserts. 4279 This makes sure we don't end up with extra critical edges that 4280 we would need to split. */ 4281 remove_fake_exit_edges (); 4282 gsi_commit_edge_inserts (); 4283 4284 /* Eliminate folds statements which might (should not...) end up 4285 not keeping virtual operands up-to-date. */ 4286 gcc_assert (!need_ssa_update_p (fun)); 4287 4288 statistics_counter_event (fun, "Insertions", pre_stats.insertions); 4289 statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert); 4290 statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert); 4291 statistics_counter_event (fun, "New PHIs", pre_stats.phis); 4292 4293 todo |= eliminate_with_rpo_vn (inserted_exprs); 4294 4295 vn_valueize = NULL; 4296 4297 /* Because we don't follow exactly the standard PRE algorithm, and decide not 4298 to insert PHI nodes sometimes, and because value numbering of casts isn't 4299 perfect, we sometimes end up inserting dead code. This simple DCE-like 4300 pass removes any insertions we made that weren't actually used. */ 4301 simple_dce_from_worklist (inserted_exprs); 4302 4303 fini_pre (); 4304 4305 scev_finalize (); 4306 loop_optimizer_finalize (); 4307 4308 /* TODO: tail_merge_optimize may merge all predecessors of a block, in which 4309 case we can merge the block with the remaining predecessor of the block. 4310 It should either: 4311 - call merge_blocks after each tail merge iteration 4312 - call merge_blocks after all tail merge iterations 4313 - mark TODO_cleanup_cfg when necessary 4314 - share the cfg cleanup with fini_pre. */ 4315 todo |= tail_merge_optimize (todo); 4316 4317 free_rpo_vn (); 4318 4319 /* Tail merging invalidates the virtual SSA web, together with 4320 cfg-cleanup opportunities exposed by PRE this will wreck the 4321 SSA updating machinery. So make sure to run update-ssa 4322 manually, before eventually scheduling cfg-cleanup as part of 4323 the todo. */ 4324 update_ssa (TODO_update_ssa_only_virtuals); 4325 4326 return todo; 4327 } 4328 4329 } // anon namespace 4330 4331 gimple_opt_pass * 4332 make_pass_pre (gcc::context *ctxt) 4333 { 4334 return new pass_pre (ctxt); 4335 } 4336