1 /* Full and partial redundancy elimination and code hoisting on SSA GIMPLE. 2 Copyright (C) 2001-2019 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> and Steven Bosscher 4 <stevenb@suse.de> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "predict.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "ssa.h" 33 #include "cgraph.h" 34 #include "gimple-pretty-print.h" 35 #include "fold-const.h" 36 #include "cfganal.h" 37 #include "gimple-fold.h" 38 #include "tree-eh.h" 39 #include "gimplify.h" 40 #include "gimple-iterator.h" 41 #include "tree-cfg.h" 42 #include "tree-into-ssa.h" 43 #include "tree-dfa.h" 44 #include "tree-ssa.h" 45 #include "cfgloop.h" 46 #include "tree-ssa-sccvn.h" 47 #include "tree-scalar-evolution.h" 48 #include "params.h" 49 #include "dbgcnt.h" 50 #include "domwalk.h" 51 #include "tree-ssa-propagate.h" 52 #include "tree-ssa-dce.h" 53 #include "tree-cfgcleanup.h" 54 #include "alias.h" 55 56 /* Even though this file is called tree-ssa-pre.c, we actually 57 implement a bit more than just PRE here. All of them piggy-back 58 on GVN which is implemented in tree-ssa-sccvn.c. 59 60 1. Full Redundancy Elimination (FRE) 61 This is the elimination phase of GVN. 62 63 2. Partial Redundancy Elimination (PRE) 64 This is adds computation of AVAIL_OUT and ANTIC_IN and 65 doing expression insertion to form GVN-PRE. 66 67 3. Code hoisting 68 This optimization uses the ANTIC_IN sets computed for PRE 69 to move expressions further up than PRE would do, to make 70 multiple computations of the same value fully redundant. 71 This pass is explained below (after the explanation of the 72 basic algorithm for PRE). 73 */ 74 75 /* TODO: 76 77 1. Avail sets can be shared by making an avail_find_leader that 78 walks up the dominator tree and looks in those avail sets. 79 This might affect code optimality, it's unclear right now. 80 Currently the AVAIL_OUT sets are the remaining quadraticness in 81 memory of GVN-PRE. 82 2. Strength reduction can be performed by anticipating expressions 83 we can repair later on. 84 3. We can do back-substitution or smarter value numbering to catch 85 commutative expressions split up over multiple statements. 86 */ 87 88 /* For ease of terminology, "expression node" in the below refers to 89 every expression node but GIMPLE_ASSIGN, because GIMPLE_ASSIGNs 90 represent the actual statement containing the expressions we care about, 91 and we cache the value number by putting it in the expression. */ 92 93 /* Basic algorithm for Partial Redundancy Elimination: 94 95 First we walk the statements to generate the AVAIL sets, the 96 EXP_GEN sets, and the tmp_gen sets. EXP_GEN sets represent the 97 generation of values/expressions by a given block. We use them 98 when computing the ANTIC sets. The AVAIL sets consist of 99 SSA_NAME's that represent values, so we know what values are 100 available in what blocks. AVAIL is a forward dataflow problem. In 101 SSA, values are never killed, so we don't need a kill set, or a 102 fixpoint iteration, in order to calculate the AVAIL sets. In 103 traditional parlance, AVAIL sets tell us the downsafety of the 104 expressions/values. 105 106 Next, we generate the ANTIC sets. These sets represent the 107 anticipatable expressions. ANTIC is a backwards dataflow 108 problem. An expression is anticipatable in a given block if it could 109 be generated in that block. This means that if we had to perform 110 an insertion in that block, of the value of that expression, we 111 could. Calculating the ANTIC sets requires phi translation of 112 expressions, because the flow goes backwards through phis. We must 113 iterate to a fixpoint of the ANTIC sets, because we have a kill 114 set. Even in SSA form, values are not live over the entire 115 function, only from their definition point onwards. So we have to 116 remove values from the ANTIC set once we go past the definition 117 point of the leaders that make them up. 118 compute_antic/compute_antic_aux performs this computation. 119 120 Third, we perform insertions to make partially redundant 121 expressions fully redundant. 122 123 An expression is partially redundant (excluding partial 124 anticipation) if: 125 126 1. It is AVAIL in some, but not all, of the predecessors of a 127 given block. 128 2. It is ANTIC in all the predecessors. 129 130 In order to make it fully redundant, we insert the expression into 131 the predecessors where it is not available, but is ANTIC. 132 133 When optimizing for size, we only eliminate the partial redundancy 134 if we need to insert in only one predecessor. This avoids almost 135 completely the code size increase that PRE usually causes. 136 137 For the partial anticipation case, we only perform insertion if it 138 is partially anticipated in some block, and fully available in all 139 of the predecessors. 140 141 do_pre_regular_insertion/do_pre_partial_partial_insertion 142 performs these steps, driven by insert/insert_aux. 143 144 Fourth, we eliminate fully redundant expressions. 145 This is a simple statement walk that replaces redundant 146 calculations with the now available values. */ 147 148 /* Basic algorithm for Code Hoisting: 149 150 Code hoisting is: Moving value computations up in the control flow 151 graph to make multiple copies redundant. Typically this is a size 152 optimization, but there are cases where it also is helpful for speed. 153 154 A simple code hoisting algorithm is implemented that piggy-backs on 155 the PRE infrastructure. For code hoisting, we have to know ANTIC_OUT 156 which is effectively ANTIC_IN - AVAIL_OUT. The latter two have to be 157 computed for PRE, and we can use them to perform a limited version of 158 code hoisting, too. 159 160 For the purpose of this implementation, a value is hoistable to a basic 161 block B if the following properties are met: 162 163 1. The value is in ANTIC_IN(B) -- the value will be computed on all 164 paths from B to function exit and it can be computed in B); 165 166 2. The value is not in AVAIL_OUT(B) -- there would be no need to 167 compute the value again and make it available twice; 168 169 3. All successors of B are dominated by B -- makes sure that inserting 170 a computation of the value in B will make the remaining 171 computations fully redundant; 172 173 4. At least one successor has the value in AVAIL_OUT -- to avoid 174 hoisting values up too far; 175 176 5. There are at least two successors of B -- hoisting in straight 177 line code is pointless. 178 179 The third condition is not strictly necessary, but it would complicate 180 the hoisting pass a lot. In fact, I don't know of any code hoisting 181 algorithm that does not have this requirement. Fortunately, experiments 182 have show that most candidate hoistable values are in regions that meet 183 this condition (e.g. diamond-shape regions). 184 185 The forth condition is necessary to avoid hoisting things up too far 186 away from the uses of the value. Nothing else limits the algorithm 187 from hoisting everything up as far as ANTIC_IN allows. Experiments 188 with SPEC and CSiBE have shown that hoisting up too far results in more 189 spilling, less benefits for code size, and worse benchmark scores. 190 Fortunately, in practice most of the interesting hoisting opportunities 191 are caught despite this limitation. 192 193 For hoistable values that meet all conditions, expressions are inserted 194 to make the calculation of the hoistable value fully redundant. We 195 perform code hoisting insertions after each round of PRE insertions, 196 because code hoisting never exposes new PRE opportunities, but PRE can 197 create new code hoisting opportunities. 198 199 The code hoisting algorithm is implemented in do_hoist_insert, driven 200 by insert/insert_aux. */ 201 202 /* Representations of value numbers: 203 204 Value numbers are represented by a representative SSA_NAME. We 205 will create fake SSA_NAME's in situations where we need a 206 representative but do not have one (because it is a complex 207 expression). In order to facilitate storing the value numbers in 208 bitmaps, and keep the number of wasted SSA_NAME's down, we also 209 associate a value_id with each value number, and create full blown 210 ssa_name's only where we actually need them (IE in operands of 211 existing expressions). 212 213 Theoretically you could replace all the value_id's with 214 SSA_NAME_VERSION, but this would allocate a large number of 215 SSA_NAME's (which are each > 30 bytes) just to get a 4 byte number. 216 It would also require an additional indirection at each point we 217 use the value id. */ 218 219 /* Representation of expressions on value numbers: 220 221 Expressions consisting of value numbers are represented the same 222 way as our VN internally represents them, with an additional 223 "pre_expr" wrapping around them in order to facilitate storing all 224 of the expressions in the same sets. */ 225 226 /* Representation of sets: 227 228 The dataflow sets do not need to be sorted in any particular order 229 for the majority of their lifetime, are simply represented as two 230 bitmaps, one that keeps track of values present in the set, and one 231 that keeps track of expressions present in the set. 232 233 When we need them in topological order, we produce it on demand by 234 transforming the bitmap into an array and sorting it into topo 235 order. */ 236 237 /* Type of expression, used to know which member of the PRE_EXPR union 238 is valid. */ 239 240 enum pre_expr_kind 241 { 242 NAME, 243 NARY, 244 REFERENCE, 245 CONSTANT 246 }; 247 248 union pre_expr_union 249 { 250 tree name; 251 tree constant; 252 vn_nary_op_t nary; 253 vn_reference_t reference; 254 }; 255 256 typedef struct pre_expr_d : nofree_ptr_hash <pre_expr_d> 257 { 258 enum pre_expr_kind kind; 259 unsigned int id; 260 pre_expr_union u; 261 262 /* hash_table support. */ 263 static inline hashval_t hash (const pre_expr_d *); 264 static inline int equal (const pre_expr_d *, const pre_expr_d *); 265 } *pre_expr; 266 267 #define PRE_EXPR_NAME(e) (e)->u.name 268 #define PRE_EXPR_NARY(e) (e)->u.nary 269 #define PRE_EXPR_REFERENCE(e) (e)->u.reference 270 #define PRE_EXPR_CONSTANT(e) (e)->u.constant 271 272 /* Compare E1 and E1 for equality. */ 273 274 inline int 275 pre_expr_d::equal (const pre_expr_d *e1, const pre_expr_d *e2) 276 { 277 if (e1->kind != e2->kind) 278 return false; 279 280 switch (e1->kind) 281 { 282 case CONSTANT: 283 return vn_constant_eq_with_type (PRE_EXPR_CONSTANT (e1), 284 PRE_EXPR_CONSTANT (e2)); 285 case NAME: 286 return PRE_EXPR_NAME (e1) == PRE_EXPR_NAME (e2); 287 case NARY: 288 return vn_nary_op_eq (PRE_EXPR_NARY (e1), PRE_EXPR_NARY (e2)); 289 case REFERENCE: 290 return vn_reference_eq (PRE_EXPR_REFERENCE (e1), 291 PRE_EXPR_REFERENCE (e2)); 292 default: 293 gcc_unreachable (); 294 } 295 } 296 297 /* Hash E. */ 298 299 inline hashval_t 300 pre_expr_d::hash (const pre_expr_d *e) 301 { 302 switch (e->kind) 303 { 304 case CONSTANT: 305 return vn_hash_constant_with_type (PRE_EXPR_CONSTANT (e)); 306 case NAME: 307 return SSA_NAME_VERSION (PRE_EXPR_NAME (e)); 308 case NARY: 309 return PRE_EXPR_NARY (e)->hashcode; 310 case REFERENCE: 311 return PRE_EXPR_REFERENCE (e)->hashcode; 312 default: 313 gcc_unreachable (); 314 } 315 } 316 317 /* Next global expression id number. */ 318 static unsigned int next_expression_id; 319 320 /* Mapping from expression to id number we can use in bitmap sets. */ 321 static vec<pre_expr> expressions; 322 static hash_table<pre_expr_d> *expression_to_id; 323 static vec<unsigned> name_to_id; 324 325 /* Allocate an expression id for EXPR. */ 326 327 static inline unsigned int 328 alloc_expression_id (pre_expr expr) 329 { 330 struct pre_expr_d **slot; 331 /* Make sure we won't overflow. */ 332 gcc_assert (next_expression_id + 1 > next_expression_id); 333 expr->id = next_expression_id++; 334 expressions.safe_push (expr); 335 if (expr->kind == NAME) 336 { 337 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr)); 338 /* vec::safe_grow_cleared allocates no headroom. Avoid frequent 339 re-allocations by using vec::reserve upfront. */ 340 unsigned old_len = name_to_id.length (); 341 name_to_id.reserve (num_ssa_names - old_len); 342 name_to_id.quick_grow_cleared (num_ssa_names); 343 gcc_assert (name_to_id[version] == 0); 344 name_to_id[version] = expr->id; 345 } 346 else 347 { 348 slot = expression_to_id->find_slot (expr, INSERT); 349 gcc_assert (!*slot); 350 *slot = expr; 351 } 352 return next_expression_id - 1; 353 } 354 355 /* Return the expression id for tree EXPR. */ 356 357 static inline unsigned int 358 get_expression_id (const pre_expr expr) 359 { 360 return expr->id; 361 } 362 363 static inline unsigned int 364 lookup_expression_id (const pre_expr expr) 365 { 366 struct pre_expr_d **slot; 367 368 if (expr->kind == NAME) 369 { 370 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr)); 371 if (name_to_id.length () <= version) 372 return 0; 373 return name_to_id[version]; 374 } 375 else 376 { 377 slot = expression_to_id->find_slot (expr, NO_INSERT); 378 if (!slot) 379 return 0; 380 return ((pre_expr)*slot)->id; 381 } 382 } 383 384 /* Return the existing expression id for EXPR, or create one if one 385 does not exist yet. */ 386 387 static inline unsigned int 388 get_or_alloc_expression_id (pre_expr expr) 389 { 390 unsigned int id = lookup_expression_id (expr); 391 if (id == 0) 392 return alloc_expression_id (expr); 393 return expr->id = id; 394 } 395 396 /* Return the expression that has expression id ID */ 397 398 static inline pre_expr 399 expression_for_id (unsigned int id) 400 { 401 return expressions[id]; 402 } 403 404 static object_allocator<pre_expr_d> pre_expr_pool ("pre_expr nodes"); 405 406 /* Given an SSA_NAME NAME, get or create a pre_expr to represent it. */ 407 408 static pre_expr 409 get_or_alloc_expr_for_name (tree name) 410 { 411 struct pre_expr_d expr; 412 pre_expr result; 413 unsigned int result_id; 414 415 expr.kind = NAME; 416 expr.id = 0; 417 PRE_EXPR_NAME (&expr) = name; 418 result_id = lookup_expression_id (&expr); 419 if (result_id != 0) 420 return expression_for_id (result_id); 421 422 result = pre_expr_pool.allocate (); 423 result->kind = NAME; 424 PRE_EXPR_NAME (result) = name; 425 alloc_expression_id (result); 426 return result; 427 } 428 429 /* An unordered bitmap set. One bitmap tracks values, the other, 430 expressions. */ 431 typedef struct bitmap_set 432 { 433 bitmap_head expressions; 434 bitmap_head values; 435 } *bitmap_set_t; 436 437 #define FOR_EACH_EXPR_ID_IN_SET(set, id, bi) \ 438 EXECUTE_IF_SET_IN_BITMAP (&(set)->expressions, 0, (id), (bi)) 439 440 #define FOR_EACH_VALUE_ID_IN_SET(set, id, bi) \ 441 EXECUTE_IF_SET_IN_BITMAP (&(set)->values, 0, (id), (bi)) 442 443 /* Mapping from value id to expressions with that value_id. */ 444 static vec<bitmap> value_expressions; 445 446 /* Sets that we need to keep track of. */ 447 typedef struct bb_bitmap_sets 448 { 449 /* The EXP_GEN set, which represents expressions/values generated in 450 a basic block. */ 451 bitmap_set_t exp_gen; 452 453 /* The PHI_GEN set, which represents PHI results generated in a 454 basic block. */ 455 bitmap_set_t phi_gen; 456 457 /* The TMP_GEN set, which represents results/temporaries generated 458 in a basic block. IE the LHS of an expression. */ 459 bitmap_set_t tmp_gen; 460 461 /* The AVAIL_OUT set, which represents which values are available in 462 a given basic block. */ 463 bitmap_set_t avail_out; 464 465 /* The ANTIC_IN set, which represents which values are anticipatable 466 in a given basic block. */ 467 bitmap_set_t antic_in; 468 469 /* The PA_IN set, which represents which values are 470 partially anticipatable in a given basic block. */ 471 bitmap_set_t pa_in; 472 473 /* The NEW_SETS set, which is used during insertion to augment the 474 AVAIL_OUT set of blocks with the new insertions performed during 475 the current iteration. */ 476 bitmap_set_t new_sets; 477 478 /* A cache for value_dies_in_block_x. */ 479 bitmap expr_dies; 480 481 /* The live virtual operand on successor edges. */ 482 tree vop_on_exit; 483 484 /* True if we have visited this block during ANTIC calculation. */ 485 unsigned int visited : 1; 486 487 /* True when the block contains a call that might not return. */ 488 unsigned int contains_may_not_return_call : 1; 489 } *bb_value_sets_t; 490 491 #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen 492 #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen 493 #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen 494 #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out 495 #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in 496 #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in 497 #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets 498 #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)->aux))->expr_dies 499 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited 500 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)->aux))->contains_may_not_return_call 501 #define BB_LIVE_VOP_ON_EXIT(BB) ((bb_value_sets_t) ((BB)->aux))->vop_on_exit 502 503 504 /* This structure is used to keep track of statistics on what 505 optimization PRE was able to perform. */ 506 static struct 507 { 508 /* The number of new expressions/temporaries generated by PRE. */ 509 int insertions; 510 511 /* The number of inserts found due to partial anticipation */ 512 int pa_insert; 513 514 /* The number of inserts made for code hoisting. */ 515 int hoist_insert; 516 517 /* The number of new PHI nodes added by PRE. */ 518 int phis; 519 } pre_stats; 520 521 static bool do_partial_partial; 522 static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int); 523 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr); 524 static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr); 525 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t); 526 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int); 527 static void bitmap_insert_into_set (bitmap_set_t, pre_expr); 528 static bitmap_set_t bitmap_set_new (void); 529 static tree create_expression_by_pieces (basic_block, pre_expr, gimple_seq *, 530 tree); 531 static tree find_or_generate_expression (basic_block, tree, gimple_seq *); 532 static unsigned int get_expr_value_id (pre_expr); 533 534 /* We can add and remove elements and entries to and from sets 535 and hash tables, so we use alloc pools for them. */ 536 537 static object_allocator<bitmap_set> bitmap_set_pool ("Bitmap sets"); 538 static bitmap_obstack grand_bitmap_obstack; 539 540 /* A three tuple {e, pred, v} used to cache phi translations in the 541 phi_translate_table. */ 542 543 typedef struct expr_pred_trans_d : free_ptr_hash<expr_pred_trans_d> 544 { 545 /* The expression. */ 546 pre_expr e; 547 548 /* The predecessor block along which we translated the expression. */ 549 basic_block pred; 550 551 /* The value that resulted from the translation. */ 552 pre_expr v; 553 554 /* The hashcode for the expression, pred pair. This is cached for 555 speed reasons. */ 556 hashval_t hashcode; 557 558 /* hash_table support. */ 559 static inline hashval_t hash (const expr_pred_trans_d *); 560 static inline int equal (const expr_pred_trans_d *, const expr_pred_trans_d *); 561 } *expr_pred_trans_t; 562 typedef const struct expr_pred_trans_d *const_expr_pred_trans_t; 563 564 inline hashval_t 565 expr_pred_trans_d::hash (const expr_pred_trans_d *e) 566 { 567 return e->hashcode; 568 } 569 570 inline int 571 expr_pred_trans_d::equal (const expr_pred_trans_d *ve1, 572 const expr_pred_trans_d *ve2) 573 { 574 basic_block b1 = ve1->pred; 575 basic_block b2 = ve2->pred; 576 577 /* If they are not translations for the same basic block, they can't 578 be equal. */ 579 if (b1 != b2) 580 return false; 581 return pre_expr_d::equal (ve1->e, ve2->e); 582 } 583 584 /* The phi_translate_table caches phi translations for a given 585 expression and predecessor. */ 586 static hash_table<expr_pred_trans_d> *phi_translate_table; 587 588 /* Add the tuple mapping from {expression E, basic block PRED} to 589 the phi translation table and return whether it pre-existed. */ 590 591 static inline bool 592 phi_trans_add (expr_pred_trans_t *entry, pre_expr e, basic_block pred) 593 { 594 expr_pred_trans_t *slot; 595 expr_pred_trans_d tem; 596 hashval_t hash = iterative_hash_hashval_t (pre_expr_d::hash (e), 597 pred->index); 598 tem.e = e; 599 tem.pred = pred; 600 tem.hashcode = hash; 601 slot = phi_translate_table->find_slot_with_hash (&tem, hash, INSERT); 602 if (*slot) 603 { 604 *entry = *slot; 605 return true; 606 } 607 608 *entry = *slot = XNEW (struct expr_pred_trans_d); 609 (*entry)->e = e; 610 (*entry)->pred = pred; 611 (*entry)->hashcode = hash; 612 return false; 613 } 614 615 616 /* Add expression E to the expression set of value id V. */ 617 618 static void 619 add_to_value (unsigned int v, pre_expr e) 620 { 621 bitmap set; 622 623 gcc_checking_assert (get_expr_value_id (e) == v); 624 625 if (v >= value_expressions.length ()) 626 { 627 value_expressions.safe_grow_cleared (v + 1); 628 } 629 630 set = value_expressions[v]; 631 if (!set) 632 { 633 set = BITMAP_ALLOC (&grand_bitmap_obstack); 634 value_expressions[v] = set; 635 } 636 637 bitmap_set_bit (set, get_or_alloc_expression_id (e)); 638 } 639 640 /* Create a new bitmap set and return it. */ 641 642 static bitmap_set_t 643 bitmap_set_new (void) 644 { 645 bitmap_set_t ret = bitmap_set_pool.allocate (); 646 bitmap_initialize (&ret->expressions, &grand_bitmap_obstack); 647 bitmap_initialize (&ret->values, &grand_bitmap_obstack); 648 return ret; 649 } 650 651 /* Return the value id for a PRE expression EXPR. */ 652 653 static unsigned int 654 get_expr_value_id (pre_expr expr) 655 { 656 unsigned int id; 657 switch (expr->kind) 658 { 659 case CONSTANT: 660 id = get_constant_value_id (PRE_EXPR_CONSTANT (expr)); 661 break; 662 case NAME: 663 id = VN_INFO (PRE_EXPR_NAME (expr))->value_id; 664 break; 665 case NARY: 666 gcc_assert (!PRE_EXPR_NARY (expr)->predicated_values); 667 id = PRE_EXPR_NARY (expr)->value_id; 668 break; 669 case REFERENCE: 670 id = PRE_EXPR_REFERENCE (expr)->value_id; 671 break; 672 default: 673 gcc_unreachable (); 674 } 675 /* ??? We cannot assert that expr has a value-id (it can be 0), because 676 we assign value-ids only to expressions that have a result 677 in set_hashtable_value_ids. */ 678 return id; 679 } 680 681 /* Return a VN valnum (SSA name or constant) for the PRE value-id VAL. */ 682 683 static tree 684 vn_valnum_from_value_id (unsigned int val) 685 { 686 bitmap_iterator bi; 687 unsigned int i; 688 bitmap exprset = value_expressions[val]; 689 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 690 { 691 pre_expr vexpr = expression_for_id (i); 692 if (vexpr->kind == NAME) 693 return VN_INFO (PRE_EXPR_NAME (vexpr))->valnum; 694 else if (vexpr->kind == CONSTANT) 695 return PRE_EXPR_CONSTANT (vexpr); 696 } 697 return NULL_TREE; 698 } 699 700 /* Insert an expression EXPR into a bitmapped set. */ 701 702 static void 703 bitmap_insert_into_set (bitmap_set_t set, pre_expr expr) 704 { 705 unsigned int val = get_expr_value_id (expr); 706 if (! value_id_constant_p (val)) 707 { 708 /* Note this is the only function causing multiple expressions 709 for the same value to appear in a set. This is needed for 710 TMP_GEN, PHI_GEN and NEW_SETs. */ 711 bitmap_set_bit (&set->values, val); 712 bitmap_set_bit (&set->expressions, get_or_alloc_expression_id (expr)); 713 } 714 } 715 716 /* Copy a bitmapped set ORIG, into bitmapped set DEST. */ 717 718 static void 719 bitmap_set_copy (bitmap_set_t dest, bitmap_set_t orig) 720 { 721 bitmap_copy (&dest->expressions, &orig->expressions); 722 bitmap_copy (&dest->values, &orig->values); 723 } 724 725 726 /* Free memory used up by SET. */ 727 static void 728 bitmap_set_free (bitmap_set_t set) 729 { 730 bitmap_clear (&set->expressions); 731 bitmap_clear (&set->values); 732 } 733 734 735 /* Generate an topological-ordered array of bitmap set SET. */ 736 737 static vec<pre_expr> 738 sorted_array_from_bitmap_set (bitmap_set_t set) 739 { 740 unsigned int i, j; 741 bitmap_iterator bi, bj; 742 vec<pre_expr> result; 743 744 /* Pre-allocate enough space for the array. */ 745 result.create (bitmap_count_bits (&set->expressions)); 746 747 FOR_EACH_VALUE_ID_IN_SET (set, i, bi) 748 { 749 /* The number of expressions having a given value is usually 750 relatively small. Thus, rather than making a vector of all 751 the expressions and sorting it by value-id, we walk the values 752 and check in the reverse mapping that tells us what expressions 753 have a given value, to filter those in our set. As a result, 754 the expressions are inserted in value-id order, which means 755 topological order. 756 757 If this is somehow a significant lose for some cases, we can 758 choose which set to walk based on the set size. */ 759 bitmap exprset = value_expressions[i]; 760 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, j, bj) 761 { 762 if (bitmap_bit_p (&set->expressions, j)) 763 result.quick_push (expression_for_id (j)); 764 } 765 } 766 767 return result; 768 } 769 770 /* Subtract all expressions contained in ORIG from DEST. */ 771 772 static bitmap_set_t 773 bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig) 774 { 775 bitmap_set_t result = bitmap_set_new (); 776 bitmap_iterator bi; 777 unsigned int i; 778 779 bitmap_and_compl (&result->expressions, &dest->expressions, 780 &orig->expressions); 781 782 FOR_EACH_EXPR_ID_IN_SET (result, i, bi) 783 { 784 pre_expr expr = expression_for_id (i); 785 unsigned int value_id = get_expr_value_id (expr); 786 bitmap_set_bit (&result->values, value_id); 787 } 788 789 return result; 790 } 791 792 /* Subtract all values in bitmap set B from bitmap set A. */ 793 794 static void 795 bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b) 796 { 797 unsigned int i; 798 bitmap_iterator bi; 799 unsigned to_remove = -1U; 800 bitmap_and_compl_into (&a->values, &b->values); 801 FOR_EACH_EXPR_ID_IN_SET (a, i, bi) 802 { 803 if (to_remove != -1U) 804 { 805 bitmap_clear_bit (&a->expressions, to_remove); 806 to_remove = -1U; 807 } 808 pre_expr expr = expression_for_id (i); 809 if (! bitmap_bit_p (&a->values, get_expr_value_id (expr))) 810 to_remove = i; 811 } 812 if (to_remove != -1U) 813 bitmap_clear_bit (&a->expressions, to_remove); 814 } 815 816 817 /* Return true if bitmapped set SET contains the value VALUE_ID. */ 818 819 static bool 820 bitmap_set_contains_value (bitmap_set_t set, unsigned int value_id) 821 { 822 if (value_id_constant_p (value_id)) 823 return true; 824 825 return bitmap_bit_p (&set->values, value_id); 826 } 827 828 /* Return true if two bitmap sets are equal. */ 829 830 static bool 831 bitmap_set_equal (bitmap_set_t a, bitmap_set_t b) 832 { 833 return bitmap_equal_p (&a->values, &b->values); 834 } 835 836 /* Replace an instance of EXPR's VALUE with EXPR in SET if it exists, 837 and add it otherwise. */ 838 839 static void 840 bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr) 841 { 842 unsigned int val = get_expr_value_id (expr); 843 if (value_id_constant_p (val)) 844 return; 845 846 if (bitmap_set_contains_value (set, val)) 847 { 848 /* The number of expressions having a given value is usually 849 significantly less than the total number of expressions in SET. 850 Thus, rather than check, for each expression in SET, whether it 851 has the value LOOKFOR, we walk the reverse mapping that tells us 852 what expressions have a given value, and see if any of those 853 expressions are in our set. For large testcases, this is about 854 5-10x faster than walking the bitmap. If this is somehow a 855 significant lose for some cases, we can choose which set to walk 856 based on the set size. */ 857 unsigned int i; 858 bitmap_iterator bi; 859 bitmap exprset = value_expressions[val]; 860 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 861 { 862 if (bitmap_clear_bit (&set->expressions, i)) 863 { 864 bitmap_set_bit (&set->expressions, get_expression_id (expr)); 865 return; 866 } 867 } 868 gcc_unreachable (); 869 } 870 else 871 bitmap_insert_into_set (set, expr); 872 } 873 874 /* Insert EXPR into SET if EXPR's value is not already present in 875 SET. */ 876 877 static void 878 bitmap_value_insert_into_set (bitmap_set_t set, pre_expr expr) 879 { 880 unsigned int val = get_expr_value_id (expr); 881 882 gcc_checking_assert (expr->id == get_or_alloc_expression_id (expr)); 883 884 /* Constant values are always considered to be part of the set. */ 885 if (value_id_constant_p (val)) 886 return; 887 888 /* If the value membership changed, add the expression. */ 889 if (bitmap_set_bit (&set->values, val)) 890 bitmap_set_bit (&set->expressions, expr->id); 891 } 892 893 /* Print out EXPR to outfile. */ 894 895 static void 896 print_pre_expr (FILE *outfile, const pre_expr expr) 897 { 898 if (! expr) 899 { 900 fprintf (outfile, "NULL"); 901 return; 902 } 903 switch (expr->kind) 904 { 905 case CONSTANT: 906 print_generic_expr (outfile, PRE_EXPR_CONSTANT (expr)); 907 break; 908 case NAME: 909 print_generic_expr (outfile, PRE_EXPR_NAME (expr)); 910 break; 911 case NARY: 912 { 913 unsigned int i; 914 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 915 fprintf (outfile, "{%s,", get_tree_code_name (nary->opcode)); 916 for (i = 0; i < nary->length; i++) 917 { 918 print_generic_expr (outfile, nary->op[i]); 919 if (i != (unsigned) nary->length - 1) 920 fprintf (outfile, ","); 921 } 922 fprintf (outfile, "}"); 923 } 924 break; 925 926 case REFERENCE: 927 { 928 vn_reference_op_t vro; 929 unsigned int i; 930 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 931 fprintf (outfile, "{"); 932 for (i = 0; 933 ref->operands.iterate (i, &vro); 934 i++) 935 { 936 bool closebrace = false; 937 if (vro->opcode != SSA_NAME 938 && TREE_CODE_CLASS (vro->opcode) != tcc_declaration) 939 { 940 fprintf (outfile, "%s", get_tree_code_name (vro->opcode)); 941 if (vro->op0) 942 { 943 fprintf (outfile, "<"); 944 closebrace = true; 945 } 946 } 947 if (vro->op0) 948 { 949 print_generic_expr (outfile, vro->op0); 950 if (vro->op1) 951 { 952 fprintf (outfile, ","); 953 print_generic_expr (outfile, vro->op1); 954 } 955 if (vro->op2) 956 { 957 fprintf (outfile, ","); 958 print_generic_expr (outfile, vro->op2); 959 } 960 } 961 if (closebrace) 962 fprintf (outfile, ">"); 963 if (i != ref->operands.length () - 1) 964 fprintf (outfile, ","); 965 } 966 fprintf (outfile, "}"); 967 if (ref->vuse) 968 { 969 fprintf (outfile, "@"); 970 print_generic_expr (outfile, ref->vuse); 971 } 972 } 973 break; 974 } 975 } 976 void debug_pre_expr (pre_expr); 977 978 /* Like print_pre_expr but always prints to stderr. */ 979 DEBUG_FUNCTION void 980 debug_pre_expr (pre_expr e) 981 { 982 print_pre_expr (stderr, e); 983 fprintf (stderr, "\n"); 984 } 985 986 /* Print out SET to OUTFILE. */ 987 988 static void 989 print_bitmap_set (FILE *outfile, bitmap_set_t set, 990 const char *setname, int blockindex) 991 { 992 fprintf (outfile, "%s[%d] := { ", setname, blockindex); 993 if (set) 994 { 995 bool first = true; 996 unsigned i; 997 bitmap_iterator bi; 998 999 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 1000 { 1001 const pre_expr expr = expression_for_id (i); 1002 1003 if (!first) 1004 fprintf (outfile, ", "); 1005 first = false; 1006 print_pre_expr (outfile, expr); 1007 1008 fprintf (outfile, " (%04d)", get_expr_value_id (expr)); 1009 } 1010 } 1011 fprintf (outfile, " }\n"); 1012 } 1013 1014 void debug_bitmap_set (bitmap_set_t); 1015 1016 DEBUG_FUNCTION void 1017 debug_bitmap_set (bitmap_set_t set) 1018 { 1019 print_bitmap_set (stderr, set, "debug", 0); 1020 } 1021 1022 void debug_bitmap_sets_for (basic_block); 1023 1024 DEBUG_FUNCTION void 1025 debug_bitmap_sets_for (basic_block bb) 1026 { 1027 print_bitmap_set (stderr, AVAIL_OUT (bb), "avail_out", bb->index); 1028 print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index); 1029 print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index); 1030 print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index); 1031 print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index); 1032 if (do_partial_partial) 1033 print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index); 1034 print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index); 1035 } 1036 1037 /* Print out the expressions that have VAL to OUTFILE. */ 1038 1039 static void 1040 print_value_expressions (FILE *outfile, unsigned int val) 1041 { 1042 bitmap set = value_expressions[val]; 1043 if (set) 1044 { 1045 bitmap_set x; 1046 char s[10]; 1047 sprintf (s, "%04d", val); 1048 x.expressions = *set; 1049 print_bitmap_set (outfile, &x, s, 0); 1050 } 1051 } 1052 1053 1054 DEBUG_FUNCTION void 1055 debug_value_expressions (unsigned int val) 1056 { 1057 print_value_expressions (stderr, val); 1058 } 1059 1060 /* Given a CONSTANT, allocate a new CONSTANT type PRE_EXPR to 1061 represent it. */ 1062 1063 static pre_expr 1064 get_or_alloc_expr_for_constant (tree constant) 1065 { 1066 unsigned int result_id; 1067 unsigned int value_id; 1068 struct pre_expr_d expr; 1069 pre_expr newexpr; 1070 1071 expr.kind = CONSTANT; 1072 PRE_EXPR_CONSTANT (&expr) = constant; 1073 result_id = lookup_expression_id (&expr); 1074 if (result_id != 0) 1075 return expression_for_id (result_id); 1076 1077 newexpr = pre_expr_pool.allocate (); 1078 newexpr->kind = CONSTANT; 1079 PRE_EXPR_CONSTANT (newexpr) = constant; 1080 alloc_expression_id (newexpr); 1081 value_id = get_or_alloc_constant_value_id (constant); 1082 add_to_value (value_id, newexpr); 1083 return newexpr; 1084 } 1085 1086 /* Get or allocate a pre_expr for a piece of GIMPLE, and return it. 1087 Currently only supports constants and SSA_NAMES. */ 1088 static pre_expr 1089 get_or_alloc_expr_for (tree t) 1090 { 1091 if (TREE_CODE (t) == SSA_NAME) 1092 return get_or_alloc_expr_for_name (t); 1093 else if (is_gimple_min_invariant (t)) 1094 return get_or_alloc_expr_for_constant (t); 1095 gcc_unreachable (); 1096 } 1097 1098 /* Return the folded version of T if T, when folded, is a gimple 1099 min_invariant or an SSA name. Otherwise, return T. */ 1100 1101 static pre_expr 1102 fully_constant_expression (pre_expr e) 1103 { 1104 switch (e->kind) 1105 { 1106 case CONSTANT: 1107 return e; 1108 case NARY: 1109 { 1110 vn_nary_op_t nary = PRE_EXPR_NARY (e); 1111 tree res = vn_nary_simplify (nary); 1112 if (!res) 1113 return e; 1114 if (is_gimple_min_invariant (res)) 1115 return get_or_alloc_expr_for_constant (res); 1116 if (TREE_CODE (res) == SSA_NAME) 1117 return get_or_alloc_expr_for_name (res); 1118 return e; 1119 } 1120 case REFERENCE: 1121 { 1122 vn_reference_t ref = PRE_EXPR_REFERENCE (e); 1123 tree folded; 1124 if ((folded = fully_constant_vn_reference_p (ref))) 1125 return get_or_alloc_expr_for_constant (folded); 1126 return e; 1127 } 1128 default: 1129 return e; 1130 } 1131 return e; 1132 } 1133 1134 /* Translate the VUSE backwards through phi nodes in PHIBLOCK, so that 1135 it has the value it would have in BLOCK. Set *SAME_VALID to true 1136 in case the new vuse doesn't change the value id of the OPERANDS. */ 1137 1138 static tree 1139 translate_vuse_through_block (vec<vn_reference_op_s> operands, 1140 alias_set_type set, tree type, tree vuse, 1141 basic_block phiblock, 1142 basic_block block, bool *same_valid) 1143 { 1144 gimple *phi = SSA_NAME_DEF_STMT (vuse); 1145 ao_ref ref; 1146 edge e = NULL; 1147 bool use_oracle; 1148 1149 *same_valid = true; 1150 1151 if (gimple_bb (phi) != phiblock) 1152 return vuse; 1153 1154 unsigned int cnt = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS); 1155 use_oracle = ao_ref_init_from_vn_reference (&ref, set, type, operands); 1156 1157 /* Use the alias-oracle to find either the PHI node in this block, 1158 the first VUSE used in this block that is equivalent to vuse or 1159 the first VUSE which definition in this block kills the value. */ 1160 if (gimple_code (phi) == GIMPLE_PHI) 1161 e = find_edge (block, phiblock); 1162 else if (use_oracle) 1163 while (cnt > 0 1164 && !stmt_may_clobber_ref_p_1 (phi, &ref)) 1165 { 1166 --cnt; 1167 vuse = gimple_vuse (phi); 1168 phi = SSA_NAME_DEF_STMT (vuse); 1169 if (gimple_bb (phi) != phiblock) 1170 return vuse; 1171 if (gimple_code (phi) == GIMPLE_PHI) 1172 { 1173 e = find_edge (block, phiblock); 1174 break; 1175 } 1176 } 1177 else 1178 return NULL_TREE; 1179 1180 if (e) 1181 { 1182 if (use_oracle) 1183 { 1184 bitmap visited = NULL; 1185 /* Try to find a vuse that dominates this phi node by skipping 1186 non-clobbering statements. */ 1187 vuse = get_continuation_for_phi (phi, &ref, true, 1188 cnt, &visited, false, NULL, NULL); 1189 if (visited) 1190 BITMAP_FREE (visited); 1191 } 1192 else 1193 vuse = NULL_TREE; 1194 if (!vuse) 1195 { 1196 /* If we didn't find any, the value ID can't stay the same, 1197 but return the translated vuse. */ 1198 *same_valid = false; 1199 vuse = PHI_ARG_DEF (phi, e->dest_idx); 1200 } 1201 /* ??? We would like to return vuse here as this is the canonical 1202 upmost vdef that this reference is associated with. But during 1203 insertion of the references into the hash tables we only ever 1204 directly insert with their direct gimple_vuse, hence returning 1205 something else would make us not find the other expression. */ 1206 return PHI_ARG_DEF (phi, e->dest_idx); 1207 } 1208 1209 return NULL_TREE; 1210 } 1211 1212 /* Like bitmap_find_leader, but checks for the value existing in SET1 *or* 1213 SET2 *or* SET3. This is used to avoid making a set consisting of the union 1214 of PA_IN and ANTIC_IN during insert and phi-translation. */ 1215 1216 static inline pre_expr 1217 find_leader_in_sets (unsigned int val, bitmap_set_t set1, bitmap_set_t set2, 1218 bitmap_set_t set3 = NULL) 1219 { 1220 pre_expr result = NULL; 1221 1222 if (set1) 1223 result = bitmap_find_leader (set1, val); 1224 if (!result && set2) 1225 result = bitmap_find_leader (set2, val); 1226 if (!result && set3) 1227 result = bitmap_find_leader (set3, val); 1228 return result; 1229 } 1230 1231 /* Get the tree type for our PRE expression e. */ 1232 1233 static tree 1234 get_expr_type (const pre_expr e) 1235 { 1236 switch (e->kind) 1237 { 1238 case NAME: 1239 return TREE_TYPE (PRE_EXPR_NAME (e)); 1240 case CONSTANT: 1241 return TREE_TYPE (PRE_EXPR_CONSTANT (e)); 1242 case REFERENCE: 1243 return PRE_EXPR_REFERENCE (e)->type; 1244 case NARY: 1245 return PRE_EXPR_NARY (e)->type; 1246 } 1247 gcc_unreachable (); 1248 } 1249 1250 /* Get a representative SSA_NAME for a given expression that is available in B. 1251 Since all of our sub-expressions are treated as values, we require 1252 them to be SSA_NAME's for simplicity. 1253 Prior versions of GVNPRE used to use "value handles" here, so that 1254 an expression would be VH.11 + VH.10 instead of d_3 + e_6. In 1255 either case, the operands are really values (IE we do not expect 1256 them to be usable without finding leaders). */ 1257 1258 static tree 1259 get_representative_for (const pre_expr e, basic_block b = NULL) 1260 { 1261 tree name, valnum = NULL_TREE; 1262 unsigned int value_id = get_expr_value_id (e); 1263 1264 switch (e->kind) 1265 { 1266 case NAME: 1267 return PRE_EXPR_NAME (e); 1268 case CONSTANT: 1269 return PRE_EXPR_CONSTANT (e); 1270 case NARY: 1271 case REFERENCE: 1272 { 1273 /* Go through all of the expressions representing this value 1274 and pick out an SSA_NAME. */ 1275 unsigned int i; 1276 bitmap_iterator bi; 1277 bitmap exprs = value_expressions[value_id]; 1278 EXECUTE_IF_SET_IN_BITMAP (exprs, 0, i, bi) 1279 { 1280 pre_expr rep = expression_for_id (i); 1281 if (rep->kind == NAME) 1282 { 1283 tree name = PRE_EXPR_NAME (rep); 1284 valnum = VN_INFO (name)->valnum; 1285 gimple *def = SSA_NAME_DEF_STMT (name); 1286 /* We have to return either a new representative or one 1287 that can be used for expression simplification and thus 1288 is available in B. */ 1289 if (! b 1290 || gimple_nop_p (def) 1291 || dominated_by_p (CDI_DOMINATORS, b, gimple_bb (def))) 1292 return name; 1293 } 1294 else if (rep->kind == CONSTANT) 1295 return PRE_EXPR_CONSTANT (rep); 1296 } 1297 } 1298 break; 1299 } 1300 1301 /* If we reached here we couldn't find an SSA_NAME. This can 1302 happen when we've discovered a value that has never appeared in 1303 the program as set to an SSA_NAME, as the result of phi translation. 1304 Create one here. 1305 ??? We should be able to re-use this when we insert the statement 1306 to compute it. */ 1307 name = make_temp_ssa_name (get_expr_type (e), gimple_build_nop (), "pretmp"); 1308 VN_INFO (name)->value_id = value_id; 1309 VN_INFO (name)->valnum = valnum ? valnum : name; 1310 /* ??? For now mark this SSA name for release by VN. */ 1311 VN_INFO (name)->needs_insertion = true; 1312 add_to_value (value_id, get_or_alloc_expr_for_name (name)); 1313 if (dump_file && (dump_flags & TDF_DETAILS)) 1314 { 1315 fprintf (dump_file, "Created SSA_NAME representative "); 1316 print_generic_expr (dump_file, name); 1317 fprintf (dump_file, " for expression:"); 1318 print_pre_expr (dump_file, e); 1319 fprintf (dump_file, " (%04d)\n", value_id); 1320 } 1321 1322 return name; 1323 } 1324 1325 1326 static pre_expr 1327 phi_translate (bitmap_set_t, pre_expr, bitmap_set_t, bitmap_set_t, edge); 1328 1329 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of 1330 the phis in PRED. Return NULL if we can't find a leader for each part 1331 of the translated expression. */ 1332 1333 static pre_expr 1334 phi_translate_1 (bitmap_set_t dest, 1335 pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e) 1336 { 1337 basic_block pred = e->src; 1338 basic_block phiblock = e->dest; 1339 switch (expr->kind) 1340 { 1341 case NARY: 1342 { 1343 unsigned int i; 1344 bool changed = false; 1345 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1346 vn_nary_op_t newnary = XALLOCAVAR (struct vn_nary_op_s, 1347 sizeof_vn_nary_op (nary->length)); 1348 memcpy (newnary, nary, sizeof_vn_nary_op (nary->length)); 1349 1350 for (i = 0; i < newnary->length; i++) 1351 { 1352 if (TREE_CODE (newnary->op[i]) != SSA_NAME) 1353 continue; 1354 else 1355 { 1356 pre_expr leader, result; 1357 unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id; 1358 leader = find_leader_in_sets (op_val_id, set1, set2); 1359 result = phi_translate (dest, leader, set1, set2, e); 1360 if (result && result != leader) 1361 /* If op has a leader in the sets we translate make 1362 sure to use the value of the translated expression. 1363 We might need a new representative for that. */ 1364 newnary->op[i] = get_representative_for (result, pred); 1365 else if (!result) 1366 return NULL; 1367 1368 changed |= newnary->op[i] != nary->op[i]; 1369 } 1370 } 1371 if (changed) 1372 { 1373 pre_expr constant; 1374 unsigned int new_val_id; 1375 1376 PRE_EXPR_NARY (expr) = newnary; 1377 constant = fully_constant_expression (expr); 1378 PRE_EXPR_NARY (expr) = nary; 1379 if (constant != expr) 1380 { 1381 /* For non-CONSTANTs we have to make sure we can eventually 1382 insert the expression. Which means we need to have a 1383 leader for it. */ 1384 if (constant->kind != CONSTANT) 1385 { 1386 /* Do not allow simplifications to non-constants over 1387 backedges as this will likely result in a loop PHI node 1388 to be inserted and increased register pressure. 1389 See PR77498 - this avoids doing predcoms work in 1390 a less efficient way. */ 1391 if (e->flags & EDGE_DFS_BACK) 1392 ; 1393 else 1394 { 1395 unsigned value_id = get_expr_value_id (constant); 1396 /* We want a leader in ANTIC_OUT or AVAIL_OUT here. 1397 dest has what we computed into ANTIC_OUT sofar 1398 so pick from that - since topological sorting 1399 by sorted_array_from_bitmap_set isn't perfect 1400 we may lose some cases here. */ 1401 constant = find_leader_in_sets (value_id, dest, 1402 AVAIL_OUT (pred)); 1403 if (constant) 1404 { 1405 if (dump_file && (dump_flags & TDF_DETAILS)) 1406 { 1407 fprintf (dump_file, "simplifying "); 1408 print_pre_expr (dump_file, expr); 1409 fprintf (dump_file, " translated %d -> %d to ", 1410 phiblock->index, pred->index); 1411 PRE_EXPR_NARY (expr) = newnary; 1412 print_pre_expr (dump_file, expr); 1413 PRE_EXPR_NARY (expr) = nary; 1414 fprintf (dump_file, " to "); 1415 print_pre_expr (dump_file, constant); 1416 fprintf (dump_file, "\n"); 1417 } 1418 return constant; 1419 } 1420 } 1421 } 1422 else 1423 return constant; 1424 } 1425 1426 /* vn_nary_* do not valueize operands. */ 1427 for (i = 0; i < newnary->length; ++i) 1428 if (TREE_CODE (newnary->op[i]) == SSA_NAME) 1429 newnary->op[i] = VN_INFO (newnary->op[i])->valnum; 1430 tree result = vn_nary_op_lookup_pieces (newnary->length, 1431 newnary->opcode, 1432 newnary->type, 1433 &newnary->op[0], 1434 &nary); 1435 if (result && is_gimple_min_invariant (result)) 1436 return get_or_alloc_expr_for_constant (result); 1437 1438 expr = pre_expr_pool.allocate (); 1439 expr->kind = NARY; 1440 expr->id = 0; 1441 if (nary && !nary->predicated_values) 1442 { 1443 PRE_EXPR_NARY (expr) = nary; 1444 new_val_id = nary->value_id; 1445 get_or_alloc_expression_id (expr); 1446 } 1447 else 1448 { 1449 new_val_id = get_next_value_id (); 1450 value_expressions.safe_grow_cleared (get_max_value_id () + 1); 1451 nary = vn_nary_op_insert_pieces (newnary->length, 1452 newnary->opcode, 1453 newnary->type, 1454 &newnary->op[0], 1455 result, new_val_id); 1456 PRE_EXPR_NARY (expr) = nary; 1457 get_or_alloc_expression_id (expr); 1458 } 1459 add_to_value (new_val_id, expr); 1460 } 1461 return expr; 1462 } 1463 break; 1464 1465 case REFERENCE: 1466 { 1467 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1468 vec<vn_reference_op_s> operands = ref->operands; 1469 tree vuse = ref->vuse; 1470 tree newvuse = vuse; 1471 vec<vn_reference_op_s> newoperands = vNULL; 1472 bool changed = false, same_valid = true; 1473 unsigned int i, n; 1474 vn_reference_op_t operand; 1475 vn_reference_t newref; 1476 1477 for (i = 0; operands.iterate (i, &operand); i++) 1478 { 1479 pre_expr opresult; 1480 pre_expr leader; 1481 tree op[3]; 1482 tree type = operand->type; 1483 vn_reference_op_s newop = *operand; 1484 op[0] = operand->op0; 1485 op[1] = operand->op1; 1486 op[2] = operand->op2; 1487 for (n = 0; n < 3; ++n) 1488 { 1489 unsigned int op_val_id; 1490 if (!op[n]) 1491 continue; 1492 if (TREE_CODE (op[n]) != SSA_NAME) 1493 { 1494 /* We can't possibly insert these. */ 1495 if (n != 0 1496 && !is_gimple_min_invariant (op[n])) 1497 break; 1498 continue; 1499 } 1500 op_val_id = VN_INFO (op[n])->value_id; 1501 leader = find_leader_in_sets (op_val_id, set1, set2); 1502 opresult = phi_translate (dest, leader, set1, set2, e); 1503 if (opresult && opresult != leader) 1504 { 1505 tree name = get_representative_for (opresult); 1506 changed |= name != op[n]; 1507 op[n] = name; 1508 } 1509 else if (!opresult) 1510 break; 1511 } 1512 if (n != 3) 1513 { 1514 newoperands.release (); 1515 return NULL; 1516 } 1517 if (!changed) 1518 continue; 1519 if (!newoperands.exists ()) 1520 newoperands = operands.copy (); 1521 /* We may have changed from an SSA_NAME to a constant */ 1522 if (newop.opcode == SSA_NAME && TREE_CODE (op[0]) != SSA_NAME) 1523 newop.opcode = TREE_CODE (op[0]); 1524 newop.type = type; 1525 newop.op0 = op[0]; 1526 newop.op1 = op[1]; 1527 newop.op2 = op[2]; 1528 newoperands[i] = newop; 1529 } 1530 gcc_checking_assert (i == operands.length ()); 1531 1532 if (vuse) 1533 { 1534 newvuse = translate_vuse_through_block (newoperands.exists () 1535 ? newoperands : operands, 1536 ref->set, ref->type, 1537 vuse, phiblock, pred, 1538 &same_valid); 1539 if (newvuse == NULL_TREE) 1540 { 1541 newoperands.release (); 1542 return NULL; 1543 } 1544 } 1545 1546 if (changed || newvuse != vuse) 1547 { 1548 unsigned int new_val_id; 1549 1550 tree result = vn_reference_lookup_pieces (newvuse, ref->set, 1551 ref->type, 1552 newoperands.exists () 1553 ? newoperands : operands, 1554 &newref, VN_WALK); 1555 if (result) 1556 newoperands.release (); 1557 1558 /* We can always insert constants, so if we have a partial 1559 redundant constant load of another type try to translate it 1560 to a constant of appropriate type. */ 1561 if (result && is_gimple_min_invariant (result)) 1562 { 1563 tree tem = result; 1564 if (!useless_type_conversion_p (ref->type, TREE_TYPE (result))) 1565 { 1566 tem = fold_unary (VIEW_CONVERT_EXPR, ref->type, result); 1567 if (tem && !is_gimple_min_invariant (tem)) 1568 tem = NULL_TREE; 1569 } 1570 if (tem) 1571 return get_or_alloc_expr_for_constant (tem); 1572 } 1573 1574 /* If we'd have to convert things we would need to validate 1575 if we can insert the translated expression. So fail 1576 here for now - we cannot insert an alias with a different 1577 type in the VN tables either, as that would assert. */ 1578 if (result 1579 && !useless_type_conversion_p (ref->type, TREE_TYPE (result))) 1580 return NULL; 1581 else if (!result && newref 1582 && !useless_type_conversion_p (ref->type, newref->type)) 1583 { 1584 newoperands.release (); 1585 return NULL; 1586 } 1587 1588 expr = pre_expr_pool.allocate (); 1589 expr->kind = REFERENCE; 1590 expr->id = 0; 1591 1592 if (newref) 1593 new_val_id = newref->value_id; 1594 else 1595 { 1596 if (changed || !same_valid) 1597 { 1598 new_val_id = get_next_value_id (); 1599 value_expressions.safe_grow_cleared 1600 (get_max_value_id () + 1); 1601 } 1602 else 1603 new_val_id = ref->value_id; 1604 if (!newoperands.exists ()) 1605 newoperands = operands.copy (); 1606 newref = vn_reference_insert_pieces (newvuse, ref->set, 1607 ref->type, 1608 newoperands, 1609 result, new_val_id); 1610 newoperands = vNULL; 1611 } 1612 PRE_EXPR_REFERENCE (expr) = newref; 1613 get_or_alloc_expression_id (expr); 1614 add_to_value (new_val_id, expr); 1615 } 1616 newoperands.release (); 1617 return expr; 1618 } 1619 break; 1620 1621 case NAME: 1622 { 1623 tree name = PRE_EXPR_NAME (expr); 1624 gimple *def_stmt = SSA_NAME_DEF_STMT (name); 1625 /* If the SSA name is defined by a PHI node in this block, 1626 translate it. */ 1627 if (gimple_code (def_stmt) == GIMPLE_PHI 1628 && gimple_bb (def_stmt) == phiblock) 1629 { 1630 tree def = PHI_ARG_DEF (def_stmt, e->dest_idx); 1631 1632 /* Handle constant. */ 1633 if (is_gimple_min_invariant (def)) 1634 return get_or_alloc_expr_for_constant (def); 1635 1636 return get_or_alloc_expr_for_name (def); 1637 } 1638 /* Otherwise return it unchanged - it will get removed if its 1639 value is not available in PREDs AVAIL_OUT set of expressions 1640 by the subtraction of TMP_GEN. */ 1641 return expr; 1642 } 1643 1644 default: 1645 gcc_unreachable (); 1646 } 1647 } 1648 1649 /* Wrapper around phi_translate_1 providing caching functionality. */ 1650 1651 static pre_expr 1652 phi_translate (bitmap_set_t dest, pre_expr expr, 1653 bitmap_set_t set1, bitmap_set_t set2, edge e) 1654 { 1655 expr_pred_trans_t slot = NULL; 1656 pre_expr phitrans; 1657 1658 if (!expr) 1659 return NULL; 1660 1661 /* Constants contain no values that need translation. */ 1662 if (expr->kind == CONSTANT) 1663 return expr; 1664 1665 if (value_id_constant_p (get_expr_value_id (expr))) 1666 return expr; 1667 1668 /* Don't add translations of NAMEs as those are cheap to translate. */ 1669 if (expr->kind != NAME) 1670 { 1671 if (phi_trans_add (&slot, expr, e->src)) 1672 return slot->v; 1673 /* Store NULL for the value we want to return in the case of 1674 recursing. */ 1675 slot->v = NULL; 1676 } 1677 1678 /* Translate. */ 1679 basic_block saved_valueize_bb = vn_context_bb; 1680 vn_context_bb = e->src; 1681 phitrans = phi_translate_1 (dest, expr, set1, set2, e); 1682 vn_context_bb = saved_valueize_bb; 1683 1684 if (slot) 1685 { 1686 if (phitrans) 1687 slot->v = phitrans; 1688 else 1689 /* Remove failed translations again, they cause insert 1690 iteration to not pick up new opportunities reliably. */ 1691 phi_translate_table->remove_elt_with_hash (slot, slot->hashcode); 1692 } 1693 1694 return phitrans; 1695 } 1696 1697 1698 /* For each expression in SET, translate the values through phi nodes 1699 in PHIBLOCK using edge PHIBLOCK->PRED, and store the resulting 1700 expressions in DEST. */ 1701 1702 static void 1703 phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e) 1704 { 1705 vec<pre_expr> exprs; 1706 pre_expr expr; 1707 int i; 1708 1709 if (gimple_seq_empty_p (phi_nodes (e->dest))) 1710 { 1711 bitmap_set_copy (dest, set); 1712 return; 1713 } 1714 1715 exprs = sorted_array_from_bitmap_set (set); 1716 FOR_EACH_VEC_ELT (exprs, i, expr) 1717 { 1718 pre_expr translated; 1719 translated = phi_translate (dest, expr, set, NULL, e); 1720 if (!translated) 1721 continue; 1722 1723 bitmap_insert_into_set (dest, translated); 1724 } 1725 exprs.release (); 1726 } 1727 1728 /* Find the leader for a value (i.e., the name representing that 1729 value) in a given set, and return it. Return NULL if no leader 1730 is found. */ 1731 1732 static pre_expr 1733 bitmap_find_leader (bitmap_set_t set, unsigned int val) 1734 { 1735 if (value_id_constant_p (val)) 1736 { 1737 unsigned int i; 1738 bitmap_iterator bi; 1739 bitmap exprset = value_expressions[val]; 1740 1741 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 1742 { 1743 pre_expr expr = expression_for_id (i); 1744 if (expr->kind == CONSTANT) 1745 return expr; 1746 } 1747 } 1748 if (bitmap_set_contains_value (set, val)) 1749 { 1750 /* Rather than walk the entire bitmap of expressions, and see 1751 whether any of them has the value we are looking for, we look 1752 at the reverse mapping, which tells us the set of expressions 1753 that have a given value (IE value->expressions with that 1754 value) and see if any of those expressions are in our set. 1755 The number of expressions per value is usually significantly 1756 less than the number of expressions in the set. In fact, for 1757 large testcases, doing it this way is roughly 5-10x faster 1758 than walking the bitmap. 1759 If this is somehow a significant lose for some cases, we can 1760 choose which set to walk based on which set is smaller. */ 1761 unsigned int i; 1762 bitmap_iterator bi; 1763 bitmap exprset = value_expressions[val]; 1764 1765 EXECUTE_IF_AND_IN_BITMAP (exprset, &set->expressions, 0, i, bi) 1766 return expression_for_id (i); 1767 } 1768 return NULL; 1769 } 1770 1771 /* Determine if EXPR, a memory expression, is ANTIC_IN at the top of 1772 BLOCK by seeing if it is not killed in the block. Note that we are 1773 only determining whether there is a store that kills it. Because 1774 of the order in which clean iterates over values, we are guaranteed 1775 that altered operands will have caused us to be eliminated from the 1776 ANTIC_IN set already. */ 1777 1778 static bool 1779 value_dies_in_block_x (pre_expr expr, basic_block block) 1780 { 1781 tree vuse = PRE_EXPR_REFERENCE (expr)->vuse; 1782 vn_reference_t refx = PRE_EXPR_REFERENCE (expr); 1783 gimple *def; 1784 gimple_stmt_iterator gsi; 1785 unsigned id = get_expression_id (expr); 1786 bool res = false; 1787 ao_ref ref; 1788 1789 if (!vuse) 1790 return false; 1791 1792 /* Lookup a previously calculated result. */ 1793 if (EXPR_DIES (block) 1794 && bitmap_bit_p (EXPR_DIES (block), id * 2)) 1795 return bitmap_bit_p (EXPR_DIES (block), id * 2 + 1); 1796 1797 /* A memory expression {e, VUSE} dies in the block if there is a 1798 statement that may clobber e. If, starting statement walk from the 1799 top of the basic block, a statement uses VUSE there can be no kill 1800 inbetween that use and the original statement that loaded {e, VUSE}, 1801 so we can stop walking. */ 1802 ref.base = NULL_TREE; 1803 for (gsi = gsi_start_bb (block); !gsi_end_p (gsi); gsi_next (&gsi)) 1804 { 1805 tree def_vuse, def_vdef; 1806 def = gsi_stmt (gsi); 1807 def_vuse = gimple_vuse (def); 1808 def_vdef = gimple_vdef (def); 1809 1810 /* Not a memory statement. */ 1811 if (!def_vuse) 1812 continue; 1813 1814 /* Not a may-def. */ 1815 if (!def_vdef) 1816 { 1817 /* A load with the same VUSE, we're done. */ 1818 if (def_vuse == vuse) 1819 break; 1820 1821 continue; 1822 } 1823 1824 /* Init ref only if we really need it. */ 1825 if (ref.base == NULL_TREE 1826 && !ao_ref_init_from_vn_reference (&ref, refx->set, refx->type, 1827 refx->operands)) 1828 { 1829 res = true; 1830 break; 1831 } 1832 /* If the statement may clobber expr, it dies. */ 1833 if (stmt_may_clobber_ref_p_1 (def, &ref)) 1834 { 1835 res = true; 1836 break; 1837 } 1838 } 1839 1840 /* Remember the result. */ 1841 if (!EXPR_DIES (block)) 1842 EXPR_DIES (block) = BITMAP_ALLOC (&grand_bitmap_obstack); 1843 bitmap_set_bit (EXPR_DIES (block), id * 2); 1844 if (res) 1845 bitmap_set_bit (EXPR_DIES (block), id * 2 + 1); 1846 1847 return res; 1848 } 1849 1850 1851 /* Determine if OP is valid in SET1 U SET2, which it is when the union 1852 contains its value-id. */ 1853 1854 static bool 1855 op_valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree op) 1856 { 1857 if (op && TREE_CODE (op) == SSA_NAME) 1858 { 1859 unsigned int value_id = VN_INFO (op)->value_id; 1860 if (!(bitmap_set_contains_value (set1, value_id) 1861 || (set2 && bitmap_set_contains_value (set2, value_id)))) 1862 return false; 1863 } 1864 return true; 1865 } 1866 1867 /* Determine if the expression EXPR is valid in SET1 U SET2. 1868 ONLY SET2 CAN BE NULL. 1869 This means that we have a leader for each part of the expression 1870 (if it consists of values), or the expression is an SSA_NAME. 1871 For loads/calls, we also see if the vuse is killed in this block. */ 1872 1873 static bool 1874 valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, pre_expr expr) 1875 { 1876 switch (expr->kind) 1877 { 1878 case NAME: 1879 /* By construction all NAMEs are available. Non-available 1880 NAMEs are removed by subtracting TMP_GEN from the sets. */ 1881 return true; 1882 case NARY: 1883 { 1884 unsigned int i; 1885 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1886 for (i = 0; i < nary->length; i++) 1887 if (!op_valid_in_sets (set1, set2, nary->op[i])) 1888 return false; 1889 return true; 1890 } 1891 break; 1892 case REFERENCE: 1893 { 1894 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1895 vn_reference_op_t vro; 1896 unsigned int i; 1897 1898 FOR_EACH_VEC_ELT (ref->operands, i, vro) 1899 { 1900 if (!op_valid_in_sets (set1, set2, vro->op0) 1901 || !op_valid_in_sets (set1, set2, vro->op1) 1902 || !op_valid_in_sets (set1, set2, vro->op2)) 1903 return false; 1904 } 1905 return true; 1906 } 1907 default: 1908 gcc_unreachable (); 1909 } 1910 } 1911 1912 /* Clean the set of expressions SET1 that are no longer valid in SET1 or SET2. 1913 This means expressions that are made up of values we have no leaders for 1914 in SET1 or SET2. */ 1915 1916 static void 1917 clean (bitmap_set_t set1, bitmap_set_t set2 = NULL) 1918 { 1919 vec<pre_expr> exprs = sorted_array_from_bitmap_set (set1); 1920 pre_expr expr; 1921 int i; 1922 1923 FOR_EACH_VEC_ELT (exprs, i, expr) 1924 { 1925 if (!valid_in_sets (set1, set2, expr)) 1926 { 1927 unsigned int val = get_expr_value_id (expr); 1928 bitmap_clear_bit (&set1->expressions, get_expression_id (expr)); 1929 /* We are entered with possibly multiple expressions for a value 1930 so before removing a value from the set see if there's an 1931 expression for it left. */ 1932 if (! bitmap_find_leader (set1, val)) 1933 bitmap_clear_bit (&set1->values, val); 1934 } 1935 } 1936 exprs.release (); 1937 } 1938 1939 /* Clean the set of expressions that are no longer valid in SET because 1940 they are clobbered in BLOCK or because they trap and may not be executed. */ 1941 1942 static void 1943 prune_clobbered_mems (bitmap_set_t set, basic_block block) 1944 { 1945 bitmap_iterator bi; 1946 unsigned i; 1947 unsigned to_remove = -1U; 1948 bool any_removed = false; 1949 1950 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 1951 { 1952 /* Remove queued expr. */ 1953 if (to_remove != -1U) 1954 { 1955 bitmap_clear_bit (&set->expressions, to_remove); 1956 any_removed = true; 1957 to_remove = -1U; 1958 } 1959 1960 pre_expr expr = expression_for_id (i); 1961 if (expr->kind == REFERENCE) 1962 { 1963 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 1964 if (ref->vuse) 1965 { 1966 gimple *def_stmt = SSA_NAME_DEF_STMT (ref->vuse); 1967 if (!gimple_nop_p (def_stmt) 1968 && ((gimple_bb (def_stmt) != block 1969 && !dominated_by_p (CDI_DOMINATORS, 1970 block, gimple_bb (def_stmt))) 1971 || (gimple_bb (def_stmt) == block 1972 && value_dies_in_block_x (expr, block)))) 1973 to_remove = i; 1974 } 1975 } 1976 else if (expr->kind == NARY) 1977 { 1978 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 1979 /* If the NARY may trap make sure the block does not contain 1980 a possible exit point. 1981 ??? This is overly conservative if we translate AVAIL_OUT 1982 as the available expression might be after the exit point. */ 1983 if (BB_MAY_NOTRETURN (block) 1984 && vn_nary_may_trap (nary)) 1985 to_remove = i; 1986 } 1987 } 1988 1989 /* Remove queued expr. */ 1990 if (to_remove != -1U) 1991 { 1992 bitmap_clear_bit (&set->expressions, to_remove); 1993 any_removed = true; 1994 } 1995 1996 /* Above we only removed expressions, now clean the set of values 1997 which no longer have any corresponding expression. We cannot 1998 clear the value at the time we remove an expression since there 1999 may be multiple expressions per value. 2000 If we'd queue possibly to be removed values we could use 2001 the bitmap_find_leader way to see if there's still an expression 2002 for it. For some ratio of to be removed values and number of 2003 values/expressions in the set this might be faster than rebuilding 2004 the value-set. */ 2005 if (any_removed) 2006 { 2007 bitmap_clear (&set->values); 2008 FOR_EACH_EXPR_ID_IN_SET (set, i, bi) 2009 { 2010 pre_expr expr = expression_for_id (i); 2011 unsigned int value_id = get_expr_value_id (expr); 2012 bitmap_set_bit (&set->values, value_id); 2013 } 2014 } 2015 } 2016 2017 static sbitmap has_abnormal_preds; 2018 2019 /* Compute the ANTIC set for BLOCK. 2020 2021 If succs(BLOCK) > 1 then 2022 ANTIC_OUT[BLOCK] = intersection of ANTIC_IN[b] for all succ(BLOCK) 2023 else if succs(BLOCK) == 1 then 2024 ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)]) 2025 2026 ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK]) 2027 2028 Note that clean() is deferred until after the iteration. */ 2029 2030 static bool 2031 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) 2032 { 2033 bitmap_set_t S, old, ANTIC_OUT; 2034 edge e; 2035 edge_iterator ei; 2036 2037 bool was_visited = BB_VISITED (block); 2038 bool changed = ! BB_VISITED (block); 2039 BB_VISITED (block) = 1; 2040 old = ANTIC_OUT = S = NULL; 2041 2042 /* If any edges from predecessors are abnormal, antic_in is empty, 2043 so do nothing. */ 2044 if (block_has_abnormal_pred_edge) 2045 goto maybe_dump_sets; 2046 2047 old = ANTIC_IN (block); 2048 ANTIC_OUT = bitmap_set_new (); 2049 2050 /* If the block has no successors, ANTIC_OUT is empty. */ 2051 if (EDGE_COUNT (block->succs) == 0) 2052 ; 2053 /* If we have one successor, we could have some phi nodes to 2054 translate through. */ 2055 else if (single_succ_p (block)) 2056 { 2057 e = single_succ_edge (block); 2058 gcc_assert (BB_VISITED (e->dest)); 2059 phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e); 2060 } 2061 /* If we have multiple successors, we take the intersection of all of 2062 them. Note that in the case of loop exit phi nodes, we may have 2063 phis to translate through. */ 2064 else 2065 { 2066 size_t i; 2067 edge first = NULL; 2068 2069 auto_vec<edge> worklist (EDGE_COUNT (block->succs)); 2070 FOR_EACH_EDGE (e, ei, block->succs) 2071 { 2072 if (!first 2073 && BB_VISITED (e->dest)) 2074 first = e; 2075 else if (BB_VISITED (e->dest)) 2076 worklist.quick_push (e); 2077 else 2078 { 2079 /* Unvisited successors get their ANTIC_IN replaced by the 2080 maximal set to arrive at a maximum ANTIC_IN solution. 2081 We can ignore them in the intersection operation and thus 2082 need not explicitely represent that maximum solution. */ 2083 if (dump_file && (dump_flags & TDF_DETAILS)) 2084 fprintf (dump_file, "ANTIC_IN is MAX on %d->%d\n", 2085 e->src->index, e->dest->index); 2086 } 2087 } 2088 2089 /* Of multiple successors we have to have visited one already 2090 which is guaranteed by iteration order. */ 2091 gcc_assert (first != NULL); 2092 2093 phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first); 2094 2095 /* If we have multiple successors we need to intersect the ANTIC_OUT 2096 sets. For values that's a simple intersection but for 2097 expressions it is a union. Given we want to have a single 2098 expression per value in our sets we have to canonicalize. 2099 Avoid randomness and running into cycles like for PR82129 and 2100 canonicalize the expression we choose to the one with the 2101 lowest id. This requires we actually compute the union first. */ 2102 FOR_EACH_VEC_ELT (worklist, i, e) 2103 { 2104 if (!gimple_seq_empty_p (phi_nodes (e->dest))) 2105 { 2106 bitmap_set_t tmp = bitmap_set_new (); 2107 phi_translate_set (tmp, ANTIC_IN (e->dest), e); 2108 bitmap_and_into (&ANTIC_OUT->values, &tmp->values); 2109 bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions); 2110 bitmap_set_free (tmp); 2111 } 2112 else 2113 { 2114 bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values); 2115 bitmap_ior_into (&ANTIC_OUT->expressions, 2116 &ANTIC_IN (e->dest)->expressions); 2117 } 2118 } 2119 if (! worklist.is_empty ()) 2120 { 2121 /* Prune expressions not in the value set. */ 2122 bitmap_iterator bi; 2123 unsigned int i; 2124 unsigned int to_clear = -1U; 2125 FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi) 2126 { 2127 if (to_clear != -1U) 2128 { 2129 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear); 2130 to_clear = -1U; 2131 } 2132 pre_expr expr = expression_for_id (i); 2133 unsigned int value_id = get_expr_value_id (expr); 2134 if (!bitmap_bit_p (&ANTIC_OUT->values, value_id)) 2135 to_clear = i; 2136 } 2137 if (to_clear != -1U) 2138 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear); 2139 } 2140 } 2141 2142 /* Prune expressions that are clobbered in block and thus become 2143 invalid if translated from ANTIC_OUT to ANTIC_IN. */ 2144 prune_clobbered_mems (ANTIC_OUT, block); 2145 2146 /* Generate ANTIC_OUT - TMP_GEN. */ 2147 S = bitmap_set_subtract_expressions (ANTIC_OUT, TMP_GEN (block)); 2148 2149 /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */ 2150 ANTIC_IN (block) = bitmap_set_subtract_expressions (EXP_GEN (block), 2151 TMP_GEN (block)); 2152 2153 /* Then union in the ANTIC_OUT - TMP_GEN values, 2154 to get ANTIC_OUT U EXP_GEN - TMP_GEN */ 2155 bitmap_ior_into (&ANTIC_IN (block)->values, &S->values); 2156 bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions); 2157 2158 /* clean (ANTIC_IN (block)) is defered to after the iteration converged 2159 because it can cause non-convergence, see for example PR81181. */ 2160 2161 /* Intersect ANTIC_IN with the old ANTIC_IN. This is required until 2162 we properly represent the maximum expression set, thus not prune 2163 values without expressions during the iteration. */ 2164 if (was_visited 2165 && bitmap_and_into (&ANTIC_IN (block)->values, &old->values)) 2166 { 2167 if (dump_file && (dump_flags & TDF_DETAILS)) 2168 fprintf (dump_file, "warning: intersecting with old ANTIC_IN " 2169 "shrinks the set\n"); 2170 /* Prune expressions not in the value set. */ 2171 bitmap_iterator bi; 2172 unsigned int i; 2173 unsigned int to_clear = -1U; 2174 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (block), i, bi) 2175 { 2176 if (to_clear != -1U) 2177 { 2178 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear); 2179 to_clear = -1U; 2180 } 2181 pre_expr expr = expression_for_id (i); 2182 unsigned int value_id = get_expr_value_id (expr); 2183 if (!bitmap_bit_p (&ANTIC_IN (block)->values, value_id)) 2184 to_clear = i; 2185 } 2186 if (to_clear != -1U) 2187 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear); 2188 } 2189 2190 if (!bitmap_set_equal (old, ANTIC_IN (block))) 2191 changed = true; 2192 2193 maybe_dump_sets: 2194 if (dump_file && (dump_flags & TDF_DETAILS)) 2195 { 2196 if (ANTIC_OUT) 2197 print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index); 2198 2199 if (changed) 2200 fprintf (dump_file, "[changed] "); 2201 print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN", 2202 block->index); 2203 2204 if (S) 2205 print_bitmap_set (dump_file, S, "S", block->index); 2206 } 2207 if (old) 2208 bitmap_set_free (old); 2209 if (S) 2210 bitmap_set_free (S); 2211 if (ANTIC_OUT) 2212 bitmap_set_free (ANTIC_OUT); 2213 return changed; 2214 } 2215 2216 /* Compute PARTIAL_ANTIC for BLOCK. 2217 2218 If succs(BLOCK) > 1 then 2219 PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not 2220 in ANTIC_OUT for all succ(BLOCK) 2221 else if succs(BLOCK) == 1 then 2222 PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)]) 2223 2224 PA_IN[BLOCK] = clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] - ANTIC_IN[BLOCK]) 2225 2226 */ 2227 static void 2228 compute_partial_antic_aux (basic_block block, 2229 bool block_has_abnormal_pred_edge) 2230 { 2231 bitmap_set_t old_PA_IN; 2232 bitmap_set_t PA_OUT; 2233 edge e; 2234 edge_iterator ei; 2235 unsigned long max_pa = PARAM_VALUE (PARAM_MAX_PARTIAL_ANTIC_LENGTH); 2236 2237 old_PA_IN = PA_OUT = NULL; 2238 2239 /* If any edges from predecessors are abnormal, antic_in is empty, 2240 so do nothing. */ 2241 if (block_has_abnormal_pred_edge) 2242 goto maybe_dump_sets; 2243 2244 /* If there are too many partially anticipatable values in the 2245 block, phi_translate_set can take an exponential time: stop 2246 before the translation starts. */ 2247 if (max_pa 2248 && single_succ_p (block) 2249 && bitmap_count_bits (&PA_IN (single_succ (block))->values) > max_pa) 2250 goto maybe_dump_sets; 2251 2252 old_PA_IN = PA_IN (block); 2253 PA_OUT = bitmap_set_new (); 2254 2255 /* If the block has no successors, ANTIC_OUT is empty. */ 2256 if (EDGE_COUNT (block->succs) == 0) 2257 ; 2258 /* If we have one successor, we could have some phi nodes to 2259 translate through. Note that we can't phi translate across DFS 2260 back edges in partial antic, because it uses a union operation on 2261 the successors. For recurrences like IV's, we will end up 2262 generating a new value in the set on each go around (i + 3 (VH.1) 2263 VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */ 2264 else if (single_succ_p (block)) 2265 { 2266 e = single_succ_edge (block); 2267 if (!(e->flags & EDGE_DFS_BACK)) 2268 phi_translate_set (PA_OUT, PA_IN (e->dest), e); 2269 } 2270 /* If we have multiple successors, we take the union of all of 2271 them. */ 2272 else 2273 { 2274 size_t i; 2275 2276 auto_vec<edge> worklist (EDGE_COUNT (block->succs)); 2277 FOR_EACH_EDGE (e, ei, block->succs) 2278 { 2279 if (e->flags & EDGE_DFS_BACK) 2280 continue; 2281 worklist.quick_push (e); 2282 } 2283 if (worklist.length () > 0) 2284 { 2285 FOR_EACH_VEC_ELT (worklist, i, e) 2286 { 2287 unsigned int i; 2288 bitmap_iterator bi; 2289 2290 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi) 2291 bitmap_value_insert_into_set (PA_OUT, 2292 expression_for_id (i)); 2293 if (!gimple_seq_empty_p (phi_nodes (e->dest))) 2294 { 2295 bitmap_set_t pa_in = bitmap_set_new (); 2296 phi_translate_set (pa_in, PA_IN (e->dest), e); 2297 FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi) 2298 bitmap_value_insert_into_set (PA_OUT, 2299 expression_for_id (i)); 2300 bitmap_set_free (pa_in); 2301 } 2302 else 2303 FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi) 2304 bitmap_value_insert_into_set (PA_OUT, 2305 expression_for_id (i)); 2306 } 2307 } 2308 } 2309 2310 /* Prune expressions that are clobbered in block and thus become 2311 invalid if translated from PA_OUT to PA_IN. */ 2312 prune_clobbered_mems (PA_OUT, block); 2313 2314 /* PA_IN starts with PA_OUT - TMP_GEN. 2315 Then we subtract things from ANTIC_IN. */ 2316 PA_IN (block) = bitmap_set_subtract_expressions (PA_OUT, TMP_GEN (block)); 2317 2318 /* For partial antic, we want to put back in the phi results, since 2319 we will properly avoid making them partially antic over backedges. */ 2320 bitmap_ior_into (&PA_IN (block)->values, &PHI_GEN (block)->values); 2321 bitmap_ior_into (&PA_IN (block)->expressions, &PHI_GEN (block)->expressions); 2322 2323 /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */ 2324 bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block)); 2325 2326 clean (PA_IN (block), ANTIC_IN (block)); 2327 2328 maybe_dump_sets: 2329 if (dump_file && (dump_flags & TDF_DETAILS)) 2330 { 2331 if (PA_OUT) 2332 print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index); 2333 2334 print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index); 2335 } 2336 if (old_PA_IN) 2337 bitmap_set_free (old_PA_IN); 2338 if (PA_OUT) 2339 bitmap_set_free (PA_OUT); 2340 } 2341 2342 /* Compute ANTIC and partial ANTIC sets. */ 2343 2344 static void 2345 compute_antic (void) 2346 { 2347 bool changed = true; 2348 int num_iterations = 0; 2349 basic_block block; 2350 int i; 2351 edge_iterator ei; 2352 edge e; 2353 2354 /* If any predecessor edges are abnormal, we punt, so antic_in is empty. 2355 We pre-build the map of blocks with incoming abnormal edges here. */ 2356 has_abnormal_preds = sbitmap_alloc (last_basic_block_for_fn (cfun)); 2357 bitmap_clear (has_abnormal_preds); 2358 2359 FOR_ALL_BB_FN (block, cfun) 2360 { 2361 BB_VISITED (block) = 0; 2362 2363 FOR_EACH_EDGE (e, ei, block->preds) 2364 if (e->flags & EDGE_ABNORMAL) 2365 { 2366 bitmap_set_bit (has_abnormal_preds, block->index); 2367 break; 2368 } 2369 2370 /* While we are here, give empty ANTIC_IN sets to each block. */ 2371 ANTIC_IN (block) = bitmap_set_new (); 2372 if (do_partial_partial) 2373 PA_IN (block) = bitmap_set_new (); 2374 } 2375 2376 /* At the exit block we anticipate nothing. */ 2377 BB_VISITED (EXIT_BLOCK_PTR_FOR_FN (cfun)) = 1; 2378 2379 /* For ANTIC computation we need a postorder that also guarantees that 2380 a block with a single successor is visited after its successor. 2381 RPO on the inverted CFG has this property. */ 2382 auto_vec<int, 20> postorder; 2383 inverted_post_order_compute (&postorder); 2384 2385 auto_sbitmap worklist (last_basic_block_for_fn (cfun) + 1); 2386 bitmap_clear (worklist); 2387 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 2388 bitmap_set_bit (worklist, e->src->index); 2389 while (changed) 2390 { 2391 if (dump_file && (dump_flags & TDF_DETAILS)) 2392 fprintf (dump_file, "Starting iteration %d\n", num_iterations); 2393 /* ??? We need to clear our PHI translation cache here as the 2394 ANTIC sets shrink and we restrict valid translations to 2395 those having operands with leaders in ANTIC. Same below 2396 for PA ANTIC computation. */ 2397 num_iterations++; 2398 changed = false; 2399 for (i = postorder.length () - 1; i >= 0; i--) 2400 { 2401 if (bitmap_bit_p (worklist, postorder[i])) 2402 { 2403 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); 2404 bitmap_clear_bit (worklist, block->index); 2405 if (compute_antic_aux (block, 2406 bitmap_bit_p (has_abnormal_preds, 2407 block->index))) 2408 { 2409 FOR_EACH_EDGE (e, ei, block->preds) 2410 bitmap_set_bit (worklist, e->src->index); 2411 changed = true; 2412 } 2413 } 2414 } 2415 /* Theoretically possible, but *highly* unlikely. */ 2416 gcc_checking_assert (num_iterations < 500); 2417 } 2418 2419 /* We have to clean after the dataflow problem converged as cleaning 2420 can cause non-convergence because it is based on expressions 2421 rather than values. */ 2422 FOR_EACH_BB_FN (block, cfun) 2423 clean (ANTIC_IN (block)); 2424 2425 statistics_histogram_event (cfun, "compute_antic iterations", 2426 num_iterations); 2427 2428 if (do_partial_partial) 2429 { 2430 /* For partial antic we ignore backedges and thus we do not need 2431 to perform any iteration when we process blocks in postorder. */ 2432 for (i = postorder.length () - 1; i >= 0; i--) 2433 { 2434 basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); 2435 compute_partial_antic_aux (block, 2436 bitmap_bit_p (has_abnormal_preds, 2437 block->index)); 2438 } 2439 } 2440 2441 sbitmap_free (has_abnormal_preds); 2442 } 2443 2444 2445 /* Inserted expressions are placed onto this worklist, which is used 2446 for performing quick dead code elimination of insertions we made 2447 that didn't turn out to be necessary. */ 2448 static bitmap inserted_exprs; 2449 2450 /* The actual worker for create_component_ref_by_pieces. */ 2451 2452 static tree 2453 create_component_ref_by_pieces_1 (basic_block block, vn_reference_t ref, 2454 unsigned int *operand, gimple_seq *stmts) 2455 { 2456 vn_reference_op_t currop = &ref->operands[*operand]; 2457 tree genop; 2458 ++*operand; 2459 switch (currop->opcode) 2460 { 2461 case CALL_EXPR: 2462 gcc_unreachable (); 2463 2464 case MEM_REF: 2465 { 2466 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand, 2467 stmts); 2468 if (!baseop) 2469 return NULL_TREE; 2470 tree offset = currop->op0; 2471 if (TREE_CODE (baseop) == ADDR_EXPR 2472 && handled_component_p (TREE_OPERAND (baseop, 0))) 2473 { 2474 poly_int64 off; 2475 tree base; 2476 base = get_addr_base_and_unit_offset (TREE_OPERAND (baseop, 0), 2477 &off); 2478 gcc_assert (base); 2479 offset = int_const_binop (PLUS_EXPR, offset, 2480 build_int_cst (TREE_TYPE (offset), 2481 off)); 2482 baseop = build_fold_addr_expr (base); 2483 } 2484 genop = build2 (MEM_REF, currop->type, baseop, offset); 2485 MR_DEPENDENCE_CLIQUE (genop) = currop->clique; 2486 MR_DEPENDENCE_BASE (genop) = currop->base; 2487 REF_REVERSE_STORAGE_ORDER (genop) = currop->reverse; 2488 return genop; 2489 } 2490 2491 case TARGET_MEM_REF: 2492 { 2493 tree genop0 = NULL_TREE, genop1 = NULL_TREE; 2494 vn_reference_op_t nextop = &ref->operands[++*operand]; 2495 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand, 2496 stmts); 2497 if (!baseop) 2498 return NULL_TREE; 2499 if (currop->op0) 2500 { 2501 genop0 = find_or_generate_expression (block, currop->op0, stmts); 2502 if (!genop0) 2503 return NULL_TREE; 2504 } 2505 if (nextop->op0) 2506 { 2507 genop1 = find_or_generate_expression (block, nextop->op0, stmts); 2508 if (!genop1) 2509 return NULL_TREE; 2510 } 2511 genop = build5 (TARGET_MEM_REF, currop->type, 2512 baseop, currop->op2, genop0, currop->op1, genop1); 2513 2514 MR_DEPENDENCE_CLIQUE (genop) = currop->clique; 2515 MR_DEPENDENCE_BASE (genop) = currop->base; 2516 return genop; 2517 } 2518 2519 case ADDR_EXPR: 2520 if (currop->op0) 2521 { 2522 gcc_assert (is_gimple_min_invariant (currop->op0)); 2523 return currop->op0; 2524 } 2525 /* Fallthrough. */ 2526 case REALPART_EXPR: 2527 case IMAGPART_EXPR: 2528 case VIEW_CONVERT_EXPR: 2529 { 2530 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2531 stmts); 2532 if (!genop0) 2533 return NULL_TREE; 2534 return fold_build1 (currop->opcode, currop->type, genop0); 2535 } 2536 2537 case WITH_SIZE_EXPR: 2538 { 2539 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2540 stmts); 2541 if (!genop0) 2542 return NULL_TREE; 2543 tree genop1 = find_or_generate_expression (block, currop->op0, stmts); 2544 if (!genop1) 2545 return NULL_TREE; 2546 return fold_build2 (currop->opcode, currop->type, genop0, genop1); 2547 } 2548 2549 case BIT_FIELD_REF: 2550 { 2551 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2552 stmts); 2553 if (!genop0) 2554 return NULL_TREE; 2555 tree op1 = currop->op0; 2556 tree op2 = currop->op1; 2557 tree t = build3 (BIT_FIELD_REF, currop->type, genop0, op1, op2); 2558 REF_REVERSE_STORAGE_ORDER (t) = currop->reverse; 2559 return fold (t); 2560 } 2561 2562 /* For array ref vn_reference_op's, operand 1 of the array ref 2563 is op0 of the reference op and operand 3 of the array ref is 2564 op1. */ 2565 case ARRAY_RANGE_REF: 2566 case ARRAY_REF: 2567 { 2568 tree genop0; 2569 tree genop1 = currop->op0; 2570 tree genop2 = currop->op1; 2571 tree genop3 = currop->op2; 2572 genop0 = create_component_ref_by_pieces_1 (block, ref, operand, 2573 stmts); 2574 if (!genop0) 2575 return NULL_TREE; 2576 genop1 = find_or_generate_expression (block, genop1, stmts); 2577 if (!genop1) 2578 return NULL_TREE; 2579 if (genop2) 2580 { 2581 tree domain_type = TYPE_DOMAIN (TREE_TYPE (genop0)); 2582 /* Drop zero minimum index if redundant. */ 2583 if (integer_zerop (genop2) 2584 && (!domain_type 2585 || integer_zerop (TYPE_MIN_VALUE (domain_type)))) 2586 genop2 = NULL_TREE; 2587 else 2588 { 2589 genop2 = find_or_generate_expression (block, genop2, stmts); 2590 if (!genop2) 2591 return NULL_TREE; 2592 } 2593 } 2594 if (genop3) 2595 { 2596 tree elmt_type = TREE_TYPE (TREE_TYPE (genop0)); 2597 /* We can't always put a size in units of the element alignment 2598 here as the element alignment may be not visible. See 2599 PR43783. Simply drop the element size for constant 2600 sizes. */ 2601 if (TREE_CODE (genop3) == INTEGER_CST 2602 && TREE_CODE (TYPE_SIZE_UNIT (elmt_type)) == INTEGER_CST 2603 && wi::eq_p (wi::to_offset (TYPE_SIZE_UNIT (elmt_type)), 2604 (wi::to_offset (genop3) 2605 * vn_ref_op_align_unit (currop)))) 2606 genop3 = NULL_TREE; 2607 else 2608 { 2609 genop3 = find_or_generate_expression (block, genop3, stmts); 2610 if (!genop3) 2611 return NULL_TREE; 2612 } 2613 } 2614 return build4 (currop->opcode, currop->type, genop0, genop1, 2615 genop2, genop3); 2616 } 2617 case COMPONENT_REF: 2618 { 2619 tree op0; 2620 tree op1; 2621 tree genop2 = currop->op1; 2622 op0 = create_component_ref_by_pieces_1 (block, ref, operand, stmts); 2623 if (!op0) 2624 return NULL_TREE; 2625 /* op1 should be a FIELD_DECL, which are represented by themselves. */ 2626 op1 = currop->op0; 2627 if (genop2) 2628 { 2629 genop2 = find_or_generate_expression (block, genop2, stmts); 2630 if (!genop2) 2631 return NULL_TREE; 2632 } 2633 return fold_build3 (COMPONENT_REF, TREE_TYPE (op1), op0, op1, genop2); 2634 } 2635 2636 case SSA_NAME: 2637 { 2638 genop = find_or_generate_expression (block, currop->op0, stmts); 2639 return genop; 2640 } 2641 case STRING_CST: 2642 case INTEGER_CST: 2643 case COMPLEX_CST: 2644 case VECTOR_CST: 2645 case REAL_CST: 2646 case CONSTRUCTOR: 2647 case VAR_DECL: 2648 case PARM_DECL: 2649 case CONST_DECL: 2650 case RESULT_DECL: 2651 case FUNCTION_DECL: 2652 return currop->op0; 2653 2654 default: 2655 gcc_unreachable (); 2656 } 2657 } 2658 2659 /* For COMPONENT_REF's and ARRAY_REF's, we can't have any intermediates for the 2660 COMPONENT_REF or MEM_REF or ARRAY_REF portion, because we'd end up with 2661 trying to rename aggregates into ssa form directly, which is a no no. 2662 2663 Thus, this routine doesn't create temporaries, it just builds a 2664 single access expression for the array, calling 2665 find_or_generate_expression to build the innermost pieces. 2666 2667 This function is a subroutine of create_expression_by_pieces, and 2668 should not be called on it's own unless you really know what you 2669 are doing. */ 2670 2671 static tree 2672 create_component_ref_by_pieces (basic_block block, vn_reference_t ref, 2673 gimple_seq *stmts) 2674 { 2675 unsigned int op = 0; 2676 return create_component_ref_by_pieces_1 (block, ref, &op, stmts); 2677 } 2678 2679 /* Find a simple leader for an expression, or generate one using 2680 create_expression_by_pieces from a NARY expression for the value. 2681 BLOCK is the basic_block we are looking for leaders in. 2682 OP is the tree expression to find a leader for or generate. 2683 Returns the leader or NULL_TREE on failure. */ 2684 2685 static tree 2686 find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts) 2687 { 2688 pre_expr expr = get_or_alloc_expr_for (op); 2689 unsigned int lookfor = get_expr_value_id (expr); 2690 pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor); 2691 if (leader) 2692 { 2693 if (leader->kind == NAME) 2694 return PRE_EXPR_NAME (leader); 2695 else if (leader->kind == CONSTANT) 2696 return PRE_EXPR_CONSTANT (leader); 2697 2698 /* Defer. */ 2699 return NULL_TREE; 2700 } 2701 2702 /* It must be a complex expression, so generate it recursively. Note 2703 that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c 2704 where the insert algorithm fails to insert a required expression. */ 2705 bitmap exprset = value_expressions[lookfor]; 2706 bitmap_iterator bi; 2707 unsigned int i; 2708 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) 2709 { 2710 pre_expr temp = expression_for_id (i); 2711 /* We cannot insert random REFERENCE expressions at arbitrary 2712 places. We can insert NARYs which eventually re-materializes 2713 its operand values. */ 2714 if (temp->kind == NARY) 2715 return create_expression_by_pieces (block, temp, stmts, 2716 get_expr_type (expr)); 2717 } 2718 2719 /* Defer. */ 2720 return NULL_TREE; 2721 } 2722 2723 /* Create an expression in pieces, so that we can handle very complex 2724 expressions that may be ANTIC, but not necessary GIMPLE. 2725 BLOCK is the basic block the expression will be inserted into, 2726 EXPR is the expression to insert (in value form) 2727 STMTS is a statement list to append the necessary insertions into. 2728 2729 This function will die if we hit some value that shouldn't be 2730 ANTIC but is (IE there is no leader for it, or its components). 2731 The function returns NULL_TREE in case a different antic expression 2732 has to be inserted first. 2733 This function may also generate expressions that are themselves 2734 partially or fully redundant. Those that are will be either made 2735 fully redundant during the next iteration of insert (for partially 2736 redundant ones), or eliminated by eliminate (for fully redundant 2737 ones). */ 2738 2739 static tree 2740 create_expression_by_pieces (basic_block block, pre_expr expr, 2741 gimple_seq *stmts, tree type) 2742 { 2743 tree name; 2744 tree folded; 2745 gimple_seq forced_stmts = NULL; 2746 unsigned int value_id; 2747 gimple_stmt_iterator gsi; 2748 tree exprtype = type ? type : get_expr_type (expr); 2749 pre_expr nameexpr; 2750 gassign *newstmt; 2751 2752 switch (expr->kind) 2753 { 2754 /* We may hit the NAME/CONSTANT case if we have to convert types 2755 that value numbering saw through. */ 2756 case NAME: 2757 folded = PRE_EXPR_NAME (expr); 2758 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (folded)) 2759 return NULL_TREE; 2760 if (useless_type_conversion_p (exprtype, TREE_TYPE (folded))) 2761 return folded; 2762 break; 2763 case CONSTANT: 2764 { 2765 folded = PRE_EXPR_CONSTANT (expr); 2766 tree tem = fold_convert (exprtype, folded); 2767 if (is_gimple_min_invariant (tem)) 2768 return tem; 2769 break; 2770 } 2771 case REFERENCE: 2772 if (PRE_EXPR_REFERENCE (expr)->operands[0].opcode == CALL_EXPR) 2773 { 2774 vn_reference_t ref = PRE_EXPR_REFERENCE (expr); 2775 unsigned int operand = 1; 2776 vn_reference_op_t currop = &ref->operands[0]; 2777 tree sc = NULL_TREE; 2778 tree fn = find_or_generate_expression (block, currop->op0, stmts); 2779 if (!fn) 2780 return NULL_TREE; 2781 if (currop->op1) 2782 { 2783 sc = find_or_generate_expression (block, currop->op1, stmts); 2784 if (!sc) 2785 return NULL_TREE; 2786 } 2787 auto_vec<tree> args (ref->operands.length () - 1); 2788 while (operand < ref->operands.length ()) 2789 { 2790 tree arg = create_component_ref_by_pieces_1 (block, ref, 2791 &operand, stmts); 2792 if (!arg) 2793 return NULL_TREE; 2794 args.quick_push (arg); 2795 } 2796 gcall *call = gimple_build_call_vec (fn, args); 2797 gimple_call_set_fntype (call, currop->type); 2798 if (sc) 2799 gimple_call_set_chain (call, sc); 2800 tree forcedname = make_ssa_name (TREE_TYPE (currop->type)); 2801 gimple_call_set_lhs (call, forcedname); 2802 /* There's no CCP pass after PRE which would re-compute alignment 2803 information so make sure we re-materialize this here. */ 2804 if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED) 2805 && args.length () - 2 <= 1 2806 && tree_fits_uhwi_p (args[1]) 2807 && (args.length () != 3 || tree_fits_uhwi_p (args[2]))) 2808 { 2809 unsigned HOST_WIDE_INT halign = tree_to_uhwi (args[1]); 2810 unsigned HOST_WIDE_INT hmisalign 2811 = args.length () == 3 ? tree_to_uhwi (args[2]) : 0; 2812 if ((halign & (halign - 1)) == 0 2813 && (hmisalign & ~(halign - 1)) == 0) 2814 set_ptr_info_alignment (get_ptr_info (forcedname), 2815 halign, hmisalign); 2816 } 2817 gimple_set_vuse (call, BB_LIVE_VOP_ON_EXIT (block)); 2818 gimple_seq_add_stmt_without_update (&forced_stmts, call); 2819 folded = forcedname; 2820 } 2821 else 2822 { 2823 folded = create_component_ref_by_pieces (block, 2824 PRE_EXPR_REFERENCE (expr), 2825 stmts); 2826 if (!folded) 2827 return NULL_TREE; 2828 name = make_temp_ssa_name (exprtype, NULL, "pretmp"); 2829 newstmt = gimple_build_assign (name, folded); 2830 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt); 2831 gimple_set_vuse (newstmt, BB_LIVE_VOP_ON_EXIT (block)); 2832 folded = name; 2833 } 2834 break; 2835 case NARY: 2836 { 2837 vn_nary_op_t nary = PRE_EXPR_NARY (expr); 2838 tree *genop = XALLOCAVEC (tree, nary->length); 2839 unsigned i; 2840 for (i = 0; i < nary->length; ++i) 2841 { 2842 genop[i] = find_or_generate_expression (block, nary->op[i], stmts); 2843 if (!genop[i]) 2844 return NULL_TREE; 2845 /* Ensure genop[] is properly typed for POINTER_PLUS_EXPR. It 2846 may have conversions stripped. */ 2847 if (nary->opcode == POINTER_PLUS_EXPR) 2848 { 2849 if (i == 0) 2850 genop[i] = gimple_convert (&forced_stmts, 2851 nary->type, genop[i]); 2852 else if (i == 1) 2853 genop[i] = gimple_convert (&forced_stmts, 2854 sizetype, genop[i]); 2855 } 2856 else 2857 genop[i] = gimple_convert (&forced_stmts, 2858 TREE_TYPE (nary->op[i]), genop[i]); 2859 } 2860 if (nary->opcode == CONSTRUCTOR) 2861 { 2862 vec<constructor_elt, va_gc> *elts = NULL; 2863 for (i = 0; i < nary->length; ++i) 2864 CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, genop[i]); 2865 folded = build_constructor (nary->type, elts); 2866 name = make_temp_ssa_name (exprtype, NULL, "pretmp"); 2867 newstmt = gimple_build_assign (name, folded); 2868 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt); 2869 folded = name; 2870 } 2871 else 2872 { 2873 switch (nary->length) 2874 { 2875 case 1: 2876 folded = gimple_build (&forced_stmts, nary->opcode, nary->type, 2877 genop[0]); 2878 break; 2879 case 2: 2880 folded = gimple_build (&forced_stmts, nary->opcode, nary->type, 2881 genop[0], genop[1]); 2882 break; 2883 case 3: 2884 folded = gimple_build (&forced_stmts, nary->opcode, nary->type, 2885 genop[0], genop[1], genop[2]); 2886 break; 2887 default: 2888 gcc_unreachable (); 2889 } 2890 } 2891 } 2892 break; 2893 default: 2894 gcc_unreachable (); 2895 } 2896 2897 folded = gimple_convert (&forced_stmts, exprtype, folded); 2898 2899 /* If there is nothing to insert, return the simplified result. */ 2900 if (gimple_seq_empty_p (forced_stmts)) 2901 return folded; 2902 /* If we simplified to a constant return it and discard eventually 2903 built stmts. */ 2904 if (is_gimple_min_invariant (folded)) 2905 { 2906 gimple_seq_discard (forced_stmts); 2907 return folded; 2908 } 2909 /* Likewise if we simplified to sth not queued for insertion. */ 2910 bool found = false; 2911 gsi = gsi_last (forced_stmts); 2912 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2913 { 2914 gimple *stmt = gsi_stmt (gsi); 2915 tree forcedname = gimple_get_lhs (stmt); 2916 if (forcedname == folded) 2917 { 2918 found = true; 2919 break; 2920 } 2921 } 2922 if (! found) 2923 { 2924 gimple_seq_discard (forced_stmts); 2925 return folded; 2926 } 2927 gcc_assert (TREE_CODE (folded) == SSA_NAME); 2928 2929 /* If we have any intermediate expressions to the value sets, add them 2930 to the value sets and chain them in the instruction stream. */ 2931 if (forced_stmts) 2932 { 2933 gsi = gsi_start (forced_stmts); 2934 for (; !gsi_end_p (gsi); gsi_next (&gsi)) 2935 { 2936 gimple *stmt = gsi_stmt (gsi); 2937 tree forcedname = gimple_get_lhs (stmt); 2938 pre_expr nameexpr; 2939 2940 if (forcedname != folded) 2941 { 2942 VN_INFO (forcedname)->valnum = forcedname; 2943 VN_INFO (forcedname)->value_id = get_next_value_id (); 2944 nameexpr = get_or_alloc_expr_for_name (forcedname); 2945 add_to_value (VN_INFO (forcedname)->value_id, nameexpr); 2946 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); 2947 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); 2948 } 2949 2950 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname)); 2951 } 2952 gimple_seq_add_seq (stmts, forced_stmts); 2953 } 2954 2955 name = folded; 2956 2957 /* Fold the last statement. */ 2958 gsi = gsi_last (*stmts); 2959 if (fold_stmt_inplace (&gsi)) 2960 update_stmt (gsi_stmt (gsi)); 2961 2962 /* Add a value number to the temporary. 2963 The value may already exist in either NEW_SETS, or AVAIL_OUT, because 2964 we are creating the expression by pieces, and this particular piece of 2965 the expression may have been represented. There is no harm in replacing 2966 here. */ 2967 value_id = get_expr_value_id (expr); 2968 VN_INFO (name)->value_id = value_id; 2969 VN_INFO (name)->valnum = vn_valnum_from_value_id (value_id); 2970 if (VN_INFO (name)->valnum == NULL_TREE) 2971 VN_INFO (name)->valnum = name; 2972 gcc_assert (VN_INFO (name)->valnum != NULL_TREE); 2973 nameexpr = get_or_alloc_expr_for_name (name); 2974 add_to_value (value_id, nameexpr); 2975 if (NEW_SETS (block)) 2976 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); 2977 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); 2978 2979 pre_stats.insertions++; 2980 if (dump_file && (dump_flags & TDF_DETAILS)) 2981 { 2982 fprintf (dump_file, "Inserted "); 2983 print_gimple_stmt (dump_file, gsi_stmt (gsi_last (*stmts)), 0); 2984 fprintf (dump_file, " in predecessor %d (%04d)\n", 2985 block->index, value_id); 2986 } 2987 2988 return name; 2989 } 2990 2991 2992 /* Insert the to-be-made-available values of expression EXPRNUM for each 2993 predecessor, stored in AVAIL, into the predecessors of BLOCK, and 2994 merge the result with a phi node, given the same value number as 2995 NODE. Return true if we have inserted new stuff. */ 2996 2997 static bool 2998 insert_into_preds_of_block (basic_block block, unsigned int exprnum, 2999 vec<pre_expr> avail) 3000 { 3001 pre_expr expr = expression_for_id (exprnum); 3002 pre_expr newphi; 3003 unsigned int val = get_expr_value_id (expr); 3004 edge pred; 3005 bool insertions = false; 3006 bool nophi = false; 3007 basic_block bprime; 3008 pre_expr eprime; 3009 edge_iterator ei; 3010 tree type = get_expr_type (expr); 3011 tree temp; 3012 gphi *phi; 3013 3014 /* Make sure we aren't creating an induction variable. */ 3015 if (bb_loop_depth (block) > 0 && EDGE_COUNT (block->preds) == 2) 3016 { 3017 bool firstinsideloop = false; 3018 bool secondinsideloop = false; 3019 firstinsideloop = flow_bb_inside_loop_p (block->loop_father, 3020 EDGE_PRED (block, 0)->src); 3021 secondinsideloop = flow_bb_inside_loop_p (block->loop_father, 3022 EDGE_PRED (block, 1)->src); 3023 /* Induction variables only have one edge inside the loop. */ 3024 if ((firstinsideloop ^ secondinsideloop) 3025 && expr->kind != REFERENCE) 3026 { 3027 if (dump_file && (dump_flags & TDF_DETAILS)) 3028 fprintf (dump_file, "Skipping insertion of phi for partial redundancy: Looks like an induction variable\n"); 3029 nophi = true; 3030 } 3031 } 3032 3033 /* Make the necessary insertions. */ 3034 FOR_EACH_EDGE (pred, ei, block->preds) 3035 { 3036 gimple_seq stmts = NULL; 3037 tree builtexpr; 3038 bprime = pred->src; 3039 eprime = avail[pred->dest_idx]; 3040 builtexpr = create_expression_by_pieces (bprime, eprime, 3041 &stmts, type); 3042 gcc_assert (!(pred->flags & EDGE_ABNORMAL)); 3043 if (!gimple_seq_empty_p (stmts)) 3044 { 3045 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pred, stmts); 3046 gcc_assert (! new_bb); 3047 insertions = true; 3048 } 3049 if (!builtexpr) 3050 { 3051 /* We cannot insert a PHI node if we failed to insert 3052 on one edge. */ 3053 nophi = true; 3054 continue; 3055 } 3056 if (is_gimple_min_invariant (builtexpr)) 3057 avail[pred->dest_idx] = get_or_alloc_expr_for_constant (builtexpr); 3058 else 3059 avail[pred->dest_idx] = get_or_alloc_expr_for_name (builtexpr); 3060 } 3061 /* If we didn't want a phi node, and we made insertions, we still have 3062 inserted new stuff, and thus return true. If we didn't want a phi node, 3063 and didn't make insertions, we haven't added anything new, so return 3064 false. */ 3065 if (nophi && insertions) 3066 return true; 3067 else if (nophi && !insertions) 3068 return false; 3069 3070 /* Now build a phi for the new variable. */ 3071 temp = make_temp_ssa_name (type, NULL, "prephitmp"); 3072 phi = create_phi_node (temp, block); 3073 3074 VN_INFO (temp)->value_id = val; 3075 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val); 3076 if (VN_INFO (temp)->valnum == NULL_TREE) 3077 VN_INFO (temp)->valnum = temp; 3078 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp)); 3079 FOR_EACH_EDGE (pred, ei, block->preds) 3080 { 3081 pre_expr ae = avail[pred->dest_idx]; 3082 gcc_assert (get_expr_type (ae) == type 3083 || useless_type_conversion_p (type, get_expr_type (ae))); 3084 if (ae->kind == CONSTANT) 3085 add_phi_arg (phi, unshare_expr (PRE_EXPR_CONSTANT (ae)), 3086 pred, UNKNOWN_LOCATION); 3087 else 3088 add_phi_arg (phi, PRE_EXPR_NAME (ae), pred, UNKNOWN_LOCATION); 3089 } 3090 3091 newphi = get_or_alloc_expr_for_name (temp); 3092 add_to_value (val, newphi); 3093 3094 /* The value should *not* exist in PHI_GEN, or else we wouldn't be doing 3095 this insertion, since we test for the existence of this value in PHI_GEN 3096 before proceeding with the partial redundancy checks in insert_aux. 3097 3098 The value may exist in AVAIL_OUT, in particular, it could be represented 3099 by the expression we are trying to eliminate, in which case we want the 3100 replacement to occur. If it's not existing in AVAIL_OUT, we want it 3101 inserted there. 3102 3103 Similarly, to the PHI_GEN case, the value should not exist in NEW_SETS of 3104 this block, because if it did, it would have existed in our dominator's 3105 AVAIL_OUT, and would have been skipped due to the full redundancy check. 3106 */ 3107 3108 bitmap_insert_into_set (PHI_GEN (block), newphi); 3109 bitmap_value_replace_in_set (AVAIL_OUT (block), 3110 newphi); 3111 bitmap_insert_into_set (NEW_SETS (block), 3112 newphi); 3113 3114 /* If we insert a PHI node for a conversion of another PHI node 3115 in the same basic-block try to preserve range information. 3116 This is important so that followup loop passes receive optimal 3117 number of iteration analysis results. See PR61743. */ 3118 if (expr->kind == NARY 3119 && CONVERT_EXPR_CODE_P (expr->u.nary->opcode) 3120 && TREE_CODE (expr->u.nary->op[0]) == SSA_NAME 3121 && gimple_bb (SSA_NAME_DEF_STMT (expr->u.nary->op[0])) == block 3122 && INTEGRAL_TYPE_P (type) 3123 && INTEGRAL_TYPE_P (TREE_TYPE (expr->u.nary->op[0])) 3124 && (TYPE_PRECISION (type) 3125 >= TYPE_PRECISION (TREE_TYPE (expr->u.nary->op[0]))) 3126 && SSA_NAME_RANGE_INFO (expr->u.nary->op[0])) 3127 { 3128 wide_int min, max; 3129 if (get_range_info (expr->u.nary->op[0], &min, &max) == VR_RANGE 3130 && !wi::neg_p (min, SIGNED) 3131 && !wi::neg_p (max, SIGNED)) 3132 /* Just handle extension and sign-changes of all-positive ranges. */ 3133 set_range_info (temp, 3134 SSA_NAME_RANGE_TYPE (expr->u.nary->op[0]), 3135 wide_int_storage::from (min, TYPE_PRECISION (type), 3136 TYPE_SIGN (type)), 3137 wide_int_storage::from (max, TYPE_PRECISION (type), 3138 TYPE_SIGN (type))); 3139 } 3140 3141 if (dump_file && (dump_flags & TDF_DETAILS)) 3142 { 3143 fprintf (dump_file, "Created phi "); 3144 print_gimple_stmt (dump_file, phi, 0); 3145 fprintf (dump_file, " in block %d (%04d)\n", block->index, val); 3146 } 3147 pre_stats.phis++; 3148 return true; 3149 } 3150 3151 3152 3153 /* Perform insertion of partially redundant or hoistable values. 3154 For BLOCK, do the following: 3155 1. Propagate the NEW_SETS of the dominator into the current block. 3156 If the block has multiple predecessors, 3157 2a. Iterate over the ANTIC expressions for the block to see if 3158 any of them are partially redundant. 3159 2b. If so, insert them into the necessary predecessors to make 3160 the expression fully redundant. 3161 2c. Insert a new PHI merging the values of the predecessors. 3162 2d. Insert the new PHI, and the new expressions, into the 3163 NEW_SETS set. 3164 If the block has multiple successors, 3165 3a. Iterate over the ANTIC values for the block to see if 3166 any of them are good candidates for hoisting. 3167 3b. If so, insert expressions computing the values in BLOCK, 3168 and add the new expressions into the NEW_SETS set. 3169 4. Recursively call ourselves on the dominator children of BLOCK. 3170 3171 Steps 1, 2a, and 4 are done by insert_aux. 2b, 2c and 2d are done by 3172 do_pre_regular_insertion and do_partial_insertion. 3a and 3b are 3173 done in do_hoist_insertion. 3174 */ 3175 3176 static bool 3177 do_pre_regular_insertion (basic_block block, basic_block dom) 3178 { 3179 bool new_stuff = false; 3180 vec<pre_expr> exprs; 3181 pre_expr expr; 3182 auto_vec<pre_expr> avail; 3183 int i; 3184 3185 exprs = sorted_array_from_bitmap_set (ANTIC_IN (block)); 3186 avail.safe_grow (EDGE_COUNT (block->preds)); 3187 3188 FOR_EACH_VEC_ELT (exprs, i, expr) 3189 { 3190 if (expr->kind == NARY 3191 || expr->kind == REFERENCE) 3192 { 3193 unsigned int val; 3194 bool by_some = false; 3195 bool cant_insert = false; 3196 bool all_same = true; 3197 pre_expr first_s = NULL; 3198 edge pred; 3199 basic_block bprime; 3200 pre_expr eprime = NULL; 3201 edge_iterator ei; 3202 pre_expr edoubleprime = NULL; 3203 bool do_insertion = false; 3204 3205 val = get_expr_value_id (expr); 3206 if (bitmap_set_contains_value (PHI_GEN (block), val)) 3207 continue; 3208 if (bitmap_set_contains_value (AVAIL_OUT (dom), val)) 3209 { 3210 if (dump_file && (dump_flags & TDF_DETAILS)) 3211 { 3212 fprintf (dump_file, "Found fully redundant value: "); 3213 print_pre_expr (dump_file, expr); 3214 fprintf (dump_file, "\n"); 3215 } 3216 continue; 3217 } 3218 3219 FOR_EACH_EDGE (pred, ei, block->preds) 3220 { 3221 unsigned int vprime; 3222 3223 /* We should never run insertion for the exit block 3224 and so not come across fake pred edges. */ 3225 gcc_assert (!(pred->flags & EDGE_FAKE)); 3226 bprime = pred->src; 3227 /* We are looking at ANTIC_OUT of bprime. */ 3228 eprime = phi_translate (NULL, expr, ANTIC_IN (block), NULL, pred); 3229 3230 /* eprime will generally only be NULL if the 3231 value of the expression, translated 3232 through the PHI for this predecessor, is 3233 undefined. If that is the case, we can't 3234 make the expression fully redundant, 3235 because its value is undefined along a 3236 predecessor path. We can thus break out 3237 early because it doesn't matter what the 3238 rest of the results are. */ 3239 if (eprime == NULL) 3240 { 3241 avail[pred->dest_idx] = NULL; 3242 cant_insert = true; 3243 break; 3244 } 3245 3246 vprime = get_expr_value_id (eprime); 3247 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), 3248 vprime); 3249 if (edoubleprime == NULL) 3250 { 3251 avail[pred->dest_idx] = eprime; 3252 all_same = false; 3253 } 3254 else 3255 { 3256 avail[pred->dest_idx] = edoubleprime; 3257 by_some = true; 3258 /* We want to perform insertions to remove a redundancy on 3259 a path in the CFG we want to optimize for speed. */ 3260 if (optimize_edge_for_speed_p (pred)) 3261 do_insertion = true; 3262 if (first_s == NULL) 3263 first_s = edoubleprime; 3264 else if (!pre_expr_d::equal (first_s, edoubleprime)) 3265 all_same = false; 3266 } 3267 } 3268 /* If we can insert it, it's not the same value 3269 already existing along every predecessor, and 3270 it's defined by some predecessor, it is 3271 partially redundant. */ 3272 if (!cant_insert && !all_same && by_some) 3273 { 3274 if (!do_insertion) 3275 { 3276 if (dump_file && (dump_flags & TDF_DETAILS)) 3277 { 3278 fprintf (dump_file, "Skipping partial redundancy for " 3279 "expression "); 3280 print_pre_expr (dump_file, expr); 3281 fprintf (dump_file, " (%04d), no redundancy on to be " 3282 "optimized for speed edge\n", val); 3283 } 3284 } 3285 else if (dbg_cnt (treepre_insert)) 3286 { 3287 if (dump_file && (dump_flags & TDF_DETAILS)) 3288 { 3289 fprintf (dump_file, "Found partial redundancy for " 3290 "expression "); 3291 print_pre_expr (dump_file, expr); 3292 fprintf (dump_file, " (%04d)\n", 3293 get_expr_value_id (expr)); 3294 } 3295 if (insert_into_preds_of_block (block, 3296 get_expression_id (expr), 3297 avail)) 3298 new_stuff = true; 3299 } 3300 } 3301 /* If all edges produce the same value and that value is 3302 an invariant, then the PHI has the same value on all 3303 edges. Note this. */ 3304 else if (!cant_insert && all_same) 3305 { 3306 gcc_assert (edoubleprime->kind == CONSTANT 3307 || edoubleprime->kind == NAME); 3308 3309 tree temp = make_temp_ssa_name (get_expr_type (expr), 3310 NULL, "pretmp"); 3311 gassign *assign 3312 = gimple_build_assign (temp, 3313 edoubleprime->kind == CONSTANT ? 3314 PRE_EXPR_CONSTANT (edoubleprime) : 3315 PRE_EXPR_NAME (edoubleprime)); 3316 gimple_stmt_iterator gsi = gsi_after_labels (block); 3317 gsi_insert_before (&gsi, assign, GSI_NEW_STMT); 3318 3319 VN_INFO (temp)->value_id = val; 3320 VN_INFO (temp)->valnum = vn_valnum_from_value_id (val); 3321 if (VN_INFO (temp)->valnum == NULL_TREE) 3322 VN_INFO (temp)->valnum = temp; 3323 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp)); 3324 pre_expr newe = get_or_alloc_expr_for_name (temp); 3325 add_to_value (val, newe); 3326 bitmap_value_replace_in_set (AVAIL_OUT (block), newe); 3327 bitmap_insert_into_set (NEW_SETS (block), newe); 3328 } 3329 } 3330 } 3331 3332 exprs.release (); 3333 return new_stuff; 3334 } 3335 3336 3337 /* Perform insertion for partially anticipatable expressions. There 3338 is only one case we will perform insertion for these. This case is 3339 if the expression is partially anticipatable, and fully available. 3340 In this case, we know that putting it earlier will enable us to 3341 remove the later computation. */ 3342 3343 static bool 3344 do_pre_partial_partial_insertion (basic_block block, basic_block dom) 3345 { 3346 bool new_stuff = false; 3347 vec<pre_expr> exprs; 3348 pre_expr expr; 3349 auto_vec<pre_expr> avail; 3350 int i; 3351 3352 exprs = sorted_array_from_bitmap_set (PA_IN (block)); 3353 avail.safe_grow (EDGE_COUNT (block->preds)); 3354 3355 FOR_EACH_VEC_ELT (exprs, i, expr) 3356 { 3357 if (expr->kind == NARY 3358 || expr->kind == REFERENCE) 3359 { 3360 unsigned int val; 3361 bool by_all = true; 3362 bool cant_insert = false; 3363 edge pred; 3364 basic_block bprime; 3365 pre_expr eprime = NULL; 3366 edge_iterator ei; 3367 3368 val = get_expr_value_id (expr); 3369 if (bitmap_set_contains_value (PHI_GEN (block), val)) 3370 continue; 3371 if (bitmap_set_contains_value (AVAIL_OUT (dom), val)) 3372 continue; 3373 3374 FOR_EACH_EDGE (pred, ei, block->preds) 3375 { 3376 unsigned int vprime; 3377 pre_expr edoubleprime; 3378 3379 /* We should never run insertion for the exit block 3380 and so not come across fake pred edges. */ 3381 gcc_assert (!(pred->flags & EDGE_FAKE)); 3382 bprime = pred->src; 3383 eprime = phi_translate (NULL, expr, ANTIC_IN (block), 3384 PA_IN (block), pred); 3385 3386 /* eprime will generally only be NULL if the 3387 value of the expression, translated 3388 through the PHI for this predecessor, is 3389 undefined. If that is the case, we can't 3390 make the expression fully redundant, 3391 because its value is undefined along a 3392 predecessor path. We can thus break out 3393 early because it doesn't matter what the 3394 rest of the results are. */ 3395 if (eprime == NULL) 3396 { 3397 avail[pred->dest_idx] = NULL; 3398 cant_insert = true; 3399 break; 3400 } 3401 3402 vprime = get_expr_value_id (eprime); 3403 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), vprime); 3404 avail[pred->dest_idx] = edoubleprime; 3405 if (edoubleprime == NULL) 3406 { 3407 by_all = false; 3408 break; 3409 } 3410 } 3411 3412 /* If we can insert it, it's not the same value 3413 already existing along every predecessor, and 3414 it's defined by some predecessor, it is 3415 partially redundant. */ 3416 if (!cant_insert && by_all) 3417 { 3418 edge succ; 3419 bool do_insertion = false; 3420 3421 /* Insert only if we can remove a later expression on a path 3422 that we want to optimize for speed. 3423 The phi node that we will be inserting in BLOCK is not free, 3424 and inserting it for the sake of !optimize_for_speed successor 3425 may cause regressions on the speed path. */ 3426 FOR_EACH_EDGE (succ, ei, block->succs) 3427 { 3428 if (bitmap_set_contains_value (PA_IN (succ->dest), val) 3429 || bitmap_set_contains_value (ANTIC_IN (succ->dest), val)) 3430 { 3431 if (optimize_edge_for_speed_p (succ)) 3432 do_insertion = true; 3433 } 3434 } 3435 3436 if (!do_insertion) 3437 { 3438 if (dump_file && (dump_flags & TDF_DETAILS)) 3439 { 3440 fprintf (dump_file, "Skipping partial partial redundancy " 3441 "for expression "); 3442 print_pre_expr (dump_file, expr); 3443 fprintf (dump_file, " (%04d), not (partially) anticipated " 3444 "on any to be optimized for speed edges\n", val); 3445 } 3446 } 3447 else if (dbg_cnt (treepre_insert)) 3448 { 3449 pre_stats.pa_insert++; 3450 if (dump_file && (dump_flags & TDF_DETAILS)) 3451 { 3452 fprintf (dump_file, "Found partial partial redundancy " 3453 "for expression "); 3454 print_pre_expr (dump_file, expr); 3455 fprintf (dump_file, " (%04d)\n", 3456 get_expr_value_id (expr)); 3457 } 3458 if (insert_into_preds_of_block (block, 3459 get_expression_id (expr), 3460 avail)) 3461 new_stuff = true; 3462 } 3463 } 3464 } 3465 } 3466 3467 exprs.release (); 3468 return new_stuff; 3469 } 3470 3471 /* Insert expressions in BLOCK to compute hoistable values up. 3472 Return TRUE if something was inserted, otherwise return FALSE. 3473 The caller has to make sure that BLOCK has at least two successors. */ 3474 3475 static bool 3476 do_hoist_insertion (basic_block block) 3477 { 3478 edge e; 3479 edge_iterator ei; 3480 bool new_stuff = false; 3481 unsigned i; 3482 gimple_stmt_iterator last; 3483 3484 /* At least two successors, or else... */ 3485 gcc_assert (EDGE_COUNT (block->succs) >= 2); 3486 3487 /* Check that all successors of BLOCK are dominated by block. 3488 We could use dominated_by_p() for this, but actually there is a much 3489 quicker check: any successor that is dominated by BLOCK can't have 3490 more than one predecessor edge. */ 3491 FOR_EACH_EDGE (e, ei, block->succs) 3492 if (! single_pred_p (e->dest)) 3493 return false; 3494 3495 /* Determine the insertion point. If we cannot safely insert before 3496 the last stmt if we'd have to, bail out. */ 3497 last = gsi_last_bb (block); 3498 if (!gsi_end_p (last) 3499 && !is_ctrl_stmt (gsi_stmt (last)) 3500 && stmt_ends_bb_p (gsi_stmt (last))) 3501 return false; 3502 3503 /* Compute the set of hoistable expressions from ANTIC_IN. First compute 3504 hoistable values. */ 3505 bitmap_set hoistable_set; 3506 3507 /* A hoistable value must be in ANTIC_IN(block) 3508 but not in AVAIL_OUT(BLOCK). */ 3509 bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack); 3510 bitmap_and_compl (&hoistable_set.values, 3511 &ANTIC_IN (block)->values, &AVAIL_OUT (block)->values); 3512 3513 /* Short-cut for a common case: hoistable_set is empty. */ 3514 if (bitmap_empty_p (&hoistable_set.values)) 3515 return false; 3516 3517 /* Compute which of the hoistable values is in AVAIL_OUT of 3518 at least one of the successors of BLOCK. */ 3519 bitmap_head availout_in_some; 3520 bitmap_initialize (&availout_in_some, &grand_bitmap_obstack); 3521 FOR_EACH_EDGE (e, ei, block->succs) 3522 /* Do not consider expressions solely because their availability 3523 on loop exits. They'd be ANTIC-IN throughout the whole loop 3524 and thus effectively hoisted across loops by combination of 3525 PRE and hoisting. */ 3526 if (! loop_exit_edge_p (block->loop_father, e)) 3527 bitmap_ior_and_into (&availout_in_some, &hoistable_set.values, 3528 &AVAIL_OUT (e->dest)->values); 3529 bitmap_clear (&hoistable_set.values); 3530 3531 /* Short-cut for a common case: availout_in_some is empty. */ 3532 if (bitmap_empty_p (&availout_in_some)) 3533 return false; 3534 3535 /* Hack hoitable_set in-place so we can use sorted_array_from_bitmap_set. */ 3536 hoistable_set.values = availout_in_some; 3537 hoistable_set.expressions = ANTIC_IN (block)->expressions; 3538 3539 /* Now finally construct the topological-ordered expression set. */ 3540 vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set); 3541 3542 bitmap_clear (&hoistable_set.values); 3543 3544 /* If there are candidate values for hoisting, insert expressions 3545 strategically to make the hoistable expressions fully redundant. */ 3546 pre_expr expr; 3547 FOR_EACH_VEC_ELT (exprs, i, expr) 3548 { 3549 /* While we try to sort expressions topologically above the 3550 sorting doesn't work out perfectly. Catch expressions we 3551 already inserted. */ 3552 unsigned int value_id = get_expr_value_id (expr); 3553 if (bitmap_set_contains_value (AVAIL_OUT (block), value_id)) 3554 { 3555 if (dump_file && (dump_flags & TDF_DETAILS)) 3556 { 3557 fprintf (dump_file, 3558 "Already inserted expression for "); 3559 print_pre_expr (dump_file, expr); 3560 fprintf (dump_file, " (%04d)\n", value_id); 3561 } 3562 continue; 3563 } 3564 3565 /* OK, we should hoist this value. Perform the transformation. */ 3566 pre_stats.hoist_insert++; 3567 if (dump_file && (dump_flags & TDF_DETAILS)) 3568 { 3569 fprintf (dump_file, 3570 "Inserting expression in block %d for code hoisting: ", 3571 block->index); 3572 print_pre_expr (dump_file, expr); 3573 fprintf (dump_file, " (%04d)\n", value_id); 3574 } 3575 3576 gimple_seq stmts = NULL; 3577 tree res = create_expression_by_pieces (block, expr, &stmts, 3578 get_expr_type (expr)); 3579 3580 /* Do not return true if expression creation ultimately 3581 did not insert any statements. */ 3582 if (gimple_seq_empty_p (stmts)) 3583 res = NULL_TREE; 3584 else 3585 { 3586 if (gsi_end_p (last) || is_ctrl_stmt (gsi_stmt (last))) 3587 gsi_insert_seq_before (&last, stmts, GSI_SAME_STMT); 3588 else 3589 gsi_insert_seq_after (&last, stmts, GSI_NEW_STMT); 3590 } 3591 3592 /* Make sure to not return true if expression creation ultimately 3593 failed but also make sure to insert any stmts produced as they 3594 are tracked in inserted_exprs. */ 3595 if (! res) 3596 continue; 3597 3598 new_stuff = true; 3599 } 3600 3601 exprs.release (); 3602 3603 return new_stuff; 3604 } 3605 3606 /* Do a dominator walk on the control flow graph, and insert computations 3607 of values as necessary for PRE and hoisting. */ 3608 3609 static bool 3610 insert_aux (basic_block block, bool do_pre, bool do_hoist) 3611 { 3612 basic_block son; 3613 bool new_stuff = false; 3614 3615 if (block) 3616 { 3617 basic_block dom; 3618 dom = get_immediate_dominator (CDI_DOMINATORS, block); 3619 if (dom) 3620 { 3621 unsigned i; 3622 bitmap_iterator bi; 3623 bitmap_set_t newset; 3624 3625 /* First, update the AVAIL_OUT set with anything we may have 3626 inserted higher up in the dominator tree. */ 3627 newset = NEW_SETS (dom); 3628 if (newset) 3629 { 3630 /* Note that we need to value_replace both NEW_SETS, and 3631 AVAIL_OUT. For both the case of NEW_SETS, the value may be 3632 represented by some non-simple expression here that we want 3633 to replace it with. */ 3634 FOR_EACH_EXPR_ID_IN_SET (newset, i, bi) 3635 { 3636 pre_expr expr = expression_for_id (i); 3637 bitmap_value_replace_in_set (NEW_SETS (block), expr); 3638 bitmap_value_replace_in_set (AVAIL_OUT (block), expr); 3639 } 3640 } 3641 3642 /* Insert expressions for partial redundancies. */ 3643 if (do_pre && !single_pred_p (block)) 3644 { 3645 new_stuff |= do_pre_regular_insertion (block, dom); 3646 if (do_partial_partial) 3647 new_stuff |= do_pre_partial_partial_insertion (block, dom); 3648 } 3649 3650 /* Insert expressions for hoisting. */ 3651 if (do_hoist && EDGE_COUNT (block->succs) >= 2) 3652 new_stuff |= do_hoist_insertion (block); 3653 } 3654 } 3655 for (son = first_dom_son (CDI_DOMINATORS, block); 3656 son; 3657 son = next_dom_son (CDI_DOMINATORS, son)) 3658 { 3659 new_stuff |= insert_aux (son, do_pre, do_hoist); 3660 } 3661 3662 return new_stuff; 3663 } 3664 3665 /* Perform insertion of partially redundant and hoistable values. */ 3666 3667 static void 3668 insert (void) 3669 { 3670 bool new_stuff = true; 3671 basic_block bb; 3672 int num_iterations = 0; 3673 3674 FOR_ALL_BB_FN (bb, cfun) 3675 NEW_SETS (bb) = bitmap_set_new (); 3676 3677 while (new_stuff) 3678 { 3679 num_iterations++; 3680 if (dump_file && dump_flags & TDF_DETAILS) 3681 fprintf (dump_file, "Starting insert iteration %d\n", num_iterations); 3682 new_stuff = insert_aux (ENTRY_BLOCK_PTR_FOR_FN (cfun), flag_tree_pre, 3683 flag_code_hoisting); 3684 3685 /* Clear the NEW sets before the next iteration. We have already 3686 fully propagated its contents. */ 3687 if (new_stuff) 3688 FOR_ALL_BB_FN (bb, cfun) 3689 bitmap_set_free (NEW_SETS (bb)); 3690 } 3691 statistics_histogram_event (cfun, "insert iterations", num_iterations); 3692 } 3693 3694 3695 /* Compute the AVAIL set for all basic blocks. 3696 3697 This function performs value numbering of the statements in each basic 3698 block. The AVAIL sets are built from information we glean while doing 3699 this value numbering, since the AVAIL sets contain only one entry per 3700 value. 3701 3702 AVAIL_IN[BLOCK] = AVAIL_OUT[dom(BLOCK)]. 3703 AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */ 3704 3705 static void 3706 compute_avail (void) 3707 { 3708 3709 basic_block block, son; 3710 basic_block *worklist; 3711 size_t sp = 0; 3712 unsigned i; 3713 tree name; 3714 3715 /* We pretend that default definitions are defined in the entry block. 3716 This includes function arguments and the static chain decl. */ 3717 FOR_EACH_SSA_NAME (i, name, cfun) 3718 { 3719 pre_expr e; 3720 if (!SSA_NAME_IS_DEFAULT_DEF (name) 3721 || has_zero_uses (name) 3722 || virtual_operand_p (name)) 3723 continue; 3724 3725 e = get_or_alloc_expr_for_name (name); 3726 add_to_value (get_expr_value_id (e), e); 3727 bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), e); 3728 bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3729 e); 3730 } 3731 3732 if (dump_file && (dump_flags & TDF_DETAILS)) 3733 { 3734 print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3735 "tmp_gen", ENTRY_BLOCK); 3736 print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3737 "avail_out", ENTRY_BLOCK); 3738 } 3739 3740 /* Allocate the worklist. */ 3741 worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); 3742 3743 /* Seed the algorithm by putting the dominator children of the entry 3744 block on the worklist. */ 3745 for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun)); 3746 son; 3747 son = next_dom_son (CDI_DOMINATORS, son)) 3748 worklist[sp++] = son; 3749 3750 BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun)) 3751 = ssa_default_def (cfun, gimple_vop (cfun)); 3752 3753 /* Loop until the worklist is empty. */ 3754 while (sp) 3755 { 3756 gimple *stmt; 3757 basic_block dom; 3758 3759 /* Pick a block from the worklist. */ 3760 block = worklist[--sp]; 3761 vn_context_bb = block; 3762 3763 /* Initially, the set of available values in BLOCK is that of 3764 its immediate dominator. */ 3765 dom = get_immediate_dominator (CDI_DOMINATORS, block); 3766 if (dom) 3767 { 3768 bitmap_set_copy (AVAIL_OUT (block), AVAIL_OUT (dom)); 3769 BB_LIVE_VOP_ON_EXIT (block) = BB_LIVE_VOP_ON_EXIT (dom); 3770 } 3771 3772 /* Generate values for PHI nodes. */ 3773 for (gphi_iterator gsi = gsi_start_phis (block); !gsi_end_p (gsi); 3774 gsi_next (&gsi)) 3775 { 3776 tree result = gimple_phi_result (gsi.phi ()); 3777 3778 /* We have no need for virtual phis, as they don't represent 3779 actual computations. */ 3780 if (virtual_operand_p (result)) 3781 { 3782 BB_LIVE_VOP_ON_EXIT (block) = result; 3783 continue; 3784 } 3785 3786 pre_expr e = get_or_alloc_expr_for_name (result); 3787 add_to_value (get_expr_value_id (e), e); 3788 bitmap_value_insert_into_set (AVAIL_OUT (block), e); 3789 bitmap_insert_into_set (PHI_GEN (block), e); 3790 } 3791 3792 BB_MAY_NOTRETURN (block) = 0; 3793 3794 /* Now compute value numbers and populate value sets with all 3795 the expressions computed in BLOCK. */ 3796 for (gimple_stmt_iterator gsi = gsi_start_bb (block); !gsi_end_p (gsi); 3797 gsi_next (&gsi)) 3798 { 3799 ssa_op_iter iter; 3800 tree op; 3801 3802 stmt = gsi_stmt (gsi); 3803 3804 /* Cache whether the basic-block has any non-visible side-effect 3805 or control flow. 3806 If this isn't a call or it is the last stmt in the 3807 basic-block then the CFG represents things correctly. */ 3808 if (is_gimple_call (stmt) && !stmt_ends_bb_p (stmt)) 3809 { 3810 /* Non-looping const functions always return normally. 3811 Otherwise the call might not return or have side-effects 3812 that forbids hoisting possibly trapping expressions 3813 before it. */ 3814 int flags = gimple_call_flags (stmt); 3815 if (!(flags & ECF_CONST) 3816 || (flags & ECF_LOOPING_CONST_OR_PURE)) 3817 BB_MAY_NOTRETURN (block) = 1; 3818 } 3819 3820 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF) 3821 { 3822 pre_expr e = get_or_alloc_expr_for_name (op); 3823 3824 add_to_value (get_expr_value_id (e), e); 3825 bitmap_insert_into_set (TMP_GEN (block), e); 3826 bitmap_value_insert_into_set (AVAIL_OUT (block), e); 3827 } 3828 3829 if (gimple_vdef (stmt)) 3830 BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt); 3831 3832 if (gimple_has_side_effects (stmt) 3833 || stmt_could_throw_p (cfun, stmt) 3834 || is_gimple_debug (stmt)) 3835 continue; 3836 3837 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 3838 { 3839 if (ssa_undefined_value_p (op)) 3840 continue; 3841 pre_expr e = get_or_alloc_expr_for_name (op); 3842 bitmap_value_insert_into_set (EXP_GEN (block), e); 3843 } 3844 3845 switch (gimple_code (stmt)) 3846 { 3847 case GIMPLE_RETURN: 3848 continue; 3849 3850 case GIMPLE_CALL: 3851 { 3852 vn_reference_t ref; 3853 vn_reference_s ref1; 3854 pre_expr result = NULL; 3855 3856 /* We can value number only calls to real functions. */ 3857 if (gimple_call_internal_p (stmt)) 3858 continue; 3859 3860 vn_reference_lookup_call (as_a <gcall *> (stmt), &ref, &ref1); 3861 if (!ref) 3862 continue; 3863 3864 /* If the value of the call is not invalidated in 3865 this block until it is computed, add the expression 3866 to EXP_GEN. */ 3867 if (!gimple_vuse (stmt) 3868 || gimple_code 3869 (SSA_NAME_DEF_STMT (gimple_vuse (stmt))) == GIMPLE_PHI 3870 || gimple_bb (SSA_NAME_DEF_STMT 3871 (gimple_vuse (stmt))) != block) 3872 { 3873 result = pre_expr_pool.allocate (); 3874 result->kind = REFERENCE; 3875 result->id = 0; 3876 PRE_EXPR_REFERENCE (result) = ref; 3877 3878 get_or_alloc_expression_id (result); 3879 add_to_value (get_expr_value_id (result), result); 3880 bitmap_value_insert_into_set (EXP_GEN (block), result); 3881 } 3882 continue; 3883 } 3884 3885 case GIMPLE_ASSIGN: 3886 { 3887 pre_expr result = NULL; 3888 switch (vn_get_stmt_kind (stmt)) 3889 { 3890 case VN_NARY: 3891 { 3892 enum tree_code code = gimple_assign_rhs_code (stmt); 3893 vn_nary_op_t nary; 3894 3895 /* COND_EXPR and VEC_COND_EXPR are awkward in 3896 that they contain an embedded complex expression. 3897 Don't even try to shove those through PRE. */ 3898 if (code == COND_EXPR 3899 || code == VEC_COND_EXPR) 3900 continue; 3901 3902 vn_nary_op_lookup_stmt (stmt, &nary); 3903 if (!nary || nary->predicated_values) 3904 continue; 3905 3906 /* If the NARY traps and there was a preceding 3907 point in the block that might not return avoid 3908 adding the nary to EXP_GEN. */ 3909 if (BB_MAY_NOTRETURN (block) 3910 && vn_nary_may_trap (nary)) 3911 continue; 3912 3913 result = pre_expr_pool.allocate (); 3914 result->kind = NARY; 3915 result->id = 0; 3916 PRE_EXPR_NARY (result) = nary; 3917 break; 3918 } 3919 3920 case VN_REFERENCE: 3921 { 3922 tree rhs1 = gimple_assign_rhs1 (stmt); 3923 alias_set_type set = get_alias_set (rhs1); 3924 vec<vn_reference_op_s> operands 3925 = vn_reference_operands_for_lookup (rhs1); 3926 vn_reference_t ref; 3927 vn_reference_lookup_pieces (gimple_vuse (stmt), set, 3928 TREE_TYPE (rhs1), 3929 operands, &ref, VN_WALK); 3930 if (!ref) 3931 { 3932 operands.release (); 3933 continue; 3934 } 3935 3936 /* If the REFERENCE traps and there was a preceding 3937 point in the block that might not return avoid 3938 adding the reference to EXP_GEN. */ 3939 if (BB_MAY_NOTRETURN (block) 3940 && vn_reference_may_trap (ref)) 3941 continue; 3942 3943 /* If the value of the reference is not invalidated in 3944 this block until it is computed, add the expression 3945 to EXP_GEN. */ 3946 if (gimple_vuse (stmt)) 3947 { 3948 gimple *def_stmt; 3949 bool ok = true; 3950 def_stmt = SSA_NAME_DEF_STMT (gimple_vuse (stmt)); 3951 while (!gimple_nop_p (def_stmt) 3952 && gimple_code (def_stmt) != GIMPLE_PHI 3953 && gimple_bb (def_stmt) == block) 3954 { 3955 if (stmt_may_clobber_ref_p 3956 (def_stmt, gimple_assign_rhs1 (stmt))) 3957 { 3958 ok = false; 3959 break; 3960 } 3961 def_stmt 3962 = SSA_NAME_DEF_STMT (gimple_vuse (def_stmt)); 3963 } 3964 if (!ok) 3965 { 3966 operands.release (); 3967 continue; 3968 } 3969 } 3970 3971 /* If the load was value-numbered to another 3972 load make sure we do not use its expression 3973 for insertion if it wouldn't be a valid 3974 replacement. */ 3975 /* At the momemt we have a testcase 3976 for hoist insertion of aligned vs. misaligned 3977 variants in gcc.dg/torture/pr65270-1.c thus 3978 with just alignment to be considered we can 3979 simply replace the expression in the hashtable 3980 with the most conservative one. */ 3981 vn_reference_op_t ref1 = &ref->operands.last (); 3982 while (ref1->opcode != TARGET_MEM_REF 3983 && ref1->opcode != MEM_REF 3984 && ref1 != &ref->operands[0]) 3985 --ref1; 3986 vn_reference_op_t ref2 = &operands.last (); 3987 while (ref2->opcode != TARGET_MEM_REF 3988 && ref2->opcode != MEM_REF 3989 && ref2 != &operands[0]) 3990 --ref2; 3991 if ((ref1->opcode == TARGET_MEM_REF 3992 || ref1->opcode == MEM_REF) 3993 && (TYPE_ALIGN (ref1->type) 3994 > TYPE_ALIGN (ref2->type))) 3995 ref1->type 3996 = build_aligned_type (ref1->type, 3997 TYPE_ALIGN (ref2->type)); 3998 /* TBAA behavior is an obvious part so make sure 3999 that the hashtable one covers this as well 4000 by adjusting the ref alias set and its base. */ 4001 if (ref->set == set 4002 || alias_set_subset_of (set, ref->set)) 4003 ; 4004 else if (alias_set_subset_of (ref->set, set)) 4005 { 4006 ref->set = set; 4007 if (ref1->opcode == MEM_REF) 4008 ref1->op0 4009 = wide_int_to_tree (TREE_TYPE (ref2->op0), 4010 wi::to_wide (ref1->op0)); 4011 else 4012 ref1->op2 4013 = wide_int_to_tree (TREE_TYPE (ref2->op2), 4014 wi::to_wide (ref1->op2)); 4015 } 4016 else 4017 { 4018 ref->set = 0; 4019 if (ref1->opcode == MEM_REF) 4020 ref1->op0 4021 = wide_int_to_tree (ptr_type_node, 4022 wi::to_wide (ref1->op0)); 4023 else 4024 ref1->op2 4025 = wide_int_to_tree (ptr_type_node, 4026 wi::to_wide (ref1->op2)); 4027 } 4028 operands.release (); 4029 4030 result = pre_expr_pool.allocate (); 4031 result->kind = REFERENCE; 4032 result->id = 0; 4033 PRE_EXPR_REFERENCE (result) = ref; 4034 break; 4035 } 4036 4037 default: 4038 continue; 4039 } 4040 4041 get_or_alloc_expression_id (result); 4042 add_to_value (get_expr_value_id (result), result); 4043 bitmap_value_insert_into_set (EXP_GEN (block), result); 4044 continue; 4045 } 4046 default: 4047 break; 4048 } 4049 } 4050 4051 if (dump_file && (dump_flags & TDF_DETAILS)) 4052 { 4053 print_bitmap_set (dump_file, EXP_GEN (block), 4054 "exp_gen", block->index); 4055 print_bitmap_set (dump_file, PHI_GEN (block), 4056 "phi_gen", block->index); 4057 print_bitmap_set (dump_file, TMP_GEN (block), 4058 "tmp_gen", block->index); 4059 print_bitmap_set (dump_file, AVAIL_OUT (block), 4060 "avail_out", block->index); 4061 } 4062 4063 /* Put the dominator children of BLOCK on the worklist of blocks 4064 to compute available sets for. */ 4065 for (son = first_dom_son (CDI_DOMINATORS, block); 4066 son; 4067 son = next_dom_son (CDI_DOMINATORS, son)) 4068 worklist[sp++] = son; 4069 } 4070 vn_context_bb = NULL; 4071 4072 free (worklist); 4073 } 4074 4075 4076 /* Initialize data structures used by PRE. */ 4077 4078 static void 4079 init_pre (void) 4080 { 4081 basic_block bb; 4082 4083 next_expression_id = 1; 4084 expressions.create (0); 4085 expressions.safe_push (NULL); 4086 value_expressions.create (get_max_value_id () + 1); 4087 value_expressions.safe_grow_cleared (get_max_value_id () + 1); 4088 name_to_id.create (0); 4089 4090 inserted_exprs = BITMAP_ALLOC (NULL); 4091 4092 connect_infinite_loops_to_exit (); 4093 memset (&pre_stats, 0, sizeof (pre_stats)); 4094 4095 alloc_aux_for_blocks (sizeof (struct bb_bitmap_sets)); 4096 4097 calculate_dominance_info (CDI_DOMINATORS); 4098 4099 bitmap_obstack_initialize (&grand_bitmap_obstack); 4100 phi_translate_table = new hash_table<expr_pred_trans_d> (5110); 4101 expression_to_id = new hash_table<pre_expr_d> (num_ssa_names * 3); 4102 FOR_ALL_BB_FN (bb, cfun) 4103 { 4104 EXP_GEN (bb) = bitmap_set_new (); 4105 PHI_GEN (bb) = bitmap_set_new (); 4106 TMP_GEN (bb) = bitmap_set_new (); 4107 AVAIL_OUT (bb) = bitmap_set_new (); 4108 } 4109 } 4110 4111 4112 /* Deallocate data structures used by PRE. */ 4113 4114 static void 4115 fini_pre () 4116 { 4117 value_expressions.release (); 4118 expressions.release (); 4119 BITMAP_FREE (inserted_exprs); 4120 bitmap_obstack_release (&grand_bitmap_obstack); 4121 bitmap_set_pool.release (); 4122 pre_expr_pool.release (); 4123 delete phi_translate_table; 4124 phi_translate_table = NULL; 4125 delete expression_to_id; 4126 expression_to_id = NULL; 4127 name_to_id.release (); 4128 4129 free_aux_for_blocks (); 4130 } 4131 4132 namespace { 4133 4134 const pass_data pass_data_pre = 4135 { 4136 GIMPLE_PASS, /* type */ 4137 "pre", /* name */ 4138 OPTGROUP_NONE, /* optinfo_flags */ 4139 TV_TREE_PRE, /* tv_id */ 4140 ( PROP_cfg | PROP_ssa ), /* properties_required */ 4141 0, /* properties_provided */ 4142 0, /* properties_destroyed */ 4143 TODO_rebuild_alias, /* todo_flags_start */ 4144 0, /* todo_flags_finish */ 4145 }; 4146 4147 class pass_pre : public gimple_opt_pass 4148 { 4149 public: 4150 pass_pre (gcc::context *ctxt) 4151 : gimple_opt_pass (pass_data_pre, ctxt) 4152 {} 4153 4154 /* opt_pass methods: */ 4155 virtual bool gate (function *) 4156 { return flag_tree_pre != 0 || flag_code_hoisting != 0; } 4157 virtual unsigned int execute (function *); 4158 4159 }; // class pass_pre 4160 4161 /* Valueization hook for RPO VN when we are calling back to it 4162 at ANTIC compute time. */ 4163 4164 static tree 4165 pre_valueize (tree name) 4166 { 4167 if (TREE_CODE (name) == SSA_NAME) 4168 { 4169 tree tem = VN_INFO (name)->valnum; 4170 if (tem != VN_TOP && tem != name) 4171 { 4172 if (TREE_CODE (tem) != SSA_NAME 4173 || SSA_NAME_IS_DEFAULT_DEF (tem)) 4174 return tem; 4175 /* We create temporary SSA names for representatives that 4176 do not have a definition (yet) but are not default defs either 4177 assume they are fine to use. */ 4178 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (tem)); 4179 if (! def_bb 4180 || dominated_by_p (CDI_DOMINATORS, vn_context_bb, def_bb)) 4181 return tem; 4182 /* ??? Now we could look for a leader. Ideally we'd somehow 4183 expose RPO VN leaders and get rid of AVAIL_OUT as well... */ 4184 } 4185 } 4186 return name; 4187 } 4188 4189 unsigned int 4190 pass_pre::execute (function *fun) 4191 { 4192 unsigned int todo = 0; 4193 4194 do_partial_partial = 4195 flag_tree_partial_pre && optimize_function_for_speed_p (fun); 4196 4197 /* This has to happen before VN runs because 4198 loop_optimizer_init may create new phis, etc. */ 4199 loop_optimizer_init (LOOPS_NORMAL); 4200 split_critical_edges (); 4201 scev_initialize (); 4202 calculate_dominance_info (CDI_DOMINATORS); 4203 4204 run_rpo_vn (VN_WALK); 4205 4206 init_pre (); 4207 4208 vn_valueize = pre_valueize; 4209 4210 /* Insert can get quite slow on an incredibly large number of basic 4211 blocks due to some quadratic behavior. Until this behavior is 4212 fixed, don't run it when he have an incredibly large number of 4213 bb's. If we aren't going to run insert, there is no point in 4214 computing ANTIC, either, even though it's plenty fast nor do 4215 we require AVAIL. */ 4216 if (n_basic_blocks_for_fn (fun) < 4000) 4217 { 4218 compute_avail (); 4219 compute_antic (); 4220 insert (); 4221 } 4222 4223 /* Make sure to remove fake edges before committing our inserts. 4224 This makes sure we don't end up with extra critical edges that 4225 we would need to split. */ 4226 remove_fake_exit_edges (); 4227 gsi_commit_edge_inserts (); 4228 4229 /* Eliminate folds statements which might (should not...) end up 4230 not keeping virtual operands up-to-date. */ 4231 gcc_assert (!need_ssa_update_p (fun)); 4232 4233 statistics_counter_event (fun, "Insertions", pre_stats.insertions); 4234 statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert); 4235 statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert); 4236 statistics_counter_event (fun, "New PHIs", pre_stats.phis); 4237 4238 todo |= eliminate_with_rpo_vn (inserted_exprs); 4239 4240 vn_valueize = NULL; 4241 4242 /* Because we don't follow exactly the standard PRE algorithm, and decide not 4243 to insert PHI nodes sometimes, and because value numbering of casts isn't 4244 perfect, we sometimes end up inserting dead code. This simple DCE-like 4245 pass removes any insertions we made that weren't actually used. */ 4246 simple_dce_from_worklist (inserted_exprs); 4247 4248 fini_pre (); 4249 4250 scev_finalize (); 4251 loop_optimizer_finalize (); 4252 4253 /* TODO: tail_merge_optimize may merge all predecessors of a block, in which 4254 case we can merge the block with the remaining predecessor of the block. 4255 It should either: 4256 - call merge_blocks after each tail merge iteration 4257 - call merge_blocks after all tail merge iterations 4258 - mark TODO_cleanup_cfg when necessary 4259 - share the cfg cleanup with fini_pre. */ 4260 todo |= tail_merge_optimize (todo); 4261 4262 free_rpo_vn (); 4263 4264 /* Tail merging invalidates the virtual SSA web, together with 4265 cfg-cleanup opportunities exposed by PRE this will wreck the 4266 SSA updating machinery. So make sure to run update-ssa 4267 manually, before eventually scheduling cfg-cleanup as part of 4268 the todo. */ 4269 update_ssa (TODO_update_ssa_only_virtuals); 4270 4271 return todo; 4272 } 4273 4274 } // anon namespace 4275 4276 gimple_opt_pass * 4277 make_pass_pre (gcc::context *ctxt) 4278 { 4279 return new pass_pre (ctxt); 4280 } 4281