1 /* Straight-line strength reduction. 2 Copyright (C) 2012-2015 Free Software Foundation, Inc. 3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* There are many algorithms for performing strength reduction on 22 loops. This is not one of them. IVOPTS handles strength reduction 23 of induction variables just fine. This pass is intended to pick 24 up the crumbs it leaves behind, by considering opportunities for 25 strength reduction along dominator paths. 26 27 Strength reduction addresses explicit multiplies, and certain 28 multiplies implicit in addressing expressions. It would also be 29 possible to apply strength reduction to divisions and modulos, 30 but such opportunities are relatively uncommon. 31 32 Strength reduction is also currently restricted to integer operations. 33 If desired, it could be extended to floating-point operations under 34 control of something like -funsafe-math-optimizations. */ 35 36 #include "config.h" 37 #include "system.h" 38 #include "coretypes.h" 39 #include "hash-set.h" 40 #include "machmode.h" 41 #include "vec.h" 42 #include "double-int.h" 43 #include "input.h" 44 #include "alias.h" 45 #include "symtab.h" 46 #include "options.h" 47 #include "wide-int.h" 48 #include "inchash.h" 49 #include "tree.h" 50 #include "fold-const.h" 51 #include "predict.h" 52 #include "tm.h" 53 #include "hard-reg-set.h" 54 #include "function.h" 55 #include "dominance.h" 56 #include "cfg.h" 57 #include "basic-block.h" 58 #include "tree-ssa-alias.h" 59 #include "internal-fn.h" 60 #include "gimple-expr.h" 61 #include "is-a.h" 62 #include "gimple.h" 63 #include "gimple-iterator.h" 64 #include "gimplify-me.h" 65 #include "stor-layout.h" 66 #include "hashtab.h" 67 #include "rtl.h" 68 #include "flags.h" 69 #include "statistics.h" 70 #include "real.h" 71 #include "fixed-value.h" 72 #include "insn-config.h" 73 #include "expmed.h" 74 #include "dojump.h" 75 #include "explow.h" 76 #include "calls.h" 77 #include "emit-rtl.h" 78 #include "varasm.h" 79 #include "stmt.h" 80 #include "expr.h" 81 #include "tree-pass.h" 82 #include "cfgloop.h" 83 #include "gimple-pretty-print.h" 84 #include "gimple-ssa.h" 85 #include "tree-cfg.h" 86 #include "tree-phinodes.h" 87 #include "ssa-iterators.h" 88 #include "stringpool.h" 89 #include "tree-ssanames.h" 90 #include "domwalk.h" 91 #include "params.h" 92 #include "tree-ssa-address.h" 93 #include "tree-affine.h" 94 #include "wide-int-print.h" 95 #include "builtins.h" 96 97 /* Information about a strength reduction candidate. Each statement 98 in the candidate table represents an expression of one of the 99 following forms (the special case of CAND_REF will be described 100 later): 101 102 (CAND_MULT) S1: X = (B + i) * S 103 (CAND_ADD) S1: X = B + (i * S) 104 105 Here X and B are SSA names, i is an integer constant, and S is 106 either an SSA name or a constant. We call B the "base," i the 107 "index", and S the "stride." 108 109 Any statement S0 that dominates S1 and is of the form: 110 111 (CAND_MULT) S0: Y = (B + i') * S 112 (CAND_ADD) S0: Y = B + (i' * S) 113 114 is called a "basis" for S1. In both cases, S1 may be replaced by 115 116 S1': X = Y + (i - i') * S, 117 118 where (i - i') * S is folded to the extent possible. 119 120 All gimple statements are visited in dominator order, and each 121 statement that may contribute to one of the forms of S1 above is 122 given at least one entry in the candidate table. Such statements 123 include addition, pointer addition, subtraction, multiplication, 124 negation, copies, and nontrivial type casts. If a statement may 125 represent more than one expression of the forms of S1 above, 126 multiple "interpretations" are stored in the table and chained 127 together. Examples: 128 129 * An add of two SSA names may treat either operand as the base. 130 * A multiply of two SSA names, likewise. 131 * A copy or cast may be thought of as either a CAND_MULT with 132 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0. 133 134 Candidate records are allocated from an obstack. They are addressed 135 both from a hash table keyed on S1, and from a vector of candidate 136 pointers arranged in predominator order. 137 138 Opportunity note 139 ---------------- 140 Currently we don't recognize: 141 142 S0: Y = (S * i') - B 143 S1: X = (S * i) - B 144 145 as a strength reduction opportunity, even though this S1 would 146 also be replaceable by the S1' above. This can be added if it 147 comes up in practice. 148 149 Strength reduction in addressing 150 -------------------------------- 151 There is another kind of candidate known as CAND_REF. A CAND_REF 152 describes a statement containing a memory reference having 153 complex addressing that might benefit from strength reduction. 154 Specifically, we are interested in references for which 155 get_inner_reference returns a base address, offset, and bitpos as 156 follows: 157 158 base: MEM_REF (T1, C1) 159 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3) 160 bitpos: C4 * BITS_PER_UNIT 161 162 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are 163 arbitrary integer constants. Note that C2 may be zero, in which 164 case the offset will be MULT_EXPR (T2, C3). 165 166 When this pattern is recognized, the original memory reference 167 can be replaced with: 168 169 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)), 170 C1 + (C2 * C3) + C4) 171 172 which distributes the multiply to allow constant folding. When 173 two or more addressing expressions can be represented by MEM_REFs 174 of this form, differing only in the constants C1, C2, and C4, 175 making this substitution produces more efficient addressing during 176 the RTL phases. When there are not at least two expressions with 177 the same values of T1, T2, and C3, there is nothing to be gained 178 by the replacement. 179 180 Strength reduction of CAND_REFs uses the same infrastructure as 181 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B) 182 field, MULT_EXPR (T2, C3) in the stride (S) field, and 183 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF 184 is thus another CAND_REF with the same B and S values. When at 185 least two CAND_REFs are chained together using the basis relation, 186 each of them is replaced as above, resulting in improved code 187 generation for addressing. 188 189 Conditional candidates 190 ====================== 191 192 Conditional candidates are best illustrated with an example. 193 Consider the code sequence: 194 195 (1) x_0 = ...; 196 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5) 197 if (...) 198 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1) 199 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1) 200 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1) 201 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5) 202 203 Here strength reduction is complicated by the uncertain value of x_2. 204 A legitimate transformation is: 205 206 (1) x_0 = ...; 207 (2) a_0 = x_0 * 5; 208 if (...) 209 { 210 (3) [x_1 = x_0 + 1;] 211 (3a) t_1 = a_0 + 5; 212 } 213 (4) [x_2 = PHI <x_0, x_1>;] 214 (4a) t_2 = PHI <a_0, t_1>; 215 (5) [x_3 = x_2 + 1;] 216 (6r) a_1 = t_2 + 5; 217 218 where the bracketed instructions may go dead. 219 220 To recognize this opportunity, we have to observe that statement (6) 221 has a "hidden basis" (2). The hidden basis is unlike a normal basis 222 in that the statement and the hidden basis have different base SSA 223 names (x_2 and x_0, respectively). The relationship is established 224 when a statement's base name (x_2) is defined by a phi statement (4), 225 each argument of which (x_0, x_1) has an identical "derived base name." 226 If the argument is defined by a candidate (as x_1 is by (3)) that is a 227 CAND_ADD having a stride of 1, the derived base name of the argument is 228 the base name of the candidate (x_0). Otherwise, the argument itself 229 is its derived base name (as is the case with argument x_0). 230 231 The hidden basis for statement (6) is the nearest dominating candidate 232 whose base name is the derived base name (x_0) of the feeding phi (4), 233 and whose stride is identical to that of the statement. We can then 234 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a), 235 allowing the final replacement of (6) by the strength-reduced (6r). 236 237 To facilitate this, a new kind of candidate (CAND_PHI) is introduced. 238 A CAND_PHI is not a candidate for replacement, but is maintained in the 239 candidate table to ease discovery of hidden bases. Any phi statement 240 whose arguments share a common derived base name is entered into the 241 table with the derived base name, an (arbitrary) index of zero, and a 242 stride of 1. A statement with a hidden basis can then be detected by 243 simply looking up its feeding phi definition in the candidate table, 244 extracting the derived base name, and searching for a basis in the 245 usual manner after substituting the derived base name. 246 247 Note that the transformation is only valid when the original phi and 248 the statements that define the phi's arguments are all at the same 249 position in the loop hierarchy. */ 250 251 252 /* Index into the candidate vector, offset by 1. VECs are zero-based, 253 while cand_idx's are one-based, with zero indicating null. */ 254 typedef unsigned cand_idx; 255 256 /* The kind of candidate. */ 257 enum cand_kind 258 { 259 CAND_MULT, 260 CAND_ADD, 261 CAND_REF, 262 CAND_PHI 263 }; 264 265 struct slsr_cand_d 266 { 267 /* The candidate statement S1. */ 268 gimple cand_stmt; 269 270 /* The base expression B: often an SSA name, but not always. */ 271 tree base_expr; 272 273 /* The stride S. */ 274 tree stride; 275 276 /* The index constant i. */ 277 widest_int index; 278 279 /* The type of the candidate. This is normally the type of base_expr, 280 but casts may have occurred when combining feeding instructions. 281 A candidate can only be a basis for candidates of the same final type. 282 (For CAND_REFs, this is the type to be used for operand 1 of the 283 replacement MEM_REF.) */ 284 tree cand_type; 285 286 /* The kind of candidate (CAND_MULT, etc.). */ 287 enum cand_kind kind; 288 289 /* Index of this candidate in the candidate vector. */ 290 cand_idx cand_num; 291 292 /* Index of the next candidate record for the same statement. 293 A statement may be useful in more than one way (e.g., due to 294 commutativity). So we can have multiple "interpretations" 295 of a statement. */ 296 cand_idx next_interp; 297 298 /* Index of the basis statement S0, if any, in the candidate vector. */ 299 cand_idx basis; 300 301 /* First candidate for which this candidate is a basis, if one exists. */ 302 cand_idx dependent; 303 304 /* Next candidate having the same basis as this one. */ 305 cand_idx sibling; 306 307 /* If this is a conditional candidate, the CAND_PHI candidate 308 that defines the base SSA name B. */ 309 cand_idx def_phi; 310 311 /* Savings that can be expected from eliminating dead code if this 312 candidate is replaced. */ 313 int dead_savings; 314 }; 315 316 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t; 317 typedef const struct slsr_cand_d *const_slsr_cand_t; 318 319 /* Pointers to candidates are chained together as part of a mapping 320 from base expressions to the candidates that use them. */ 321 322 struct cand_chain_d 323 { 324 /* Base expression for the chain of candidates: often, but not 325 always, an SSA name. */ 326 tree base_expr; 327 328 /* Pointer to a candidate. */ 329 slsr_cand_t cand; 330 331 /* Chain pointer. */ 332 struct cand_chain_d *next; 333 334 }; 335 336 typedef struct cand_chain_d cand_chain, *cand_chain_t; 337 typedef const struct cand_chain_d *const_cand_chain_t; 338 339 /* Information about a unique "increment" associated with candidates 340 having an SSA name for a stride. An increment is the difference 341 between the index of the candidate and the index of its basis, 342 i.e., (i - i') as discussed in the module commentary. 343 344 When we are not going to generate address arithmetic we treat 345 increments that differ only in sign as the same, allowing sharing 346 of the cost of initializers. The absolute value of the increment 347 is stored in the incr_info. */ 348 349 struct incr_info_d 350 { 351 /* The increment that relates a candidate to its basis. */ 352 widest_int incr; 353 354 /* How many times the increment occurs in the candidate tree. */ 355 unsigned count; 356 357 /* Cost of replacing candidates using this increment. Negative and 358 zero costs indicate replacement should be performed. */ 359 int cost; 360 361 /* If this increment is profitable but is not -1, 0, or 1, it requires 362 an initializer T_0 = stride * incr to be found or introduced in the 363 nearest common dominator of all candidates. This field holds T_0 364 for subsequent use. */ 365 tree initializer; 366 367 /* If the initializer was found to already exist, this is the block 368 where it was found. */ 369 basic_block init_bb; 370 }; 371 372 typedef struct incr_info_d incr_info, *incr_info_t; 373 374 /* Candidates are maintained in a vector. If candidate X dominates 375 candidate Y, then X appears before Y in the vector; but the 376 converse does not necessarily hold. */ 377 static vec<slsr_cand_t> cand_vec; 378 379 enum cost_consts 380 { 381 COST_NEUTRAL = 0, 382 COST_INFINITE = 1000 383 }; 384 385 enum stride_status 386 { 387 UNKNOWN_STRIDE = 0, 388 KNOWN_STRIDE = 1 389 }; 390 391 enum phi_adjust_status 392 { 393 NOT_PHI_ADJUST = 0, 394 PHI_ADJUST = 1 395 }; 396 397 enum count_phis_status 398 { 399 DONT_COUNT_PHIS = 0, 400 COUNT_PHIS = 1 401 }; 402 403 /* Pointer map embodying a mapping from statements to candidates. */ 404 static hash_map<gimple, slsr_cand_t> *stmt_cand_map; 405 406 /* Obstack for candidates. */ 407 static struct obstack cand_obstack; 408 409 /* Obstack for candidate chains. */ 410 static struct obstack chain_obstack; 411 412 /* An array INCR_VEC of incr_infos is used during analysis of related 413 candidates having an SSA name for a stride. INCR_VEC_LEN describes 414 its current length. MAX_INCR_VEC_LEN is used to avoid costly 415 pathological cases. */ 416 static incr_info_t incr_vec; 417 static unsigned incr_vec_len; 418 const int MAX_INCR_VEC_LEN = 16; 419 420 /* For a chain of candidates with unknown stride, indicates whether or not 421 we must generate pointer arithmetic when replacing statements. */ 422 static bool address_arithmetic_p; 423 424 /* Forward function declarations. */ 425 static slsr_cand_t base_cand_from_table (tree); 426 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree); 427 static bool legal_cast_p_1 (tree, tree); 428 429 /* Produce a pointer to the IDX'th candidate in the candidate vector. */ 430 431 static slsr_cand_t 432 lookup_cand (cand_idx idx) 433 { 434 return cand_vec[idx - 1]; 435 } 436 437 /* Helper for hashing a candidate chain header. */ 438 439 struct cand_chain_hasher : typed_noop_remove <cand_chain> 440 { 441 typedef cand_chain value_type; 442 typedef cand_chain compare_type; 443 static inline hashval_t hash (const value_type *); 444 static inline bool equal (const value_type *, const compare_type *); 445 }; 446 447 inline hashval_t 448 cand_chain_hasher::hash (const value_type *p) 449 { 450 tree base_expr = p->base_expr; 451 return iterative_hash_expr (base_expr, 0); 452 } 453 454 inline bool 455 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2) 456 { 457 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0); 458 } 459 460 /* Hash table embodying a mapping from base exprs to chains of candidates. */ 461 static hash_table<cand_chain_hasher> *base_cand_map; 462 463 /* Pointer map used by tree_to_aff_combination_expand. */ 464 static hash_map<tree, name_expansion *> *name_expansions; 465 /* Pointer map embodying a mapping from bases to alternative bases. */ 466 static hash_map<tree, tree> *alt_base_map; 467 468 /* Given BASE, use the tree affine combiniation facilities to 469 find the underlying tree expression for BASE, with any 470 immediate offset excluded. 471 472 N.B. we should eliminate this backtracking with better forward 473 analysis in a future release. */ 474 475 static tree 476 get_alternative_base (tree base) 477 { 478 tree *result = alt_base_map->get (base); 479 480 if (result == NULL) 481 { 482 tree expr; 483 aff_tree aff; 484 485 tree_to_aff_combination_expand (base, TREE_TYPE (base), 486 &aff, &name_expansions); 487 aff.offset = 0; 488 expr = aff_combination_to_tree (&aff); 489 490 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr)); 491 492 return expr == base ? NULL : expr; 493 } 494 495 return *result; 496 } 497 498 /* Look in the candidate table for a CAND_PHI that defines BASE and 499 return it if found; otherwise return NULL. */ 500 501 static cand_idx 502 find_phi_def (tree base) 503 { 504 slsr_cand_t c; 505 506 if (TREE_CODE (base) != SSA_NAME) 507 return 0; 508 509 c = base_cand_from_table (base); 510 511 if (!c || c->kind != CAND_PHI 512 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (c->cand_stmt))) 513 return 0; 514 515 return c->cand_num; 516 } 517 518 /* Helper routine for find_basis_for_candidate. May be called twice: 519 once for the candidate's base expr, and optionally again either for 520 the candidate's phi definition or for a CAND_REF's alternative base 521 expression. */ 522 523 static slsr_cand_t 524 find_basis_for_base_expr (slsr_cand_t c, tree base_expr) 525 { 526 cand_chain mapping_key; 527 cand_chain_t chain; 528 slsr_cand_t basis = NULL; 529 530 // Limit potential of N^2 behavior for long candidate chains. 531 int iters = 0; 532 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN); 533 534 mapping_key.base_expr = base_expr; 535 chain = base_cand_map->find (&mapping_key); 536 537 for (; chain && iters < max_iters; chain = chain->next, ++iters) 538 { 539 slsr_cand_t one_basis = chain->cand; 540 541 if (one_basis->kind != c->kind 542 || one_basis->cand_stmt == c->cand_stmt 543 || !operand_equal_p (one_basis->stride, c->stride, 0) 544 || !types_compatible_p (one_basis->cand_type, c->cand_type) 545 || !dominated_by_p (CDI_DOMINATORS, 546 gimple_bb (c->cand_stmt), 547 gimple_bb (one_basis->cand_stmt))) 548 continue; 549 550 tree lhs = gimple_assign_lhs (one_basis->cand_stmt); 551 if (lhs && TREE_CODE (lhs) == SSA_NAME 552 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) 553 continue; 554 555 if (!basis || basis->cand_num < one_basis->cand_num) 556 basis = one_basis; 557 } 558 559 return basis; 560 } 561 562 /* Use the base expr from candidate C to look for possible candidates 563 that can serve as a basis for C. Each potential basis must also 564 appear in a block that dominates the candidate statement and have 565 the same stride and type. If more than one possible basis exists, 566 the one with highest index in the vector is chosen; this will be 567 the most immediately dominating basis. */ 568 569 static int 570 find_basis_for_candidate (slsr_cand_t c) 571 { 572 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr); 573 574 /* If a candidate doesn't have a basis using its base expression, 575 it may have a basis hidden by one or more intervening phis. */ 576 if (!basis && c->def_phi) 577 { 578 basic_block basis_bb, phi_bb; 579 slsr_cand_t phi_cand = lookup_cand (c->def_phi); 580 basis = find_basis_for_base_expr (c, phi_cand->base_expr); 581 582 if (basis) 583 { 584 /* A hidden basis must dominate the phi-definition of the 585 candidate's base name. */ 586 phi_bb = gimple_bb (phi_cand->cand_stmt); 587 basis_bb = gimple_bb (basis->cand_stmt); 588 589 if (phi_bb == basis_bb 590 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb)) 591 { 592 basis = NULL; 593 c->basis = 0; 594 } 595 596 /* If we found a hidden basis, estimate additional dead-code 597 savings if the phi and its feeding statements can be removed. */ 598 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt))) 599 c->dead_savings += phi_cand->dead_savings; 600 } 601 } 602 603 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF) 604 { 605 tree alt_base_expr = get_alternative_base (c->base_expr); 606 if (alt_base_expr) 607 basis = find_basis_for_base_expr (c, alt_base_expr); 608 } 609 610 if (basis) 611 { 612 c->sibling = basis->dependent; 613 basis->dependent = c->cand_num; 614 return basis->cand_num; 615 } 616 617 return 0; 618 } 619 620 /* Record a mapping from BASE to C, indicating that C may potentially serve 621 as a basis using that base expression. BASE may be the same as 622 C->BASE_EXPR; alternatively BASE can be a different tree that share the 623 underlining expression of C->BASE_EXPR. */ 624 625 static void 626 record_potential_basis (slsr_cand_t c, tree base) 627 { 628 cand_chain_t node; 629 cand_chain **slot; 630 631 gcc_assert (base); 632 633 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain)); 634 node->base_expr = base; 635 node->cand = c; 636 node->next = NULL; 637 slot = base_cand_map->find_slot (node, INSERT); 638 639 if (*slot) 640 { 641 cand_chain_t head = (cand_chain_t) (*slot); 642 node->next = head->next; 643 head->next = node; 644 } 645 else 646 *slot = node; 647 } 648 649 /* Allocate storage for a new candidate and initialize its fields. 650 Attempt to find a basis for the candidate. 651 652 For CAND_REF, an alternative base may also be recorded and used 653 to find a basis. This helps cases where the expression hidden 654 behind BASE (which is usually an SSA_NAME) has immediate offset, 655 e.g. 656 657 a2[i][j] = 1; 658 a2[i + 20][j] = 2; */ 659 660 static slsr_cand_t 661 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base, 662 const widest_int &index, tree stride, tree ctype, 663 unsigned savings) 664 { 665 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack, 666 sizeof (slsr_cand)); 667 c->cand_stmt = gs; 668 c->base_expr = base; 669 c->stride = stride; 670 c->index = index; 671 c->cand_type = ctype; 672 c->kind = kind; 673 c->cand_num = cand_vec.length () + 1; 674 c->next_interp = 0; 675 c->dependent = 0; 676 c->sibling = 0; 677 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0; 678 c->dead_savings = savings; 679 680 cand_vec.safe_push (c); 681 682 if (kind == CAND_PHI) 683 c->basis = 0; 684 else 685 c->basis = find_basis_for_candidate (c); 686 687 record_potential_basis (c, base); 688 if (flag_expensive_optimizations && kind == CAND_REF) 689 { 690 tree alt_base = get_alternative_base (base); 691 if (alt_base) 692 record_potential_basis (c, alt_base); 693 } 694 695 return c; 696 } 697 698 /* Determine the target cost of statement GS when compiling according 699 to SPEED. */ 700 701 static int 702 stmt_cost (gimple gs, bool speed) 703 { 704 tree lhs, rhs1, rhs2; 705 machine_mode lhs_mode; 706 707 gcc_assert (is_gimple_assign (gs)); 708 lhs = gimple_assign_lhs (gs); 709 rhs1 = gimple_assign_rhs1 (gs); 710 lhs_mode = TYPE_MODE (TREE_TYPE (lhs)); 711 712 switch (gimple_assign_rhs_code (gs)) 713 { 714 case MULT_EXPR: 715 rhs2 = gimple_assign_rhs2 (gs); 716 717 if (tree_fits_shwi_p (rhs2)) 718 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed); 719 720 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST); 721 return mul_cost (speed, lhs_mode); 722 723 case PLUS_EXPR: 724 case POINTER_PLUS_EXPR: 725 case MINUS_EXPR: 726 return add_cost (speed, lhs_mode); 727 728 case NEGATE_EXPR: 729 return neg_cost (speed, lhs_mode); 730 731 CASE_CONVERT: 732 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed); 733 734 /* Note that we don't assign costs to copies that in most cases 735 will go away. */ 736 default: 737 ; 738 } 739 740 gcc_unreachable (); 741 return 0; 742 } 743 744 /* Look up the defining statement for BASE_IN and return a pointer 745 to its candidate in the candidate table, if any; otherwise NULL. 746 Only CAND_ADD and CAND_MULT candidates are returned. */ 747 748 static slsr_cand_t 749 base_cand_from_table (tree base_in) 750 { 751 slsr_cand_t *result; 752 753 gimple def = SSA_NAME_DEF_STMT (base_in); 754 if (!def) 755 return (slsr_cand_t) NULL; 756 757 result = stmt_cand_map->get (def); 758 759 if (result && (*result)->kind != CAND_REF) 760 return *result; 761 762 return (slsr_cand_t) NULL; 763 } 764 765 /* Add an entry to the statement-to-candidate mapping. */ 766 767 static void 768 add_cand_for_stmt (gimple gs, slsr_cand_t c) 769 { 770 gcc_assert (!stmt_cand_map->put (gs, c)); 771 } 772 773 /* Given PHI which contains a phi statement, determine whether it 774 satisfies all the requirements of a phi candidate. If so, create 775 a candidate. Note that a CAND_PHI never has a basis itself, but 776 is used to help find a basis for subsequent candidates. */ 777 778 static void 779 slsr_process_phi (gphi *phi, bool speed) 780 { 781 unsigned i; 782 tree arg0_base = NULL_TREE, base_type; 783 slsr_cand_t c; 784 struct loop *cand_loop = gimple_bb (phi)->loop_father; 785 unsigned savings = 0; 786 787 /* A CAND_PHI requires each of its arguments to have the same 788 derived base name. (See the module header commentary for a 789 definition of derived base names.) Furthermore, all feeding 790 definitions must be in the same position in the loop hierarchy 791 as PHI. */ 792 793 for (i = 0; i < gimple_phi_num_args (phi); i++) 794 { 795 slsr_cand_t arg_cand; 796 tree arg = gimple_phi_arg_def (phi, i); 797 tree derived_base_name = NULL_TREE; 798 gimple arg_stmt = NULL; 799 basic_block arg_bb = NULL; 800 801 if (TREE_CODE (arg) != SSA_NAME) 802 return; 803 804 arg_cand = base_cand_from_table (arg); 805 806 if (arg_cand) 807 { 808 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI) 809 { 810 if (!arg_cand->next_interp) 811 return; 812 813 arg_cand = lookup_cand (arg_cand->next_interp); 814 } 815 816 if (!integer_onep (arg_cand->stride)) 817 return; 818 819 derived_base_name = arg_cand->base_expr; 820 arg_stmt = arg_cand->cand_stmt; 821 arg_bb = gimple_bb (arg_stmt); 822 823 /* Gather potential dead code savings if the phi statement 824 can be removed later on. */ 825 if (has_single_use (arg)) 826 { 827 if (gimple_code (arg_stmt) == GIMPLE_PHI) 828 savings += arg_cand->dead_savings; 829 else 830 savings += stmt_cost (arg_stmt, speed); 831 } 832 } 833 else 834 { 835 derived_base_name = arg; 836 837 if (SSA_NAME_IS_DEFAULT_DEF (arg)) 838 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 839 else 840 gimple_bb (SSA_NAME_DEF_STMT (arg)); 841 } 842 843 if (!arg_bb || arg_bb->loop_father != cand_loop) 844 return; 845 846 if (i == 0) 847 arg0_base = derived_base_name; 848 else if (!operand_equal_p (derived_base_name, arg0_base, 0)) 849 return; 850 } 851 852 /* Create the candidate. "alloc_cand_and_find_basis" is named 853 misleadingly for this case, as no basis will be sought for a 854 CAND_PHI. */ 855 base_type = TREE_TYPE (arg0_base); 856 857 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base, 858 0, integer_one_node, base_type, savings); 859 860 /* Add the candidate to the statement-candidate mapping. */ 861 add_cand_for_stmt (phi, c); 862 } 863 864 /* Given PBASE which is a pointer to tree, look up the defining 865 statement for it and check whether the candidate is in the 866 form of: 867 868 X = B + (1 * S), S is integer constant 869 X = B + (i * S), S is integer one 870 871 If so, set PBASE to the candidate's base_expr and return double 872 int (i * S). 873 Otherwise, just return double int zero. */ 874 875 static widest_int 876 backtrace_base_for_ref (tree *pbase) 877 { 878 tree base_in = *pbase; 879 slsr_cand_t base_cand; 880 881 STRIP_NOPS (base_in); 882 883 /* Strip off widening conversion(s) to handle cases where 884 e.g. 'B' is widened from an 'int' in order to calculate 885 a 64-bit address. */ 886 if (CONVERT_EXPR_P (base_in) 887 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0))) 888 base_in = get_unwidened (base_in, NULL_TREE); 889 890 if (TREE_CODE (base_in) != SSA_NAME) 891 return 0; 892 893 base_cand = base_cand_from_table (base_in); 894 895 while (base_cand && base_cand->kind != CAND_PHI) 896 { 897 if (base_cand->kind == CAND_ADD 898 && base_cand->index == 1 899 && TREE_CODE (base_cand->stride) == INTEGER_CST) 900 { 901 /* X = B + (1 * S), S is integer constant. */ 902 *pbase = base_cand->base_expr; 903 return wi::to_widest (base_cand->stride); 904 } 905 else if (base_cand->kind == CAND_ADD 906 && TREE_CODE (base_cand->stride) == INTEGER_CST 907 && integer_onep (base_cand->stride)) 908 { 909 /* X = B + (i * S), S is integer one. */ 910 *pbase = base_cand->base_expr; 911 return base_cand->index; 912 } 913 914 if (base_cand->next_interp) 915 base_cand = lookup_cand (base_cand->next_interp); 916 else 917 base_cand = NULL; 918 } 919 920 return 0; 921 } 922 923 /* Look for the following pattern: 924 925 *PBASE: MEM_REF (T1, C1) 926 927 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero] 928 or 929 MULT_EXPR (PLUS_EXPR (T2, C2), C3) 930 or 931 MULT_EXPR (MINUS_EXPR (T2, -C2), C3) 932 933 *PINDEX: C4 * BITS_PER_UNIT 934 935 If not present, leave the input values unchanged and return FALSE. 936 Otherwise, modify the input values as follows and return TRUE: 937 938 *PBASE: T1 939 *POFFSET: MULT_EXPR (T2, C3) 940 *PINDEX: C1 + (C2 * C3) + C4 941 942 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it 943 will be further restructured to: 944 945 *PBASE: T1 946 *POFFSET: MULT_EXPR (T2', C3) 947 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */ 948 949 static bool 950 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex, 951 tree *ptype) 952 { 953 tree base = *pbase, offset = *poffset; 954 widest_int index = *pindex; 955 tree mult_op0, t1, t2, type; 956 widest_int c1, c2, c3, c4, c5; 957 958 if (!base 959 || !offset 960 || TREE_CODE (base) != MEM_REF 961 || TREE_CODE (offset) != MULT_EXPR 962 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST 963 || wi::umod_floor (index, BITS_PER_UNIT) != 0) 964 return false; 965 966 t1 = TREE_OPERAND (base, 0); 967 c1 = widest_int::from (mem_ref_offset (base), SIGNED); 968 type = TREE_TYPE (TREE_OPERAND (base, 1)); 969 970 mult_op0 = TREE_OPERAND (offset, 0); 971 c3 = wi::to_widest (TREE_OPERAND (offset, 1)); 972 973 if (TREE_CODE (mult_op0) == PLUS_EXPR) 974 975 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) 976 { 977 t2 = TREE_OPERAND (mult_op0, 0); 978 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1)); 979 } 980 else 981 return false; 982 983 else if (TREE_CODE (mult_op0) == MINUS_EXPR) 984 985 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) 986 { 987 t2 = TREE_OPERAND (mult_op0, 0); 988 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1)); 989 } 990 else 991 return false; 992 993 else 994 { 995 t2 = mult_op0; 996 c2 = 0; 997 } 998 999 c4 = wi::lrshift (index, LOG2_BITS_PER_UNIT); 1000 c5 = backtrace_base_for_ref (&t2); 1001 1002 *pbase = t1; 1003 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2), 1004 wide_int_to_tree (sizetype, c3)); 1005 *pindex = c1 + c2 * c3 + c4 + c5 * c3; 1006 *ptype = type; 1007 1008 return true; 1009 } 1010 1011 /* Given GS which contains a data reference, create a CAND_REF entry in 1012 the candidate table and attempt to find a basis. */ 1013 1014 static void 1015 slsr_process_ref (gimple gs) 1016 { 1017 tree ref_expr, base, offset, type; 1018 HOST_WIDE_INT bitsize, bitpos; 1019 machine_mode mode; 1020 int unsignedp, volatilep; 1021 slsr_cand_t c; 1022 1023 if (gimple_vdef (gs)) 1024 ref_expr = gimple_assign_lhs (gs); 1025 else 1026 ref_expr = gimple_assign_rhs1 (gs); 1027 1028 if (!handled_component_p (ref_expr) 1029 || TREE_CODE (ref_expr) == BIT_FIELD_REF 1030 || (TREE_CODE (ref_expr) == COMPONENT_REF 1031 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1)))) 1032 return; 1033 1034 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode, 1035 &unsignedp, &volatilep, false); 1036 widest_int index = bitpos; 1037 1038 if (!restructure_reference (&base, &offset, &index, &type)) 1039 return; 1040 1041 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset, 1042 type, 0); 1043 1044 /* Add the candidate to the statement-candidate mapping. */ 1045 add_cand_for_stmt (gs, c); 1046 } 1047 1048 /* Create a candidate entry for a statement GS, where GS multiplies 1049 two SSA names BASE_IN and STRIDE_IN. Propagate any known information 1050 about the two SSA names into the new candidate. Return the new 1051 candidate. */ 1052 1053 static slsr_cand_t 1054 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed) 1055 { 1056 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; 1057 widest_int index; 1058 unsigned savings = 0; 1059 slsr_cand_t c; 1060 slsr_cand_t base_cand = base_cand_from_table (base_in); 1061 1062 /* Look at all interpretations of the base candidate, if necessary, 1063 to find information to propagate into this candidate. */ 1064 while (base_cand && !base && base_cand->kind != CAND_PHI) 1065 { 1066 1067 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride)) 1068 { 1069 /* Y = (B + i') * 1 1070 X = Y * Z 1071 ================ 1072 X = (B + i') * Z */ 1073 base = base_cand->base_expr; 1074 index = base_cand->index; 1075 stride = stride_in; 1076 ctype = base_cand->cand_type; 1077 if (has_single_use (base_in)) 1078 savings = (base_cand->dead_savings 1079 + stmt_cost (base_cand->cand_stmt, speed)); 1080 } 1081 else if (base_cand->kind == CAND_ADD 1082 && TREE_CODE (base_cand->stride) == INTEGER_CST) 1083 { 1084 /* Y = B + (i' * S), S constant 1085 X = Y * Z 1086 ============================ 1087 X = B + ((i' * S) * Z) */ 1088 base = base_cand->base_expr; 1089 index = base_cand->index * wi::to_widest (base_cand->stride); 1090 stride = stride_in; 1091 ctype = base_cand->cand_type; 1092 if (has_single_use (base_in)) 1093 savings = (base_cand->dead_savings 1094 + stmt_cost (base_cand->cand_stmt, speed)); 1095 } 1096 1097 if (base_cand->next_interp) 1098 base_cand = lookup_cand (base_cand->next_interp); 1099 else 1100 base_cand = NULL; 1101 } 1102 1103 if (!base) 1104 { 1105 /* No interpretations had anything useful to propagate, so 1106 produce X = (Y + 0) * Z. */ 1107 base = base_in; 1108 index = 0; 1109 stride = stride_in; 1110 ctype = TREE_TYPE (base_in); 1111 } 1112 1113 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, 1114 ctype, savings); 1115 return c; 1116 } 1117 1118 /* Create a candidate entry for a statement GS, where GS multiplies 1119 SSA name BASE_IN by constant STRIDE_IN. Propagate any known 1120 information about BASE_IN into the new candidate. Return the new 1121 candidate. */ 1122 1123 static slsr_cand_t 1124 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed) 1125 { 1126 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; 1127 widest_int index, temp; 1128 unsigned savings = 0; 1129 slsr_cand_t c; 1130 slsr_cand_t base_cand = base_cand_from_table (base_in); 1131 1132 /* Look at all interpretations of the base candidate, if necessary, 1133 to find information to propagate into this candidate. */ 1134 while (base_cand && !base && base_cand->kind != CAND_PHI) 1135 { 1136 if (base_cand->kind == CAND_MULT 1137 && TREE_CODE (base_cand->stride) == INTEGER_CST) 1138 { 1139 /* Y = (B + i') * S, S constant 1140 X = Y * c 1141 ============================ 1142 X = (B + i') * (S * c) */ 1143 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in); 1144 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in))) 1145 { 1146 base = base_cand->base_expr; 1147 index = base_cand->index; 1148 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp); 1149 ctype = base_cand->cand_type; 1150 if (has_single_use (base_in)) 1151 savings = (base_cand->dead_savings 1152 + stmt_cost (base_cand->cand_stmt, speed)); 1153 } 1154 } 1155 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride)) 1156 { 1157 /* Y = B + (i' * 1) 1158 X = Y * c 1159 =========================== 1160 X = (B + i') * c */ 1161 base = base_cand->base_expr; 1162 index = base_cand->index; 1163 stride = stride_in; 1164 ctype = base_cand->cand_type; 1165 if (has_single_use (base_in)) 1166 savings = (base_cand->dead_savings 1167 + stmt_cost (base_cand->cand_stmt, speed)); 1168 } 1169 else if (base_cand->kind == CAND_ADD 1170 && base_cand->index == 1 1171 && TREE_CODE (base_cand->stride) == INTEGER_CST) 1172 { 1173 /* Y = B + (1 * S), S constant 1174 X = Y * c 1175 =========================== 1176 X = (B + S) * c */ 1177 base = base_cand->base_expr; 1178 index = wi::to_widest (base_cand->stride); 1179 stride = stride_in; 1180 ctype = base_cand->cand_type; 1181 if (has_single_use (base_in)) 1182 savings = (base_cand->dead_savings 1183 + stmt_cost (base_cand->cand_stmt, speed)); 1184 } 1185 1186 if (base_cand->next_interp) 1187 base_cand = lookup_cand (base_cand->next_interp); 1188 else 1189 base_cand = NULL; 1190 } 1191 1192 if (!base) 1193 { 1194 /* No interpretations had anything useful to propagate, so 1195 produce X = (Y + 0) * c. */ 1196 base = base_in; 1197 index = 0; 1198 stride = stride_in; 1199 ctype = TREE_TYPE (base_in); 1200 } 1201 1202 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, 1203 ctype, savings); 1204 return c; 1205 } 1206 1207 /* Given GS which is a multiply of scalar integers, make an appropriate 1208 entry in the candidate table. If this is a multiply of two SSA names, 1209 create two CAND_MULT interpretations and attempt to find a basis for 1210 each of them. Otherwise, create a single CAND_MULT and attempt to 1211 find a basis. */ 1212 1213 static void 1214 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed) 1215 { 1216 slsr_cand_t c, c2; 1217 1218 /* If this is a multiply of an SSA name with itself, it is highly 1219 unlikely that we will get a strength reduction opportunity, so 1220 don't record it as a candidate. This simplifies the logic for 1221 finding a basis, so if this is removed that must be considered. */ 1222 if (rhs1 == rhs2) 1223 return; 1224 1225 if (TREE_CODE (rhs2) == SSA_NAME) 1226 { 1227 /* Record an interpretation of this statement in the candidate table 1228 assuming RHS1 is the base expression and RHS2 is the stride. */ 1229 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed); 1230 1231 /* Add the first interpretation to the statement-candidate mapping. */ 1232 add_cand_for_stmt (gs, c); 1233 1234 /* Record another interpretation of this statement assuming RHS1 1235 is the stride and RHS2 is the base expression. */ 1236 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed); 1237 c->next_interp = c2->cand_num; 1238 } 1239 else 1240 { 1241 /* Record an interpretation for the multiply-immediate. */ 1242 c = create_mul_imm_cand (gs, rhs1, rhs2, speed); 1243 1244 /* Add the interpretation to the statement-candidate mapping. */ 1245 add_cand_for_stmt (gs, c); 1246 } 1247 } 1248 1249 /* Create a candidate entry for a statement GS, where GS adds two 1250 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and 1251 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known 1252 information about the two SSA names into the new candidate. 1253 Return the new candidate. */ 1254 1255 static slsr_cand_t 1256 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in, 1257 bool subtract_p, bool speed) 1258 { 1259 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL; 1260 widest_int index; 1261 unsigned savings = 0; 1262 slsr_cand_t c; 1263 slsr_cand_t base_cand = base_cand_from_table (base_in); 1264 slsr_cand_t addend_cand = base_cand_from_table (addend_in); 1265 1266 /* The most useful transformation is a multiply-immediate feeding 1267 an add or subtract. Look for that first. */ 1268 while (addend_cand && !base && addend_cand->kind != CAND_PHI) 1269 { 1270 if (addend_cand->kind == CAND_MULT 1271 && addend_cand->index == 0 1272 && TREE_CODE (addend_cand->stride) == INTEGER_CST) 1273 { 1274 /* Z = (B + 0) * S, S constant 1275 X = Y +/- Z 1276 =========================== 1277 X = Y + ((+/-1 * S) * B) */ 1278 base = base_in; 1279 index = wi::to_widest (addend_cand->stride); 1280 if (subtract_p) 1281 index = -index; 1282 stride = addend_cand->base_expr; 1283 ctype = TREE_TYPE (base_in); 1284 if (has_single_use (addend_in)) 1285 savings = (addend_cand->dead_savings 1286 + stmt_cost (addend_cand->cand_stmt, speed)); 1287 } 1288 1289 if (addend_cand->next_interp) 1290 addend_cand = lookup_cand (addend_cand->next_interp); 1291 else 1292 addend_cand = NULL; 1293 } 1294 1295 while (base_cand && !base && base_cand->kind != CAND_PHI) 1296 { 1297 if (base_cand->kind == CAND_ADD 1298 && (base_cand->index == 0 1299 || operand_equal_p (base_cand->stride, 1300 integer_zero_node, 0))) 1301 { 1302 /* Y = B + (i' * S), i' * S = 0 1303 X = Y +/- Z 1304 ============================ 1305 X = B + (+/-1 * Z) */ 1306 base = base_cand->base_expr; 1307 index = subtract_p ? -1 : 1; 1308 stride = addend_in; 1309 ctype = base_cand->cand_type; 1310 if (has_single_use (base_in)) 1311 savings = (base_cand->dead_savings 1312 + stmt_cost (base_cand->cand_stmt, speed)); 1313 } 1314 else if (subtract_p) 1315 { 1316 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in); 1317 1318 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI) 1319 { 1320 if (subtrahend_cand->kind == CAND_MULT 1321 && subtrahend_cand->index == 0 1322 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST) 1323 { 1324 /* Z = (B + 0) * S, S constant 1325 X = Y - Z 1326 =========================== 1327 Value: X = Y + ((-1 * S) * B) */ 1328 base = base_in; 1329 index = wi::to_widest (subtrahend_cand->stride); 1330 index = -index; 1331 stride = subtrahend_cand->base_expr; 1332 ctype = TREE_TYPE (base_in); 1333 if (has_single_use (addend_in)) 1334 savings = (subtrahend_cand->dead_savings 1335 + stmt_cost (subtrahend_cand->cand_stmt, speed)); 1336 } 1337 1338 if (subtrahend_cand->next_interp) 1339 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp); 1340 else 1341 subtrahend_cand = NULL; 1342 } 1343 } 1344 1345 if (base_cand->next_interp) 1346 base_cand = lookup_cand (base_cand->next_interp); 1347 else 1348 base_cand = NULL; 1349 } 1350 1351 if (!base) 1352 { 1353 /* No interpretations had anything useful to propagate, so 1354 produce X = Y + (1 * Z). */ 1355 base = base_in; 1356 index = subtract_p ? -1 : 1; 1357 stride = addend_in; 1358 ctype = TREE_TYPE (base_in); 1359 } 1360 1361 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride, 1362 ctype, savings); 1363 return c; 1364 } 1365 1366 /* Create a candidate entry for a statement GS, where GS adds SSA 1367 name BASE_IN to constant INDEX_IN. Propagate any known information 1368 about BASE_IN into the new candidate. Return the new candidate. */ 1369 1370 static slsr_cand_t 1371 create_add_imm_cand (gimple gs, tree base_in, const widest_int &index_in, 1372 bool speed) 1373 { 1374 enum cand_kind kind = CAND_ADD; 1375 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; 1376 widest_int index, multiple; 1377 unsigned savings = 0; 1378 slsr_cand_t c; 1379 slsr_cand_t base_cand = base_cand_from_table (base_in); 1380 1381 while (base_cand && !base && base_cand->kind != CAND_PHI) 1382 { 1383 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride)); 1384 1385 if (TREE_CODE (base_cand->stride) == INTEGER_CST 1386 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride), 1387 sign, &multiple)) 1388 { 1389 /* Y = (B + i') * S, S constant, c = kS for some integer k 1390 X = Y + c 1391 ============================ 1392 X = (B + (i'+ k)) * S 1393 OR 1394 Y = B + (i' * S), S constant, c = kS for some integer k 1395 X = Y + c 1396 ============================ 1397 X = (B + (i'+ k)) * S */ 1398 kind = base_cand->kind; 1399 base = base_cand->base_expr; 1400 index = base_cand->index + multiple; 1401 stride = base_cand->stride; 1402 ctype = base_cand->cand_type; 1403 if (has_single_use (base_in)) 1404 savings = (base_cand->dead_savings 1405 + stmt_cost (base_cand->cand_stmt, speed)); 1406 } 1407 1408 if (base_cand->next_interp) 1409 base_cand = lookup_cand (base_cand->next_interp); 1410 else 1411 base_cand = NULL; 1412 } 1413 1414 if (!base) 1415 { 1416 /* No interpretations had anything useful to propagate, so 1417 produce X = Y + (c * 1). */ 1418 kind = CAND_ADD; 1419 base = base_in; 1420 index = index_in; 1421 stride = integer_one_node; 1422 ctype = TREE_TYPE (base_in); 1423 } 1424 1425 c = alloc_cand_and_find_basis (kind, gs, base, index, stride, 1426 ctype, savings); 1427 return c; 1428 } 1429 1430 /* Given GS which is an add or subtract of scalar integers or pointers, 1431 make at least one appropriate entry in the candidate table. */ 1432 1433 static void 1434 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed) 1435 { 1436 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR; 1437 slsr_cand_t c = NULL, c2; 1438 1439 if (TREE_CODE (rhs2) == SSA_NAME) 1440 { 1441 /* First record an interpretation assuming RHS1 is the base expression 1442 and RHS2 is the stride. But it doesn't make sense for the 1443 stride to be a pointer, so don't record a candidate in that case. */ 1444 if (!POINTER_TYPE_P (TREE_TYPE (rhs2))) 1445 { 1446 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed); 1447 1448 /* Add the first interpretation to the statement-candidate 1449 mapping. */ 1450 add_cand_for_stmt (gs, c); 1451 } 1452 1453 /* If the two RHS operands are identical, or this is a subtract, 1454 we're done. */ 1455 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p) 1456 return; 1457 1458 /* Otherwise, record another interpretation assuming RHS2 is the 1459 base expression and RHS1 is the stride, again provided that the 1460 stride is not a pointer. */ 1461 if (!POINTER_TYPE_P (TREE_TYPE (rhs1))) 1462 { 1463 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed); 1464 if (c) 1465 c->next_interp = c2->cand_num; 1466 else 1467 add_cand_for_stmt (gs, c2); 1468 } 1469 } 1470 else 1471 { 1472 /* Record an interpretation for the add-immediate. */ 1473 widest_int index = wi::to_widest (rhs2); 1474 if (subtract_p) 1475 index = -index; 1476 1477 c = create_add_imm_cand (gs, rhs1, index, speed); 1478 1479 /* Add the interpretation to the statement-candidate mapping. */ 1480 add_cand_for_stmt (gs, c); 1481 } 1482 } 1483 1484 /* Given GS which is a negate of a scalar integer, make an appropriate 1485 entry in the candidate table. A negate is equivalent to a multiply 1486 by -1. */ 1487 1488 static void 1489 slsr_process_neg (gimple gs, tree rhs1, bool speed) 1490 { 1491 /* Record a CAND_MULT interpretation for the multiply by -1. */ 1492 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed); 1493 1494 /* Add the interpretation to the statement-candidate mapping. */ 1495 add_cand_for_stmt (gs, c); 1496 } 1497 1498 /* Help function for legal_cast_p, operating on two trees. Checks 1499 whether it's allowable to cast from RHS to LHS. See legal_cast_p 1500 for more details. */ 1501 1502 static bool 1503 legal_cast_p_1 (tree lhs, tree rhs) 1504 { 1505 tree lhs_type, rhs_type; 1506 unsigned lhs_size, rhs_size; 1507 bool lhs_wraps, rhs_wraps; 1508 1509 lhs_type = TREE_TYPE (lhs); 1510 rhs_type = TREE_TYPE (rhs); 1511 lhs_size = TYPE_PRECISION (lhs_type); 1512 rhs_size = TYPE_PRECISION (rhs_type); 1513 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type); 1514 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type); 1515 1516 if (lhs_size < rhs_size 1517 || (rhs_wraps && !lhs_wraps) 1518 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size)) 1519 return false; 1520 1521 return true; 1522 } 1523 1524 /* Return TRUE if GS is a statement that defines an SSA name from 1525 a conversion and is legal for us to combine with an add and multiply 1526 in the candidate table. For example, suppose we have: 1527 1528 A = B + i; 1529 C = (type) A; 1530 D = C * S; 1531 1532 Without the type-cast, we would create a CAND_MULT for D with base B, 1533 index i, and stride S. We want to record this candidate only if it 1534 is equivalent to apply the type cast following the multiply: 1535 1536 A = B + i; 1537 E = A * S; 1538 D = (type) E; 1539 1540 We will record the type with the candidate for D. This allows us 1541 to use a similar previous candidate as a basis. If we have earlier seen 1542 1543 A' = B + i'; 1544 C' = (type) A'; 1545 D' = C' * S; 1546 1547 we can replace D with 1548 1549 D = D' + (i - i') * S; 1550 1551 But if moving the type-cast would change semantics, we mustn't do this. 1552 1553 This is legitimate for casts from a non-wrapping integral type to 1554 any integral type of the same or larger size. It is not legitimate 1555 to convert a wrapping type to a non-wrapping type, or to a wrapping 1556 type of a different size. I.e., with a wrapping type, we must 1557 assume that the addition B + i could wrap, in which case performing 1558 the multiply before or after one of the "illegal" type casts will 1559 have different semantics. */ 1560 1561 static bool 1562 legal_cast_p (gimple gs, tree rhs) 1563 { 1564 if (!is_gimple_assign (gs) 1565 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs))) 1566 return false; 1567 1568 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs); 1569 } 1570 1571 /* Given GS which is a cast to a scalar integer type, determine whether 1572 the cast is legal for strength reduction. If so, make at least one 1573 appropriate entry in the candidate table. */ 1574 1575 static void 1576 slsr_process_cast (gimple gs, tree rhs1, bool speed) 1577 { 1578 tree lhs, ctype; 1579 slsr_cand_t base_cand, c, c2; 1580 unsigned savings = 0; 1581 1582 if (!legal_cast_p (gs, rhs1)) 1583 return; 1584 1585 lhs = gimple_assign_lhs (gs); 1586 base_cand = base_cand_from_table (rhs1); 1587 ctype = TREE_TYPE (lhs); 1588 1589 if (base_cand && base_cand->kind != CAND_PHI) 1590 { 1591 while (base_cand) 1592 { 1593 /* Propagate all data from the base candidate except the type, 1594 which comes from the cast, and the base candidate's cast, 1595 which is no longer applicable. */ 1596 if (has_single_use (rhs1)) 1597 savings = (base_cand->dead_savings 1598 + stmt_cost (base_cand->cand_stmt, speed)); 1599 1600 c = alloc_cand_and_find_basis (base_cand->kind, gs, 1601 base_cand->base_expr, 1602 base_cand->index, base_cand->stride, 1603 ctype, savings); 1604 if (base_cand->next_interp) 1605 base_cand = lookup_cand (base_cand->next_interp); 1606 else 1607 base_cand = NULL; 1608 } 1609 } 1610 else 1611 { 1612 /* If nothing is known about the RHS, create fresh CAND_ADD and 1613 CAND_MULT interpretations: 1614 1615 X = Y + (0 * 1) 1616 X = (Y + 0) * 1 1617 1618 The first of these is somewhat arbitrary, but the choice of 1619 1 for the stride simplifies the logic for propagating casts 1620 into their uses. */ 1621 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 1622 0, integer_one_node, ctype, 0); 1623 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 1624 0, integer_one_node, ctype, 0); 1625 c->next_interp = c2->cand_num; 1626 } 1627 1628 /* Add the first (or only) interpretation to the statement-candidate 1629 mapping. */ 1630 add_cand_for_stmt (gs, c); 1631 } 1632 1633 /* Given GS which is a copy of a scalar integer type, make at least one 1634 appropriate entry in the candidate table. 1635 1636 This interface is included for completeness, but is unnecessary 1637 if this pass immediately follows a pass that performs copy 1638 propagation, such as DOM. */ 1639 1640 static void 1641 slsr_process_copy (gimple gs, tree rhs1, bool speed) 1642 { 1643 slsr_cand_t base_cand, c, c2; 1644 unsigned savings = 0; 1645 1646 base_cand = base_cand_from_table (rhs1); 1647 1648 if (base_cand && base_cand->kind != CAND_PHI) 1649 { 1650 while (base_cand) 1651 { 1652 /* Propagate all data from the base candidate. */ 1653 if (has_single_use (rhs1)) 1654 savings = (base_cand->dead_savings 1655 + stmt_cost (base_cand->cand_stmt, speed)); 1656 1657 c = alloc_cand_and_find_basis (base_cand->kind, gs, 1658 base_cand->base_expr, 1659 base_cand->index, base_cand->stride, 1660 base_cand->cand_type, savings); 1661 if (base_cand->next_interp) 1662 base_cand = lookup_cand (base_cand->next_interp); 1663 else 1664 base_cand = NULL; 1665 } 1666 } 1667 else 1668 { 1669 /* If nothing is known about the RHS, create fresh CAND_ADD and 1670 CAND_MULT interpretations: 1671 1672 X = Y + (0 * 1) 1673 X = (Y + 0) * 1 1674 1675 The first of these is somewhat arbitrary, but the choice of 1676 1 for the stride simplifies the logic for propagating casts 1677 into their uses. */ 1678 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 1679 0, integer_one_node, TREE_TYPE (rhs1), 0); 1680 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 1681 0, integer_one_node, TREE_TYPE (rhs1), 0); 1682 c->next_interp = c2->cand_num; 1683 } 1684 1685 /* Add the first (or only) interpretation to the statement-candidate 1686 mapping. */ 1687 add_cand_for_stmt (gs, c); 1688 } 1689 1690 class find_candidates_dom_walker : public dom_walker 1691 { 1692 public: 1693 find_candidates_dom_walker (cdi_direction direction) 1694 : dom_walker (direction) {} 1695 virtual void before_dom_children (basic_block); 1696 }; 1697 1698 /* Find strength-reduction candidates in block BB. */ 1699 1700 void 1701 find_candidates_dom_walker::before_dom_children (basic_block bb) 1702 { 1703 bool speed = optimize_bb_for_speed_p (bb); 1704 1705 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); 1706 gsi_next (&gsi)) 1707 slsr_process_phi (gsi.phi (), speed); 1708 1709 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 1710 gsi_next (&gsi)) 1711 { 1712 gimple gs = gsi_stmt (gsi); 1713 1714 if (gimple_vuse (gs) && gimple_assign_single_p (gs)) 1715 slsr_process_ref (gs); 1716 1717 else if (is_gimple_assign (gs) 1718 && SCALAR_INT_MODE_P 1719 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) 1720 { 1721 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE; 1722 1723 switch (gimple_assign_rhs_code (gs)) 1724 { 1725 case MULT_EXPR: 1726 case PLUS_EXPR: 1727 rhs1 = gimple_assign_rhs1 (gs); 1728 rhs2 = gimple_assign_rhs2 (gs); 1729 /* Should never happen, but currently some buggy situations 1730 in earlier phases put constants in rhs1. */ 1731 if (TREE_CODE (rhs1) != SSA_NAME) 1732 continue; 1733 break; 1734 1735 /* Possible future opportunity: rhs1 of a ptr+ can be 1736 an ADDR_EXPR. */ 1737 case POINTER_PLUS_EXPR: 1738 case MINUS_EXPR: 1739 rhs2 = gimple_assign_rhs2 (gs); 1740 /* Fall-through. */ 1741 1742 CASE_CONVERT: 1743 case MODIFY_EXPR: 1744 case NEGATE_EXPR: 1745 rhs1 = gimple_assign_rhs1 (gs); 1746 if (TREE_CODE (rhs1) != SSA_NAME) 1747 continue; 1748 break; 1749 1750 default: 1751 ; 1752 } 1753 1754 switch (gimple_assign_rhs_code (gs)) 1755 { 1756 case MULT_EXPR: 1757 slsr_process_mul (gs, rhs1, rhs2, speed); 1758 break; 1759 1760 case PLUS_EXPR: 1761 case POINTER_PLUS_EXPR: 1762 case MINUS_EXPR: 1763 slsr_process_add (gs, rhs1, rhs2, speed); 1764 break; 1765 1766 case NEGATE_EXPR: 1767 slsr_process_neg (gs, rhs1, speed); 1768 break; 1769 1770 CASE_CONVERT: 1771 slsr_process_cast (gs, rhs1, speed); 1772 break; 1773 1774 case MODIFY_EXPR: 1775 slsr_process_copy (gs, rhs1, speed); 1776 break; 1777 1778 default: 1779 ; 1780 } 1781 } 1782 } 1783 } 1784 1785 /* Dump a candidate for debug. */ 1786 1787 static void 1788 dump_candidate (slsr_cand_t c) 1789 { 1790 fprintf (dump_file, "%3d [%d] ", c->cand_num, 1791 gimple_bb (c->cand_stmt)->index); 1792 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); 1793 switch (c->kind) 1794 { 1795 case CAND_MULT: 1796 fputs (" MULT : (", dump_file); 1797 print_generic_expr (dump_file, c->base_expr, 0); 1798 fputs (" + ", dump_file); 1799 print_decs (c->index, dump_file); 1800 fputs (") * ", dump_file); 1801 print_generic_expr (dump_file, c->stride, 0); 1802 fputs (" : ", dump_file); 1803 break; 1804 case CAND_ADD: 1805 fputs (" ADD : ", dump_file); 1806 print_generic_expr (dump_file, c->base_expr, 0); 1807 fputs (" + (", dump_file); 1808 print_decs (c->index, dump_file); 1809 fputs (" * ", dump_file); 1810 print_generic_expr (dump_file, c->stride, 0); 1811 fputs (") : ", dump_file); 1812 break; 1813 case CAND_REF: 1814 fputs (" REF : ", dump_file); 1815 print_generic_expr (dump_file, c->base_expr, 0); 1816 fputs (" + (", dump_file); 1817 print_generic_expr (dump_file, c->stride, 0); 1818 fputs (") + ", dump_file); 1819 print_decs (c->index, dump_file); 1820 fputs (" : ", dump_file); 1821 break; 1822 case CAND_PHI: 1823 fputs (" PHI : ", dump_file); 1824 print_generic_expr (dump_file, c->base_expr, 0); 1825 fputs (" + (unknown * ", dump_file); 1826 print_generic_expr (dump_file, c->stride, 0); 1827 fputs (") : ", dump_file); 1828 break; 1829 default: 1830 gcc_unreachable (); 1831 } 1832 print_generic_expr (dump_file, c->cand_type, 0); 1833 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n", 1834 c->basis, c->dependent, c->sibling); 1835 fprintf (dump_file, " next-interp: %d dead-savings: %d\n", 1836 c->next_interp, c->dead_savings); 1837 if (c->def_phi) 1838 fprintf (dump_file, " phi: %d\n", c->def_phi); 1839 fputs ("\n", dump_file); 1840 } 1841 1842 /* Dump the candidate vector for debug. */ 1843 1844 static void 1845 dump_cand_vec (void) 1846 { 1847 unsigned i; 1848 slsr_cand_t c; 1849 1850 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n"); 1851 1852 FOR_EACH_VEC_ELT (cand_vec, i, c) 1853 dump_candidate (c); 1854 } 1855 1856 /* Callback used to dump the candidate chains hash table. */ 1857 1858 int 1859 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED) 1860 { 1861 const_cand_chain_t chain = *slot; 1862 cand_chain_t p; 1863 1864 print_generic_expr (dump_file, chain->base_expr, 0); 1865 fprintf (dump_file, " -> %d", chain->cand->cand_num); 1866 1867 for (p = chain->next; p; p = p->next) 1868 fprintf (dump_file, " -> %d", p->cand->cand_num); 1869 1870 fputs ("\n", dump_file); 1871 return 1; 1872 } 1873 1874 /* Dump the candidate chains. */ 1875 1876 static void 1877 dump_cand_chains (void) 1878 { 1879 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); 1880 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback> 1881 (NULL); 1882 fputs ("\n", dump_file); 1883 } 1884 1885 /* Dump the increment vector for debug. */ 1886 1887 static void 1888 dump_incr_vec (void) 1889 { 1890 if (dump_file && (dump_flags & TDF_DETAILS)) 1891 { 1892 unsigned i; 1893 1894 fprintf (dump_file, "\nIncrement vector:\n\n"); 1895 1896 for (i = 0; i < incr_vec_len; i++) 1897 { 1898 fprintf (dump_file, "%3d increment: ", i); 1899 print_decs (incr_vec[i].incr, dump_file); 1900 fprintf (dump_file, "\n count: %d", incr_vec[i].count); 1901 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost); 1902 fputs ("\n initializer: ", dump_file); 1903 print_generic_expr (dump_file, incr_vec[i].initializer, 0); 1904 fputs ("\n\n", dump_file); 1905 } 1906 } 1907 } 1908 1909 /* Replace *EXPR in candidate C with an equivalent strength-reduced 1910 data reference. */ 1911 1912 static void 1913 replace_ref (tree *expr, slsr_cand_t c) 1914 { 1915 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr); 1916 unsigned HOST_WIDE_INT misalign; 1917 unsigned align; 1918 1919 /* Ensure the memory reference carries the minimum alignment 1920 requirement for the data type. See PR58041. */ 1921 get_object_alignment_1 (*expr, &align, &misalign); 1922 if (misalign != 0) 1923 align = (misalign & -misalign); 1924 if (align < TYPE_ALIGN (acc_type)) 1925 acc_type = build_aligned_type (acc_type, align); 1926 1927 add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type, 1928 c->base_expr, c->stride); 1929 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr, 1930 wide_int_to_tree (c->cand_type, c->index)); 1931 1932 /* Gimplify the base addressing expression for the new MEM_REF tree. */ 1933 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 1934 TREE_OPERAND (mem_ref, 0) 1935 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0), 1936 /*simple_p=*/true, NULL, 1937 /*before=*/true, GSI_SAME_STMT); 1938 copy_ref_info (mem_ref, *expr); 1939 *expr = mem_ref; 1940 update_stmt (c->cand_stmt); 1941 } 1942 1943 /* Replace CAND_REF candidate C, each sibling of candidate C, and each 1944 dependent of candidate C with an equivalent strength-reduced data 1945 reference. */ 1946 1947 static void 1948 replace_refs (slsr_cand_t c) 1949 { 1950 if (dump_file && (dump_flags & TDF_DETAILS)) 1951 { 1952 fputs ("Replacing reference: ", dump_file); 1953 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); 1954 } 1955 1956 if (gimple_vdef (c->cand_stmt)) 1957 { 1958 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt); 1959 replace_ref (lhs, c); 1960 } 1961 else 1962 { 1963 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt); 1964 replace_ref (rhs, c); 1965 } 1966 1967 if (dump_file && (dump_flags & TDF_DETAILS)) 1968 { 1969 fputs ("With: ", dump_file); 1970 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); 1971 fputs ("\n", dump_file); 1972 } 1973 1974 if (c->sibling) 1975 replace_refs (lookup_cand (c->sibling)); 1976 1977 if (c->dependent) 1978 replace_refs (lookup_cand (c->dependent)); 1979 } 1980 1981 /* Return TRUE if candidate C is dependent upon a PHI. */ 1982 1983 static bool 1984 phi_dependent_cand_p (slsr_cand_t c) 1985 { 1986 /* A candidate is not necessarily dependent upon a PHI just because 1987 it has a phi definition for its base name. It may have a basis 1988 that relies upon the same phi definition, in which case the PHI 1989 is irrelevant to this candidate. */ 1990 return (c->def_phi 1991 && c->basis 1992 && lookup_cand (c->basis)->def_phi != c->def_phi); 1993 } 1994 1995 /* Calculate the increment required for candidate C relative to 1996 its basis. */ 1997 1998 static widest_int 1999 cand_increment (slsr_cand_t c) 2000 { 2001 slsr_cand_t basis; 2002 2003 /* If the candidate doesn't have a basis, just return its own 2004 index. This is useful in record_increments to help us find 2005 an existing initializer. Also, if the candidate's basis is 2006 hidden by a phi, then its own index will be the increment 2007 from the newly introduced phi basis. */ 2008 if (!c->basis || phi_dependent_cand_p (c)) 2009 return c->index; 2010 2011 basis = lookup_cand (c->basis); 2012 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0)); 2013 return c->index - basis->index; 2014 } 2015 2016 /* Calculate the increment required for candidate C relative to 2017 its basis. If we aren't going to generate pointer arithmetic 2018 for this candidate, return the absolute value of that increment 2019 instead. */ 2020 2021 static inline widest_int 2022 cand_abs_increment (slsr_cand_t c) 2023 { 2024 widest_int increment = cand_increment (c); 2025 2026 if (!address_arithmetic_p && wi::neg_p (increment)) 2027 increment = -increment; 2028 2029 return increment; 2030 } 2031 2032 /* Return TRUE iff candidate C has already been replaced under 2033 another interpretation. */ 2034 2035 static inline bool 2036 cand_already_replaced (slsr_cand_t c) 2037 { 2038 return (gimple_bb (c->cand_stmt) == 0); 2039 } 2040 2041 /* Common logic used by replace_unconditional_candidate and 2042 replace_conditional_candidate. */ 2043 2044 static void 2045 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump) 2046 { 2047 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt)); 2048 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt); 2049 2050 /* It is not useful to replace casts, copies, negates, or adds of 2051 an SSA name and a constant. */ 2052 if (cand_code == MODIFY_EXPR 2053 || CONVERT_EXPR_CODE_P (cand_code) 2054 || cand_code == PLUS_EXPR 2055 || cand_code == POINTER_PLUS_EXPR 2056 || cand_code == MINUS_EXPR 2057 || cand_code == NEGATE_EXPR) 2058 return; 2059 2060 enum tree_code code = PLUS_EXPR; 2061 tree bump_tree; 2062 gimple stmt_to_print = NULL; 2063 2064 /* If the basis name and the candidate's LHS have incompatible 2065 types, introduce a cast. */ 2066 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name))) 2067 basis_name = introduce_cast_before_cand (c, target_type, basis_name); 2068 if (wi::neg_p (bump)) 2069 { 2070 code = MINUS_EXPR; 2071 bump = -bump; 2072 } 2073 2074 /* It is possible that the resulting bump doesn't fit in target_type. 2075 Abandon the replacement in this case. This does not affect 2076 siblings or dependents of C. */ 2077 if (bump != wi::ext (bump, TYPE_PRECISION (target_type), 2078 TYPE_SIGN (target_type))) 2079 return; 2080 2081 bump_tree = wide_int_to_tree (target_type, bump); 2082 2083 if (dump_file && (dump_flags & TDF_DETAILS)) 2084 { 2085 fputs ("Replacing: ", dump_file); 2086 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); 2087 } 2088 2089 if (bump == 0) 2090 { 2091 tree lhs = gimple_assign_lhs (c->cand_stmt); 2092 gassign *copy_stmt = gimple_build_assign (lhs, basis_name); 2093 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 2094 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); 2095 gsi_replace (&gsi, copy_stmt, false); 2096 c->cand_stmt = copy_stmt; 2097 if (dump_file && (dump_flags & TDF_DETAILS)) 2098 stmt_to_print = copy_stmt; 2099 } 2100 else 2101 { 2102 tree rhs1, rhs2; 2103 if (cand_code != NEGATE_EXPR) { 2104 rhs1 = gimple_assign_rhs1 (c->cand_stmt); 2105 rhs2 = gimple_assign_rhs2 (c->cand_stmt); 2106 } 2107 if (cand_code != NEGATE_EXPR 2108 && ((operand_equal_p (rhs1, basis_name, 0) 2109 && operand_equal_p (rhs2, bump_tree, 0)) 2110 || (operand_equal_p (rhs1, bump_tree, 0) 2111 && operand_equal_p (rhs2, basis_name, 0)))) 2112 { 2113 if (dump_file && (dump_flags & TDF_DETAILS)) 2114 { 2115 fputs ("(duplicate, not actually replacing)", dump_file); 2116 stmt_to_print = c->cand_stmt; 2117 } 2118 } 2119 else 2120 { 2121 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 2122 gimple_assign_set_rhs_with_ops (&gsi, code, 2123 basis_name, bump_tree); 2124 update_stmt (gsi_stmt (gsi)); 2125 c->cand_stmt = gsi_stmt (gsi); 2126 if (dump_file && (dump_flags & TDF_DETAILS)) 2127 stmt_to_print = gsi_stmt (gsi); 2128 } 2129 } 2130 2131 if (dump_file && (dump_flags & TDF_DETAILS)) 2132 { 2133 fputs ("With: ", dump_file); 2134 print_gimple_stmt (dump_file, stmt_to_print, 0, 0); 2135 fputs ("\n", dump_file); 2136 } 2137 } 2138 2139 /* Replace candidate C with an add or subtract. Note that we only 2140 operate on CAND_MULTs with known strides, so we will never generate 2141 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by 2142 X = Y + ((i - i') * S), as described in the module commentary. The 2143 folded value ((i - i') * S) is referred to here as the "bump." */ 2144 2145 static void 2146 replace_unconditional_candidate (slsr_cand_t c) 2147 { 2148 slsr_cand_t basis; 2149 2150 if (cand_already_replaced (c)) 2151 return; 2152 2153 basis = lookup_cand (c->basis); 2154 widest_int bump = cand_increment (c) * wi::to_widest (c->stride); 2155 2156 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump); 2157 } 2158 2159 /* Return the index in the increment vector of the given INCREMENT, 2160 or -1 if not found. The latter can occur if more than 2161 MAX_INCR_VEC_LEN increments have been found. */ 2162 2163 static inline int 2164 incr_vec_index (const widest_int &increment) 2165 { 2166 unsigned i; 2167 2168 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++) 2169 ; 2170 2171 if (i < incr_vec_len) 2172 return i; 2173 else 2174 return -1; 2175 } 2176 2177 /* Create a new statement along edge E to add BASIS_NAME to the product 2178 of INCREMENT and the stride of candidate C. Create and return a new 2179 SSA name from *VAR to be used as the LHS of the new statement. 2180 KNOWN_STRIDE is true iff C's stride is a constant. */ 2181 2182 static tree 2183 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name, 2184 widest_int increment, edge e, location_t loc, 2185 bool known_stride) 2186 { 2187 tree lhs, basis_type; 2188 gassign *new_stmt; 2189 2190 /* If the add candidate along this incoming edge has the same 2191 index as C's hidden basis, the hidden basis represents this 2192 edge correctly. */ 2193 if (increment == 0) 2194 return basis_name; 2195 2196 basis_type = TREE_TYPE (basis_name); 2197 lhs = make_temp_ssa_name (basis_type, NULL, "slsr"); 2198 2199 /* Occasionally people convert integers to pointers without a 2200 cast, leading us into trouble if we aren't careful. */ 2201 enum tree_code plus_code 2202 = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR; 2203 2204 if (known_stride) 2205 { 2206 tree bump_tree; 2207 enum tree_code code = plus_code; 2208 widest_int bump = increment * wi::to_widest (c->stride); 2209 if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type)) 2210 { 2211 code = MINUS_EXPR; 2212 bump = -bump; 2213 } 2214 2215 tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type; 2216 bump_tree = wide_int_to_tree (stride_type, bump); 2217 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree); 2218 } 2219 else 2220 { 2221 int i; 2222 bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment); 2223 i = incr_vec_index (negate_incr ? -increment : increment); 2224 gcc_assert (i >= 0); 2225 2226 if (incr_vec[i].initializer) 2227 { 2228 enum tree_code code = negate_incr ? MINUS_EXPR : plus_code; 2229 new_stmt = gimple_build_assign (lhs, code, basis_name, 2230 incr_vec[i].initializer); 2231 } 2232 else if (increment == 1) 2233 new_stmt = gimple_build_assign (lhs, plus_code, basis_name, c->stride); 2234 else if (increment == -1) 2235 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, 2236 c->stride); 2237 else 2238 gcc_unreachable (); 2239 } 2240 2241 gimple_set_location (new_stmt, loc); 2242 gsi_insert_on_edge (e, new_stmt); 2243 2244 if (dump_file && (dump_flags & TDF_DETAILS)) 2245 { 2246 fprintf (dump_file, "Inserting on edge %d->%d: ", e->src->index, 2247 e->dest->index); 2248 print_gimple_stmt (dump_file, new_stmt, 0, 0); 2249 } 2250 2251 return lhs; 2252 } 2253 2254 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which 2255 is hidden by the phi node FROM_PHI, create a new phi node in the same 2256 block as FROM_PHI. The new phi is suitable for use as a basis by C, 2257 with its phi arguments representing conditional adjustments to the 2258 hidden basis along conditional incoming paths. Those adjustments are 2259 made by creating add statements (and sometimes recursively creating 2260 phis) along those incoming paths. LOC is the location to attach to 2261 the introduced statements. KNOWN_STRIDE is true iff C's stride is a 2262 constant. */ 2263 2264 static tree 2265 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name, 2266 location_t loc, bool known_stride) 2267 { 2268 int i; 2269 tree name, phi_arg; 2270 gphi *phi; 2271 vec<tree> phi_args; 2272 slsr_cand_t basis = lookup_cand (c->basis); 2273 int nargs = gimple_phi_num_args (from_phi); 2274 basic_block phi_bb = gimple_bb (from_phi); 2275 slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi); 2276 phi_args.create (nargs); 2277 2278 /* Process each argument of the existing phi that represents 2279 conditionally-executed add candidates. */ 2280 for (i = 0; i < nargs; i++) 2281 { 2282 edge e = (*phi_bb->preds)[i]; 2283 tree arg = gimple_phi_arg_def (from_phi, i); 2284 tree feeding_def; 2285 2286 /* If the phi argument is the base name of the CAND_PHI, then 2287 this incoming arc should use the hidden basis. */ 2288 if (operand_equal_p (arg, phi_cand->base_expr, 0)) 2289 if (basis->index == 0) 2290 feeding_def = gimple_assign_lhs (basis->cand_stmt); 2291 else 2292 { 2293 widest_int incr = -basis->index; 2294 feeding_def = create_add_on_incoming_edge (c, basis_name, incr, 2295 e, loc, known_stride); 2296 } 2297 else 2298 { 2299 gimple arg_def = SSA_NAME_DEF_STMT (arg); 2300 2301 /* If there is another phi along this incoming edge, we must 2302 process it in the same fashion to ensure that all basis 2303 adjustments are made along its incoming edges. */ 2304 if (gimple_code (arg_def) == GIMPLE_PHI) 2305 feeding_def = create_phi_basis (c, arg_def, basis_name, 2306 loc, known_stride); 2307 else 2308 { 2309 slsr_cand_t arg_cand = base_cand_from_table (arg); 2310 widest_int diff = arg_cand->index - basis->index; 2311 feeding_def = create_add_on_incoming_edge (c, basis_name, diff, 2312 e, loc, known_stride); 2313 } 2314 } 2315 2316 /* Because of recursion, we need to save the arguments in a vector 2317 so we can create the PHI statement all at once. Otherwise the 2318 storage for the half-created PHI can be reclaimed. */ 2319 phi_args.safe_push (feeding_def); 2320 } 2321 2322 /* Create the new phi basis. */ 2323 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr"); 2324 phi = create_phi_node (name, phi_bb); 2325 SSA_NAME_DEF_STMT (name) = phi; 2326 2327 FOR_EACH_VEC_ELT (phi_args, i, phi_arg) 2328 { 2329 edge e = (*phi_bb->preds)[i]; 2330 add_phi_arg (phi, phi_arg, e, loc); 2331 } 2332 2333 update_stmt (phi); 2334 2335 if (dump_file && (dump_flags & TDF_DETAILS)) 2336 { 2337 fputs ("Introducing new phi basis: ", dump_file); 2338 print_gimple_stmt (dump_file, phi, 0, 0); 2339 } 2340 2341 return name; 2342 } 2343 2344 /* Given a candidate C whose basis is hidden by at least one intervening 2345 phi, introduce a matching number of new phis to represent its basis 2346 adjusted by conditional increments along possible incoming paths. Then 2347 replace C as though it were an unconditional candidate, using the new 2348 basis. */ 2349 2350 static void 2351 replace_conditional_candidate (slsr_cand_t c) 2352 { 2353 tree basis_name, name; 2354 slsr_cand_t basis; 2355 location_t loc; 2356 2357 /* Look up the LHS SSA name from C's basis. This will be the 2358 RHS1 of the adds we will introduce to create new phi arguments. */ 2359 basis = lookup_cand (c->basis); 2360 basis_name = gimple_assign_lhs (basis->cand_stmt); 2361 2362 /* Create a new phi statement which will represent C's true basis 2363 after the transformation is complete. */ 2364 loc = gimple_location (c->cand_stmt); 2365 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt, 2366 basis_name, loc, KNOWN_STRIDE); 2367 /* Replace C with an add of the new basis phi and a constant. */ 2368 widest_int bump = c->index * wi::to_widest (c->stride); 2369 2370 replace_mult_candidate (c, name, bump); 2371 } 2372 2373 /* Compute the expected costs of inserting basis adjustments for 2374 candidate C with phi-definition PHI. The cost of inserting 2375 one adjustment is given by ONE_ADD_COST. If PHI has arguments 2376 which are themselves phi results, recursively calculate costs 2377 for those phis as well. */ 2378 2379 static int 2380 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost) 2381 { 2382 unsigned i; 2383 int cost = 0; 2384 slsr_cand_t phi_cand = *stmt_cand_map->get (phi); 2385 2386 /* If we work our way back to a phi that isn't dominated by the hidden 2387 basis, this isn't a candidate for replacement. Indicate this by 2388 returning an unreasonably high cost. It's not easy to detect 2389 these situations when determining the basis, so we defer the 2390 decision until now. */ 2391 basic_block phi_bb = gimple_bb (phi); 2392 slsr_cand_t basis = lookup_cand (c->basis); 2393 basic_block basis_bb = gimple_bb (basis->cand_stmt); 2394 2395 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb)) 2396 return COST_INFINITE; 2397 2398 for (i = 0; i < gimple_phi_num_args (phi); i++) 2399 { 2400 tree arg = gimple_phi_arg_def (phi, i); 2401 2402 if (arg != phi_cand->base_expr) 2403 { 2404 gimple arg_def = SSA_NAME_DEF_STMT (arg); 2405 2406 if (gimple_code (arg_def) == GIMPLE_PHI) 2407 cost += phi_add_costs (arg_def, c, one_add_cost); 2408 else 2409 { 2410 slsr_cand_t arg_cand = base_cand_from_table (arg); 2411 2412 if (arg_cand->index != c->index) 2413 cost += one_add_cost; 2414 } 2415 } 2416 } 2417 2418 return cost; 2419 } 2420 2421 /* For candidate C, each sibling of candidate C, and each dependent of 2422 candidate C, determine whether the candidate is dependent upon a 2423 phi that hides its basis. If not, replace the candidate unconditionally. 2424 Otherwise, determine whether the cost of introducing compensation code 2425 for the candidate is offset by the gains from strength reduction. If 2426 so, replace the candidate and introduce the compensation code. */ 2427 2428 static void 2429 replace_uncond_cands_and_profitable_phis (slsr_cand_t c) 2430 { 2431 if (phi_dependent_cand_p (c)) 2432 { 2433 if (c->kind == CAND_MULT) 2434 { 2435 /* A candidate dependent upon a phi will replace a multiply by 2436 a constant with an add, and will insert at most one add for 2437 each phi argument. Add these costs with the potential dead-code 2438 savings to determine profitability. */ 2439 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt)); 2440 int mult_savings = stmt_cost (c->cand_stmt, speed); 2441 gimple phi = lookup_cand (c->def_phi)->cand_stmt; 2442 tree phi_result = gimple_phi_result (phi); 2443 int one_add_cost = add_cost (speed, 2444 TYPE_MODE (TREE_TYPE (phi_result))); 2445 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost); 2446 int cost = add_costs - mult_savings - c->dead_savings; 2447 2448 if (dump_file && (dump_flags & TDF_DETAILS)) 2449 { 2450 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num); 2451 fprintf (dump_file, " add_costs = %d\n", add_costs); 2452 fprintf (dump_file, " mult_savings = %d\n", mult_savings); 2453 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings); 2454 fprintf (dump_file, " cost = %d\n", cost); 2455 if (cost <= COST_NEUTRAL) 2456 fputs (" Replacing...\n", dump_file); 2457 else 2458 fputs (" Not replaced.\n", dump_file); 2459 } 2460 2461 if (cost <= COST_NEUTRAL) 2462 replace_conditional_candidate (c); 2463 } 2464 } 2465 else 2466 replace_unconditional_candidate (c); 2467 2468 if (c->sibling) 2469 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling)); 2470 2471 if (c->dependent) 2472 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent)); 2473 } 2474 2475 /* Count the number of candidates in the tree rooted at C that have 2476 not already been replaced under other interpretations. */ 2477 2478 static int 2479 count_candidates (slsr_cand_t c) 2480 { 2481 unsigned count = cand_already_replaced (c) ? 0 : 1; 2482 2483 if (c->sibling) 2484 count += count_candidates (lookup_cand (c->sibling)); 2485 2486 if (c->dependent) 2487 count += count_candidates (lookup_cand (c->dependent)); 2488 2489 return count; 2490 } 2491 2492 /* Increase the count of INCREMENT by one in the increment vector. 2493 INCREMENT is associated with candidate C. If INCREMENT is to be 2494 conditionally executed as part of a conditional candidate replacement, 2495 IS_PHI_ADJUST is true, otherwise false. If an initializer 2496 T_0 = stride * I is provided by a candidate that dominates all 2497 candidates with the same increment, also record T_0 for subsequent use. */ 2498 2499 static void 2500 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust) 2501 { 2502 bool found = false; 2503 unsigned i; 2504 2505 /* Treat increments that differ only in sign as identical so as to 2506 share initializers, unless we are generating pointer arithmetic. */ 2507 if (!address_arithmetic_p && wi::neg_p (increment)) 2508 increment = -increment; 2509 2510 for (i = 0; i < incr_vec_len; i++) 2511 { 2512 if (incr_vec[i].incr == increment) 2513 { 2514 incr_vec[i].count++; 2515 found = true; 2516 2517 /* If we previously recorded an initializer that doesn't 2518 dominate this candidate, it's not going to be useful to 2519 us after all. */ 2520 if (incr_vec[i].initializer 2521 && !dominated_by_p (CDI_DOMINATORS, 2522 gimple_bb (c->cand_stmt), 2523 incr_vec[i].init_bb)) 2524 { 2525 incr_vec[i].initializer = NULL_TREE; 2526 incr_vec[i].init_bb = NULL; 2527 } 2528 2529 break; 2530 } 2531 } 2532 2533 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1) 2534 { 2535 /* The first time we see an increment, create the entry for it. 2536 If this is the root candidate which doesn't have a basis, set 2537 the count to zero. We're only processing it so it can possibly 2538 provide an initializer for other candidates. */ 2539 incr_vec[incr_vec_len].incr = increment; 2540 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0; 2541 incr_vec[incr_vec_len].cost = COST_INFINITE; 2542 2543 /* Optimistically record the first occurrence of this increment 2544 as providing an initializer (if it does); we will revise this 2545 opinion later if it doesn't dominate all other occurrences. 2546 Exception: increments of 0, 1 never need initializers; 2547 and phi adjustments don't ever provide initializers. */ 2548 if (c->kind == CAND_ADD 2549 && !is_phi_adjust 2550 && c->index == increment 2551 && (wi::gts_p (increment, 1) 2552 || wi::lts_p (increment, 0)) 2553 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR 2554 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR)) 2555 { 2556 tree t0 = NULL_TREE; 2557 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); 2558 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); 2559 if (operand_equal_p (rhs1, c->base_expr, 0)) 2560 t0 = rhs2; 2561 else if (operand_equal_p (rhs2, c->base_expr, 0)) 2562 t0 = rhs1; 2563 if (t0 2564 && SSA_NAME_DEF_STMT (t0) 2565 && gimple_bb (SSA_NAME_DEF_STMT (t0))) 2566 { 2567 incr_vec[incr_vec_len].initializer = t0; 2568 incr_vec[incr_vec_len++].init_bb 2569 = gimple_bb (SSA_NAME_DEF_STMT (t0)); 2570 } 2571 else 2572 { 2573 incr_vec[incr_vec_len].initializer = NULL_TREE; 2574 incr_vec[incr_vec_len++].init_bb = NULL; 2575 } 2576 } 2577 else 2578 { 2579 incr_vec[incr_vec_len].initializer = NULL_TREE; 2580 incr_vec[incr_vec_len++].init_bb = NULL; 2581 } 2582 } 2583 } 2584 2585 /* Given phi statement PHI that hides a candidate from its BASIS, find 2586 the increments along each incoming arc (recursively handling additional 2587 phis that may be present) and record them. These increments are the 2588 difference in index between the index-adjusting statements and the 2589 index of the basis. */ 2590 2591 static void 2592 record_phi_increments (slsr_cand_t basis, gimple phi) 2593 { 2594 unsigned i; 2595 slsr_cand_t phi_cand = *stmt_cand_map->get (phi); 2596 2597 for (i = 0; i < gimple_phi_num_args (phi); i++) 2598 { 2599 tree arg = gimple_phi_arg_def (phi, i); 2600 2601 if (!operand_equal_p (arg, phi_cand->base_expr, 0)) 2602 { 2603 gimple arg_def = SSA_NAME_DEF_STMT (arg); 2604 2605 if (gimple_code (arg_def) == GIMPLE_PHI) 2606 record_phi_increments (basis, arg_def); 2607 else 2608 { 2609 slsr_cand_t arg_cand = base_cand_from_table (arg); 2610 widest_int diff = arg_cand->index - basis->index; 2611 record_increment (arg_cand, diff, PHI_ADJUST); 2612 } 2613 } 2614 } 2615 } 2616 2617 /* Determine how many times each unique increment occurs in the set 2618 of candidates rooted at C's parent, recording the data in the 2619 increment vector. For each unique increment I, if an initializer 2620 T_0 = stride * I is provided by a candidate that dominates all 2621 candidates with the same increment, also record T_0 for subsequent 2622 use. */ 2623 2624 static void 2625 record_increments (slsr_cand_t c) 2626 { 2627 if (!cand_already_replaced (c)) 2628 { 2629 if (!phi_dependent_cand_p (c)) 2630 record_increment (c, cand_increment (c), NOT_PHI_ADJUST); 2631 else 2632 { 2633 /* A candidate with a basis hidden by a phi will have one 2634 increment for its relationship to the index represented by 2635 the phi, and potentially additional increments along each 2636 incoming edge. For the root of the dependency tree (which 2637 has no basis), process just the initial index in case it has 2638 an initializer that can be used by subsequent candidates. */ 2639 record_increment (c, c->index, NOT_PHI_ADJUST); 2640 2641 if (c->basis) 2642 record_phi_increments (lookup_cand (c->basis), 2643 lookup_cand (c->def_phi)->cand_stmt); 2644 } 2645 } 2646 2647 if (c->sibling) 2648 record_increments (lookup_cand (c->sibling)); 2649 2650 if (c->dependent) 2651 record_increments (lookup_cand (c->dependent)); 2652 } 2653 2654 /* Add up and return the costs of introducing add statements that 2655 require the increment INCR on behalf of candidate C and phi 2656 statement PHI. Accumulate into *SAVINGS the potential savings 2657 from removing existing statements that feed PHI and have no other 2658 uses. */ 2659 2660 static int 2661 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple phi, int *savings) 2662 { 2663 unsigned i; 2664 int cost = 0; 2665 slsr_cand_t basis = lookup_cand (c->basis); 2666 slsr_cand_t phi_cand = *stmt_cand_map->get (phi); 2667 2668 for (i = 0; i < gimple_phi_num_args (phi); i++) 2669 { 2670 tree arg = gimple_phi_arg_def (phi, i); 2671 2672 if (!operand_equal_p (arg, phi_cand->base_expr, 0)) 2673 { 2674 gimple arg_def = SSA_NAME_DEF_STMT (arg); 2675 2676 if (gimple_code (arg_def) == GIMPLE_PHI) 2677 { 2678 int feeding_savings = 0; 2679 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings); 2680 if (has_single_use (gimple_phi_result (arg_def))) 2681 *savings += feeding_savings; 2682 } 2683 else 2684 { 2685 slsr_cand_t arg_cand = base_cand_from_table (arg); 2686 widest_int diff = arg_cand->index - basis->index; 2687 2688 if (incr == diff) 2689 { 2690 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt); 2691 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt); 2692 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs))); 2693 if (has_single_use (lhs)) 2694 *savings += stmt_cost (arg_cand->cand_stmt, true); 2695 } 2696 } 2697 } 2698 } 2699 2700 return cost; 2701 } 2702 2703 /* Return the first candidate in the tree rooted at C that has not 2704 already been replaced, favoring siblings over dependents. */ 2705 2706 static slsr_cand_t 2707 unreplaced_cand_in_tree (slsr_cand_t c) 2708 { 2709 if (!cand_already_replaced (c)) 2710 return c; 2711 2712 if (c->sibling) 2713 { 2714 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling)); 2715 if (sib) 2716 return sib; 2717 } 2718 2719 if (c->dependent) 2720 { 2721 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent)); 2722 if (dep) 2723 return dep; 2724 } 2725 2726 return NULL; 2727 } 2728 2729 /* Return TRUE if the candidates in the tree rooted at C should be 2730 optimized for speed, else FALSE. We estimate this based on the block 2731 containing the most dominant candidate in the tree that has not yet 2732 been replaced. */ 2733 2734 static bool 2735 optimize_cands_for_speed_p (slsr_cand_t c) 2736 { 2737 slsr_cand_t c2 = unreplaced_cand_in_tree (c); 2738 gcc_assert (c2); 2739 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt)); 2740 } 2741 2742 /* Add COST_IN to the lowest cost of any dependent path starting at 2743 candidate C or any of its siblings, counting only candidates along 2744 such paths with increment INCR. Assume that replacing a candidate 2745 reduces cost by REPL_SAVINGS. Also account for savings from any 2746 statements that would go dead. If COUNT_PHIS is true, include 2747 costs of introducing feeding statements for conditional candidates. */ 2748 2749 static int 2750 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, 2751 const widest_int &incr, bool count_phis) 2752 { 2753 int local_cost, sib_cost, savings = 0; 2754 widest_int cand_incr = cand_abs_increment (c); 2755 2756 if (cand_already_replaced (c)) 2757 local_cost = cost_in; 2758 else if (incr == cand_incr) 2759 local_cost = cost_in - repl_savings - c->dead_savings; 2760 else 2761 local_cost = cost_in - c->dead_savings; 2762 2763 if (count_phis 2764 && phi_dependent_cand_p (c) 2765 && !cand_already_replaced (c)) 2766 { 2767 gimple phi = lookup_cand (c->def_phi)->cand_stmt; 2768 local_cost += phi_incr_cost (c, incr, phi, &savings); 2769 2770 if (has_single_use (gimple_phi_result (phi))) 2771 local_cost -= savings; 2772 } 2773 2774 if (c->dependent) 2775 local_cost = lowest_cost_path (local_cost, repl_savings, 2776 lookup_cand (c->dependent), incr, 2777 count_phis); 2778 2779 if (c->sibling) 2780 { 2781 sib_cost = lowest_cost_path (cost_in, repl_savings, 2782 lookup_cand (c->sibling), incr, 2783 count_phis); 2784 local_cost = MIN (local_cost, sib_cost); 2785 } 2786 2787 return local_cost; 2788 } 2789 2790 /* Compute the total savings that would accrue from all replacements 2791 in the candidate tree rooted at C, counting only candidates with 2792 increment INCR. Assume that replacing a candidate reduces cost 2793 by REPL_SAVINGS. Also account for savings from statements that 2794 would go dead. */ 2795 2796 static int 2797 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr, 2798 bool count_phis) 2799 { 2800 int savings = 0; 2801 widest_int cand_incr = cand_abs_increment (c); 2802 2803 if (incr == cand_incr && !cand_already_replaced (c)) 2804 savings += repl_savings + c->dead_savings; 2805 2806 if (count_phis 2807 && phi_dependent_cand_p (c) 2808 && !cand_already_replaced (c)) 2809 { 2810 int phi_savings = 0; 2811 gimple phi = lookup_cand (c->def_phi)->cand_stmt; 2812 savings -= phi_incr_cost (c, incr, phi, &phi_savings); 2813 2814 if (has_single_use (gimple_phi_result (phi))) 2815 savings += phi_savings; 2816 } 2817 2818 if (c->dependent) 2819 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr, 2820 count_phis); 2821 2822 if (c->sibling) 2823 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr, 2824 count_phis); 2825 2826 return savings; 2827 } 2828 2829 /* Use target-specific costs to determine and record which increments 2830 in the current candidate tree are profitable to replace, assuming 2831 MODE and SPEED. FIRST_DEP is the first dependent of the root of 2832 the candidate tree. 2833 2834 One slight limitation here is that we don't account for the possible 2835 introduction of casts in some cases. See replace_one_candidate for 2836 the cases where these are introduced. This should probably be cleaned 2837 up sometime. */ 2838 2839 static void 2840 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed) 2841 { 2842 unsigned i; 2843 2844 for (i = 0; i < incr_vec_len; i++) 2845 { 2846 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi (); 2847 2848 /* If somehow this increment is bigger than a HWI, we won't 2849 be optimizing candidates that use it. And if the increment 2850 has a count of zero, nothing will be done with it. */ 2851 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count) 2852 incr_vec[i].cost = COST_INFINITE; 2853 2854 /* Increments of 0, 1, and -1 are always profitable to replace, 2855 because they always replace a multiply or add with an add or 2856 copy, and may cause one or more existing instructions to go 2857 dead. Exception: -1 can't be assumed to be profitable for 2858 pointer addition. */ 2859 else if (incr == 0 2860 || incr == 1 2861 || (incr == -1 2862 && !POINTER_TYPE_P (first_dep->cand_type))) 2863 incr_vec[i].cost = COST_NEUTRAL; 2864 2865 /* FORNOW: If we need to add an initializer, give up if a cast from 2866 the candidate's type to its stride's type can lose precision. 2867 This could eventually be handled better by expressly retaining the 2868 result of a cast to a wider type in the stride. Example: 2869 2870 short int _1; 2871 _2 = (int) _1; 2872 _3 = _2 * 10; 2873 _4 = x + _3; ADD: x + (10 * _1) : int 2874 _5 = _2 * 15; 2875 _6 = x + _3; ADD: x + (15 * _1) : int 2876 2877 Right now replacing _6 would cause insertion of an initializer 2878 of the form "short int T = _1 * 5;" followed by a cast to 2879 int, which could overflow incorrectly. Had we recorded _2 or 2880 (int)_1 as the stride, this wouldn't happen. However, doing 2881 this breaks other opportunities, so this will require some 2882 care. */ 2883 else if (!incr_vec[i].initializer 2884 && TREE_CODE (first_dep->stride) != INTEGER_CST 2885 && !legal_cast_p_1 (first_dep->stride, 2886 gimple_assign_lhs (first_dep->cand_stmt))) 2887 2888 incr_vec[i].cost = COST_INFINITE; 2889 2890 /* If we need to add an initializer, make sure we don't introduce 2891 a multiply by a pointer type, which can happen in certain cast 2892 scenarios. FIXME: When cleaning up these cast issues, we can 2893 afford to introduce the multiply provided we cast out to an 2894 unsigned int of appropriate size. */ 2895 else if (!incr_vec[i].initializer 2896 && TREE_CODE (first_dep->stride) != INTEGER_CST 2897 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride))) 2898 2899 incr_vec[i].cost = COST_INFINITE; 2900 2901 /* For any other increment, if this is a multiply candidate, we 2902 must introduce a temporary T and initialize it with 2903 T_0 = stride * increment. When optimizing for speed, walk the 2904 candidate tree to calculate the best cost reduction along any 2905 path; if it offsets the fixed cost of inserting the initializer, 2906 replacing the increment is profitable. When optimizing for 2907 size, instead calculate the total cost reduction from replacing 2908 all candidates with this increment. */ 2909 else if (first_dep->kind == CAND_MULT) 2910 { 2911 int cost = mult_by_coeff_cost (incr, mode, speed); 2912 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode); 2913 if (speed) 2914 cost = lowest_cost_path (cost, repl_savings, first_dep, 2915 incr_vec[i].incr, COUNT_PHIS); 2916 else 2917 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr, 2918 COUNT_PHIS); 2919 2920 incr_vec[i].cost = cost; 2921 } 2922 2923 /* If this is an add candidate, the initializer may already 2924 exist, so only calculate the cost of the initializer if it 2925 doesn't. We are replacing one add with another here, so the 2926 known replacement savings is zero. We will account for removal 2927 of dead instructions in lowest_cost_path or total_savings. */ 2928 else 2929 { 2930 int cost = 0; 2931 if (!incr_vec[i].initializer) 2932 cost = mult_by_coeff_cost (incr, mode, speed); 2933 2934 if (speed) 2935 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr, 2936 DONT_COUNT_PHIS); 2937 else 2938 cost -= total_savings (0, first_dep, incr_vec[i].incr, 2939 DONT_COUNT_PHIS); 2940 2941 incr_vec[i].cost = cost; 2942 } 2943 } 2944 } 2945 2946 /* Return the nearest common dominator of BB1 and BB2. If the blocks 2947 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise, 2948 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2, 2949 return C2 in *WHERE; and if the NCD matches neither, return NULL in 2950 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */ 2951 2952 static basic_block 2953 ncd_for_two_cands (basic_block bb1, basic_block bb2, 2954 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where) 2955 { 2956 basic_block ncd; 2957 2958 if (!bb1) 2959 { 2960 *where = c2; 2961 return bb2; 2962 } 2963 2964 if (!bb2) 2965 { 2966 *where = c1; 2967 return bb1; 2968 } 2969 2970 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2); 2971 2972 /* If both candidates are in the same block, the earlier 2973 candidate wins. */ 2974 if (bb1 == ncd && bb2 == ncd) 2975 { 2976 if (!c1 || (c2 && c2->cand_num < c1->cand_num)) 2977 *where = c2; 2978 else 2979 *where = c1; 2980 } 2981 2982 /* Otherwise, if one of them produced a candidate in the 2983 dominator, that one wins. */ 2984 else if (bb1 == ncd) 2985 *where = c1; 2986 2987 else if (bb2 == ncd) 2988 *where = c2; 2989 2990 /* If neither matches the dominator, neither wins. */ 2991 else 2992 *where = NULL; 2993 2994 return ncd; 2995 } 2996 2997 /* Consider all candidates that feed PHI. Find the nearest common 2998 dominator of those candidates requiring the given increment INCR. 2999 Further find and return the nearest common dominator of this result 3000 with block NCD. If the returned block contains one or more of the 3001 candidates, return the earliest candidate in the block in *WHERE. */ 3002 3003 static basic_block 3004 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi, 3005 basic_block ncd, slsr_cand_t *where) 3006 { 3007 unsigned i; 3008 slsr_cand_t basis = lookup_cand (c->basis); 3009 slsr_cand_t phi_cand = *stmt_cand_map->get (phi); 3010 3011 for (i = 0; i < gimple_phi_num_args (phi); i++) 3012 { 3013 tree arg = gimple_phi_arg_def (phi, i); 3014 3015 if (!operand_equal_p (arg, phi_cand->base_expr, 0)) 3016 { 3017 gimple arg_def = SSA_NAME_DEF_STMT (arg); 3018 3019 if (gimple_code (arg_def) == GIMPLE_PHI) 3020 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd, 3021 where); 3022 else 3023 { 3024 slsr_cand_t arg_cand = base_cand_from_table (arg); 3025 widest_int diff = arg_cand->index - basis->index; 3026 basic_block pred = gimple_phi_arg_edge (phi, i)->src; 3027 3028 if ((incr == diff) || (!address_arithmetic_p && incr == -diff)) 3029 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where); 3030 } 3031 } 3032 } 3033 3034 return ncd; 3035 } 3036 3037 /* Consider the candidate C together with any candidates that feed 3038 C's phi dependence (if any). Find and return the nearest common 3039 dominator of those candidates requiring the given increment INCR. 3040 If the returned block contains one or more of the candidates, 3041 return the earliest candidate in the block in *WHERE. */ 3042 3043 static basic_block 3044 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where) 3045 { 3046 basic_block ncd = NULL; 3047 3048 if (cand_abs_increment (c) == incr) 3049 { 3050 ncd = gimple_bb (c->cand_stmt); 3051 *where = c; 3052 } 3053 3054 if (phi_dependent_cand_p (c)) 3055 ncd = ncd_with_phi (c, incr, 3056 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt), 3057 ncd, where); 3058 3059 return ncd; 3060 } 3061 3062 /* Consider all candidates in the tree rooted at C for which INCR 3063 represents the required increment of C relative to its basis. 3064 Find and return the basic block that most nearly dominates all 3065 such candidates. If the returned block contains one or more of 3066 the candidates, return the earliest candidate in the block in 3067 *WHERE. */ 3068 3069 static basic_block 3070 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr, 3071 slsr_cand_t *where) 3072 { 3073 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd; 3074 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where; 3075 3076 /* First find the NCD of all siblings and dependents. */ 3077 if (c->sibling) 3078 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling), 3079 incr, &sib_where); 3080 if (c->dependent) 3081 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent), 3082 incr, &dep_where); 3083 if (!sib_ncd && !dep_ncd) 3084 { 3085 new_where = NULL; 3086 ncd = NULL; 3087 } 3088 else if (sib_ncd && !dep_ncd) 3089 { 3090 new_where = sib_where; 3091 ncd = sib_ncd; 3092 } 3093 else if (dep_ncd && !sib_ncd) 3094 { 3095 new_where = dep_where; 3096 ncd = dep_ncd; 3097 } 3098 else 3099 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where, 3100 dep_where, &new_where); 3101 3102 /* If the candidate's increment doesn't match the one we're interested 3103 in (and nor do any increments for feeding defs of a phi-dependence), 3104 then the result depends only on siblings and dependents. */ 3105 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where); 3106 3107 if (!this_ncd || cand_already_replaced (c)) 3108 { 3109 *where = new_where; 3110 return ncd; 3111 } 3112 3113 /* Otherwise, compare this candidate with the result from all siblings 3114 and dependents. */ 3115 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where); 3116 3117 return ncd; 3118 } 3119 3120 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */ 3121 3122 static inline bool 3123 profitable_increment_p (unsigned index) 3124 { 3125 return (incr_vec[index].cost <= COST_NEUTRAL); 3126 } 3127 3128 /* For each profitable increment in the increment vector not equal to 3129 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common 3130 dominator of all statements in the candidate chain rooted at C 3131 that require that increment, and insert an initializer 3132 T_0 = stride * increment at that location. Record T_0 with the 3133 increment record. */ 3134 3135 static void 3136 insert_initializers (slsr_cand_t c) 3137 { 3138 unsigned i; 3139 3140 for (i = 0; i < incr_vec_len; i++) 3141 { 3142 basic_block bb; 3143 slsr_cand_t where = NULL; 3144 gassign *init_stmt; 3145 tree stride_type, new_name, incr_tree; 3146 widest_int incr = incr_vec[i].incr; 3147 3148 if (!profitable_increment_p (i) 3149 || incr == 1 3150 || (incr == -1 3151 && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type))) 3152 || incr == 0) 3153 continue; 3154 3155 /* We may have already identified an existing initializer that 3156 will suffice. */ 3157 if (incr_vec[i].initializer) 3158 { 3159 if (dump_file && (dump_flags & TDF_DETAILS)) 3160 { 3161 fputs ("Using existing initializer: ", dump_file); 3162 print_gimple_stmt (dump_file, 3163 SSA_NAME_DEF_STMT (incr_vec[i].initializer), 3164 0, 0); 3165 } 3166 continue; 3167 } 3168 3169 /* Find the block that most closely dominates all candidates 3170 with this increment. If there is at least one candidate in 3171 that block, the earliest one will be returned in WHERE. */ 3172 bb = nearest_common_dominator_for_cands (c, incr, &where); 3173 3174 /* If the NCD is not dominated by the block containing the 3175 definition of the stride, we can't legally insert a 3176 single initializer. Mark the increment as unprofitable 3177 so we don't make any replacements. FIXME: Multiple 3178 initializers could be placed with more analysis. */ 3179 gimple stride_def = SSA_NAME_DEF_STMT (c->stride); 3180 basic_block stride_bb = gimple_bb (stride_def); 3181 3182 if (stride_bb && !dominated_by_p (CDI_DOMINATORS, bb, stride_bb)) 3183 { 3184 if (dump_file && (dump_flags & TDF_DETAILS)) 3185 fprintf (dump_file, 3186 "Initializer #%d cannot be legally placed\n", i); 3187 incr_vec[i].cost = COST_INFINITE; 3188 continue; 3189 } 3190 3191 /* Create a new SSA name to hold the initializer's value. */ 3192 stride_type = TREE_TYPE (c->stride); 3193 new_name = make_temp_ssa_name (stride_type, NULL, "slsr"); 3194 incr_vec[i].initializer = new_name; 3195 3196 /* Create the initializer and insert it in the latest possible 3197 dominating position. */ 3198 incr_tree = wide_int_to_tree (stride_type, incr); 3199 init_stmt = gimple_build_assign (new_name, MULT_EXPR, 3200 c->stride, incr_tree); 3201 if (where) 3202 { 3203 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt); 3204 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); 3205 gimple_set_location (init_stmt, gimple_location (where->cand_stmt)); 3206 } 3207 else 3208 { 3209 gimple_stmt_iterator gsi = gsi_last_bb (bb); 3210 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt; 3211 3212 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi))) 3213 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); 3214 else 3215 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT); 3216 3217 gimple_set_location (init_stmt, gimple_location (basis_stmt)); 3218 } 3219 3220 if (dump_file && (dump_flags & TDF_DETAILS)) 3221 { 3222 fputs ("Inserting initializer: ", dump_file); 3223 print_gimple_stmt (dump_file, init_stmt, 0, 0); 3224 } 3225 } 3226 } 3227 3228 /* Return TRUE iff all required increments for candidates feeding PHI 3229 are profitable to replace on behalf of candidate C. */ 3230 3231 static bool 3232 all_phi_incrs_profitable (slsr_cand_t c, gimple phi) 3233 { 3234 unsigned i; 3235 slsr_cand_t basis = lookup_cand (c->basis); 3236 slsr_cand_t phi_cand = *stmt_cand_map->get (phi); 3237 3238 for (i = 0; i < gimple_phi_num_args (phi); i++) 3239 { 3240 tree arg = gimple_phi_arg_def (phi, i); 3241 3242 if (!operand_equal_p (arg, phi_cand->base_expr, 0)) 3243 { 3244 gimple arg_def = SSA_NAME_DEF_STMT (arg); 3245 3246 if (gimple_code (arg_def) == GIMPLE_PHI) 3247 { 3248 if (!all_phi_incrs_profitable (c, arg_def)) 3249 return false; 3250 } 3251 else 3252 { 3253 int j; 3254 slsr_cand_t arg_cand = base_cand_from_table (arg); 3255 widest_int increment = arg_cand->index - basis->index; 3256 3257 if (!address_arithmetic_p && wi::neg_p (increment)) 3258 increment = -increment; 3259 3260 j = incr_vec_index (increment); 3261 3262 if (dump_file && (dump_flags & TDF_DETAILS)) 3263 { 3264 fprintf (dump_file, " Conditional candidate %d, phi: ", 3265 c->cand_num); 3266 print_gimple_stmt (dump_file, phi, 0, 0); 3267 fputs (" increment: ", dump_file); 3268 print_decs (increment, dump_file); 3269 if (j < 0) 3270 fprintf (dump_file, 3271 "\n Not replaced; incr_vec overflow.\n"); 3272 else { 3273 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost); 3274 if (profitable_increment_p (j)) 3275 fputs (" Replacing...\n", dump_file); 3276 else 3277 fputs (" Not replaced.\n", dump_file); 3278 } 3279 } 3280 3281 if (j < 0 || !profitable_increment_p (j)) 3282 return false; 3283 } 3284 } 3285 } 3286 3287 return true; 3288 } 3289 3290 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of 3291 type TO_TYPE, and insert it in front of the statement represented 3292 by candidate C. Use *NEW_VAR to create the new SSA name. Return 3293 the new SSA name. */ 3294 3295 static tree 3296 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr) 3297 { 3298 tree cast_lhs; 3299 gassign *cast_stmt; 3300 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 3301 3302 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr"); 3303 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr); 3304 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); 3305 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT); 3306 3307 if (dump_file && (dump_flags & TDF_DETAILS)) 3308 { 3309 fputs (" Inserting: ", dump_file); 3310 print_gimple_stmt (dump_file, cast_stmt, 0, 0); 3311 } 3312 3313 return cast_lhs; 3314 } 3315 3316 /* Replace the RHS of the statement represented by candidate C with 3317 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't 3318 leave C unchanged or just interchange its operands. The original 3319 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2. 3320 If the replacement was made and we are doing a details dump, 3321 return the revised statement, else NULL. */ 3322 3323 static gimple 3324 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2, 3325 enum tree_code old_code, tree old_rhs1, tree old_rhs2, 3326 slsr_cand_t c) 3327 { 3328 if (new_code != old_code 3329 || ((!operand_equal_p (new_rhs1, old_rhs1, 0) 3330 || !operand_equal_p (new_rhs2, old_rhs2, 0)) 3331 && (!operand_equal_p (new_rhs1, old_rhs2, 0) 3332 || !operand_equal_p (new_rhs2, old_rhs1, 0)))) 3333 { 3334 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 3335 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2); 3336 update_stmt (gsi_stmt (gsi)); 3337 c->cand_stmt = gsi_stmt (gsi); 3338 3339 if (dump_file && (dump_flags & TDF_DETAILS)) 3340 return gsi_stmt (gsi); 3341 } 3342 3343 else if (dump_file && (dump_flags & TDF_DETAILS)) 3344 fputs (" (duplicate, not actually replacing)\n", dump_file); 3345 3346 return NULL; 3347 } 3348 3349 /* Strength-reduce the statement represented by candidate C by replacing 3350 it with an equivalent addition or subtraction. I is the index into 3351 the increment vector identifying C's increment. NEW_VAR is used to 3352 create a new SSA name if a cast needs to be introduced. BASIS_NAME 3353 is the rhs1 to use in creating the add/subtract. */ 3354 3355 static void 3356 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name) 3357 { 3358 gimple stmt_to_print = NULL; 3359 tree orig_rhs1, orig_rhs2; 3360 tree rhs2; 3361 enum tree_code orig_code, repl_code; 3362 widest_int cand_incr; 3363 3364 orig_code = gimple_assign_rhs_code (c->cand_stmt); 3365 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt); 3366 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt); 3367 cand_incr = cand_increment (c); 3368 3369 if (dump_file && (dump_flags & TDF_DETAILS)) 3370 { 3371 fputs ("Replacing: ", dump_file); 3372 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); 3373 stmt_to_print = c->cand_stmt; 3374 } 3375 3376 if (address_arithmetic_p) 3377 repl_code = POINTER_PLUS_EXPR; 3378 else 3379 repl_code = PLUS_EXPR; 3380 3381 /* If the increment has an initializer T_0, replace the candidate 3382 statement with an add of the basis name and the initializer. */ 3383 if (incr_vec[i].initializer) 3384 { 3385 tree init_type = TREE_TYPE (incr_vec[i].initializer); 3386 tree orig_type = TREE_TYPE (orig_rhs2); 3387 3388 if (types_compatible_p (orig_type, init_type)) 3389 rhs2 = incr_vec[i].initializer; 3390 else 3391 rhs2 = introduce_cast_before_cand (c, orig_type, 3392 incr_vec[i].initializer); 3393 3394 if (incr_vec[i].incr != cand_incr) 3395 { 3396 gcc_assert (repl_code == PLUS_EXPR); 3397 repl_code = MINUS_EXPR; 3398 } 3399 3400 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, 3401 orig_code, orig_rhs1, orig_rhs2, 3402 c); 3403 } 3404 3405 /* Otherwise, the increment is one of -1, 0, and 1. Replace 3406 with a subtract of the stride from the basis name, a copy 3407 from the basis name, or an add of the stride to the basis 3408 name, respectively. It may be necessary to introduce a 3409 cast (or reuse an existing cast). */ 3410 else if (cand_incr == 1) 3411 { 3412 tree stride_type = TREE_TYPE (c->stride); 3413 tree orig_type = TREE_TYPE (orig_rhs2); 3414 3415 if (types_compatible_p (orig_type, stride_type)) 3416 rhs2 = c->stride; 3417 else 3418 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride); 3419 3420 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, 3421 orig_code, orig_rhs1, orig_rhs2, 3422 c); 3423 } 3424 3425 else if (cand_incr == -1) 3426 { 3427 tree stride_type = TREE_TYPE (c->stride); 3428 tree orig_type = TREE_TYPE (orig_rhs2); 3429 gcc_assert (repl_code != POINTER_PLUS_EXPR); 3430 3431 if (types_compatible_p (orig_type, stride_type)) 3432 rhs2 = c->stride; 3433 else 3434 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride); 3435 3436 if (orig_code != MINUS_EXPR 3437 || !operand_equal_p (basis_name, orig_rhs1, 0) 3438 || !operand_equal_p (rhs2, orig_rhs2, 0)) 3439 { 3440 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 3441 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2); 3442 update_stmt (gsi_stmt (gsi)); 3443 c->cand_stmt = gsi_stmt (gsi); 3444 3445 if (dump_file && (dump_flags & TDF_DETAILS)) 3446 stmt_to_print = gsi_stmt (gsi); 3447 } 3448 else if (dump_file && (dump_flags & TDF_DETAILS)) 3449 fputs (" (duplicate, not actually replacing)\n", dump_file); 3450 } 3451 3452 else if (cand_incr == 0) 3453 { 3454 tree lhs = gimple_assign_lhs (c->cand_stmt); 3455 tree lhs_type = TREE_TYPE (lhs); 3456 tree basis_type = TREE_TYPE (basis_name); 3457 3458 if (types_compatible_p (lhs_type, basis_type)) 3459 { 3460 gassign *copy_stmt = gimple_build_assign (lhs, basis_name); 3461 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 3462 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); 3463 gsi_replace (&gsi, copy_stmt, false); 3464 c->cand_stmt = copy_stmt; 3465 3466 if (dump_file && (dump_flags & TDF_DETAILS)) 3467 stmt_to_print = copy_stmt; 3468 } 3469 else 3470 { 3471 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); 3472 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name); 3473 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); 3474 gsi_replace (&gsi, cast_stmt, false); 3475 c->cand_stmt = cast_stmt; 3476 3477 if (dump_file && (dump_flags & TDF_DETAILS)) 3478 stmt_to_print = cast_stmt; 3479 } 3480 } 3481 else 3482 gcc_unreachable (); 3483 3484 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print) 3485 { 3486 fputs ("With: ", dump_file); 3487 print_gimple_stmt (dump_file, stmt_to_print, 0, 0); 3488 fputs ("\n", dump_file); 3489 } 3490 } 3491 3492 /* For each candidate in the tree rooted at C, replace it with 3493 an increment if such has been shown to be profitable. */ 3494 3495 static void 3496 replace_profitable_candidates (slsr_cand_t c) 3497 { 3498 if (!cand_already_replaced (c)) 3499 { 3500 widest_int increment = cand_abs_increment (c); 3501 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt); 3502 int i; 3503 3504 i = incr_vec_index (increment); 3505 3506 /* Only process profitable increments. Nothing useful can be done 3507 to a cast or copy. */ 3508 if (i >= 0 3509 && profitable_increment_p (i) 3510 && orig_code != MODIFY_EXPR 3511 && !CONVERT_EXPR_CODE_P (orig_code)) 3512 { 3513 if (phi_dependent_cand_p (c)) 3514 { 3515 gimple phi = lookup_cand (c->def_phi)->cand_stmt; 3516 3517 if (all_phi_incrs_profitable (c, phi)) 3518 { 3519 /* Look up the LHS SSA name from C's basis. This will be 3520 the RHS1 of the adds we will introduce to create new 3521 phi arguments. */ 3522 slsr_cand_t basis = lookup_cand (c->basis); 3523 tree basis_name = gimple_assign_lhs (basis->cand_stmt); 3524 3525 /* Create a new phi statement that will represent C's true 3526 basis after the transformation is complete. */ 3527 location_t loc = gimple_location (c->cand_stmt); 3528 tree name = create_phi_basis (c, phi, basis_name, 3529 loc, UNKNOWN_STRIDE); 3530 3531 /* Replace C with an add of the new basis phi and the 3532 increment. */ 3533 replace_one_candidate (c, i, name); 3534 } 3535 } 3536 else 3537 { 3538 slsr_cand_t basis = lookup_cand (c->basis); 3539 tree basis_name = gimple_assign_lhs (basis->cand_stmt); 3540 replace_one_candidate (c, i, basis_name); 3541 } 3542 } 3543 } 3544 3545 if (c->sibling) 3546 replace_profitable_candidates (lookup_cand (c->sibling)); 3547 3548 if (c->dependent) 3549 replace_profitable_candidates (lookup_cand (c->dependent)); 3550 } 3551 3552 /* Analyze costs of related candidates in the candidate vector, 3553 and make beneficial replacements. */ 3554 3555 static void 3556 analyze_candidates_and_replace (void) 3557 { 3558 unsigned i; 3559 slsr_cand_t c; 3560 3561 /* Each candidate that has a null basis and a non-null 3562 dependent is the root of a tree of related statements. 3563 Analyze each tree to determine a subset of those 3564 statements that can be replaced with maximum benefit. */ 3565 FOR_EACH_VEC_ELT (cand_vec, i, c) 3566 { 3567 slsr_cand_t first_dep; 3568 3569 if (c->basis != 0 || c->dependent == 0) 3570 continue; 3571 3572 if (dump_file && (dump_flags & TDF_DETAILS)) 3573 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n", 3574 c->cand_num); 3575 3576 first_dep = lookup_cand (c->dependent); 3577 3578 /* If this is a chain of CAND_REFs, unconditionally replace 3579 each of them with a strength-reduced data reference. */ 3580 if (c->kind == CAND_REF) 3581 replace_refs (c); 3582 3583 /* If the common stride of all related candidates is a known 3584 constant, each candidate without a phi-dependence can be 3585 profitably replaced. Each replaces a multiply by a single 3586 add, with the possibility that a feeding add also goes dead. 3587 A candidate with a phi-dependence is replaced only if the 3588 compensation code it requires is offset by the strength 3589 reduction savings. */ 3590 else if (TREE_CODE (c->stride) == INTEGER_CST) 3591 replace_uncond_cands_and_profitable_phis (first_dep); 3592 3593 /* When the stride is an SSA name, it may still be profitable 3594 to replace some or all of the dependent candidates, depending 3595 on whether the introduced increments can be reused, or are 3596 less expensive to calculate than the replaced statements. */ 3597 else 3598 { 3599 machine_mode mode; 3600 bool speed; 3601 3602 /* Determine whether we'll be generating pointer arithmetic 3603 when replacing candidates. */ 3604 address_arithmetic_p = (c->kind == CAND_ADD 3605 && POINTER_TYPE_P (c->cand_type)); 3606 3607 /* If all candidates have already been replaced under other 3608 interpretations, nothing remains to be done. */ 3609 if (!count_candidates (c)) 3610 continue; 3611 3612 /* Construct an array of increments for this candidate chain. */ 3613 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN); 3614 incr_vec_len = 0; 3615 record_increments (c); 3616 3617 /* Determine which increments are profitable to replace. */ 3618 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt))); 3619 speed = optimize_cands_for_speed_p (c); 3620 analyze_increments (first_dep, mode, speed); 3621 3622 /* Insert initializers of the form T_0 = stride * increment 3623 for use in profitable replacements. */ 3624 insert_initializers (first_dep); 3625 dump_incr_vec (); 3626 3627 /* Perform the replacements. */ 3628 replace_profitable_candidates (first_dep); 3629 free (incr_vec); 3630 } 3631 } 3632 3633 /* For conditional candidates, we may have uncommitted insertions 3634 on edges to clean up. */ 3635 gsi_commit_edge_inserts (); 3636 } 3637 3638 namespace { 3639 3640 const pass_data pass_data_strength_reduction = 3641 { 3642 GIMPLE_PASS, /* type */ 3643 "slsr", /* name */ 3644 OPTGROUP_NONE, /* optinfo_flags */ 3645 TV_GIMPLE_SLSR, /* tv_id */ 3646 ( PROP_cfg | PROP_ssa ), /* properties_required */ 3647 0, /* properties_provided */ 3648 0, /* properties_destroyed */ 3649 0, /* todo_flags_start */ 3650 0, /* todo_flags_finish */ 3651 }; 3652 3653 class pass_strength_reduction : public gimple_opt_pass 3654 { 3655 public: 3656 pass_strength_reduction (gcc::context *ctxt) 3657 : gimple_opt_pass (pass_data_strength_reduction, ctxt) 3658 {} 3659 3660 /* opt_pass methods: */ 3661 virtual bool gate (function *) { return flag_tree_slsr; } 3662 virtual unsigned int execute (function *); 3663 3664 }; // class pass_strength_reduction 3665 3666 unsigned 3667 pass_strength_reduction::execute (function *fun) 3668 { 3669 /* Create the obstack where candidates will reside. */ 3670 gcc_obstack_init (&cand_obstack); 3671 3672 /* Allocate the candidate vector. */ 3673 cand_vec.create (128); 3674 3675 /* Allocate the mapping from statements to candidate indices. */ 3676 stmt_cand_map = new hash_map<gimple, slsr_cand_t>; 3677 3678 /* Create the obstack where candidate chains will reside. */ 3679 gcc_obstack_init (&chain_obstack); 3680 3681 /* Allocate the mapping from base expressions to candidate chains. */ 3682 base_cand_map = new hash_table<cand_chain_hasher> (500); 3683 3684 /* Allocate the mapping from bases to alternative bases. */ 3685 alt_base_map = new hash_map<tree, tree>; 3686 3687 /* Initialize the loop optimizer. We need to detect flow across 3688 back edges, and this gives us dominator information as well. */ 3689 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 3690 3691 /* Walk the CFG in predominator order looking for strength reduction 3692 candidates. */ 3693 find_candidates_dom_walker (CDI_DOMINATORS) 3694 .walk (fun->cfg->x_entry_block_ptr); 3695 3696 if (dump_file && (dump_flags & TDF_DETAILS)) 3697 { 3698 dump_cand_vec (); 3699 dump_cand_chains (); 3700 } 3701 3702 delete alt_base_map; 3703 free_affine_expand_cache (&name_expansions); 3704 3705 /* Analyze costs and make appropriate replacements. */ 3706 analyze_candidates_and_replace (); 3707 3708 loop_optimizer_finalize (); 3709 delete base_cand_map; 3710 base_cand_map = NULL; 3711 obstack_free (&chain_obstack, NULL); 3712 delete stmt_cand_map; 3713 cand_vec.release (); 3714 obstack_free (&cand_obstack, NULL); 3715 3716 return 0; 3717 } 3718 3719 } // anon namespace 3720 3721 gimple_opt_pass * 3722 make_pass_strength_reduction (gcc::context *ctxt) 3723 { 3724 return new pass_strength_reduction (ctxt); 3725 } 3726