1 /* Reassociation for trees. 2 Copyright (C) 2005-2015 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "hash-table.h" 25 #include "tm.h" 26 #include "rtl.h" 27 #include "tm_p.h" 28 #include "hash-set.h" 29 #include "machmode.h" 30 #include "vec.h" 31 #include "double-int.h" 32 #include "input.h" 33 #include "alias.h" 34 #include "symtab.h" 35 #include "wide-int.h" 36 #include "inchash.h" 37 #include "tree.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "predict.h" 41 #include "hard-reg-set.h" 42 #include "function.h" 43 #include "dominance.h" 44 #include "cfg.h" 45 #include "cfganal.h" 46 #include "basic-block.h" 47 #include "gimple-pretty-print.h" 48 #include "tree-inline.h" 49 #include "hash-map.h" 50 #include "tree-ssa-alias.h" 51 #include "internal-fn.h" 52 #include "gimple-fold.h" 53 #include "tree-eh.h" 54 #include "gimple-expr.h" 55 #include "is-a.h" 56 #include "gimple.h" 57 #include "gimple-iterator.h" 58 #include "gimplify-me.h" 59 #include "gimple-ssa.h" 60 #include "tree-cfg.h" 61 #include "tree-phinodes.h" 62 #include "ssa-iterators.h" 63 #include "stringpool.h" 64 #include "tree-ssanames.h" 65 #include "tree-ssa-loop-niter.h" 66 #include "tree-ssa-loop.h" 67 #include "hashtab.h" 68 #include "flags.h" 69 #include "statistics.h" 70 #include "real.h" 71 #include "fixed-value.h" 72 #include "insn-config.h" 73 #include "expmed.h" 74 #include "dojump.h" 75 #include "explow.h" 76 #include "calls.h" 77 #include "emit-rtl.h" 78 #include "varasm.h" 79 #include "stmt.h" 80 #include "expr.h" 81 #include "tree-dfa.h" 82 #include "tree-ssa.h" 83 #include "tree-iterator.h" 84 #include "tree-pass.h" 85 #include "alloc-pool.h" 86 #include "langhooks.h" 87 #include "cfgloop.h" 88 #include "target.h" 89 #include "params.h" 90 #include "diagnostic-core.h" 91 #include "builtins.h" 92 #include "gimplify.h" 93 #include "insn-codes.h" 94 #include "optabs.h" 95 96 /* This is a simple global reassociation pass. It is, in part, based 97 on the LLVM pass of the same name (They do some things more/less 98 than we do, in different orders, etc). 99 100 It consists of five steps: 101 102 1. Breaking up subtract operations into addition + negate, where 103 it would promote the reassociation of adds. 104 105 2. Left linearization of the expression trees, so that (A+B)+(C+D) 106 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 107 During linearization, we place the operands of the binary 108 expressions into a vector of operand_entry_t 109 110 3. Optimization of the operand lists, eliminating things like a + 111 -a, a & a, etc. 112 113 3a. Combine repeated factors with the same occurrence counts 114 into a __builtin_powi call that will later be optimized into 115 an optimal number of multiplies. 116 117 4. Rewrite the expression trees we linearized and optimized so 118 they are in proper rank order. 119 120 5. Repropagate negates, as nothing else will clean it up ATM. 121 122 A bit of theory on #4, since nobody seems to write anything down 123 about why it makes sense to do it the way they do it: 124 125 We could do this much nicer theoretically, but don't (for reasons 126 explained after how to do it theoretically nice :P). 127 128 In order to promote the most redundancy elimination, you want 129 binary expressions whose operands are the same rank (or 130 preferably, the same value) exposed to the redundancy eliminator, 131 for possible elimination. 132 133 So the way to do this if we really cared, is to build the new op 134 tree from the leaves to the roots, merging as you go, and putting the 135 new op on the end of the worklist, until you are left with one 136 thing on the worklist. 137 138 IE if you have to rewrite the following set of operands (listed with 139 rank in parentheses), with opcode PLUS_EXPR: 140 141 a (1), b (1), c (1), d (2), e (2) 142 143 144 We start with our merge worklist empty, and the ops list with all of 145 those on it. 146 147 You want to first merge all leaves of the same rank, as much as 148 possible. 149 150 So first build a binary op of 151 152 mergetmp = a + b, and put "mergetmp" on the merge worklist. 153 154 Because there is no three operand form of PLUS_EXPR, c is not going to 155 be exposed to redundancy elimination as a rank 1 operand. 156 157 So you might as well throw it on the merge worklist (you could also 158 consider it to now be a rank two operand, and merge it with d and e, 159 but in this case, you then have evicted e from a binary op. So at 160 least in this situation, you can't win.) 161 162 Then build a binary op of d + e 163 mergetmp2 = d + e 164 165 and put mergetmp2 on the merge worklist. 166 167 so merge worklist = {mergetmp, c, mergetmp2} 168 169 Continue building binary ops of these operations until you have only 170 one operation left on the worklist. 171 172 So we have 173 174 build binary op 175 mergetmp3 = mergetmp + c 176 177 worklist = {mergetmp2, mergetmp3} 178 179 mergetmp4 = mergetmp2 + mergetmp3 180 181 worklist = {mergetmp4} 182 183 because we have one operation left, we can now just set the original 184 statement equal to the result of that operation. 185 186 This will at least expose a + b and d + e to redundancy elimination 187 as binary operations. 188 189 For extra points, you can reuse the old statements to build the 190 mergetmps, since you shouldn't run out. 191 192 So why don't we do this? 193 194 Because it's expensive, and rarely will help. Most trees we are 195 reassociating have 3 or less ops. If they have 2 ops, they already 196 will be written into a nice single binary op. If you have 3 ops, a 197 single simple check suffices to tell you whether the first two are of the 198 same rank. If so, you know to order it 199 200 mergetmp = op1 + op2 201 newstmt = mergetmp + op3 202 203 instead of 204 mergetmp = op2 + op3 205 newstmt = mergetmp + op1 206 207 If all three are of the same rank, you can't expose them all in a 208 single binary operator anyway, so the above is *still* the best you 209 can do. 210 211 Thus, this is what we do. When we have three ops left, we check to see 212 what order to put them in, and call it a day. As a nod to vector sum 213 reduction, we check if any of the ops are really a phi node that is a 214 destructive update for the associating op, and keep the destructive 215 update together for vector sum reduction recognition. */ 216 217 218 /* Statistics */ 219 static struct 220 { 221 int linearized; 222 int constants_eliminated; 223 int ops_eliminated; 224 int rewritten; 225 int pows_encountered; 226 int pows_created; 227 } reassociate_stats; 228 229 /* Operator, rank pair. */ 230 typedef struct operand_entry 231 { 232 unsigned int rank; 233 int id; 234 tree op; 235 unsigned int count; 236 } *operand_entry_t; 237 238 static alloc_pool operand_entry_pool; 239 240 /* This is used to assign a unique ID to each struct operand_entry 241 so that qsort results are identical on different hosts. */ 242 static int next_operand_entry_id; 243 244 /* Starting rank number for a given basic block, so that we can rank 245 operations using unmovable instructions in that BB based on the bb 246 depth. */ 247 static long *bb_rank; 248 249 /* Operand->rank hashtable. */ 250 static hash_map<tree, long> *operand_rank; 251 252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 253 all basic blocks the CFG should be adjusted - basic blocks 254 split right after that SSA_NAME's definition statement and before 255 the only use, which must be a bit ior. */ 256 static vec<tree> reassoc_branch_fixups; 257 258 /* Forward decls. */ 259 static long get_rank (tree); 260 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple); 261 262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 263 possibly added by gsi_remove. */ 264 265 bool 266 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 267 { 268 gimple stmt = gsi_stmt (*gsi); 269 270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI) 271 return gsi_remove (gsi, true); 272 273 gimple_stmt_iterator prev = *gsi; 274 gsi_prev (&prev); 275 unsigned uid = gimple_uid (stmt); 276 basic_block bb = gimple_bb (stmt); 277 bool ret = gsi_remove (gsi, true); 278 if (!gsi_end_p (prev)) 279 gsi_next (&prev); 280 else 281 prev = gsi_start_bb (bb); 282 gimple end_stmt = gsi_stmt (*gsi); 283 while ((stmt = gsi_stmt (prev)) != end_stmt) 284 { 285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 286 gimple_set_uid (stmt, uid); 287 gsi_next (&prev); 288 } 289 return ret; 290 } 291 292 /* Bias amount for loop-carried phis. We want this to be larger than 293 the depth of any reassociation tree we can see, but not larger than 294 the rank difference between two blocks. */ 295 #define PHI_LOOP_BIAS (1 << 15) 296 297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 298 an innermost loop, and the phi has only a single use which is inside 299 the loop, then the rank is the block rank of the loop latch plus an 300 extra bias for the loop-carried dependence. This causes expressions 301 calculated into an accumulator variable to be independent for each 302 iteration of the loop. If STMT is some other phi, the rank is the 303 block rank of its containing block. */ 304 static long 305 phi_rank (gimple stmt) 306 { 307 basic_block bb = gimple_bb (stmt); 308 struct loop *father = bb->loop_father; 309 tree res; 310 unsigned i; 311 use_operand_p use; 312 gimple use_stmt; 313 314 /* We only care about real loops (those with a latch). */ 315 if (!father->latch) 316 return bb_rank[bb->index]; 317 318 /* Interesting phis must be in headers of innermost loops. */ 319 if (bb != father->header 320 || father->inner) 321 return bb_rank[bb->index]; 322 323 /* Ignore virtual SSA_NAMEs. */ 324 res = gimple_phi_result (stmt); 325 if (virtual_operand_p (res)) 326 return bb_rank[bb->index]; 327 328 /* The phi definition must have a single use, and that use must be 329 within the loop. Otherwise this isn't an accumulator pattern. */ 330 if (!single_imm_use (res, &use, &use_stmt) 331 || gimple_bb (use_stmt)->loop_father != father) 332 return bb_rank[bb->index]; 333 334 /* Look for phi arguments from within the loop. If found, bias this phi. */ 335 for (i = 0; i < gimple_phi_num_args (stmt); i++) 336 { 337 tree arg = gimple_phi_arg_def (stmt, i); 338 if (TREE_CODE (arg) == SSA_NAME 339 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 340 { 341 gimple def_stmt = SSA_NAME_DEF_STMT (arg); 342 if (gimple_bb (def_stmt)->loop_father == father) 343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 344 } 345 } 346 347 /* Must be an uninteresting phi. */ 348 return bb_rank[bb->index]; 349 } 350 351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 352 loop-carried dependence of an innermost loop, return TRUE; else 353 return FALSE. */ 354 static bool 355 loop_carried_phi (tree exp) 356 { 357 gimple phi_stmt; 358 long block_rank; 359 360 if (TREE_CODE (exp) != SSA_NAME 361 || SSA_NAME_IS_DEFAULT_DEF (exp)) 362 return false; 363 364 phi_stmt = SSA_NAME_DEF_STMT (exp); 365 366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 367 return false; 368 369 /* Non-loop-carried phis have block rank. Loop-carried phis have 370 an additional bias added in. If this phi doesn't have block rank, 371 it's biased and should not be propagated. */ 372 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 373 374 if (phi_rank (phi_stmt) != block_rank) 375 return true; 376 377 return false; 378 } 379 380 /* Return the maximum of RANK and the rank that should be propagated 381 from expression OP. For most operands, this is just the rank of OP. 382 For loop-carried phis, the value is zero to avoid undoing the bias 383 in favor of the phi. */ 384 static long 385 propagate_rank (long rank, tree op) 386 { 387 long op_rank; 388 389 if (loop_carried_phi (op)) 390 return rank; 391 392 op_rank = get_rank (op); 393 394 return MAX (rank, op_rank); 395 } 396 397 /* Look up the operand rank structure for expression E. */ 398 399 static inline long 400 find_operand_rank (tree e) 401 { 402 long *slot = operand_rank->get (e); 403 return slot ? *slot : -1; 404 } 405 406 /* Insert {E,RANK} into the operand rank hashtable. */ 407 408 static inline void 409 insert_operand_rank (tree e, long rank) 410 { 411 gcc_assert (rank > 0); 412 gcc_assert (!operand_rank->put (e, rank)); 413 } 414 415 /* Given an expression E, return the rank of the expression. */ 416 417 static long 418 get_rank (tree e) 419 { 420 /* Constants have rank 0. */ 421 if (is_gimple_min_invariant (e)) 422 return 0; 423 424 /* SSA_NAME's have the rank of the expression they are the result 425 of. 426 For globals and uninitialized values, the rank is 0. 427 For function arguments, use the pre-setup rank. 428 For PHI nodes, stores, asm statements, etc, we use the rank of 429 the BB. 430 For simple operations, the rank is the maximum rank of any of 431 its operands, or the bb_rank, whichever is less. 432 I make no claims that this is optimal, however, it gives good 433 results. */ 434 435 /* We make an exception to the normal ranking system to break 436 dependences of accumulator variables in loops. Suppose we 437 have a simple one-block loop containing: 438 439 x_1 = phi(x_0, x_2) 440 b = a + x_1 441 c = b + d 442 x_2 = c + e 443 444 As shown, each iteration of the calculation into x is fully 445 dependent upon the iteration before it. We would prefer to 446 see this in the form: 447 448 x_1 = phi(x_0, x_2) 449 b = a + d 450 c = b + e 451 x_2 = c + x_1 452 453 If the loop is unrolled, the calculations of b and c from 454 different iterations can be interleaved. 455 456 To obtain this result during reassociation, we bias the rank 457 of the phi definition x_1 upward, when it is recognized as an 458 accumulator pattern. The artificial rank causes it to be 459 added last, providing the desired independence. */ 460 461 if (TREE_CODE (e) == SSA_NAME) 462 { 463 gimple stmt; 464 long rank; 465 int i, n; 466 tree op; 467 468 if (SSA_NAME_IS_DEFAULT_DEF (e)) 469 return find_operand_rank (e); 470 471 stmt = SSA_NAME_DEF_STMT (e); 472 if (gimple_code (stmt) == GIMPLE_PHI) 473 return phi_rank (stmt); 474 475 if (!is_gimple_assign (stmt) 476 || gimple_vdef (stmt)) 477 return bb_rank[gimple_bb (stmt)->index]; 478 479 /* If we already have a rank for this expression, use that. */ 480 rank = find_operand_rank (e); 481 if (rank != -1) 482 return rank; 483 484 /* Otherwise, find the maximum rank for the operands. As an 485 exception, remove the bias from loop-carried phis when propagating 486 the rank so that dependent operations are not also biased. */ 487 rank = 0; 488 if (gimple_assign_single_p (stmt)) 489 { 490 tree rhs = gimple_assign_rhs1 (stmt); 491 n = TREE_OPERAND_LENGTH (rhs); 492 if (n == 0) 493 rank = propagate_rank (rank, rhs); 494 else 495 { 496 for (i = 0; i < n; i++) 497 { 498 op = TREE_OPERAND (rhs, i); 499 500 if (op != NULL_TREE) 501 rank = propagate_rank (rank, op); 502 } 503 } 504 } 505 else 506 { 507 n = gimple_num_ops (stmt); 508 for (i = 1; i < n; i++) 509 { 510 op = gimple_op (stmt, i); 511 gcc_assert (op); 512 rank = propagate_rank (rank, op); 513 } 514 } 515 516 if (dump_file && (dump_flags & TDF_DETAILS)) 517 { 518 fprintf (dump_file, "Rank for "); 519 print_generic_expr (dump_file, e, 0); 520 fprintf (dump_file, " is %ld\n", (rank + 1)); 521 } 522 523 /* Note the rank in the hashtable so we don't recompute it. */ 524 insert_operand_rank (e, (rank + 1)); 525 return (rank + 1); 526 } 527 528 /* Globals, etc, are rank 0 */ 529 return 0; 530 } 531 532 533 /* We want integer ones to end up last no matter what, since they are 534 the ones we can do the most with. */ 535 #define INTEGER_CONST_TYPE 1 << 3 536 #define FLOAT_CONST_TYPE 1 << 2 537 #define OTHER_CONST_TYPE 1 << 1 538 539 /* Classify an invariant tree into integer, float, or other, so that 540 we can sort them to be near other constants of the same type. */ 541 static inline int 542 constant_type (tree t) 543 { 544 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 545 return INTEGER_CONST_TYPE; 546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 547 return FLOAT_CONST_TYPE; 548 else 549 return OTHER_CONST_TYPE; 550 } 551 552 /* qsort comparison function to sort operand entries PA and PB by rank 553 so that the sorted array is ordered by rank in decreasing order. */ 554 static int 555 sort_by_operand_rank (const void *pa, const void *pb) 556 { 557 const operand_entry_t oea = *(const operand_entry_t *)pa; 558 const operand_entry_t oeb = *(const operand_entry_t *)pb; 559 560 /* It's nicer for optimize_expression if constants that are likely 561 to fold when added/multiplied//whatever are put next to each 562 other. Since all constants have rank 0, order them by type. */ 563 if (oeb->rank == 0 && oea->rank == 0) 564 { 565 if (constant_type (oeb->op) != constant_type (oea->op)) 566 return constant_type (oeb->op) - constant_type (oea->op); 567 else 568 /* To make sorting result stable, we use unique IDs to determine 569 order. */ 570 return oeb->id - oea->id; 571 } 572 573 /* Lastly, make sure the versions that are the same go next to each 574 other. */ 575 if ((oeb->rank - oea->rank == 0) 576 && TREE_CODE (oea->op) == SSA_NAME 577 && TREE_CODE (oeb->op) == SSA_NAME) 578 { 579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 580 versions of removed SSA_NAMEs, so if possible, prefer to sort 581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 582 See PR60418. */ 583 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) 584 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) 585 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 586 { 587 gimple stmta = SSA_NAME_DEF_STMT (oea->op); 588 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op); 589 basic_block bba = gimple_bb (stmta); 590 basic_block bbb = gimple_bb (stmtb); 591 if (bbb != bba) 592 { 593 if (bb_rank[bbb->index] != bb_rank[bba->index]) 594 return bb_rank[bbb->index] - bb_rank[bba->index]; 595 } 596 else 597 { 598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 600 if (da != db) 601 return da ? 1 : -1; 602 } 603 } 604 605 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 606 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); 607 else 608 return oeb->id - oea->id; 609 } 610 611 if (oeb->rank != oea->rank) 612 return oeb->rank - oea->rank; 613 else 614 return oeb->id - oea->id; 615 } 616 617 /* Add an operand entry to *OPS for the tree operand OP. */ 618 619 static void 620 add_to_ops_vec (vec<operand_entry_t> *ops, tree op) 621 { 622 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 623 624 oe->op = op; 625 oe->rank = get_rank (op); 626 oe->id = next_operand_entry_id++; 627 oe->count = 1; 628 ops->safe_push (oe); 629 } 630 631 /* Add an operand entry to *OPS for the tree operand OP with repeat 632 count REPEAT. */ 633 634 static void 635 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, 636 HOST_WIDE_INT repeat) 637 { 638 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 639 640 oe->op = op; 641 oe->rank = get_rank (op); 642 oe->id = next_operand_entry_id++; 643 oe->count = repeat; 644 ops->safe_push (oe); 645 646 reassociate_stats.pows_encountered++; 647 } 648 649 /* Return true if STMT is reassociable operation containing a binary 650 operation with tree code CODE, and is inside LOOP. */ 651 652 static bool 653 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) 654 { 655 basic_block bb = gimple_bb (stmt); 656 657 if (gimple_bb (stmt) == NULL) 658 return false; 659 660 if (!flow_bb_inside_loop_p (loop, bb)) 661 return false; 662 663 if (is_gimple_assign (stmt) 664 && gimple_assign_rhs_code (stmt) == code 665 && has_single_use (gimple_assign_lhs (stmt))) 666 { 667 tree rhs1 = gimple_assign_rhs1 (stmt); 668 tree rhs2 = gimple_assign_rhs1 (stmt); 669 if (TREE_CODE (rhs1) == SSA_NAME 670 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 671 return false; 672 if (rhs2 673 && TREE_CODE (rhs2) == SSA_NAME 674 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 675 return false; 676 return true; 677 } 678 679 return false; 680 } 681 682 683 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 684 operand of the negate operation. Otherwise, return NULL. */ 685 686 static tree 687 get_unary_op (tree name, enum tree_code opcode) 688 { 689 gimple stmt = SSA_NAME_DEF_STMT (name); 690 691 if (!is_gimple_assign (stmt)) 692 return NULL_TREE; 693 694 if (gimple_assign_rhs_code (stmt) == opcode) 695 return gimple_assign_rhs1 (stmt); 696 return NULL_TREE; 697 } 698 699 /* If CURR and LAST are a pair of ops that OPCODE allows us to 700 eliminate through equivalences, do so, remove them from OPS, and 701 return true. Otherwise, return false. */ 702 703 static bool 704 eliminate_duplicate_pair (enum tree_code opcode, 705 vec<operand_entry_t> *ops, 706 bool *all_done, 707 unsigned int i, 708 operand_entry_t curr, 709 operand_entry_t last) 710 { 711 712 /* If we have two of the same op, and the opcode is & |, min, or max, 713 we can eliminate one of them. 714 If we have two of the same op, and the opcode is ^, we can 715 eliminate both of them. */ 716 717 if (last && last->op == curr->op) 718 { 719 switch (opcode) 720 { 721 case MAX_EXPR: 722 case MIN_EXPR: 723 case BIT_IOR_EXPR: 724 case BIT_AND_EXPR: 725 if (dump_file && (dump_flags & TDF_DETAILS)) 726 { 727 fprintf (dump_file, "Equivalence: "); 728 print_generic_expr (dump_file, curr->op, 0); 729 fprintf (dump_file, " [&|minmax] "); 730 print_generic_expr (dump_file, last->op, 0); 731 fprintf (dump_file, " -> "); 732 print_generic_stmt (dump_file, last->op, 0); 733 } 734 735 ops->ordered_remove (i); 736 reassociate_stats.ops_eliminated ++; 737 738 return true; 739 740 case BIT_XOR_EXPR: 741 if (dump_file && (dump_flags & TDF_DETAILS)) 742 { 743 fprintf (dump_file, "Equivalence: "); 744 print_generic_expr (dump_file, curr->op, 0); 745 fprintf (dump_file, " ^ "); 746 print_generic_expr (dump_file, last->op, 0); 747 fprintf (dump_file, " -> nothing\n"); 748 } 749 750 reassociate_stats.ops_eliminated += 2; 751 752 if (ops->length () == 2) 753 { 754 ops->create (0); 755 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 756 *all_done = true; 757 } 758 else 759 { 760 ops->ordered_remove (i-1); 761 ops->ordered_remove (i-1); 762 } 763 764 return true; 765 766 default: 767 break; 768 } 769 } 770 return false; 771 } 772 773 static vec<tree> plus_negates; 774 775 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 776 expression, look in OPS for a corresponding positive operation to cancel 777 it out. If we find one, remove the other from OPS, replace 778 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 779 return false. */ 780 781 static bool 782 eliminate_plus_minus_pair (enum tree_code opcode, 783 vec<operand_entry_t> *ops, 784 unsigned int currindex, 785 operand_entry_t curr) 786 { 787 tree negateop; 788 tree notop; 789 unsigned int i; 790 operand_entry_t oe; 791 792 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 793 return false; 794 795 negateop = get_unary_op (curr->op, NEGATE_EXPR); 796 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 797 if (negateop == NULL_TREE && notop == NULL_TREE) 798 return false; 799 800 /* Any non-negated version will have a rank that is one less than 801 the current rank. So once we hit those ranks, if we don't find 802 one, we can stop. */ 803 804 for (i = currindex + 1; 805 ops->iterate (i, &oe) 806 && oe->rank >= curr->rank - 1 ; 807 i++) 808 { 809 if (oe->op == negateop) 810 { 811 812 if (dump_file && (dump_flags & TDF_DETAILS)) 813 { 814 fprintf (dump_file, "Equivalence: "); 815 print_generic_expr (dump_file, negateop, 0); 816 fprintf (dump_file, " + -"); 817 print_generic_expr (dump_file, oe->op, 0); 818 fprintf (dump_file, " -> 0\n"); 819 } 820 821 ops->ordered_remove (i); 822 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 823 ops->ordered_remove (currindex); 824 reassociate_stats.ops_eliminated ++; 825 826 return true; 827 } 828 else if (oe->op == notop) 829 { 830 tree op_type = TREE_TYPE (oe->op); 831 832 if (dump_file && (dump_flags & TDF_DETAILS)) 833 { 834 fprintf (dump_file, "Equivalence: "); 835 print_generic_expr (dump_file, notop, 0); 836 fprintf (dump_file, " + ~"); 837 print_generic_expr (dump_file, oe->op, 0); 838 fprintf (dump_file, " -> -1\n"); 839 } 840 841 ops->ordered_remove (i); 842 add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); 843 ops->ordered_remove (currindex); 844 reassociate_stats.ops_eliminated ++; 845 846 return true; 847 } 848 } 849 850 /* CURR->OP is a negate expr in a plus expr: save it for later 851 inspection in repropagate_negates(). */ 852 if (negateop != NULL_TREE) 853 plus_negates.safe_push (curr->op); 854 855 return false; 856 } 857 858 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 859 bitwise not expression, look in OPS for a corresponding operand to 860 cancel it out. If we find one, remove the other from OPS, replace 861 OPS[CURRINDEX] with 0, and return true. Otherwise, return 862 false. */ 863 864 static bool 865 eliminate_not_pairs (enum tree_code opcode, 866 vec<operand_entry_t> *ops, 867 unsigned int currindex, 868 operand_entry_t curr) 869 { 870 tree notop; 871 unsigned int i; 872 operand_entry_t oe; 873 874 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 875 || TREE_CODE (curr->op) != SSA_NAME) 876 return false; 877 878 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 879 if (notop == NULL_TREE) 880 return false; 881 882 /* Any non-not version will have a rank that is one less than 883 the current rank. So once we hit those ranks, if we don't find 884 one, we can stop. */ 885 886 for (i = currindex + 1; 887 ops->iterate (i, &oe) 888 && oe->rank >= curr->rank - 1; 889 i++) 890 { 891 if (oe->op == notop) 892 { 893 if (dump_file && (dump_flags & TDF_DETAILS)) 894 { 895 fprintf (dump_file, "Equivalence: "); 896 print_generic_expr (dump_file, notop, 0); 897 if (opcode == BIT_AND_EXPR) 898 fprintf (dump_file, " & ~"); 899 else if (opcode == BIT_IOR_EXPR) 900 fprintf (dump_file, " | ~"); 901 print_generic_expr (dump_file, oe->op, 0); 902 if (opcode == BIT_AND_EXPR) 903 fprintf (dump_file, " -> 0\n"); 904 else if (opcode == BIT_IOR_EXPR) 905 fprintf (dump_file, " -> -1\n"); 906 } 907 908 if (opcode == BIT_AND_EXPR) 909 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 910 else if (opcode == BIT_IOR_EXPR) 911 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 912 913 reassociate_stats.ops_eliminated += ops->length () - 1; 914 ops->truncate (0); 915 ops->quick_push (oe); 916 return true; 917 } 918 } 919 920 return false; 921 } 922 923 /* Use constant value that may be present in OPS to try to eliminate 924 operands. Note that this function is only really used when we've 925 eliminated ops for other reasons, or merged constants. Across 926 single statements, fold already does all of this, plus more. There 927 is little point in duplicating logic, so I've only included the 928 identities that I could ever construct testcases to trigger. */ 929 930 static void 931 eliminate_using_constants (enum tree_code opcode, 932 vec<operand_entry_t> *ops) 933 { 934 operand_entry_t oelast = ops->last (); 935 tree type = TREE_TYPE (oelast->op); 936 937 if (oelast->rank == 0 938 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 939 { 940 switch (opcode) 941 { 942 case BIT_AND_EXPR: 943 if (integer_zerop (oelast->op)) 944 { 945 if (ops->length () != 1) 946 { 947 if (dump_file && (dump_flags & TDF_DETAILS)) 948 fprintf (dump_file, "Found & 0, removing all other ops\n"); 949 950 reassociate_stats.ops_eliminated += ops->length () - 1; 951 952 ops->truncate (0); 953 ops->quick_push (oelast); 954 return; 955 } 956 } 957 else if (integer_all_onesp (oelast->op)) 958 { 959 if (ops->length () != 1) 960 { 961 if (dump_file && (dump_flags & TDF_DETAILS)) 962 fprintf (dump_file, "Found & -1, removing\n"); 963 ops->pop (); 964 reassociate_stats.ops_eliminated++; 965 } 966 } 967 break; 968 case BIT_IOR_EXPR: 969 if (integer_all_onesp (oelast->op)) 970 { 971 if (ops->length () != 1) 972 { 973 if (dump_file && (dump_flags & TDF_DETAILS)) 974 fprintf (dump_file, "Found | -1, removing all other ops\n"); 975 976 reassociate_stats.ops_eliminated += ops->length () - 1; 977 978 ops->truncate (0); 979 ops->quick_push (oelast); 980 return; 981 } 982 } 983 else if (integer_zerop (oelast->op)) 984 { 985 if (ops->length () != 1) 986 { 987 if (dump_file && (dump_flags & TDF_DETAILS)) 988 fprintf (dump_file, "Found | 0, removing\n"); 989 ops->pop (); 990 reassociate_stats.ops_eliminated++; 991 } 992 } 993 break; 994 case MULT_EXPR: 995 if (integer_zerop (oelast->op) 996 || (FLOAT_TYPE_P (type) 997 && !HONOR_NANS (type) 998 && !HONOR_SIGNED_ZEROS (type) 999 && real_zerop (oelast->op))) 1000 { 1001 if (ops->length () != 1) 1002 { 1003 if (dump_file && (dump_flags & TDF_DETAILS)) 1004 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1005 1006 reassociate_stats.ops_eliminated += ops->length () - 1; 1007 ops->truncate (1); 1008 ops->quick_push (oelast); 1009 return; 1010 } 1011 } 1012 else if (integer_onep (oelast->op) 1013 || (FLOAT_TYPE_P (type) 1014 && !HONOR_SNANS (type) 1015 && real_onep (oelast->op))) 1016 { 1017 if (ops->length () != 1) 1018 { 1019 if (dump_file && (dump_flags & TDF_DETAILS)) 1020 fprintf (dump_file, "Found * 1, removing\n"); 1021 ops->pop (); 1022 reassociate_stats.ops_eliminated++; 1023 return; 1024 } 1025 } 1026 break; 1027 case BIT_XOR_EXPR: 1028 case PLUS_EXPR: 1029 case MINUS_EXPR: 1030 if (integer_zerop (oelast->op) 1031 || (FLOAT_TYPE_P (type) 1032 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1033 && fold_real_zero_addition_p (type, oelast->op, 1034 opcode == MINUS_EXPR))) 1035 { 1036 if (ops->length () != 1) 1037 { 1038 if (dump_file && (dump_flags & TDF_DETAILS)) 1039 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1040 ops->pop (); 1041 reassociate_stats.ops_eliminated++; 1042 return; 1043 } 1044 } 1045 break; 1046 default: 1047 break; 1048 } 1049 } 1050 } 1051 1052 1053 static void linearize_expr_tree (vec<operand_entry_t> *, gimple, 1054 bool, bool); 1055 1056 /* Structure for tracking and counting operands. */ 1057 typedef struct oecount_s { 1058 int cnt; 1059 int id; 1060 enum tree_code oecode; 1061 tree op; 1062 } oecount; 1063 1064 1065 /* The heap for the oecount hashtable and the sorted list of operands. */ 1066 static vec<oecount> cvec; 1067 1068 1069 /* Oecount hashtable helpers. */ 1070 1071 struct oecount_hasher 1072 { 1073 typedef int value_type; 1074 typedef int compare_type; 1075 typedef int store_values_directly; 1076 static inline hashval_t hash (const value_type &); 1077 static inline bool equal (const value_type &, const compare_type &); 1078 static bool is_deleted (int &v) { return v == 1; } 1079 static void mark_deleted (int &e) { e = 1; } 1080 static bool is_empty (int &v) { return v == 0; } 1081 static void mark_empty (int &e) { e = 0; } 1082 static void remove (int &) {} 1083 }; 1084 1085 /* Hash function for oecount. */ 1086 1087 inline hashval_t 1088 oecount_hasher::hash (const value_type &p) 1089 { 1090 const oecount *c = &cvec[p - 42]; 1091 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1092 } 1093 1094 /* Comparison function for oecount. */ 1095 1096 inline bool 1097 oecount_hasher::equal (const value_type &p1, const compare_type &p2) 1098 { 1099 const oecount *c1 = &cvec[p1 - 42]; 1100 const oecount *c2 = &cvec[p2 - 42]; 1101 return (c1->oecode == c2->oecode 1102 && c1->op == c2->op); 1103 } 1104 1105 /* Comparison function for qsort sorting oecount elements by count. */ 1106 1107 static int 1108 oecount_cmp (const void *p1, const void *p2) 1109 { 1110 const oecount *c1 = (const oecount *)p1; 1111 const oecount *c2 = (const oecount *)p2; 1112 if (c1->cnt != c2->cnt) 1113 return c1->cnt - c2->cnt; 1114 else 1115 /* If counts are identical, use unique IDs to stabilize qsort. */ 1116 return c1->id - c2->id; 1117 } 1118 1119 /* Return TRUE iff STMT represents a builtin call that raises OP 1120 to some exponent. */ 1121 1122 static bool 1123 stmt_is_power_of_op (gimple stmt, tree op) 1124 { 1125 tree fndecl; 1126 1127 if (!is_gimple_call (stmt)) 1128 return false; 1129 1130 fndecl = gimple_call_fndecl (stmt); 1131 1132 if (!fndecl 1133 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 1134 return false; 1135 1136 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 1137 { 1138 CASE_FLT_FN (BUILT_IN_POW): 1139 CASE_FLT_FN (BUILT_IN_POWI): 1140 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1141 1142 default: 1143 return false; 1144 } 1145 } 1146 1147 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1148 in place and return the result. Assumes that stmt_is_power_of_op 1149 was previously called for STMT and returned TRUE. */ 1150 1151 static HOST_WIDE_INT 1152 decrement_power (gimple stmt) 1153 { 1154 REAL_VALUE_TYPE c, cint; 1155 HOST_WIDE_INT power; 1156 tree arg1; 1157 1158 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 1159 { 1160 CASE_FLT_FN (BUILT_IN_POW): 1161 arg1 = gimple_call_arg (stmt, 1); 1162 c = TREE_REAL_CST (arg1); 1163 power = real_to_integer (&c) - 1; 1164 real_from_integer (&cint, VOIDmode, power, SIGNED); 1165 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1166 return power; 1167 1168 CASE_FLT_FN (BUILT_IN_POWI): 1169 arg1 = gimple_call_arg (stmt, 1); 1170 power = TREE_INT_CST_LOW (arg1) - 1; 1171 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1172 return power; 1173 1174 default: 1175 gcc_unreachable (); 1176 } 1177 } 1178 1179 /* Find the single immediate use of STMT's LHS, and replace it 1180 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1181 replace *DEF with OP as well. */ 1182 1183 static void 1184 propagate_op_to_single_use (tree op, gimple stmt, tree *def) 1185 { 1186 tree lhs; 1187 gimple use_stmt; 1188 use_operand_p use; 1189 gimple_stmt_iterator gsi; 1190 1191 if (is_gimple_call (stmt)) 1192 lhs = gimple_call_lhs (stmt); 1193 else 1194 lhs = gimple_assign_lhs (stmt); 1195 1196 gcc_assert (has_single_use (lhs)); 1197 single_imm_use (lhs, &use, &use_stmt); 1198 if (lhs == *def) 1199 *def = op; 1200 SET_USE (use, op); 1201 if (TREE_CODE (op) != SSA_NAME) 1202 update_stmt (use_stmt); 1203 gsi = gsi_for_stmt (stmt); 1204 unlink_stmt_vdef (stmt); 1205 reassoc_remove_stmt (&gsi); 1206 release_defs (stmt); 1207 } 1208 1209 /* Walks the linear chain with result *DEF searching for an operation 1210 with operand OP and code OPCODE removing that from the chain. *DEF 1211 is updated if there is only one operand but no operation left. */ 1212 1213 static void 1214 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1215 { 1216 gimple stmt = SSA_NAME_DEF_STMT (*def); 1217 1218 do 1219 { 1220 tree name; 1221 1222 if (opcode == MULT_EXPR 1223 && stmt_is_power_of_op (stmt, op)) 1224 { 1225 if (decrement_power (stmt) == 1) 1226 propagate_op_to_single_use (op, stmt, def); 1227 return; 1228 } 1229 1230 name = gimple_assign_rhs1 (stmt); 1231 1232 /* If this is the operation we look for and one of the operands 1233 is ours simply propagate the other operand into the stmts 1234 single use. */ 1235 if (gimple_assign_rhs_code (stmt) == opcode 1236 && (name == op 1237 || gimple_assign_rhs2 (stmt) == op)) 1238 { 1239 if (name == op) 1240 name = gimple_assign_rhs2 (stmt); 1241 propagate_op_to_single_use (name, stmt, def); 1242 return; 1243 } 1244 1245 /* We might have a multiply of two __builtin_pow* calls, and 1246 the operand might be hiding in the rightmost one. */ 1247 if (opcode == MULT_EXPR 1248 && gimple_assign_rhs_code (stmt) == opcode 1249 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1250 && has_single_use (gimple_assign_rhs2 (stmt))) 1251 { 1252 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1253 if (stmt_is_power_of_op (stmt2, op)) 1254 { 1255 if (decrement_power (stmt2) == 1) 1256 propagate_op_to_single_use (op, stmt2, def); 1257 return; 1258 } 1259 } 1260 1261 /* Continue walking the chain. */ 1262 gcc_assert (name != op 1263 && TREE_CODE (name) == SSA_NAME); 1264 stmt = SSA_NAME_DEF_STMT (name); 1265 } 1266 while (1); 1267 } 1268 1269 /* Returns true if statement S1 dominates statement S2. Like 1270 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1271 1272 static bool 1273 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2) 1274 { 1275 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1276 1277 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1278 SSA_NAME. Assume it lives at the beginning of function and 1279 thus dominates everything. */ 1280 if (!bb1 || s1 == s2) 1281 return true; 1282 1283 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1284 if (!bb2) 1285 return false; 1286 1287 if (bb1 == bb2) 1288 { 1289 /* PHIs in the same basic block are assumed to be 1290 executed all in parallel, if only one stmt is a PHI, 1291 it dominates the other stmt in the same basic block. */ 1292 if (gimple_code (s1) == GIMPLE_PHI) 1293 return true; 1294 1295 if (gimple_code (s2) == GIMPLE_PHI) 1296 return false; 1297 1298 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1299 1300 if (gimple_uid (s1) < gimple_uid (s2)) 1301 return true; 1302 1303 if (gimple_uid (s1) > gimple_uid (s2)) 1304 return false; 1305 1306 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1307 unsigned int uid = gimple_uid (s1); 1308 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1309 { 1310 gimple s = gsi_stmt (gsi); 1311 if (gimple_uid (s) != uid) 1312 break; 1313 if (s == s2) 1314 return true; 1315 } 1316 1317 return false; 1318 } 1319 1320 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1321 } 1322 1323 /* Insert STMT after INSERT_POINT. */ 1324 1325 static void 1326 insert_stmt_after (gimple stmt, gimple insert_point) 1327 { 1328 gimple_stmt_iterator gsi; 1329 basic_block bb; 1330 1331 if (gimple_code (insert_point) == GIMPLE_PHI) 1332 bb = gimple_bb (insert_point); 1333 else if (!stmt_ends_bb_p (insert_point)) 1334 { 1335 gsi = gsi_for_stmt (insert_point); 1336 gimple_set_uid (stmt, gimple_uid (insert_point)); 1337 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1338 return; 1339 } 1340 else 1341 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1342 thus if it must end a basic block, it should be a call that can 1343 throw, or some assignment that can throw. If it throws, the LHS 1344 of it will not be initialized though, so only valid places using 1345 the SSA_NAME should be dominated by the fallthru edge. */ 1346 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1347 gsi = gsi_after_labels (bb); 1348 if (gsi_end_p (gsi)) 1349 { 1350 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1351 gimple_set_uid (stmt, 1352 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1353 } 1354 else 1355 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1356 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1357 } 1358 1359 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1360 the result. Places the statement after the definition of either 1361 OP1 or OP2. Returns the new statement. */ 1362 1363 static gimple 1364 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1365 { 1366 gimple op1def = NULL, op2def = NULL; 1367 gimple_stmt_iterator gsi; 1368 tree op; 1369 gassign *sum; 1370 1371 /* Create the addition statement. */ 1372 op = make_ssa_name (type); 1373 sum = gimple_build_assign (op, opcode, op1, op2); 1374 1375 /* Find an insertion place and insert. */ 1376 if (TREE_CODE (op1) == SSA_NAME) 1377 op1def = SSA_NAME_DEF_STMT (op1); 1378 if (TREE_CODE (op2) == SSA_NAME) 1379 op2def = SSA_NAME_DEF_STMT (op2); 1380 if ((!op1def || gimple_nop_p (op1def)) 1381 && (!op2def || gimple_nop_p (op2def))) 1382 { 1383 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1384 if (gsi_end_p (gsi)) 1385 { 1386 gimple_stmt_iterator gsi2 1387 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1388 gimple_set_uid (sum, 1389 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1390 } 1391 else 1392 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1393 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1394 } 1395 else 1396 { 1397 gimple insert_point; 1398 if ((!op1def || gimple_nop_p (op1def)) 1399 || (op2def && !gimple_nop_p (op2def) 1400 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1401 insert_point = op2def; 1402 else 1403 insert_point = op1def; 1404 insert_stmt_after (sum, insert_point); 1405 } 1406 update_stmt (sum); 1407 1408 return sum; 1409 } 1410 1411 /* Perform un-distribution of divisions and multiplications. 1412 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1413 to (A + B) / X for real X. 1414 1415 The algorithm is organized as follows. 1416 1417 - First we walk the addition chain *OPS looking for summands that 1418 are defined by a multiplication or a real division. This results 1419 in the candidates bitmap with relevant indices into *OPS. 1420 1421 - Second we build the chains of multiplications or divisions for 1422 these candidates, counting the number of occurrences of (operand, code) 1423 pairs in all of the candidates chains. 1424 1425 - Third we sort the (operand, code) pairs by number of occurrence and 1426 process them starting with the pair with the most uses. 1427 1428 * For each such pair we walk the candidates again to build a 1429 second candidate bitmap noting all multiplication/division chains 1430 that have at least one occurrence of (operand, code). 1431 1432 * We build an alternate addition chain only covering these 1433 candidates with one (operand, code) operation removed from their 1434 multiplication/division chain. 1435 1436 * The first candidate gets replaced by the alternate addition chain 1437 multiplied/divided by the operand. 1438 1439 * All candidate chains get disabled for further processing and 1440 processing of (operand, code) pairs continues. 1441 1442 The alternate addition chains built are re-processed by the main 1443 reassociation algorithm which allows optimizing a * x * y + b * y * x 1444 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1445 1446 static bool 1447 undistribute_ops_list (enum tree_code opcode, 1448 vec<operand_entry_t> *ops, struct loop *loop) 1449 { 1450 unsigned int length = ops->length (); 1451 operand_entry_t oe1; 1452 unsigned i, j; 1453 sbitmap candidates, candidates2; 1454 unsigned nr_candidates, nr_candidates2; 1455 sbitmap_iterator sbi0; 1456 vec<operand_entry_t> *subops; 1457 bool changed = false; 1458 int next_oecount_id = 0; 1459 1460 if (length <= 1 1461 || opcode != PLUS_EXPR) 1462 return false; 1463 1464 /* Build a list of candidates to process. */ 1465 candidates = sbitmap_alloc (length); 1466 bitmap_clear (candidates); 1467 nr_candidates = 0; 1468 FOR_EACH_VEC_ELT (*ops, i, oe1) 1469 { 1470 enum tree_code dcode; 1471 gimple oe1def; 1472 1473 if (TREE_CODE (oe1->op) != SSA_NAME) 1474 continue; 1475 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1476 if (!is_gimple_assign (oe1def)) 1477 continue; 1478 dcode = gimple_assign_rhs_code (oe1def); 1479 if ((dcode != MULT_EXPR 1480 && dcode != RDIV_EXPR) 1481 || !is_reassociable_op (oe1def, dcode, loop)) 1482 continue; 1483 1484 bitmap_set_bit (candidates, i); 1485 nr_candidates++; 1486 } 1487 1488 if (nr_candidates < 2) 1489 { 1490 sbitmap_free (candidates); 1491 return false; 1492 } 1493 1494 if (dump_file && (dump_flags & TDF_DETAILS)) 1495 { 1496 fprintf (dump_file, "searching for un-distribute opportunities "); 1497 print_generic_expr (dump_file, 1498 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1499 fprintf (dump_file, " %d\n", nr_candidates); 1500 } 1501 1502 /* Build linearized sub-operand lists and the counting table. */ 1503 cvec.create (0); 1504 1505 hash_table<oecount_hasher> ctable (15); 1506 1507 /* ??? Macro arguments cannot have multi-argument template types in 1508 them. This typedef is needed to workaround that limitation. */ 1509 typedef vec<operand_entry_t> vec_operand_entry_t_heap; 1510 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1511 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1512 { 1513 gimple oedef; 1514 enum tree_code oecode; 1515 unsigned j; 1516 1517 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1518 oecode = gimple_assign_rhs_code (oedef); 1519 linearize_expr_tree (&subops[i], oedef, 1520 associative_tree_code (oecode), false); 1521 1522 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1523 { 1524 oecount c; 1525 int *slot; 1526 int idx; 1527 c.oecode = oecode; 1528 c.cnt = 1; 1529 c.id = next_oecount_id++; 1530 c.op = oe1->op; 1531 cvec.safe_push (c); 1532 idx = cvec.length () + 41; 1533 slot = ctable.find_slot (idx, INSERT); 1534 if (!*slot) 1535 { 1536 *slot = idx; 1537 } 1538 else 1539 { 1540 cvec.pop (); 1541 cvec[*slot - 42].cnt++; 1542 } 1543 } 1544 } 1545 1546 /* Sort the counting table. */ 1547 cvec.qsort (oecount_cmp); 1548 1549 if (dump_file && (dump_flags & TDF_DETAILS)) 1550 { 1551 oecount *c; 1552 fprintf (dump_file, "Candidates:\n"); 1553 FOR_EACH_VEC_ELT (cvec, j, c) 1554 { 1555 fprintf (dump_file, " %u %s: ", c->cnt, 1556 c->oecode == MULT_EXPR 1557 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1558 print_generic_expr (dump_file, c->op, 0); 1559 fprintf (dump_file, "\n"); 1560 } 1561 } 1562 1563 /* Process the (operand, code) pairs in order of most occurrence. */ 1564 candidates2 = sbitmap_alloc (length); 1565 while (!cvec.is_empty ()) 1566 { 1567 oecount *c = &cvec.last (); 1568 if (c->cnt < 2) 1569 break; 1570 1571 /* Now collect the operands in the outer chain that contain 1572 the common operand in their inner chain. */ 1573 bitmap_clear (candidates2); 1574 nr_candidates2 = 0; 1575 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1576 { 1577 gimple oedef; 1578 enum tree_code oecode; 1579 unsigned j; 1580 tree op = (*ops)[i]->op; 1581 1582 /* If we undistributed in this chain already this may be 1583 a constant. */ 1584 if (TREE_CODE (op) != SSA_NAME) 1585 continue; 1586 1587 oedef = SSA_NAME_DEF_STMT (op); 1588 oecode = gimple_assign_rhs_code (oedef); 1589 if (oecode != c->oecode) 1590 continue; 1591 1592 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1593 { 1594 if (oe1->op == c->op) 1595 { 1596 bitmap_set_bit (candidates2, i); 1597 ++nr_candidates2; 1598 break; 1599 } 1600 } 1601 } 1602 1603 if (nr_candidates2 >= 2) 1604 { 1605 operand_entry_t oe1, oe2; 1606 gimple prod; 1607 int first = bitmap_first_set_bit (candidates2); 1608 1609 /* Build the new addition chain. */ 1610 oe1 = (*ops)[first]; 1611 if (dump_file && (dump_flags & TDF_DETAILS)) 1612 { 1613 fprintf (dump_file, "Building ("); 1614 print_generic_expr (dump_file, oe1->op, 0); 1615 } 1616 zero_one_operation (&oe1->op, c->oecode, c->op); 1617 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1618 { 1619 gimple sum; 1620 oe2 = (*ops)[i]; 1621 if (dump_file && (dump_flags & TDF_DETAILS)) 1622 { 1623 fprintf (dump_file, " + "); 1624 print_generic_expr (dump_file, oe2->op, 0); 1625 } 1626 zero_one_operation (&oe2->op, c->oecode, c->op); 1627 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1628 oe1->op, oe2->op, opcode); 1629 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1630 oe2->rank = 0; 1631 oe1->op = gimple_get_lhs (sum); 1632 } 1633 1634 /* Apply the multiplication/division. */ 1635 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1636 oe1->op, c->op, c->oecode); 1637 if (dump_file && (dump_flags & TDF_DETAILS)) 1638 { 1639 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1640 print_generic_expr (dump_file, c->op, 0); 1641 fprintf (dump_file, "\n"); 1642 } 1643 1644 /* Record it in the addition chain and disable further 1645 undistribution with this op. */ 1646 oe1->op = gimple_assign_lhs (prod); 1647 oe1->rank = get_rank (oe1->op); 1648 subops[first].release (); 1649 1650 changed = true; 1651 } 1652 1653 cvec.pop (); 1654 } 1655 1656 for (i = 0; i < ops->length (); ++i) 1657 subops[i].release (); 1658 free (subops); 1659 cvec.release (); 1660 sbitmap_free (candidates); 1661 sbitmap_free (candidates2); 1662 1663 return changed; 1664 } 1665 1666 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1667 expression, examine the other OPS to see if any of them are comparisons 1668 of the same values, which we may be able to combine or eliminate. 1669 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1670 1671 static bool 1672 eliminate_redundant_comparison (enum tree_code opcode, 1673 vec<operand_entry_t> *ops, 1674 unsigned int currindex, 1675 operand_entry_t curr) 1676 { 1677 tree op1, op2; 1678 enum tree_code lcode, rcode; 1679 gimple def1, def2; 1680 int i; 1681 operand_entry_t oe; 1682 1683 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1684 return false; 1685 1686 /* Check that CURR is a comparison. */ 1687 if (TREE_CODE (curr->op) != SSA_NAME) 1688 return false; 1689 def1 = SSA_NAME_DEF_STMT (curr->op); 1690 if (!is_gimple_assign (def1)) 1691 return false; 1692 lcode = gimple_assign_rhs_code (def1); 1693 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1694 return false; 1695 op1 = gimple_assign_rhs1 (def1); 1696 op2 = gimple_assign_rhs2 (def1); 1697 1698 /* Now look for a similar comparison in the remaining OPS. */ 1699 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1700 { 1701 tree t; 1702 1703 if (TREE_CODE (oe->op) != SSA_NAME) 1704 continue; 1705 def2 = SSA_NAME_DEF_STMT (oe->op); 1706 if (!is_gimple_assign (def2)) 1707 continue; 1708 rcode = gimple_assign_rhs_code (def2); 1709 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1710 continue; 1711 1712 /* If we got here, we have a match. See if we can combine the 1713 two comparisons. */ 1714 if (opcode == BIT_IOR_EXPR) 1715 t = maybe_fold_or_comparisons (lcode, op1, op2, 1716 rcode, gimple_assign_rhs1 (def2), 1717 gimple_assign_rhs2 (def2)); 1718 else 1719 t = maybe_fold_and_comparisons (lcode, op1, op2, 1720 rcode, gimple_assign_rhs1 (def2), 1721 gimple_assign_rhs2 (def2)); 1722 if (!t) 1723 continue; 1724 1725 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1726 always give us a boolean_type_node value back. If the original 1727 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1728 we need to convert. */ 1729 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1730 t = fold_convert (TREE_TYPE (curr->op), t); 1731 1732 if (TREE_CODE (t) != INTEGER_CST 1733 && !operand_equal_p (t, curr->op, 0)) 1734 { 1735 enum tree_code subcode; 1736 tree newop1, newop2; 1737 if (!COMPARISON_CLASS_P (t)) 1738 continue; 1739 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1740 STRIP_USELESS_TYPE_CONVERSION (newop1); 1741 STRIP_USELESS_TYPE_CONVERSION (newop2); 1742 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1743 continue; 1744 } 1745 1746 if (dump_file && (dump_flags & TDF_DETAILS)) 1747 { 1748 fprintf (dump_file, "Equivalence: "); 1749 print_generic_expr (dump_file, curr->op, 0); 1750 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1751 print_generic_expr (dump_file, oe->op, 0); 1752 fprintf (dump_file, " -> "); 1753 print_generic_expr (dump_file, t, 0); 1754 fprintf (dump_file, "\n"); 1755 } 1756 1757 /* Now we can delete oe, as it has been subsumed by the new combined 1758 expression t. */ 1759 ops->ordered_remove (i); 1760 reassociate_stats.ops_eliminated ++; 1761 1762 /* If t is the same as curr->op, we're done. Otherwise we must 1763 replace curr->op with t. Special case is if we got a constant 1764 back, in which case we add it to the end instead of in place of 1765 the current entry. */ 1766 if (TREE_CODE (t) == INTEGER_CST) 1767 { 1768 ops->ordered_remove (currindex); 1769 add_to_ops_vec (ops, t); 1770 } 1771 else if (!operand_equal_p (t, curr->op, 0)) 1772 { 1773 gimple sum; 1774 enum tree_code subcode; 1775 tree newop1; 1776 tree newop2; 1777 gcc_assert (COMPARISON_CLASS_P (t)); 1778 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1779 STRIP_USELESS_TYPE_CONVERSION (newop1); 1780 STRIP_USELESS_TYPE_CONVERSION (newop2); 1781 gcc_checking_assert (is_gimple_val (newop1) 1782 && is_gimple_val (newop2)); 1783 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1784 curr->op = gimple_get_lhs (sum); 1785 } 1786 return true; 1787 } 1788 1789 return false; 1790 } 1791 1792 /* Perform various identities and other optimizations on the list of 1793 operand entries, stored in OPS. The tree code for the binary 1794 operation between all the operands is OPCODE. */ 1795 1796 static void 1797 optimize_ops_list (enum tree_code opcode, 1798 vec<operand_entry_t> *ops) 1799 { 1800 unsigned int length = ops->length (); 1801 unsigned int i; 1802 operand_entry_t oe; 1803 operand_entry_t oelast = NULL; 1804 bool iterate = false; 1805 1806 if (length == 1) 1807 return; 1808 1809 oelast = ops->last (); 1810 1811 /* If the last two are constants, pop the constants off, merge them 1812 and try the next two. */ 1813 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1814 { 1815 operand_entry_t oelm1 = (*ops)[length - 2]; 1816 1817 if (oelm1->rank == 0 1818 && is_gimple_min_invariant (oelm1->op) 1819 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1820 TREE_TYPE (oelast->op))) 1821 { 1822 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1823 oelm1->op, oelast->op); 1824 1825 if (folded && is_gimple_min_invariant (folded)) 1826 { 1827 if (dump_file && (dump_flags & TDF_DETAILS)) 1828 fprintf (dump_file, "Merging constants\n"); 1829 1830 ops->pop (); 1831 ops->pop (); 1832 1833 add_to_ops_vec (ops, folded); 1834 reassociate_stats.constants_eliminated++; 1835 1836 optimize_ops_list (opcode, ops); 1837 return; 1838 } 1839 } 1840 } 1841 1842 eliminate_using_constants (opcode, ops); 1843 oelast = NULL; 1844 1845 for (i = 0; ops->iterate (i, &oe);) 1846 { 1847 bool done = false; 1848 1849 if (eliminate_not_pairs (opcode, ops, i, oe)) 1850 return; 1851 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 1852 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 1853 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 1854 { 1855 if (done) 1856 return; 1857 iterate = true; 1858 oelast = NULL; 1859 continue; 1860 } 1861 oelast = oe; 1862 i++; 1863 } 1864 1865 length = ops->length (); 1866 oelast = ops->last (); 1867 1868 if (iterate) 1869 optimize_ops_list (opcode, ops); 1870 } 1871 1872 /* The following functions are subroutines to optimize_range_tests and allow 1873 it to try to change a logical combination of comparisons into a range 1874 test. 1875 1876 For example, both 1877 X == 2 || X == 5 || X == 3 || X == 4 1878 and 1879 X >= 2 && X <= 5 1880 are converted to 1881 (unsigned) (X - 2) <= 3 1882 1883 For more information see comments above fold_test_range in fold-const.c, 1884 this implementation is for GIMPLE. */ 1885 1886 struct range_entry 1887 { 1888 tree exp; 1889 tree low; 1890 tree high; 1891 bool in_p; 1892 bool strict_overflow_p; 1893 unsigned int idx, next; 1894 }; 1895 1896 /* This is similar to make_range in fold-const.c, but on top of 1897 GIMPLE instead of trees. If EXP is non-NULL, it should be 1898 an SSA_NAME and STMT argument is ignored, otherwise STMT 1899 argument should be a GIMPLE_COND. */ 1900 1901 static void 1902 init_range_entry (struct range_entry *r, tree exp, gimple stmt) 1903 { 1904 int in_p; 1905 tree low, high; 1906 bool is_bool, strict_overflow_p; 1907 1908 r->exp = NULL_TREE; 1909 r->in_p = false; 1910 r->strict_overflow_p = false; 1911 r->low = NULL_TREE; 1912 r->high = NULL_TREE; 1913 if (exp != NULL_TREE 1914 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 1915 return; 1916 1917 /* Start with simply saying "EXP != 0" and then look at the code of EXP 1918 and see if we can refine the range. Some of the cases below may not 1919 happen, but it doesn't seem worth worrying about this. We "continue" 1920 the outer loop when we've changed something; otherwise we "break" 1921 the switch, which will "break" the while. */ 1922 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 1923 high = low; 1924 in_p = 0; 1925 strict_overflow_p = false; 1926 is_bool = false; 1927 if (exp == NULL_TREE) 1928 is_bool = true; 1929 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 1930 { 1931 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 1932 is_bool = true; 1933 else 1934 return; 1935 } 1936 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 1937 is_bool = true; 1938 1939 while (1) 1940 { 1941 enum tree_code code; 1942 tree arg0, arg1, exp_type; 1943 tree nexp; 1944 location_t loc; 1945 1946 if (exp != NULL_TREE) 1947 { 1948 if (TREE_CODE (exp) != SSA_NAME 1949 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 1950 break; 1951 1952 stmt = SSA_NAME_DEF_STMT (exp); 1953 if (!is_gimple_assign (stmt)) 1954 break; 1955 1956 code = gimple_assign_rhs_code (stmt); 1957 arg0 = gimple_assign_rhs1 (stmt); 1958 arg1 = gimple_assign_rhs2 (stmt); 1959 exp_type = TREE_TYPE (exp); 1960 } 1961 else 1962 { 1963 code = gimple_cond_code (stmt); 1964 arg0 = gimple_cond_lhs (stmt); 1965 arg1 = gimple_cond_rhs (stmt); 1966 exp_type = boolean_type_node; 1967 } 1968 1969 if (TREE_CODE (arg0) != SSA_NAME) 1970 break; 1971 loc = gimple_location (stmt); 1972 switch (code) 1973 { 1974 case BIT_NOT_EXPR: 1975 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 1976 /* Ensure the range is either +[-,0], +[0,0], 1977 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 1978 -[1,1]. If it is e.g. +[-,-] or -[-,-] 1979 or similar expression of unconditional true or 1980 false, it should not be negated. */ 1981 && ((high && integer_zerop (high)) 1982 || (low && integer_onep (low)))) 1983 { 1984 in_p = !in_p; 1985 exp = arg0; 1986 continue; 1987 } 1988 break; 1989 case SSA_NAME: 1990 exp = arg0; 1991 continue; 1992 CASE_CONVERT: 1993 if (is_bool) 1994 goto do_default; 1995 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 1996 { 1997 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 1998 is_bool = true; 1999 else 2000 return; 2001 } 2002 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2003 is_bool = true; 2004 goto do_default; 2005 case EQ_EXPR: 2006 case NE_EXPR: 2007 case LT_EXPR: 2008 case LE_EXPR: 2009 case GE_EXPR: 2010 case GT_EXPR: 2011 is_bool = true; 2012 /* FALLTHRU */ 2013 default: 2014 if (!is_bool) 2015 return; 2016 do_default: 2017 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2018 &low, &high, &in_p, 2019 &strict_overflow_p); 2020 if (nexp != NULL_TREE) 2021 { 2022 exp = nexp; 2023 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2024 continue; 2025 } 2026 break; 2027 } 2028 break; 2029 } 2030 if (is_bool) 2031 { 2032 r->exp = exp; 2033 r->in_p = in_p; 2034 r->low = low; 2035 r->high = high; 2036 r->strict_overflow_p = strict_overflow_p; 2037 } 2038 } 2039 2040 /* Comparison function for qsort. Sort entries 2041 without SSA_NAME exp first, then with SSA_NAMEs sorted 2042 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2043 by increasing ->low and if ->low is the same, by increasing 2044 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2045 maximum. */ 2046 2047 static int 2048 range_entry_cmp (const void *a, const void *b) 2049 { 2050 const struct range_entry *p = (const struct range_entry *) a; 2051 const struct range_entry *q = (const struct range_entry *) b; 2052 2053 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2054 { 2055 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2056 { 2057 /* Group range_entries for the same SSA_NAME together. */ 2058 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2059 return -1; 2060 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2061 return 1; 2062 /* If ->low is different, NULL low goes first, then by 2063 ascending low. */ 2064 if (p->low != NULL_TREE) 2065 { 2066 if (q->low != NULL_TREE) 2067 { 2068 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2069 p->low, q->low); 2070 if (tem && integer_onep (tem)) 2071 return -1; 2072 tem = fold_binary (GT_EXPR, boolean_type_node, 2073 p->low, q->low); 2074 if (tem && integer_onep (tem)) 2075 return 1; 2076 } 2077 else 2078 return 1; 2079 } 2080 else if (q->low != NULL_TREE) 2081 return -1; 2082 /* If ->high is different, NULL high goes last, before that by 2083 ascending high. */ 2084 if (p->high != NULL_TREE) 2085 { 2086 if (q->high != NULL_TREE) 2087 { 2088 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2089 p->high, q->high); 2090 if (tem && integer_onep (tem)) 2091 return -1; 2092 tem = fold_binary (GT_EXPR, boolean_type_node, 2093 p->high, q->high); 2094 if (tem && integer_onep (tem)) 2095 return 1; 2096 } 2097 else 2098 return -1; 2099 } 2100 else if (q->high != NULL_TREE) 2101 return 1; 2102 /* If both ranges are the same, sort below by ascending idx. */ 2103 } 2104 else 2105 return 1; 2106 } 2107 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2108 return -1; 2109 2110 if (p->idx < q->idx) 2111 return -1; 2112 else 2113 { 2114 gcc_checking_assert (p->idx > q->idx); 2115 return 1; 2116 } 2117 } 2118 2119 /* Helper routine of optimize_range_test. 2120 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2121 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2122 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2123 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2124 an array of COUNT pointers to other ranges. Return 2125 true if the range merge has been successful. 2126 If OPCODE is ERROR_MARK, this is called from within 2127 maybe_optimize_range_tests and is performing inter-bb range optimization. 2128 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2129 oe->rank. */ 2130 2131 static bool 2132 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2133 struct range_entry **otherrangep, 2134 unsigned int count, enum tree_code opcode, 2135 vec<operand_entry_t> *ops, tree exp, gimple_seq seq, 2136 bool in_p, tree low, tree high, bool strict_overflow_p) 2137 { 2138 operand_entry_t oe = (*ops)[range->idx]; 2139 tree op = oe->op; 2140 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : 2141 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2142 location_t loc = gimple_location (stmt); 2143 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2144 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2145 in_p, low, high); 2146 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2147 gimple_stmt_iterator gsi; 2148 unsigned int i, uid; 2149 2150 if (tem == NULL_TREE) 2151 return false; 2152 2153 /* If op is default def SSA_NAME, there is no place to insert the 2154 new comparison. Give up, unless we can use OP itself as the 2155 range test. */ 2156 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2157 { 2158 if (op == range->exp 2159 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2160 || TREE_CODE (optype) == BOOLEAN_TYPE) 2161 && (op == tem 2162 || (TREE_CODE (tem) == EQ_EXPR 2163 && TREE_OPERAND (tem, 0) == op 2164 && integer_onep (TREE_OPERAND (tem, 1)))) 2165 && opcode != BIT_IOR_EXPR 2166 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2167 { 2168 stmt = NULL; 2169 tem = op; 2170 } 2171 else 2172 return false; 2173 } 2174 2175 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2176 warning_at (loc, OPT_Wstrict_overflow, 2177 "assuming signed overflow does not occur " 2178 "when simplifying range test"); 2179 2180 if (dump_file && (dump_flags & TDF_DETAILS)) 2181 { 2182 struct range_entry *r; 2183 fprintf (dump_file, "Optimizing range tests "); 2184 print_generic_expr (dump_file, range->exp, 0); 2185 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2186 print_generic_expr (dump_file, range->low, 0); 2187 fprintf (dump_file, ", "); 2188 print_generic_expr (dump_file, range->high, 0); 2189 fprintf (dump_file, "]"); 2190 for (i = 0; i < count; i++) 2191 { 2192 if (otherrange) 2193 r = otherrange + i; 2194 else 2195 r = otherrangep[i]; 2196 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 2197 print_generic_expr (dump_file, r->low, 0); 2198 fprintf (dump_file, ", "); 2199 print_generic_expr (dump_file, r->high, 0); 2200 fprintf (dump_file, "]"); 2201 } 2202 fprintf (dump_file, "\n into "); 2203 print_generic_expr (dump_file, tem, 0); 2204 fprintf (dump_file, "\n"); 2205 } 2206 2207 if (opcode == BIT_IOR_EXPR 2208 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2209 tem = invert_truthvalue_loc (loc, tem); 2210 2211 tem = fold_convert_loc (loc, optype, tem); 2212 if (stmt) 2213 { 2214 gsi = gsi_for_stmt (stmt); 2215 uid = gimple_uid (stmt); 2216 } 2217 else 2218 { 2219 gsi = gsi_none (); 2220 uid = 0; 2221 } 2222 if (stmt == NULL) 2223 gcc_checking_assert (tem == op); 2224 /* In rare cases range->exp can be equal to lhs of stmt. 2225 In that case we have to insert after the stmt rather then before 2226 it. If stmt is a PHI, insert it at the start of the basic block. */ 2227 else if (op != range->exp) 2228 { 2229 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2230 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2231 GSI_SAME_STMT); 2232 gsi_prev (&gsi); 2233 } 2234 else if (gimple_code (stmt) != GIMPLE_PHI) 2235 { 2236 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2237 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, 2238 GSI_CONTINUE_LINKING); 2239 } 2240 else 2241 { 2242 gsi = gsi_after_labels (gimple_bb (stmt)); 2243 if (!gsi_end_p (gsi)) 2244 uid = gimple_uid (gsi_stmt (gsi)); 2245 else 2246 { 2247 gsi = gsi_start_bb (gimple_bb (stmt)); 2248 uid = 1; 2249 while (!gsi_end_p (gsi)) 2250 { 2251 uid = gimple_uid (gsi_stmt (gsi)); 2252 gsi_next (&gsi); 2253 } 2254 } 2255 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2256 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2257 GSI_SAME_STMT); 2258 if (gsi_end_p (gsi)) 2259 gsi = gsi_last_bb (gimple_bb (stmt)); 2260 else 2261 gsi_prev (&gsi); 2262 } 2263 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2264 if (gimple_uid (gsi_stmt (gsi))) 2265 break; 2266 else 2267 gimple_set_uid (gsi_stmt (gsi), uid); 2268 2269 oe->op = tem; 2270 range->exp = exp; 2271 range->low = low; 2272 range->high = high; 2273 range->in_p = in_p; 2274 range->strict_overflow_p = false; 2275 2276 for (i = 0; i < count; i++) 2277 { 2278 if (otherrange) 2279 range = otherrange + i; 2280 else 2281 range = otherrangep[i]; 2282 oe = (*ops)[range->idx]; 2283 /* Now change all the other range test immediate uses, so that 2284 those tests will be optimized away. */ 2285 if (opcode == ERROR_MARK) 2286 { 2287 if (oe->op) 2288 oe->op = build_int_cst (TREE_TYPE (oe->op), 2289 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2290 else 2291 oe->op = (oe->rank == BIT_IOR_EXPR 2292 ? boolean_false_node : boolean_true_node); 2293 } 2294 else 2295 oe->op = error_mark_node; 2296 range->exp = NULL_TREE; 2297 } 2298 return true; 2299 } 2300 2301 /* Optimize X == CST1 || X == CST2 2302 if popcount (CST1 ^ CST2) == 1 into 2303 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2304 Similarly for ranges. E.g. 2305 X != 2 && X != 3 && X != 10 && X != 11 2306 will be transformed by the previous optimization into 2307 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2308 and this loop can transform that into 2309 !(((X & ~8) - 2U) <= 1U). */ 2310 2311 static bool 2312 optimize_range_tests_xor (enum tree_code opcode, tree type, 2313 tree lowi, tree lowj, tree highi, tree highj, 2314 vec<operand_entry_t> *ops, 2315 struct range_entry *rangei, 2316 struct range_entry *rangej) 2317 { 2318 tree lowxor, highxor, tem, exp; 2319 /* Check lowi ^ lowj == highi ^ highj and 2320 popcount (lowi ^ lowj) == 1. */ 2321 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2322 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2323 return false; 2324 if (!integer_pow2p (lowxor)) 2325 return false; 2326 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2327 if (!tree_int_cst_equal (lowxor, highxor)) 2328 return false; 2329 2330 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2331 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2332 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2333 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2334 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2335 NULL, rangei->in_p, lowj, highj, 2336 rangei->strict_overflow_p 2337 || rangej->strict_overflow_p)) 2338 return true; 2339 return false; 2340 } 2341 2342 /* Optimize X == CST1 || X == CST2 2343 if popcount (CST2 - CST1) == 1 into 2344 ((X - CST1) & ~(CST2 - CST1)) == 0. 2345 Similarly for ranges. E.g. 2346 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2347 || X == 75 || X == 45 2348 will be transformed by the previous optimization into 2349 (X - 43U) <= 3U || (X - 75U) <= 3U 2350 and this loop can transform that into 2351 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2352 static bool 2353 optimize_range_tests_diff (enum tree_code opcode, tree type, 2354 tree lowi, tree lowj, tree highi, tree highj, 2355 vec<operand_entry_t> *ops, 2356 struct range_entry *rangei, 2357 struct range_entry *rangej) 2358 { 2359 tree tem1, tem2, mask; 2360 /* Check highi - lowi == highj - lowj. */ 2361 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2362 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2363 return false; 2364 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2365 if (!tree_int_cst_equal (tem1, tem2)) 2366 return false; 2367 /* Check popcount (lowj - lowi) == 1. */ 2368 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2369 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2370 return false; 2371 if (!integer_pow2p (tem1)) 2372 return false; 2373 2374 type = unsigned_type_for (type); 2375 tem1 = fold_convert (type, tem1); 2376 tem2 = fold_convert (type, tem2); 2377 lowi = fold_convert (type, lowi); 2378 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2379 tem1 = fold_binary (MINUS_EXPR, type, 2380 fold_convert (type, rangei->exp), lowi); 2381 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2382 lowj = build_int_cst (type, 0); 2383 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2384 NULL, rangei->in_p, lowj, tem2, 2385 rangei->strict_overflow_p 2386 || rangej->strict_overflow_p)) 2387 return true; 2388 return false; 2389 } 2390 2391 /* It does some common checks for function optimize_range_tests_xor and 2392 optimize_range_tests_diff. 2393 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2394 Else it calls optimize_range_tests_diff. */ 2395 2396 static bool 2397 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2398 bool optimize_xor, vec<operand_entry_t> *ops, 2399 struct range_entry *ranges) 2400 { 2401 int i, j; 2402 bool any_changes = false; 2403 for (i = first; i < length; i++) 2404 { 2405 tree lowi, highi, lowj, highj, type, tem; 2406 2407 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2408 continue; 2409 type = TREE_TYPE (ranges[i].exp); 2410 if (!INTEGRAL_TYPE_P (type)) 2411 continue; 2412 lowi = ranges[i].low; 2413 if (lowi == NULL_TREE) 2414 lowi = TYPE_MIN_VALUE (type); 2415 highi = ranges[i].high; 2416 if (highi == NULL_TREE) 2417 continue; 2418 for (j = i + 1; j < length && j < i + 64; j++) 2419 { 2420 bool changes; 2421 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2422 continue; 2423 lowj = ranges[j].low; 2424 if (lowj == NULL_TREE) 2425 continue; 2426 highj = ranges[j].high; 2427 if (highj == NULL_TREE) 2428 highj = TYPE_MAX_VALUE (type); 2429 /* Check lowj > highi. */ 2430 tem = fold_binary (GT_EXPR, boolean_type_node, 2431 lowj, highi); 2432 if (tem == NULL_TREE || !integer_onep (tem)) 2433 continue; 2434 if (optimize_xor) 2435 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2436 highi, highj, ops, 2437 ranges + i, ranges + j); 2438 else 2439 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2440 highi, highj, ops, 2441 ranges + i, ranges + j); 2442 if (changes) 2443 { 2444 any_changes = true; 2445 break; 2446 } 2447 } 2448 } 2449 return any_changes; 2450 } 2451 2452 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 2453 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2454 EXP on success, NULL otherwise. */ 2455 2456 static tree 2457 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2458 wide_int *mask, tree *totallowp) 2459 { 2460 tree tem = int_const_binop (MINUS_EXPR, high, low); 2461 if (tem == NULL_TREE 2462 || TREE_CODE (tem) != INTEGER_CST 2463 || TREE_OVERFLOW (tem) 2464 || tree_int_cst_sgn (tem) == -1 2465 || compare_tree_int (tem, prec) != -1) 2466 return NULL_TREE; 2467 2468 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2469 *mask = wi::shifted_mask (0, max, false, prec); 2470 if (TREE_CODE (exp) == BIT_AND_EXPR 2471 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2472 { 2473 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2474 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2475 if (wi::popcount (msk) == 1 2476 && wi::ltu_p (msk, prec - max)) 2477 { 2478 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2479 max += msk.to_uhwi (); 2480 exp = TREE_OPERAND (exp, 0); 2481 if (integer_zerop (low) 2482 && TREE_CODE (exp) == PLUS_EXPR 2483 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2484 { 2485 tree ret = TREE_OPERAND (exp, 0); 2486 STRIP_NOPS (ret); 2487 widest_int bias 2488 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2489 TYPE_PRECISION (TREE_TYPE (low)))); 2490 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2491 if (totallowp) 2492 { 2493 *totallowp = tbias; 2494 return ret; 2495 } 2496 else if (!tree_int_cst_lt (totallow, tbias)) 2497 return NULL_TREE; 2498 bias = wi::to_widest (tbias); 2499 bias -= wi::to_widest (totallow); 2500 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max)) 2501 { 2502 *mask = wi::lshift (*mask, bias); 2503 return ret; 2504 } 2505 } 2506 } 2507 } 2508 if (totallowp) 2509 return exp; 2510 if (!tree_int_cst_lt (totallow, low)) 2511 return exp; 2512 tem = int_const_binop (MINUS_EXPR, low, totallow); 2513 if (tem == NULL_TREE 2514 || TREE_CODE (tem) != INTEGER_CST 2515 || TREE_OVERFLOW (tem) 2516 || compare_tree_int (tem, prec - max) == 1) 2517 return NULL_TREE; 2518 2519 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2520 return exp; 2521 } 2522 2523 /* Attempt to optimize small range tests using bit test. 2524 E.g. 2525 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2526 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2527 has been by earlier optimizations optimized into: 2528 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2529 As all the 43 through 82 range is less than 64 numbers, 2530 for 64-bit word targets optimize that into: 2531 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2532 2533 static bool 2534 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2535 vec<operand_entry_t> *ops, 2536 struct range_entry *ranges) 2537 { 2538 int i, j; 2539 bool any_changes = false; 2540 int prec = GET_MODE_BITSIZE (word_mode); 2541 auto_vec<struct range_entry *, 64> candidates; 2542 2543 for (i = first; i < length - 2; i++) 2544 { 2545 tree lowi, highi, lowj, highj, type; 2546 2547 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2548 continue; 2549 type = TREE_TYPE (ranges[i].exp); 2550 if (!INTEGRAL_TYPE_P (type)) 2551 continue; 2552 lowi = ranges[i].low; 2553 if (lowi == NULL_TREE) 2554 lowi = TYPE_MIN_VALUE (type); 2555 highi = ranges[i].high; 2556 if (highi == NULL_TREE) 2557 continue; 2558 wide_int mask; 2559 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2560 highi, &mask, &lowi); 2561 if (exp == NULL_TREE) 2562 continue; 2563 bool strict_overflow_p = ranges[i].strict_overflow_p; 2564 candidates.truncate (0); 2565 int end = MIN (i + 64, length); 2566 for (j = i + 1; j < end; j++) 2567 { 2568 tree exp2; 2569 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2570 continue; 2571 if (ranges[j].exp == exp) 2572 ; 2573 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2574 { 2575 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2576 if (exp2 == exp) 2577 ; 2578 else if (TREE_CODE (exp2) == PLUS_EXPR) 2579 { 2580 exp2 = TREE_OPERAND (exp2, 0); 2581 STRIP_NOPS (exp2); 2582 if (exp2 != exp) 2583 continue; 2584 } 2585 else 2586 continue; 2587 } 2588 else 2589 continue; 2590 lowj = ranges[j].low; 2591 if (lowj == NULL_TREE) 2592 continue; 2593 highj = ranges[j].high; 2594 if (highj == NULL_TREE) 2595 highj = TYPE_MAX_VALUE (type); 2596 wide_int mask2; 2597 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2598 highj, &mask2, NULL); 2599 if (exp2 != exp) 2600 continue; 2601 mask |= mask2; 2602 strict_overflow_p |= ranges[j].strict_overflow_p; 2603 candidates.safe_push (&ranges[j]); 2604 } 2605 2606 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2607 if (candidates.length () >= 2) 2608 { 2609 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2610 wi::to_widest (lowi) 2611 + prec - 1 - wi::clz (mask)); 2612 operand_entry_t oe = (*ops)[ranges[i].idx]; 2613 tree op = oe->op; 2614 gimple stmt = op ? SSA_NAME_DEF_STMT (op) 2615 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2616 location_t loc = gimple_location (stmt); 2617 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2618 2619 /* See if it isn't cheaper to pretend the minimum value of the 2620 range is 0, if maximum value is small enough. 2621 We can avoid then subtraction of the minimum value, but the 2622 mask constant could be perhaps more expensive. */ 2623 if (compare_tree_int (lowi, 0) > 0 2624 && compare_tree_int (high, prec) < 0) 2625 { 2626 int cost_diff; 2627 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2628 rtx reg = gen_raw_REG (word_mode, 10000); 2629 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2630 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2631 GEN_INT (-m)), speed_p); 2632 rtx r = immed_wide_int_const (mask, word_mode); 2633 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2634 speed_p); 2635 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2636 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2637 speed_p); 2638 if (cost_diff > 0) 2639 { 2640 mask = wi::lshift (mask, m); 2641 lowi = build_zero_cst (TREE_TYPE (lowi)); 2642 } 2643 } 2644 2645 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2646 false, lowi, high); 2647 if (tem == NULL_TREE || is_gimple_val (tem)) 2648 continue; 2649 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2650 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2651 fold_convert_loc (loc, etype, exp), 2652 fold_convert_loc (loc, etype, lowi)); 2653 exp = fold_convert_loc (loc, integer_type_node, exp); 2654 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2655 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2656 build_int_cst (word_type, 1), exp); 2657 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2658 wide_int_to_tree (word_type, mask)); 2659 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2660 build_zero_cst (word_type)); 2661 if (is_gimple_val (exp)) 2662 continue; 2663 2664 /* The shift might have undefined behavior if TEM is true, 2665 but reassociate_bb isn't prepared to have basic blocks 2666 split when it is running. So, temporarily emit a code 2667 with BIT_IOR_EXPR instead of &&, and fix it up in 2668 branch_fixup. */ 2669 gimple_seq seq; 2670 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2671 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2672 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2673 gimple_seq seq2; 2674 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2675 gimple_seq_add_seq_without_update (&seq, seq2); 2676 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2677 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2678 gimple g = gimple_build_assign (make_ssa_name (optype), 2679 BIT_IOR_EXPR, tem, exp); 2680 gimple_set_location (g, loc); 2681 gimple_seq_add_stmt_without_update (&seq, g); 2682 exp = gimple_assign_lhs (g); 2683 tree val = build_zero_cst (optype); 2684 if (update_range_test (&ranges[i], NULL, candidates.address (), 2685 candidates.length (), opcode, ops, exp, 2686 seq, false, val, val, strict_overflow_p)) 2687 { 2688 any_changes = true; 2689 reassoc_branch_fixups.safe_push (tem); 2690 } 2691 else 2692 gimple_seq_discard (seq); 2693 } 2694 } 2695 return any_changes; 2696 } 2697 2698 /* Optimize range tests, similarly how fold_range_test optimizes 2699 it on trees. The tree code for the binary 2700 operation between all the operands is OPCODE. 2701 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 2702 maybe_optimize_range_tests for inter-bb range optimization. 2703 In that case if oe->op is NULL, oe->id is bb->index whose 2704 GIMPLE_COND is && or ||ed into the test, and oe->rank says 2705 the actual opcode. */ 2706 2707 static bool 2708 optimize_range_tests (enum tree_code opcode, 2709 vec<operand_entry_t> *ops) 2710 { 2711 unsigned int length = ops->length (), i, j, first; 2712 operand_entry_t oe; 2713 struct range_entry *ranges; 2714 bool any_changes = false; 2715 2716 if (length == 1) 2717 return false; 2718 2719 ranges = XNEWVEC (struct range_entry, length); 2720 for (i = 0; i < length; i++) 2721 { 2722 oe = (*ops)[i]; 2723 ranges[i].idx = i; 2724 init_range_entry (ranges + i, oe->op, 2725 oe->op ? NULL : 2726 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 2727 /* For | invert it now, we will invert it again before emitting 2728 the optimized expression. */ 2729 if (opcode == BIT_IOR_EXPR 2730 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2731 ranges[i].in_p = !ranges[i].in_p; 2732 } 2733 2734 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 2735 for (i = 0; i < length; i++) 2736 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 2737 break; 2738 2739 /* Try to merge ranges. */ 2740 for (first = i; i < length; i++) 2741 { 2742 tree low = ranges[i].low; 2743 tree high = ranges[i].high; 2744 int in_p = ranges[i].in_p; 2745 bool strict_overflow_p = ranges[i].strict_overflow_p; 2746 int update_fail_count = 0; 2747 2748 for (j = i + 1; j < length; j++) 2749 { 2750 if (ranges[i].exp != ranges[j].exp) 2751 break; 2752 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 2753 ranges[j].in_p, ranges[j].low, ranges[j].high)) 2754 break; 2755 strict_overflow_p |= ranges[j].strict_overflow_p; 2756 } 2757 2758 if (j == i + 1) 2759 continue; 2760 2761 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 2762 opcode, ops, ranges[i].exp, NULL, in_p, 2763 low, high, strict_overflow_p)) 2764 { 2765 i = j - 1; 2766 any_changes = true; 2767 } 2768 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 2769 while update_range_test would fail. */ 2770 else if (update_fail_count == 64) 2771 i = j - 1; 2772 else 2773 ++update_fail_count; 2774 } 2775 2776 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 2777 ops, ranges); 2778 2779 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 2780 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 2781 ops, ranges); 2782 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 2783 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 2784 ops, ranges); 2785 2786 if (any_changes && opcode != ERROR_MARK) 2787 { 2788 j = 0; 2789 FOR_EACH_VEC_ELT (*ops, i, oe) 2790 { 2791 if (oe->op == error_mark_node) 2792 continue; 2793 else if (i != j) 2794 (*ops)[j] = oe; 2795 j++; 2796 } 2797 ops->truncate (j); 2798 } 2799 2800 XDELETEVEC (ranges); 2801 return any_changes; 2802 } 2803 2804 /* Return true if STMT is a cast like: 2805 <bb N>: 2806 ... 2807 _123 = (int) _234; 2808 2809 <bb M>: 2810 # _345 = PHI <_123(N), 1(...), 1(...)> 2811 where _234 has bool type, _123 has single use and 2812 bb N has a single successor M. This is commonly used in 2813 the last block of a range test. */ 2814 2815 static bool 2816 final_range_test_p (gimple stmt) 2817 { 2818 basic_block bb, rhs_bb; 2819 edge e; 2820 tree lhs, rhs; 2821 use_operand_p use_p; 2822 gimple use_stmt; 2823 2824 if (!gimple_assign_cast_p (stmt)) 2825 return false; 2826 bb = gimple_bb (stmt); 2827 if (!single_succ_p (bb)) 2828 return false; 2829 e = single_succ_edge (bb); 2830 if (e->flags & EDGE_COMPLEX) 2831 return false; 2832 2833 lhs = gimple_assign_lhs (stmt); 2834 rhs = gimple_assign_rhs1 (stmt); 2835 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2836 || TREE_CODE (rhs) != SSA_NAME 2837 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) 2838 return false; 2839 2840 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 2841 if (!single_imm_use (lhs, &use_p, &use_stmt)) 2842 return false; 2843 2844 if (gimple_code (use_stmt) != GIMPLE_PHI 2845 || gimple_bb (use_stmt) != e->dest) 2846 return false; 2847 2848 /* And that the rhs is defined in the same loop. */ 2849 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); 2850 if (rhs_bb == NULL 2851 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 2852 return false; 2853 2854 return true; 2855 } 2856 2857 /* Return true if BB is suitable basic block for inter-bb range test 2858 optimization. If BACKWARD is true, BB should be the only predecessor 2859 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 2860 or compared with to find a common basic block to which all conditions 2861 branch to if true resp. false. If BACKWARD is false, TEST_BB should 2862 be the only predecessor of BB. */ 2863 2864 static bool 2865 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 2866 bool backward) 2867 { 2868 edge_iterator ei, ei2; 2869 edge e, e2; 2870 gimple stmt; 2871 gphi_iterator gsi; 2872 bool other_edge_seen = false; 2873 bool is_cond; 2874 2875 if (test_bb == bb) 2876 return false; 2877 /* Check last stmt first. */ 2878 stmt = last_stmt (bb); 2879 if (stmt == NULL 2880 || (gimple_code (stmt) != GIMPLE_COND 2881 && (backward || !final_range_test_p (stmt))) 2882 || gimple_visited_p (stmt) 2883 || stmt_could_throw_p (stmt) 2884 || *other_bb == bb) 2885 return false; 2886 is_cond = gimple_code (stmt) == GIMPLE_COND; 2887 if (is_cond) 2888 { 2889 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 2890 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 2891 to *OTHER_BB (if not set yet, try to find it out). */ 2892 if (EDGE_COUNT (bb->succs) != 2) 2893 return false; 2894 FOR_EACH_EDGE (e, ei, bb->succs) 2895 { 2896 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2897 return false; 2898 if (e->dest == test_bb) 2899 { 2900 if (backward) 2901 continue; 2902 else 2903 return false; 2904 } 2905 if (e->dest == bb) 2906 return false; 2907 if (*other_bb == NULL) 2908 { 2909 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 2910 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2911 return false; 2912 else if (e->dest == e2->dest) 2913 *other_bb = e->dest; 2914 if (*other_bb == NULL) 2915 return false; 2916 } 2917 if (e->dest == *other_bb) 2918 other_edge_seen = true; 2919 else if (backward) 2920 return false; 2921 } 2922 if (*other_bb == NULL || !other_edge_seen) 2923 return false; 2924 } 2925 else if (single_succ (bb) != *other_bb) 2926 return false; 2927 2928 /* Now check all PHIs of *OTHER_BB. */ 2929 e = find_edge (bb, *other_bb); 2930 e2 = find_edge (test_bb, *other_bb); 2931 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 2932 { 2933 gphi *phi = gsi.phi (); 2934 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 2935 corresponding to BB and TEST_BB predecessor must be the same. */ 2936 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 2937 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 2938 { 2939 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 2940 one of the PHIs should have the lhs of the last stmt in 2941 that block as PHI arg and that PHI should have 0 or 1 2942 corresponding to it in all other range test basic blocks 2943 considered. */ 2944 if (!is_cond) 2945 { 2946 if (gimple_phi_arg_def (phi, e->dest_idx) 2947 == gimple_assign_lhs (stmt) 2948 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 2949 || integer_onep (gimple_phi_arg_def (phi, 2950 e2->dest_idx)))) 2951 continue; 2952 } 2953 else 2954 { 2955 gimple test_last = last_stmt (test_bb); 2956 if (gimple_code (test_last) != GIMPLE_COND 2957 && gimple_phi_arg_def (phi, e2->dest_idx) 2958 == gimple_assign_lhs (test_last) 2959 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 2960 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 2961 continue; 2962 } 2963 2964 return false; 2965 } 2966 } 2967 return true; 2968 } 2969 2970 /* Return true if BB doesn't have side-effects that would disallow 2971 range test optimization, all SSA_NAMEs set in the bb are consumed 2972 in the bb and there are no PHIs. */ 2973 2974 static bool 2975 no_side_effect_bb (basic_block bb) 2976 { 2977 gimple_stmt_iterator gsi; 2978 gimple last; 2979 2980 if (!gimple_seq_empty_p (phi_nodes (bb))) 2981 return false; 2982 last = last_stmt (bb); 2983 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2984 { 2985 gimple stmt = gsi_stmt (gsi); 2986 tree lhs; 2987 imm_use_iterator imm_iter; 2988 use_operand_p use_p; 2989 2990 if (is_gimple_debug (stmt)) 2991 continue; 2992 if (gimple_has_side_effects (stmt)) 2993 return false; 2994 if (stmt == last) 2995 return true; 2996 if (!is_gimple_assign (stmt)) 2997 return false; 2998 lhs = gimple_assign_lhs (stmt); 2999 if (TREE_CODE (lhs) != SSA_NAME) 3000 return false; 3001 if (gimple_assign_rhs_could_trap_p (stmt)) 3002 return false; 3003 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 3004 { 3005 gimple use_stmt = USE_STMT (use_p); 3006 if (is_gimple_debug (use_stmt)) 3007 continue; 3008 if (gimple_bb (use_stmt) != bb) 3009 return false; 3010 } 3011 } 3012 return false; 3013 } 3014 3015 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3016 return true and fill in *OPS recursively. */ 3017 3018 static bool 3019 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, 3020 struct loop *loop) 3021 { 3022 gimple stmt = SSA_NAME_DEF_STMT (var); 3023 tree rhs[2]; 3024 int i; 3025 3026 if (!is_reassociable_op (stmt, code, loop)) 3027 return false; 3028 3029 rhs[0] = gimple_assign_rhs1 (stmt); 3030 rhs[1] = gimple_assign_rhs2 (stmt); 3031 gimple_set_visited (stmt, true); 3032 for (i = 0; i < 2; i++) 3033 if (TREE_CODE (rhs[i]) == SSA_NAME 3034 && !get_ops (rhs[i], code, ops, loop) 3035 && has_single_use (rhs[i])) 3036 { 3037 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 3038 3039 oe->op = rhs[i]; 3040 oe->rank = code; 3041 oe->id = 0; 3042 oe->count = 1; 3043 ops->safe_push (oe); 3044 } 3045 return true; 3046 } 3047 3048 /* Find the ops that were added by get_ops starting from VAR, see if 3049 they were changed during update_range_test and if yes, create new 3050 stmts. */ 3051 3052 static tree 3053 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops, 3054 unsigned int *pidx, struct loop *loop) 3055 { 3056 gimple stmt = SSA_NAME_DEF_STMT (var); 3057 tree rhs[4]; 3058 int i; 3059 3060 if (!is_reassociable_op (stmt, code, loop)) 3061 return NULL; 3062 3063 rhs[0] = gimple_assign_rhs1 (stmt); 3064 rhs[1] = gimple_assign_rhs2 (stmt); 3065 rhs[2] = rhs[0]; 3066 rhs[3] = rhs[1]; 3067 for (i = 0; i < 2; i++) 3068 if (TREE_CODE (rhs[i]) == SSA_NAME) 3069 { 3070 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3071 if (rhs[2 + i] == NULL_TREE) 3072 { 3073 if (has_single_use (rhs[i])) 3074 rhs[2 + i] = ops[(*pidx)++]->op; 3075 else 3076 rhs[2 + i] = rhs[i]; 3077 } 3078 } 3079 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3080 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3081 { 3082 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3083 var = make_ssa_name (TREE_TYPE (var)); 3084 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3085 rhs[2], rhs[3]); 3086 gimple_set_uid (g, gimple_uid (stmt)); 3087 gimple_set_visited (g, true); 3088 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3089 } 3090 return var; 3091 } 3092 3093 /* Structure to track the initial value passed to get_ops and 3094 the range in the ops vector for each basic block. */ 3095 3096 struct inter_bb_range_test_entry 3097 { 3098 tree op; 3099 unsigned int first_idx, last_idx; 3100 }; 3101 3102 /* Inter-bb range test optimization. */ 3103 3104 static void 3105 maybe_optimize_range_tests (gimple stmt) 3106 { 3107 basic_block first_bb = gimple_bb (stmt); 3108 basic_block last_bb = first_bb; 3109 basic_block other_bb = NULL; 3110 basic_block bb; 3111 edge_iterator ei; 3112 edge e; 3113 auto_vec<operand_entry_t> ops; 3114 auto_vec<inter_bb_range_test_entry> bbinfo; 3115 bool any_changes = false; 3116 3117 /* Consider only basic blocks that end with GIMPLE_COND or 3118 a cast statement satisfying final_range_test_p. All 3119 but the last bb in the first_bb .. last_bb range 3120 should end with GIMPLE_COND. */ 3121 if (gimple_code (stmt) == GIMPLE_COND) 3122 { 3123 if (EDGE_COUNT (first_bb->succs) != 2) 3124 return; 3125 } 3126 else if (final_range_test_p (stmt)) 3127 other_bb = single_succ (first_bb); 3128 else 3129 return; 3130 3131 if (stmt_could_throw_p (stmt)) 3132 return; 3133 3134 /* As relative ordering of post-dominator sons isn't fixed, 3135 maybe_optimize_range_tests can be called first on any 3136 bb in the range we want to optimize. So, start searching 3137 backwards, if first_bb can be set to a predecessor. */ 3138 while (single_pred_p (first_bb)) 3139 { 3140 basic_block pred_bb = single_pred (first_bb); 3141 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3142 break; 3143 if (!no_side_effect_bb (first_bb)) 3144 break; 3145 first_bb = pred_bb; 3146 } 3147 /* If first_bb is last_bb, other_bb hasn't been computed yet. 3148 Before starting forward search in last_bb successors, find 3149 out the other_bb. */ 3150 if (first_bb == last_bb) 3151 { 3152 other_bb = NULL; 3153 /* As non-GIMPLE_COND last stmt always terminates the range, 3154 if forward search didn't discover anything, just give up. */ 3155 if (gimple_code (stmt) != GIMPLE_COND) 3156 return; 3157 /* Look at both successors. Either it ends with a GIMPLE_COND 3158 and satisfies suitable_cond_bb, or ends with a cast and 3159 other_bb is that cast's successor. */ 3160 FOR_EACH_EDGE (e, ei, first_bb->succs) 3161 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 3162 || e->dest == first_bb) 3163 return; 3164 else if (single_pred_p (e->dest)) 3165 { 3166 stmt = last_stmt (e->dest); 3167 if (stmt 3168 && gimple_code (stmt) == GIMPLE_COND 3169 && EDGE_COUNT (e->dest->succs) == 2) 3170 { 3171 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 3172 break; 3173 else 3174 other_bb = NULL; 3175 } 3176 else if (stmt 3177 && final_range_test_p (stmt) 3178 && find_edge (first_bb, single_succ (e->dest))) 3179 { 3180 other_bb = single_succ (e->dest); 3181 if (other_bb == first_bb) 3182 other_bb = NULL; 3183 } 3184 } 3185 if (other_bb == NULL) 3186 return; 3187 } 3188 /* Now do the forward search, moving last_bb to successor bbs 3189 that aren't other_bb. */ 3190 while (EDGE_COUNT (last_bb->succs) == 2) 3191 { 3192 FOR_EACH_EDGE (e, ei, last_bb->succs) 3193 if (e->dest != other_bb) 3194 break; 3195 if (e == NULL) 3196 break; 3197 if (!single_pred_p (e->dest)) 3198 break; 3199 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 3200 break; 3201 if (!no_side_effect_bb (e->dest)) 3202 break; 3203 last_bb = e->dest; 3204 } 3205 if (first_bb == last_bb) 3206 return; 3207 /* Here basic blocks first_bb through last_bb's predecessor 3208 end with GIMPLE_COND, all of them have one of the edges to 3209 other_bb and another to another block in the range, 3210 all blocks except first_bb don't have side-effects and 3211 last_bb ends with either GIMPLE_COND, or cast satisfying 3212 final_range_test_p. */ 3213 for (bb = last_bb; ; bb = single_pred (bb)) 3214 { 3215 enum tree_code code; 3216 tree lhs, rhs; 3217 inter_bb_range_test_entry bb_ent; 3218 3219 bb_ent.op = NULL_TREE; 3220 bb_ent.first_idx = ops.length (); 3221 bb_ent.last_idx = bb_ent.first_idx; 3222 e = find_edge (bb, other_bb); 3223 stmt = last_stmt (bb); 3224 gimple_set_visited (stmt, true); 3225 if (gimple_code (stmt) != GIMPLE_COND) 3226 { 3227 use_operand_p use_p; 3228 gimple phi; 3229 edge e2; 3230 unsigned int d; 3231 3232 lhs = gimple_assign_lhs (stmt); 3233 rhs = gimple_assign_rhs1 (stmt); 3234 gcc_assert (bb == last_bb); 3235 3236 /* stmt is 3237 _123 = (int) _234; 3238 3239 followed by: 3240 <bb M>: 3241 # _345 = PHI <_123(N), 1(...), 1(...)> 3242 3243 or 0 instead of 1. If it is 0, the _234 3244 range test is anded together with all the 3245 other range tests, if it is 1, it is ored with 3246 them. */ 3247 single_imm_use (lhs, &use_p, &phi); 3248 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 3249 e2 = find_edge (first_bb, other_bb); 3250 d = e2->dest_idx; 3251 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 3252 if (integer_zerop (gimple_phi_arg_def (phi, d))) 3253 code = BIT_AND_EXPR; 3254 else 3255 { 3256 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 3257 code = BIT_IOR_EXPR; 3258 } 3259 3260 /* If _234 SSA_NAME_DEF_STMT is 3261 _234 = _567 | _789; 3262 (or &, corresponding to 1/0 in the phi arguments, 3263 push into ops the individual range test arguments 3264 of the bitwise or resp. and, recursively. */ 3265 if (!get_ops (rhs, code, &ops, 3266 loop_containing_stmt (stmt)) 3267 && has_single_use (rhs)) 3268 { 3269 /* Otherwise, push the _234 range test itself. */ 3270 operand_entry_t oe 3271 = (operand_entry_t) pool_alloc (operand_entry_pool); 3272 3273 oe->op = rhs; 3274 oe->rank = code; 3275 oe->id = 0; 3276 oe->count = 1; 3277 ops.safe_push (oe); 3278 bb_ent.last_idx++; 3279 } 3280 else 3281 bb_ent.last_idx = ops.length (); 3282 bb_ent.op = rhs; 3283 bbinfo.safe_push (bb_ent); 3284 continue; 3285 } 3286 /* Otherwise stmt is GIMPLE_COND. */ 3287 code = gimple_cond_code (stmt); 3288 lhs = gimple_cond_lhs (stmt); 3289 rhs = gimple_cond_rhs (stmt); 3290 if (TREE_CODE (lhs) == SSA_NAME 3291 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3292 && ((code != EQ_EXPR && code != NE_EXPR) 3293 || rhs != boolean_false_node 3294 /* Either push into ops the individual bitwise 3295 or resp. and operands, depending on which 3296 edge is other_bb. */ 3297 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 3298 ^ (code == EQ_EXPR)) 3299 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 3300 loop_containing_stmt (stmt)))) 3301 { 3302 /* Or push the GIMPLE_COND stmt itself. */ 3303 operand_entry_t oe 3304 = (operand_entry_t) pool_alloc (operand_entry_pool); 3305 3306 oe->op = NULL; 3307 oe->rank = (e->flags & EDGE_TRUE_VALUE) 3308 ? BIT_IOR_EXPR : BIT_AND_EXPR; 3309 /* oe->op = NULL signs that there is no SSA_NAME 3310 for the range test, and oe->id instead is the 3311 basic block number, at which's end the GIMPLE_COND 3312 is. */ 3313 oe->id = bb->index; 3314 oe->count = 1; 3315 ops.safe_push (oe); 3316 bb_ent.op = NULL; 3317 bb_ent.last_idx++; 3318 } 3319 else if (ops.length () > bb_ent.first_idx) 3320 { 3321 bb_ent.op = lhs; 3322 bb_ent.last_idx = ops.length (); 3323 } 3324 bbinfo.safe_push (bb_ent); 3325 if (bb == first_bb) 3326 break; 3327 } 3328 if (ops.length () > 1) 3329 any_changes = optimize_range_tests (ERROR_MARK, &ops); 3330 if (any_changes) 3331 { 3332 unsigned int idx, max_idx = 0; 3333 /* update_ops relies on has_single_use predicates returning the 3334 same values as it did during get_ops earlier. Additionally it 3335 never removes statements, only adds new ones and it should walk 3336 from the single imm use and check the predicate already before 3337 making those changes. 3338 On the other side, the handling of GIMPLE_COND directly can turn 3339 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 3340 it needs to be done in a separate loop afterwards. */ 3341 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 3342 { 3343 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 3344 && bbinfo[idx].op != NULL_TREE) 3345 { 3346 tree new_op; 3347 3348 max_idx = idx; 3349 stmt = last_stmt (bb); 3350 new_op = update_ops (bbinfo[idx].op, 3351 (enum tree_code) 3352 ops[bbinfo[idx].first_idx]->rank, 3353 ops, &bbinfo[idx].first_idx, 3354 loop_containing_stmt (stmt)); 3355 if (new_op == NULL_TREE) 3356 { 3357 gcc_assert (bb == last_bb); 3358 new_op = ops[bbinfo[idx].first_idx++]->op; 3359 } 3360 if (bbinfo[idx].op != new_op) 3361 { 3362 imm_use_iterator iter; 3363 use_operand_p use_p; 3364 gimple use_stmt, cast_stmt = NULL; 3365 3366 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 3367 if (is_gimple_debug (use_stmt)) 3368 continue; 3369 else if (gimple_code (use_stmt) == GIMPLE_COND 3370 || gimple_code (use_stmt) == GIMPLE_PHI) 3371 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 3372 SET_USE (use_p, new_op); 3373 else if (gimple_assign_cast_p (use_stmt)) 3374 cast_stmt = use_stmt; 3375 else 3376 gcc_unreachable (); 3377 if (cast_stmt) 3378 { 3379 gcc_assert (bb == last_bb); 3380 tree lhs = gimple_assign_lhs (cast_stmt); 3381 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 3382 enum tree_code rhs_code 3383 = gimple_assign_rhs_code (cast_stmt); 3384 gassign *g; 3385 if (is_gimple_min_invariant (new_op)) 3386 { 3387 new_op = fold_convert (TREE_TYPE (lhs), new_op); 3388 g = gimple_build_assign (new_lhs, new_op); 3389 } 3390 else 3391 g = gimple_build_assign (new_lhs, rhs_code, new_op); 3392 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt); 3393 gimple_set_uid (g, gimple_uid (cast_stmt)); 3394 gimple_set_visited (g, true); 3395 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3396 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 3397 if (is_gimple_debug (use_stmt)) 3398 continue; 3399 else if (gimple_code (use_stmt) == GIMPLE_COND 3400 || gimple_code (use_stmt) == GIMPLE_PHI) 3401 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 3402 SET_USE (use_p, new_lhs); 3403 else 3404 gcc_unreachable (); 3405 } 3406 } 3407 } 3408 if (bb == first_bb) 3409 break; 3410 } 3411 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 3412 { 3413 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 3414 && bbinfo[idx].op == NULL_TREE 3415 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 3416 { 3417 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 3418 3419 if (idx > max_idx) 3420 max_idx = idx; 3421 3422 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 3423 gimple_cond_make_false (cond_stmt); 3424 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 3425 gimple_cond_make_true (cond_stmt); 3426 else 3427 { 3428 gimple_cond_set_code (cond_stmt, NE_EXPR); 3429 gimple_cond_set_lhs (cond_stmt, 3430 ops[bbinfo[idx].first_idx]->op); 3431 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 3432 } 3433 update_stmt (cond_stmt); 3434 } 3435 if (bb == first_bb) 3436 break; 3437 } 3438 3439 /* The above changes could result in basic blocks after the first 3440 modified one, up to and including last_bb, to be executed even if 3441 they would not be in the original program. If the value ranges of 3442 assignment lhs' in those bbs were dependent on the conditions 3443 guarding those basic blocks which now can change, the VRs might 3444 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 3445 are only used within the same bb, it should be not a big deal if 3446 we just reset all the VRs in those bbs. See PR68671. */ 3447 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 3448 reset_flow_sensitive_info_in_bb (bb); 3449 } 3450 } 3451 3452 /* Return true if OPERAND is defined by a PHI node which uses the LHS 3453 of STMT in it's operands. This is also known as a "destructive 3454 update" operation. */ 3455 3456 static bool 3457 is_phi_for_stmt (gimple stmt, tree operand) 3458 { 3459 gimple def_stmt; 3460 gphi *def_phi; 3461 tree lhs; 3462 use_operand_p arg_p; 3463 ssa_op_iter i; 3464 3465 if (TREE_CODE (operand) != SSA_NAME) 3466 return false; 3467 3468 lhs = gimple_assign_lhs (stmt); 3469 3470 def_stmt = SSA_NAME_DEF_STMT (operand); 3471 def_phi = dyn_cast <gphi *> (def_stmt); 3472 if (!def_phi) 3473 return false; 3474 3475 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 3476 if (lhs == USE_FROM_PTR (arg_p)) 3477 return true; 3478 return false; 3479 } 3480 3481 /* Remove def stmt of VAR if VAR has zero uses and recurse 3482 on rhs1 operand if so. */ 3483 3484 static void 3485 remove_visited_stmt_chain (tree var) 3486 { 3487 gimple stmt; 3488 gimple_stmt_iterator gsi; 3489 3490 while (1) 3491 { 3492 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 3493 return; 3494 stmt = SSA_NAME_DEF_STMT (var); 3495 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 3496 { 3497 var = gimple_assign_rhs1 (stmt); 3498 gsi = gsi_for_stmt (stmt); 3499 reassoc_remove_stmt (&gsi); 3500 release_defs (stmt); 3501 } 3502 else 3503 return; 3504 } 3505 } 3506 3507 /* This function checks three consequtive operands in 3508 passed operands vector OPS starting from OPINDEX and 3509 swaps two operands if it is profitable for binary operation 3510 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 3511 3512 We pair ops with the same rank if possible. 3513 3514 The alternative we try is to see if STMT is a destructive 3515 update style statement, which is like: 3516 b = phi (a, ...) 3517 a = c + b; 3518 In that case, we want to use the destructive update form to 3519 expose the possible vectorizer sum reduction opportunity. 3520 In that case, the third operand will be the phi node. This 3521 check is not performed if STMT is null. 3522 3523 We could, of course, try to be better as noted above, and do a 3524 lot of work to try to find these opportunities in >3 operand 3525 cases, but it is unlikely to be worth it. */ 3526 3527 static void 3528 swap_ops_for_binary_stmt (vec<operand_entry_t> ops, 3529 unsigned int opindex, gimple stmt) 3530 { 3531 operand_entry_t oe1, oe2, oe3; 3532 3533 oe1 = ops[opindex]; 3534 oe2 = ops[opindex + 1]; 3535 oe3 = ops[opindex + 2]; 3536 3537 if ((oe1->rank == oe2->rank 3538 && oe2->rank != oe3->rank) 3539 || (stmt && is_phi_for_stmt (stmt, oe3->op) 3540 && !is_phi_for_stmt (stmt, oe1->op) 3541 && !is_phi_for_stmt (stmt, oe2->op))) 3542 { 3543 struct operand_entry temp = *oe3; 3544 oe3->op = oe1->op; 3545 oe3->rank = oe1->rank; 3546 oe1->op = temp.op; 3547 oe1->rank= temp.rank; 3548 } 3549 else if ((oe1->rank == oe3->rank 3550 && oe2->rank != oe3->rank) 3551 || (stmt && is_phi_for_stmt (stmt, oe2->op) 3552 && !is_phi_for_stmt (stmt, oe1->op) 3553 && !is_phi_for_stmt (stmt, oe3->op))) 3554 { 3555 struct operand_entry temp = *oe2; 3556 oe2->op = oe1->op; 3557 oe2->rank = oe1->rank; 3558 oe1->op = temp.op; 3559 oe1->rank = temp.rank; 3560 } 3561 } 3562 3563 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 3564 two definitions, otherwise return STMT. */ 3565 3566 static inline gimple 3567 find_insert_point (gimple stmt, tree rhs1, tree rhs2) 3568 { 3569 if (TREE_CODE (rhs1) == SSA_NAME 3570 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 3571 stmt = SSA_NAME_DEF_STMT (rhs1); 3572 if (TREE_CODE (rhs2) == SSA_NAME 3573 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 3574 stmt = SSA_NAME_DEF_STMT (rhs2); 3575 return stmt; 3576 } 3577 3578 /* Recursively rewrite our linearized statements so that the operators 3579 match those in OPS[OPINDEX], putting the computation in rank 3580 order. Return new lhs. 3581 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 3582 the current stmt and during recursive invocations. 3583 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 3584 recursive invocations. */ 3585 3586 static tree 3587 rewrite_expr_tree (gimple stmt, unsigned int opindex, 3588 vec<operand_entry_t> ops, bool changed, bool next_changed) 3589 { 3590 tree rhs1 = gimple_assign_rhs1 (stmt); 3591 tree rhs2 = gimple_assign_rhs2 (stmt); 3592 tree lhs = gimple_assign_lhs (stmt); 3593 operand_entry_t oe; 3594 3595 /* The final recursion case for this function is that you have 3596 exactly two operations left. 3597 If we had exactly one op in the entire list to start with, we 3598 would have never called this function, and the tail recursion 3599 rewrites them one at a time. */ 3600 if (opindex + 2 == ops.length ()) 3601 { 3602 operand_entry_t oe1, oe2; 3603 3604 oe1 = ops[opindex]; 3605 oe2 = ops[opindex + 1]; 3606 3607 if (rhs1 != oe1->op || rhs2 != oe2->op) 3608 { 3609 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3610 unsigned int uid = gimple_uid (stmt); 3611 3612 if (dump_file && (dump_flags & TDF_DETAILS)) 3613 { 3614 fprintf (dump_file, "Transforming "); 3615 print_gimple_stmt (dump_file, stmt, 0, 0); 3616 } 3617 3618 /* Even when changed is false, reassociation could have e.g. removed 3619 some redundant operations, so unless we are just swapping the 3620 arguments or unless there is no change at all (then we just 3621 return lhs), force creation of a new SSA_NAME. */ 3622 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 3623 { 3624 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op); 3625 lhs = make_ssa_name (TREE_TYPE (lhs)); 3626 stmt 3627 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 3628 oe1->op, oe2->op); 3629 gimple_set_uid (stmt, uid); 3630 gimple_set_visited (stmt, true); 3631 if (insert_point == gsi_stmt (gsi)) 3632 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 3633 else 3634 insert_stmt_after (stmt, insert_point); 3635 } 3636 else 3637 { 3638 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 3639 == stmt); 3640 gimple_assign_set_rhs1 (stmt, oe1->op); 3641 gimple_assign_set_rhs2 (stmt, oe2->op); 3642 update_stmt (stmt); 3643 } 3644 3645 if (rhs1 != oe1->op && rhs1 != oe2->op) 3646 remove_visited_stmt_chain (rhs1); 3647 3648 if (dump_file && (dump_flags & TDF_DETAILS)) 3649 { 3650 fprintf (dump_file, " into "); 3651 print_gimple_stmt (dump_file, stmt, 0, 0); 3652 } 3653 } 3654 return lhs; 3655 } 3656 3657 /* If we hit here, we should have 3 or more ops left. */ 3658 gcc_assert (opindex + 2 < ops.length ()); 3659 3660 /* Rewrite the next operator. */ 3661 oe = ops[opindex]; 3662 3663 /* Recurse on the LHS of the binary operator, which is guaranteed to 3664 be the non-leaf side. */ 3665 tree new_rhs1 3666 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 3667 changed || oe->op != rhs2 || next_changed, 3668 false); 3669 3670 if (oe->op != rhs2 || new_rhs1 != rhs1) 3671 { 3672 if (dump_file && (dump_flags & TDF_DETAILS)) 3673 { 3674 fprintf (dump_file, "Transforming "); 3675 print_gimple_stmt (dump_file, stmt, 0, 0); 3676 } 3677 3678 /* If changed is false, this is either opindex == 0 3679 or all outer rhs2's were equal to corresponding oe->op, 3680 and powi_result is NULL. 3681 That means lhs is equivalent before and after reassociation. 3682 Otherwise ensure the old lhs SSA_NAME is not reused and 3683 create a new stmt as well, so that any debug stmts will be 3684 properly adjusted. */ 3685 if (changed) 3686 { 3687 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3688 unsigned int uid = gimple_uid (stmt); 3689 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op); 3690 3691 lhs = make_ssa_name (TREE_TYPE (lhs)); 3692 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 3693 new_rhs1, oe->op); 3694 gimple_set_uid (stmt, uid); 3695 gimple_set_visited (stmt, true); 3696 if (insert_point == gsi_stmt (gsi)) 3697 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 3698 else 3699 insert_stmt_after (stmt, insert_point); 3700 } 3701 else 3702 { 3703 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 3704 == stmt); 3705 gimple_assign_set_rhs1 (stmt, new_rhs1); 3706 gimple_assign_set_rhs2 (stmt, oe->op); 3707 update_stmt (stmt); 3708 } 3709 3710 if (dump_file && (dump_flags & TDF_DETAILS)) 3711 { 3712 fprintf (dump_file, " into "); 3713 print_gimple_stmt (dump_file, stmt, 0, 0); 3714 } 3715 } 3716 return lhs; 3717 } 3718 3719 /* Find out how many cycles we need to compute statements chain. 3720 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 3721 maximum number of independent statements we may execute per cycle. */ 3722 3723 static int 3724 get_required_cycles (int ops_num, int cpu_width) 3725 { 3726 int res; 3727 int elog; 3728 unsigned int rest; 3729 3730 /* While we have more than 2 * cpu_width operands 3731 we may reduce number of operands by cpu_width 3732 per cycle. */ 3733 res = ops_num / (2 * cpu_width); 3734 3735 /* Remained operands count may be reduced twice per cycle 3736 until we have only one operand. */ 3737 rest = (unsigned)(ops_num - res * cpu_width); 3738 elog = exact_log2 (rest); 3739 if (elog >= 0) 3740 res += elog; 3741 else 3742 res += floor_log2 (rest) + 1; 3743 3744 return res; 3745 } 3746 3747 /* Returns an optimal number of registers to use for computation of 3748 given statements. */ 3749 3750 static int 3751 get_reassociation_width (int ops_num, enum tree_code opc, 3752 machine_mode mode) 3753 { 3754 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 3755 int width; 3756 int width_min; 3757 int cycles_best; 3758 3759 if (param_width > 0) 3760 width = param_width; 3761 else 3762 width = targetm.sched.reassociation_width (opc, mode); 3763 3764 if (width == 1) 3765 return width; 3766 3767 /* Get the minimal time required for sequence computation. */ 3768 cycles_best = get_required_cycles (ops_num, width); 3769 3770 /* Check if we may use less width and still compute sequence for 3771 the same time. It will allow us to reduce registers usage. 3772 get_required_cycles is monotonically increasing with lower width 3773 so we can perform a binary search for the minimal width that still 3774 results in the optimal cycle count. */ 3775 width_min = 1; 3776 while (width > width_min) 3777 { 3778 int width_mid = (width + width_min) / 2; 3779 3780 if (get_required_cycles (ops_num, width_mid) == cycles_best) 3781 width = width_mid; 3782 else if (width_min < width_mid) 3783 width_min = width_mid; 3784 else 3785 break; 3786 } 3787 3788 return width; 3789 } 3790 3791 /* Recursively rewrite our linearized statements so that the operators 3792 match those in OPS[OPINDEX], putting the computation in rank 3793 order and trying to allow operations to be executed in 3794 parallel. */ 3795 3796 static void 3797 rewrite_expr_tree_parallel (gassign *stmt, int width, 3798 vec<operand_entry_t> ops) 3799 { 3800 enum tree_code opcode = gimple_assign_rhs_code (stmt); 3801 int op_num = ops.length (); 3802 int stmt_num = op_num - 1; 3803 gimple *stmts = XALLOCAVEC (gimple, stmt_num); 3804 int op_index = op_num - 1; 3805 int stmt_index = 0; 3806 int ready_stmts_end = 0; 3807 int i = 0; 3808 tree last_rhs1 = gimple_assign_rhs1 (stmt); 3809 3810 /* We start expression rewriting from the top statements. 3811 So, in this loop we create a full list of statements 3812 we will work with. */ 3813 stmts[stmt_num - 1] = stmt; 3814 for (i = stmt_num - 2; i >= 0; i--) 3815 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 3816 3817 for (i = 0; i < stmt_num; i++) 3818 { 3819 tree op1, op2; 3820 3821 /* Determine whether we should use results of 3822 already handled statements or not. */ 3823 if (ready_stmts_end == 0 3824 && (i - stmt_index >= width || op_index < 1)) 3825 ready_stmts_end = i; 3826 3827 /* Now we choose operands for the next statement. Non zero 3828 value in ready_stmts_end means here that we should use 3829 the result of already generated statements as new operand. */ 3830 if (ready_stmts_end > 0) 3831 { 3832 op1 = gimple_assign_lhs (stmts[stmt_index++]); 3833 if (ready_stmts_end > stmt_index) 3834 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3835 else if (op_index >= 0) 3836 op2 = ops[op_index--]->op; 3837 else 3838 { 3839 gcc_assert (stmt_index < i); 3840 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3841 } 3842 3843 if (stmt_index >= ready_stmts_end) 3844 ready_stmts_end = 0; 3845 } 3846 else 3847 { 3848 if (op_index > 1) 3849 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 3850 op2 = ops[op_index--]->op; 3851 op1 = ops[op_index--]->op; 3852 } 3853 3854 /* If we emit the last statement then we should put 3855 operands into the last statement. It will also 3856 break the loop. */ 3857 if (op_index < 0 && stmt_index == i) 3858 i = stmt_num - 1; 3859 3860 if (dump_file && (dump_flags & TDF_DETAILS)) 3861 { 3862 fprintf (dump_file, "Transforming "); 3863 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3864 } 3865 3866 /* We keep original statement only for the last one. All 3867 others are recreated. */ 3868 if (i == stmt_num - 1) 3869 { 3870 gimple_assign_set_rhs1 (stmts[i], op1); 3871 gimple_assign_set_rhs2 (stmts[i], op2); 3872 update_stmt (stmts[i]); 3873 } 3874 else 3875 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 3876 3877 if (dump_file && (dump_flags & TDF_DETAILS)) 3878 { 3879 fprintf (dump_file, " into "); 3880 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3881 } 3882 } 3883 3884 remove_visited_stmt_chain (last_rhs1); 3885 } 3886 3887 /* Transform STMT, which is really (A +B) + (C + D) into the left 3888 linear form, ((A+B)+C)+D. 3889 Recurse on D if necessary. */ 3890 3891 static void 3892 linearize_expr (gimple stmt) 3893 { 3894 gimple_stmt_iterator gsi; 3895 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 3896 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3897 gimple oldbinrhs = binrhs; 3898 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 3899 gimple newbinrhs = NULL; 3900 struct loop *loop = loop_containing_stmt (stmt); 3901 tree lhs = gimple_assign_lhs (stmt); 3902 3903 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 3904 && is_reassociable_op (binrhs, rhscode, loop)); 3905 3906 gsi = gsi_for_stmt (stmt); 3907 3908 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 3909 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 3910 gimple_assign_rhs_code (binrhs), 3911 gimple_assign_lhs (binlhs), 3912 gimple_assign_rhs2 (binrhs)); 3913 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 3914 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 3915 gimple_set_uid (binrhs, gimple_uid (stmt)); 3916 3917 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 3918 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3919 3920 if (dump_file && (dump_flags & TDF_DETAILS)) 3921 { 3922 fprintf (dump_file, "Linearized: "); 3923 print_gimple_stmt (dump_file, stmt, 0, 0); 3924 } 3925 3926 reassociate_stats.linearized++; 3927 update_stmt (stmt); 3928 3929 gsi = gsi_for_stmt (oldbinrhs); 3930 reassoc_remove_stmt (&gsi); 3931 release_defs (oldbinrhs); 3932 3933 gimple_set_visited (stmt, true); 3934 gimple_set_visited (binlhs, true); 3935 gimple_set_visited (binrhs, true); 3936 3937 /* Tail recurse on the new rhs if it still needs reassociation. */ 3938 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 3939 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 3940 want to change the algorithm while converting to tuples. */ 3941 linearize_expr (stmt); 3942 } 3943 3944 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 3945 it. Otherwise, return NULL. */ 3946 3947 static gimple 3948 get_single_immediate_use (tree lhs) 3949 { 3950 use_operand_p immuse; 3951 gimple immusestmt; 3952 3953 if (TREE_CODE (lhs) == SSA_NAME 3954 && single_imm_use (lhs, &immuse, &immusestmt) 3955 && is_gimple_assign (immusestmt)) 3956 return immusestmt; 3957 3958 return NULL; 3959 } 3960 3961 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 3962 representing the negated value. Insertions of any necessary 3963 instructions go before GSI. 3964 This function is recursive in that, if you hand it "a_5" as the 3965 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 3966 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 3967 3968 static tree 3969 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 3970 { 3971 gimple negatedefstmt = NULL; 3972 tree resultofnegate; 3973 gimple_stmt_iterator gsi; 3974 unsigned int uid; 3975 3976 /* If we are trying to negate a name, defined by an add, negate the 3977 add operands instead. */ 3978 if (TREE_CODE (tonegate) == SSA_NAME) 3979 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 3980 if (TREE_CODE (tonegate) == SSA_NAME 3981 && is_gimple_assign (negatedefstmt) 3982 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 3983 && has_single_use (gimple_assign_lhs (negatedefstmt)) 3984 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 3985 { 3986 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 3987 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 3988 tree lhs = gimple_assign_lhs (negatedefstmt); 3989 gimple g; 3990 3991 gsi = gsi_for_stmt (negatedefstmt); 3992 rhs1 = negate_value (rhs1, &gsi); 3993 3994 gsi = gsi_for_stmt (negatedefstmt); 3995 rhs2 = negate_value (rhs2, &gsi); 3996 3997 gsi = gsi_for_stmt (negatedefstmt); 3998 lhs = make_ssa_name (TREE_TYPE (lhs)); 3999 gimple_set_visited (negatedefstmt, true); 4000 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 4001 gimple_set_uid (g, gimple_uid (negatedefstmt)); 4002 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4003 return lhs; 4004 } 4005 4006 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 4007 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 4008 NULL_TREE, true, GSI_SAME_STMT); 4009 gsi = *gsip; 4010 uid = gimple_uid (gsi_stmt (gsi)); 4011 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 4012 { 4013 gimple stmt = gsi_stmt (gsi); 4014 if (gimple_uid (stmt) != 0) 4015 break; 4016 gimple_set_uid (stmt, uid); 4017 } 4018 return resultofnegate; 4019 } 4020 4021 /* Return true if we should break up the subtract in STMT into an add 4022 with negate. This is true when we the subtract operands are really 4023 adds, or the subtract itself is used in an add expression. In 4024 either case, breaking up the subtract into an add with negate 4025 exposes the adds to reassociation. */ 4026 4027 static bool 4028 should_break_up_subtract (gimple stmt) 4029 { 4030 tree lhs = gimple_assign_lhs (stmt); 4031 tree binlhs = gimple_assign_rhs1 (stmt); 4032 tree binrhs = gimple_assign_rhs2 (stmt); 4033 gimple immusestmt; 4034 struct loop *loop = loop_containing_stmt (stmt); 4035 4036 if (TREE_CODE (binlhs) == SSA_NAME 4037 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4038 return true; 4039 4040 if (TREE_CODE (binrhs) == SSA_NAME 4041 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4042 return true; 4043 4044 if (TREE_CODE (lhs) == SSA_NAME 4045 && (immusestmt = get_single_immediate_use (lhs)) 4046 && is_gimple_assign (immusestmt) 4047 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4048 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4049 return true; 4050 return false; 4051 } 4052 4053 /* Transform STMT from A - B into A + -B. */ 4054 4055 static void 4056 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) 4057 { 4058 tree rhs1 = gimple_assign_rhs1 (stmt); 4059 tree rhs2 = gimple_assign_rhs2 (stmt); 4060 4061 if (dump_file && (dump_flags & TDF_DETAILS)) 4062 { 4063 fprintf (dump_file, "Breaking up subtract "); 4064 print_gimple_stmt (dump_file, stmt, 0, 0); 4065 } 4066 4067 rhs2 = negate_value (rhs2, gsip); 4068 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 4069 update_stmt (stmt); 4070 } 4071 4072 /* Determine whether STMT is a builtin call that raises an SSA name 4073 to an integer power and has only one use. If so, and this is early 4074 reassociation and unsafe math optimizations are permitted, place 4075 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 4076 If any of these conditions does not hold, return FALSE. */ 4077 4078 static bool 4079 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) 4080 { 4081 tree fndecl, arg1; 4082 REAL_VALUE_TYPE c, cint; 4083 4084 if (!first_pass_instance 4085 || !flag_unsafe_math_optimizations 4086 || !is_gimple_call (stmt) 4087 || !has_single_use (gimple_call_lhs (stmt))) 4088 return false; 4089 4090 fndecl = gimple_call_fndecl (stmt); 4091 4092 if (!fndecl 4093 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 4094 return false; 4095 4096 switch (DECL_FUNCTION_CODE (fndecl)) 4097 { 4098 CASE_FLT_FN (BUILT_IN_POW): 4099 if (flag_errno_math) 4100 return false; 4101 4102 *base = gimple_call_arg (stmt, 0); 4103 arg1 = gimple_call_arg (stmt, 1); 4104 4105 if (TREE_CODE (arg1) != REAL_CST) 4106 return false; 4107 4108 c = TREE_REAL_CST (arg1); 4109 4110 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 4111 return false; 4112 4113 *exponent = real_to_integer (&c); 4114 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 4115 if (!real_identical (&c, &cint)) 4116 return false; 4117 4118 break; 4119 4120 CASE_FLT_FN (BUILT_IN_POWI): 4121 *base = gimple_call_arg (stmt, 0); 4122 arg1 = gimple_call_arg (stmt, 1); 4123 4124 if (!tree_fits_shwi_p (arg1)) 4125 return false; 4126 4127 *exponent = tree_to_shwi (arg1); 4128 break; 4129 4130 default: 4131 return false; 4132 } 4133 4134 /* Expanding negative exponents is generally unproductive, so we don't 4135 complicate matters with those. Exponents of zero and one should 4136 have been handled by expression folding. */ 4137 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 4138 return false; 4139 4140 return true; 4141 } 4142 4143 /* Recursively linearize a binary expression that is the RHS of STMT. 4144 Place the operands of the expression tree in the vector named OPS. */ 4145 4146 static void 4147 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, 4148 bool is_associative, bool set_visited) 4149 { 4150 tree binlhs = gimple_assign_rhs1 (stmt); 4151 tree binrhs = gimple_assign_rhs2 (stmt); 4152 gimple binlhsdef = NULL, binrhsdef = NULL; 4153 bool binlhsisreassoc = false; 4154 bool binrhsisreassoc = false; 4155 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4156 struct loop *loop = loop_containing_stmt (stmt); 4157 tree base = NULL_TREE; 4158 HOST_WIDE_INT exponent = 0; 4159 4160 if (set_visited) 4161 gimple_set_visited (stmt, true); 4162 4163 if (TREE_CODE (binlhs) == SSA_NAME) 4164 { 4165 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 4166 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 4167 && !stmt_could_throw_p (binlhsdef)); 4168 } 4169 4170 if (TREE_CODE (binrhs) == SSA_NAME) 4171 { 4172 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 4173 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 4174 && !stmt_could_throw_p (binrhsdef)); 4175 } 4176 4177 /* If the LHS is not reassociable, but the RHS is, we need to swap 4178 them. If neither is reassociable, there is nothing we can do, so 4179 just put them in the ops vector. If the LHS is reassociable, 4180 linearize it. If both are reassociable, then linearize the RHS 4181 and the LHS. */ 4182 4183 if (!binlhsisreassoc) 4184 { 4185 tree temp; 4186 4187 /* If this is not a associative operation like division, give up. */ 4188 if (!is_associative) 4189 { 4190 add_to_ops_vec (ops, binrhs); 4191 return; 4192 } 4193 4194 if (!binrhsisreassoc) 4195 { 4196 if (rhscode == MULT_EXPR 4197 && TREE_CODE (binrhs) == SSA_NAME 4198 && acceptable_pow_call (binrhsdef, &base, &exponent)) 4199 { 4200 add_repeat_to_ops_vec (ops, base, exponent); 4201 gimple_set_visited (binrhsdef, true); 4202 } 4203 else 4204 add_to_ops_vec (ops, binrhs); 4205 4206 if (rhscode == MULT_EXPR 4207 && TREE_CODE (binlhs) == SSA_NAME 4208 && acceptable_pow_call (binlhsdef, &base, &exponent)) 4209 { 4210 add_repeat_to_ops_vec (ops, base, exponent); 4211 gimple_set_visited (binlhsdef, true); 4212 } 4213 else 4214 add_to_ops_vec (ops, binlhs); 4215 4216 return; 4217 } 4218 4219 if (dump_file && (dump_flags & TDF_DETAILS)) 4220 { 4221 fprintf (dump_file, "swapping operands of "); 4222 print_gimple_stmt (dump_file, stmt, 0, 0); 4223 } 4224 4225 swap_ssa_operands (stmt, 4226 gimple_assign_rhs1_ptr (stmt), 4227 gimple_assign_rhs2_ptr (stmt)); 4228 update_stmt (stmt); 4229 4230 if (dump_file && (dump_flags & TDF_DETAILS)) 4231 { 4232 fprintf (dump_file, " is now "); 4233 print_gimple_stmt (dump_file, stmt, 0, 0); 4234 } 4235 4236 /* We want to make it so the lhs is always the reassociative op, 4237 so swap. */ 4238 temp = binlhs; 4239 binlhs = binrhs; 4240 binrhs = temp; 4241 } 4242 else if (binrhsisreassoc) 4243 { 4244 linearize_expr (stmt); 4245 binlhs = gimple_assign_rhs1 (stmt); 4246 binrhs = gimple_assign_rhs2 (stmt); 4247 } 4248 4249 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 4250 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 4251 rhscode, loop)); 4252 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 4253 is_associative, set_visited); 4254 4255 if (rhscode == MULT_EXPR 4256 && TREE_CODE (binrhs) == SSA_NAME 4257 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) 4258 { 4259 add_repeat_to_ops_vec (ops, base, exponent); 4260 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); 4261 } 4262 else 4263 add_to_ops_vec (ops, binrhs); 4264 } 4265 4266 /* Repropagate the negates back into subtracts, since no other pass 4267 currently does it. */ 4268 4269 static void 4270 repropagate_negates (void) 4271 { 4272 unsigned int i = 0; 4273 tree negate; 4274 4275 FOR_EACH_VEC_ELT (plus_negates, i, negate) 4276 { 4277 gimple user = get_single_immediate_use (negate); 4278 4279 if (!user || !is_gimple_assign (user)) 4280 continue; 4281 4282 /* The negate operand can be either operand of a PLUS_EXPR 4283 (it can be the LHS if the RHS is a constant for example). 4284 4285 Force the negate operand to the RHS of the PLUS_EXPR, then 4286 transform the PLUS_EXPR into a MINUS_EXPR. */ 4287 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 4288 { 4289 /* If the negated operand appears on the LHS of the 4290 PLUS_EXPR, exchange the operands of the PLUS_EXPR 4291 to force the negated operand to the RHS of the PLUS_EXPR. */ 4292 if (gimple_assign_rhs1 (user) == negate) 4293 { 4294 swap_ssa_operands (user, 4295 gimple_assign_rhs1_ptr (user), 4296 gimple_assign_rhs2_ptr (user)); 4297 } 4298 4299 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 4300 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 4301 if (gimple_assign_rhs2 (user) == negate) 4302 { 4303 tree rhs1 = gimple_assign_rhs1 (user); 4304 tree rhs2 = get_unary_op (negate, NEGATE_EXPR); 4305 gimple_stmt_iterator gsi = gsi_for_stmt (user); 4306 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 4307 update_stmt (user); 4308 } 4309 } 4310 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 4311 { 4312 if (gimple_assign_rhs1 (user) == negate) 4313 { 4314 /* We have 4315 x = -a 4316 y = x - b 4317 which we transform into 4318 x = a + b 4319 y = -x . 4320 This pushes down the negate which we possibly can merge 4321 into some other operation, hence insert it into the 4322 plus_negates vector. */ 4323 gimple feed = SSA_NAME_DEF_STMT (negate); 4324 tree a = gimple_assign_rhs1 (feed); 4325 tree b = gimple_assign_rhs2 (user); 4326 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 4327 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 4328 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 4329 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b); 4330 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 4331 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 4332 user = gsi_stmt (gsi2); 4333 update_stmt (user); 4334 reassoc_remove_stmt (&gsi); 4335 release_defs (feed); 4336 plus_negates.safe_push (gimple_assign_lhs (user)); 4337 } 4338 else 4339 { 4340 /* Transform "x = -a; y = b - x" into "y = b + a", getting 4341 rid of one operation. */ 4342 gimple feed = SSA_NAME_DEF_STMT (negate); 4343 tree a = gimple_assign_rhs1 (feed); 4344 tree rhs1 = gimple_assign_rhs1 (user); 4345 gimple_stmt_iterator gsi = gsi_for_stmt (user); 4346 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 4347 update_stmt (gsi_stmt (gsi)); 4348 } 4349 } 4350 } 4351 } 4352 4353 /* Returns true if OP is of a type for which we can do reassociation. 4354 That is for integral or non-saturating fixed-point types, and for 4355 floating point type when associative-math is enabled. */ 4356 4357 static bool 4358 can_reassociate_p (tree op) 4359 { 4360 tree type = TREE_TYPE (op); 4361 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 4362 return false; 4363 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 4364 || NON_SAT_FIXED_POINT_TYPE_P (type) 4365 || (flag_associative_math && FLOAT_TYPE_P (type))) 4366 return true; 4367 return false; 4368 } 4369 4370 /* Break up subtract operations in block BB. 4371 4372 We do this top down because we don't know whether the subtract is 4373 part of a possible chain of reassociation except at the top. 4374 4375 IE given 4376 d = f + g 4377 c = a + e 4378 b = c - d 4379 q = b - r 4380 k = t - q 4381 4382 we want to break up k = t - q, but we won't until we've transformed q 4383 = b - r, which won't be broken up until we transform b = c - d. 4384 4385 En passant, clear the GIMPLE visited flag on every statement 4386 and set UIDs within each basic block. */ 4387 4388 static void 4389 break_up_subtract_bb (basic_block bb) 4390 { 4391 gimple_stmt_iterator gsi; 4392 basic_block son; 4393 unsigned int uid = 1; 4394 4395 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 4396 { 4397 gimple stmt = gsi_stmt (gsi); 4398 gimple_set_visited (stmt, false); 4399 gimple_set_uid (stmt, uid++); 4400 4401 if (!is_gimple_assign (stmt) 4402 || !can_reassociate_p (gimple_assign_lhs (stmt))) 4403 continue; 4404 4405 /* Look for simple gimple subtract operations. */ 4406 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 4407 { 4408 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 4409 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 4410 continue; 4411 4412 /* Check for a subtract used only in an addition. If this 4413 is the case, transform it into add of a negate for better 4414 reassociation. IE transform C = A-B into C = A + -B if C 4415 is only used in an addition. */ 4416 if (should_break_up_subtract (stmt)) 4417 break_up_subtract (stmt, &gsi); 4418 } 4419 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 4420 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 4421 plus_negates.safe_push (gimple_assign_lhs (stmt)); 4422 } 4423 for (son = first_dom_son (CDI_DOMINATORS, bb); 4424 son; 4425 son = next_dom_son (CDI_DOMINATORS, son)) 4426 break_up_subtract_bb (son); 4427 } 4428 4429 /* Used for repeated factor analysis. */ 4430 struct repeat_factor_d 4431 { 4432 /* An SSA name that occurs in a multiply chain. */ 4433 tree factor; 4434 4435 /* Cached rank of the factor. */ 4436 unsigned rank; 4437 4438 /* Number of occurrences of the factor in the chain. */ 4439 HOST_WIDE_INT count; 4440 4441 /* An SSA name representing the product of this factor and 4442 all factors appearing later in the repeated factor vector. */ 4443 tree repr; 4444 }; 4445 4446 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; 4447 typedef const struct repeat_factor_d *const_repeat_factor_t; 4448 4449 4450 static vec<repeat_factor> repeat_factor_vec; 4451 4452 /* Used for sorting the repeat factor vector. Sort primarily by 4453 ascending occurrence count, secondarily by descending rank. */ 4454 4455 static int 4456 compare_repeat_factors (const void *x1, const void *x2) 4457 { 4458 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; 4459 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; 4460 4461 if (rf1->count != rf2->count) 4462 return rf1->count - rf2->count; 4463 4464 return rf2->rank - rf1->rank; 4465 } 4466 4467 /* Look for repeated operands in OPS in the multiply tree rooted at 4468 STMT. Replace them with an optimal sequence of multiplies and powi 4469 builtin calls, and remove the used operands from OPS. Return an 4470 SSA name representing the value of the replacement sequence. */ 4471 4472 static tree 4473 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) 4474 { 4475 unsigned i, j, vec_len; 4476 int ii; 4477 operand_entry_t oe; 4478 repeat_factor_t rf1, rf2; 4479 repeat_factor rfnew; 4480 tree result = NULL_TREE; 4481 tree target_ssa, iter_result; 4482 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 4483 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 4484 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4485 gimple mul_stmt, pow_stmt; 4486 4487 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 4488 target. */ 4489 if (!powi_fndecl) 4490 return NULL_TREE; 4491 4492 /* Allocate the repeated factor vector. */ 4493 repeat_factor_vec.create (10); 4494 4495 /* Scan the OPS vector for all SSA names in the product and build 4496 up a vector of occurrence counts for each factor. */ 4497 FOR_EACH_VEC_ELT (*ops, i, oe) 4498 { 4499 if (TREE_CODE (oe->op) == SSA_NAME) 4500 { 4501 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4502 { 4503 if (rf1->factor == oe->op) 4504 { 4505 rf1->count += oe->count; 4506 break; 4507 } 4508 } 4509 4510 if (j >= repeat_factor_vec.length ()) 4511 { 4512 rfnew.factor = oe->op; 4513 rfnew.rank = oe->rank; 4514 rfnew.count = oe->count; 4515 rfnew.repr = NULL_TREE; 4516 repeat_factor_vec.safe_push (rfnew); 4517 } 4518 } 4519 } 4520 4521 /* Sort the repeated factor vector by (a) increasing occurrence count, 4522 and (b) decreasing rank. */ 4523 repeat_factor_vec.qsort (compare_repeat_factors); 4524 4525 /* It is generally best to combine as many base factors as possible 4526 into a product before applying __builtin_powi to the result. 4527 However, the sort order chosen for the repeated factor vector 4528 allows us to cache partial results for the product of the base 4529 factors for subsequent use. When we already have a cached partial 4530 result from a previous iteration, it is best to make use of it 4531 before looking for another __builtin_pow opportunity. 4532 4533 As an example, consider x * x * y * y * y * z * z * z * z. 4534 We want to first compose the product x * y * z, raise it to the 4535 second power, then multiply this by y * z, and finally multiply 4536 by z. This can be done in 5 multiplies provided we cache y * z 4537 for use in both expressions: 4538 4539 t1 = y * z 4540 t2 = t1 * x 4541 t3 = t2 * t2 4542 t4 = t1 * t3 4543 result = t4 * z 4544 4545 If we instead ignored the cached y * z and first multiplied by 4546 the __builtin_pow opportunity z * z, we would get the inferior: 4547 4548 t1 = y * z 4549 t2 = t1 * x 4550 t3 = t2 * t2 4551 t4 = z * z 4552 t5 = t3 * t4 4553 result = t5 * y */ 4554 4555 vec_len = repeat_factor_vec.length (); 4556 4557 /* Repeatedly look for opportunities to create a builtin_powi call. */ 4558 while (true) 4559 { 4560 HOST_WIDE_INT power; 4561 4562 /* First look for the largest cached product of factors from 4563 preceding iterations. If found, create a builtin_powi for 4564 it if the minimum occurrence count for its factors is at 4565 least 2, or just use this cached product as our next 4566 multiplicand if the minimum occurrence count is 1. */ 4567 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4568 { 4569 if (rf1->repr && rf1->count > 0) 4570 break; 4571 } 4572 4573 if (j < vec_len) 4574 { 4575 power = rf1->count; 4576 4577 if (power == 1) 4578 { 4579 iter_result = rf1->repr; 4580 4581 if (dump_file && (dump_flags & TDF_DETAILS)) 4582 { 4583 unsigned elt; 4584 repeat_factor_t rf; 4585 fputs ("Multiplying by cached product ", dump_file); 4586 for (elt = j; elt < vec_len; elt++) 4587 { 4588 rf = &repeat_factor_vec[elt]; 4589 print_generic_expr (dump_file, rf->factor, 0); 4590 if (elt < vec_len - 1) 4591 fputs (" * ", dump_file); 4592 } 4593 fputs ("\n", dump_file); 4594 } 4595 } 4596 else 4597 { 4598 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4599 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 4600 build_int_cst (integer_type_node, 4601 power)); 4602 gimple_call_set_lhs (pow_stmt, iter_result); 4603 gimple_set_location (pow_stmt, gimple_location (stmt)); 4604 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 4605 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 4606 4607 if (dump_file && (dump_flags & TDF_DETAILS)) 4608 { 4609 unsigned elt; 4610 repeat_factor_t rf; 4611 fputs ("Building __builtin_pow call for cached product (", 4612 dump_file); 4613 for (elt = j; elt < vec_len; elt++) 4614 { 4615 rf = &repeat_factor_vec[elt]; 4616 print_generic_expr (dump_file, rf->factor, 0); 4617 if (elt < vec_len - 1) 4618 fputs (" * ", dump_file); 4619 } 4620 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", 4621 power); 4622 } 4623 } 4624 } 4625 else 4626 { 4627 /* Otherwise, find the first factor in the repeated factor 4628 vector whose occurrence count is at least 2. If no such 4629 factor exists, there are no builtin_powi opportunities 4630 remaining. */ 4631 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4632 { 4633 if (rf1->count >= 2) 4634 break; 4635 } 4636 4637 if (j >= vec_len) 4638 break; 4639 4640 power = rf1->count; 4641 4642 if (dump_file && (dump_flags & TDF_DETAILS)) 4643 { 4644 unsigned elt; 4645 repeat_factor_t rf; 4646 fputs ("Building __builtin_pow call for (", dump_file); 4647 for (elt = j; elt < vec_len; elt++) 4648 { 4649 rf = &repeat_factor_vec[elt]; 4650 print_generic_expr (dump_file, rf->factor, 0); 4651 if (elt < vec_len - 1) 4652 fputs (" * ", dump_file); 4653 } 4654 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); 4655 } 4656 4657 reassociate_stats.pows_created++; 4658 4659 /* Visit each element of the vector in reverse order (so that 4660 high-occurrence elements are visited first, and within the 4661 same occurrence count, lower-ranked elements are visited 4662 first). Form a linear product of all elements in this order 4663 whose occurrencce count is at least that of element J. 4664 Record the SSA name representing the product of each element 4665 with all subsequent elements in the vector. */ 4666 if (j == vec_len - 1) 4667 rf1->repr = rf1->factor; 4668 else 4669 { 4670 for (ii = vec_len - 2; ii >= (int)j; ii--) 4671 { 4672 tree op1, op2; 4673 4674 rf1 = &repeat_factor_vec[ii]; 4675 rf2 = &repeat_factor_vec[ii + 1]; 4676 4677 /* Init the last factor's representative to be itself. */ 4678 if (!rf2->repr) 4679 rf2->repr = rf2->factor; 4680 4681 op1 = rf1->factor; 4682 op2 = rf2->repr; 4683 4684 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 4685 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 4686 op1, op2); 4687 gimple_set_location (mul_stmt, gimple_location (stmt)); 4688 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4689 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 4690 rf1->repr = target_ssa; 4691 4692 /* Don't reprocess the multiply we just introduced. */ 4693 gimple_set_visited (mul_stmt, true); 4694 } 4695 } 4696 4697 /* Form a call to __builtin_powi for the maximum product 4698 just formed, raised to the power obtained earlier. */ 4699 rf1 = &repeat_factor_vec[j]; 4700 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4701 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 4702 build_int_cst (integer_type_node, 4703 power)); 4704 gimple_call_set_lhs (pow_stmt, iter_result); 4705 gimple_set_location (pow_stmt, gimple_location (stmt)); 4706 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 4707 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 4708 } 4709 4710 /* If we previously formed at least one other builtin_powi call, 4711 form the product of this one and those others. */ 4712 if (result) 4713 { 4714 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4715 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 4716 result, iter_result); 4717 gimple_set_location (mul_stmt, gimple_location (stmt)); 4718 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4719 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 4720 gimple_set_visited (mul_stmt, true); 4721 result = new_result; 4722 } 4723 else 4724 result = iter_result; 4725 4726 /* Decrement the occurrence count of each element in the product 4727 by the count found above, and remove this many copies of each 4728 factor from OPS. */ 4729 for (i = j; i < vec_len; i++) 4730 { 4731 unsigned k = power; 4732 unsigned n; 4733 4734 rf1 = &repeat_factor_vec[i]; 4735 rf1->count -= power; 4736 4737 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 4738 { 4739 if (oe->op == rf1->factor) 4740 { 4741 if (oe->count <= k) 4742 { 4743 ops->ordered_remove (n); 4744 k -= oe->count; 4745 4746 if (k == 0) 4747 break; 4748 } 4749 else 4750 { 4751 oe->count -= k; 4752 break; 4753 } 4754 } 4755 } 4756 } 4757 } 4758 4759 /* At this point all elements in the repeated factor vector have a 4760 remaining occurrence count of 0 or 1, and those with a count of 1 4761 don't have cached representatives. Re-sort the ops vector and 4762 clean up. */ 4763 ops->qsort (sort_by_operand_rank); 4764 repeat_factor_vec.release (); 4765 4766 /* Return the final product computed herein. Note that there may 4767 still be some elements with single occurrence count left in OPS; 4768 those will be handled by the normal reassociation logic. */ 4769 return result; 4770 } 4771 4772 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 4773 4774 static void 4775 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) 4776 { 4777 tree rhs1; 4778 4779 if (dump_file && (dump_flags & TDF_DETAILS)) 4780 { 4781 fprintf (dump_file, "Transforming "); 4782 print_gimple_stmt (dump_file, stmt, 0, 0); 4783 } 4784 4785 rhs1 = gimple_assign_rhs1 (stmt); 4786 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 4787 update_stmt (stmt); 4788 remove_visited_stmt_chain (rhs1); 4789 4790 if (dump_file && (dump_flags & TDF_DETAILS)) 4791 { 4792 fprintf (dump_file, " into "); 4793 print_gimple_stmt (dump_file, stmt, 0, 0); 4794 } 4795 } 4796 4797 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 4798 4799 static void 4800 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, 4801 tree rhs1, tree rhs2) 4802 { 4803 if (dump_file && (dump_flags & TDF_DETAILS)) 4804 { 4805 fprintf (dump_file, "Transforming "); 4806 print_gimple_stmt (dump_file, stmt, 0, 0); 4807 } 4808 4809 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 4810 update_stmt (gsi_stmt (*gsi)); 4811 remove_visited_stmt_chain (rhs1); 4812 4813 if (dump_file && (dump_flags & TDF_DETAILS)) 4814 { 4815 fprintf (dump_file, " into "); 4816 print_gimple_stmt (dump_file, stmt, 0, 0); 4817 } 4818 } 4819 4820 /* Reassociate expressions in basic block BB and its post-dominator as 4821 children. */ 4822 4823 static void 4824 reassociate_bb (basic_block bb) 4825 { 4826 gimple_stmt_iterator gsi; 4827 basic_block son; 4828 gimple stmt = last_stmt (bb); 4829 4830 if (stmt && !gimple_visited_p (stmt)) 4831 maybe_optimize_range_tests (stmt); 4832 4833 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 4834 { 4835 stmt = gsi_stmt (gsi); 4836 4837 if (is_gimple_assign (stmt) 4838 && !stmt_could_throw_p (stmt)) 4839 { 4840 tree lhs, rhs1, rhs2; 4841 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 4842 4843 /* If this is not a gimple binary expression, there is 4844 nothing for us to do with it. */ 4845 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 4846 continue; 4847 4848 /* If this was part of an already processed statement, 4849 we don't need to touch it again. */ 4850 if (gimple_visited_p (stmt)) 4851 { 4852 /* This statement might have become dead because of previous 4853 reassociations. */ 4854 if (has_zero_uses (gimple_get_lhs (stmt))) 4855 { 4856 reassoc_remove_stmt (&gsi); 4857 release_defs (stmt); 4858 /* We might end up removing the last stmt above which 4859 places the iterator to the end of the sequence. 4860 Reset it to the last stmt in this case which might 4861 be the end of the sequence as well if we removed 4862 the last statement of the sequence. In which case 4863 we need to bail out. */ 4864 if (gsi_end_p (gsi)) 4865 { 4866 gsi = gsi_last_bb (bb); 4867 if (gsi_end_p (gsi)) 4868 break; 4869 } 4870 } 4871 continue; 4872 } 4873 4874 lhs = gimple_assign_lhs (stmt); 4875 rhs1 = gimple_assign_rhs1 (stmt); 4876 rhs2 = gimple_assign_rhs2 (stmt); 4877 4878 /* For non-bit or min/max operations we can't associate 4879 all types. Verify that here. */ 4880 if (rhs_code != BIT_IOR_EXPR 4881 && rhs_code != BIT_AND_EXPR 4882 && rhs_code != BIT_XOR_EXPR 4883 && rhs_code != MIN_EXPR 4884 && rhs_code != MAX_EXPR 4885 && (!can_reassociate_p (lhs) 4886 || !can_reassociate_p (rhs1) 4887 || !can_reassociate_p (rhs2))) 4888 continue; 4889 4890 if (associative_tree_code (rhs_code)) 4891 { 4892 auto_vec<operand_entry_t> ops; 4893 tree powi_result = NULL_TREE; 4894 4895 /* There may be no immediate uses left by the time we 4896 get here because we may have eliminated them all. */ 4897 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 4898 continue; 4899 4900 gimple_set_visited (stmt, true); 4901 linearize_expr_tree (&ops, stmt, true, true); 4902 ops.qsort (sort_by_operand_rank); 4903 int orig_len = ops.length (); 4904 optimize_ops_list (rhs_code, &ops); 4905 if (undistribute_ops_list (rhs_code, &ops, 4906 loop_containing_stmt (stmt))) 4907 { 4908 ops.qsort (sort_by_operand_rank); 4909 optimize_ops_list (rhs_code, &ops); 4910 } 4911 4912 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 4913 optimize_range_tests (rhs_code, &ops); 4914 4915 if (first_pass_instance 4916 && rhs_code == MULT_EXPR 4917 && flag_unsafe_math_optimizations) 4918 powi_result = attempt_builtin_powi (stmt, &ops); 4919 4920 /* If the operand vector is now empty, all operands were 4921 consumed by the __builtin_powi optimization. */ 4922 if (ops.length () == 0) 4923 transform_stmt_to_copy (&gsi, stmt, powi_result); 4924 else if (ops.length () == 1) 4925 { 4926 tree last_op = ops.last ()->op; 4927 4928 if (powi_result) 4929 transform_stmt_to_multiply (&gsi, stmt, last_op, 4930 powi_result); 4931 else 4932 transform_stmt_to_copy (&gsi, stmt, last_op); 4933 } 4934 else 4935 { 4936 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 4937 int ops_num = ops.length (); 4938 int width = get_reassociation_width (ops_num, rhs_code, mode); 4939 tree new_lhs = lhs; 4940 4941 if (dump_file && (dump_flags & TDF_DETAILS)) 4942 fprintf (dump_file, 4943 "Width = %d was chosen for reassociation\n", width); 4944 4945 if (width > 1 4946 && ops.length () > 3) 4947 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 4948 width, ops); 4949 else 4950 { 4951 /* When there are three operands left, we want 4952 to make sure the ones that get the double 4953 binary op are chosen wisely. */ 4954 int len = ops.length (); 4955 if (len >= 3) 4956 swap_ops_for_binary_stmt (ops, len - 3, stmt); 4957 4958 new_lhs = rewrite_expr_tree (stmt, 0, ops, 4959 powi_result != NULL, 4960 len != orig_len); 4961 } 4962 4963 /* If we combined some repeated factors into a 4964 __builtin_powi call, multiply that result by the 4965 reassociated operands. */ 4966 if (powi_result) 4967 { 4968 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs); 4969 tree type = TREE_TYPE (lhs); 4970 tree target_ssa = make_temp_ssa_name (type, NULL, 4971 "reassocpow"); 4972 gimple_set_lhs (lhs_stmt, target_ssa); 4973 update_stmt (lhs_stmt); 4974 if (lhs != new_lhs) 4975 target_ssa = new_lhs; 4976 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 4977 powi_result, target_ssa); 4978 gimple_set_location (mul_stmt, gimple_location (stmt)); 4979 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4980 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 4981 } 4982 } 4983 } 4984 } 4985 } 4986 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 4987 son; 4988 son = next_dom_son (CDI_POST_DOMINATORS, son)) 4989 reassociate_bb (son); 4990 } 4991 4992 /* Add jumps around shifts for range tests turned into bit tests. 4993 For each SSA_NAME VAR we have code like: 4994 VAR = ...; // final stmt of range comparison 4995 // bit test here...; 4996 OTHERVAR = ...; // final stmt of the bit test sequence 4997 RES = VAR | OTHERVAR; 4998 Turn the above into: 4999 VAR = ...; 5000 if (VAR != 0) 5001 goto <l3>; 5002 else 5003 goto <l2>; 5004 <l2>: 5005 // bit test here...; 5006 OTHERVAR = ...; 5007 <l3>: 5008 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 5009 5010 static void 5011 branch_fixup (void) 5012 { 5013 tree var; 5014 unsigned int i; 5015 5016 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 5017 { 5018 gimple def_stmt = SSA_NAME_DEF_STMT (var); 5019 gimple use_stmt; 5020 use_operand_p use; 5021 bool ok = single_imm_use (var, &use, &use_stmt); 5022 gcc_assert (ok 5023 && is_gimple_assign (use_stmt) 5024 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 5025 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 5026 5027 basic_block cond_bb = gimple_bb (def_stmt); 5028 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 5029 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 5030 5031 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 5032 gimple g = gimple_build_cond (NE_EXPR, var, 5033 build_zero_cst (TREE_TYPE (var)), 5034 NULL_TREE, NULL_TREE); 5035 location_t loc = gimple_location (use_stmt); 5036 gimple_set_location (g, loc); 5037 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 5038 5039 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 5040 etrue->probability = REG_BR_PROB_BASE / 2; 5041 etrue->count = cond_bb->count / 2; 5042 edge efalse = find_edge (cond_bb, then_bb); 5043 efalse->flags = EDGE_FALSE_VALUE; 5044 efalse->probability -= etrue->probability; 5045 efalse->count -= etrue->count; 5046 then_bb->count -= etrue->count; 5047 5048 tree othervar = NULL_TREE; 5049 if (gimple_assign_rhs1 (use_stmt) == var) 5050 othervar = gimple_assign_rhs2 (use_stmt); 5051 else if (gimple_assign_rhs2 (use_stmt) == var) 5052 othervar = gimple_assign_rhs1 (use_stmt); 5053 else 5054 gcc_unreachable (); 5055 tree lhs = gimple_assign_lhs (use_stmt); 5056 gphi *phi = create_phi_node (lhs, merge_bb); 5057 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 5058 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 5059 gsi = gsi_for_stmt (use_stmt); 5060 gsi_remove (&gsi, true); 5061 5062 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 5063 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 5064 } 5065 reassoc_branch_fixups.release (); 5066 } 5067 5068 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); 5069 void debug_ops_vector (vec<operand_entry_t> ops); 5070 5071 /* Dump the operand entry vector OPS to FILE. */ 5072 5073 void 5074 dump_ops_vector (FILE *file, vec<operand_entry_t> ops) 5075 { 5076 operand_entry_t oe; 5077 unsigned int i; 5078 5079 FOR_EACH_VEC_ELT (ops, i, oe) 5080 { 5081 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 5082 print_generic_expr (file, oe->op, 0); 5083 } 5084 } 5085 5086 /* Dump the operand entry vector OPS to STDERR. */ 5087 5088 DEBUG_FUNCTION void 5089 debug_ops_vector (vec<operand_entry_t> ops) 5090 { 5091 dump_ops_vector (stderr, ops); 5092 } 5093 5094 static void 5095 do_reassoc (void) 5096 { 5097 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 5098 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 5099 } 5100 5101 /* Initialize the reassociation pass. */ 5102 5103 static void 5104 init_reassoc (void) 5105 { 5106 int i; 5107 long rank = 2; 5108 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 5109 5110 /* Find the loops, so that we can prevent moving calculations in 5111 them. */ 5112 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 5113 5114 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 5115 5116 operand_entry_pool = create_alloc_pool ("operand entry pool", 5117 sizeof (struct operand_entry), 30); 5118 next_operand_entry_id = 0; 5119 5120 /* Reverse RPO (Reverse Post Order) will give us something where 5121 deeper loops come later. */ 5122 pre_and_rev_post_order_compute (NULL, bbs, false); 5123 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 5124 operand_rank = new hash_map<tree, long>; 5125 5126 /* Give each default definition a distinct rank. This includes 5127 parameters and the static chain. Walk backwards over all 5128 SSA names so that we get proper rank ordering according 5129 to tree_swap_operands_p. */ 5130 for (i = num_ssa_names - 1; i > 0; --i) 5131 { 5132 tree name = ssa_name (i); 5133 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 5134 insert_operand_rank (name, ++rank); 5135 } 5136 5137 /* Set up rank for each BB */ 5138 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 5139 bb_rank[bbs[i]] = ++rank << 16; 5140 5141 free (bbs); 5142 calculate_dominance_info (CDI_POST_DOMINATORS); 5143 plus_negates = vNULL; 5144 } 5145 5146 /* Cleanup after the reassociation pass, and print stats if 5147 requested. */ 5148 5149 static void 5150 fini_reassoc (void) 5151 { 5152 statistics_counter_event (cfun, "Linearized", 5153 reassociate_stats.linearized); 5154 statistics_counter_event (cfun, "Constants eliminated", 5155 reassociate_stats.constants_eliminated); 5156 statistics_counter_event (cfun, "Ops eliminated", 5157 reassociate_stats.ops_eliminated); 5158 statistics_counter_event (cfun, "Statements rewritten", 5159 reassociate_stats.rewritten); 5160 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 5161 reassociate_stats.pows_encountered); 5162 statistics_counter_event (cfun, "Built-in powi calls created", 5163 reassociate_stats.pows_created); 5164 5165 delete operand_rank; 5166 free_alloc_pool (operand_entry_pool); 5167 free (bb_rank); 5168 plus_negates.release (); 5169 free_dominance_info (CDI_POST_DOMINATORS); 5170 loop_optimizer_finalize (); 5171 } 5172 5173 /* Gate and execute functions for Reassociation. */ 5174 5175 static unsigned int 5176 execute_reassoc (void) 5177 { 5178 init_reassoc (); 5179 5180 do_reassoc (); 5181 repropagate_negates (); 5182 branch_fixup (); 5183 5184 fini_reassoc (); 5185 return 0; 5186 } 5187 5188 namespace { 5189 5190 const pass_data pass_data_reassoc = 5191 { 5192 GIMPLE_PASS, /* type */ 5193 "reassoc", /* name */ 5194 OPTGROUP_NONE, /* optinfo_flags */ 5195 TV_TREE_REASSOC, /* tv_id */ 5196 ( PROP_cfg | PROP_ssa ), /* properties_required */ 5197 0, /* properties_provided */ 5198 0, /* properties_destroyed */ 5199 0, /* todo_flags_start */ 5200 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 5201 }; 5202 5203 class pass_reassoc : public gimple_opt_pass 5204 { 5205 public: 5206 pass_reassoc (gcc::context *ctxt) 5207 : gimple_opt_pass (pass_data_reassoc, ctxt) 5208 {} 5209 5210 /* opt_pass methods: */ 5211 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 5212 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 5213 virtual unsigned int execute (function *) { return execute_reassoc (); } 5214 5215 }; // class pass_reassoc 5216 5217 } // anon namespace 5218 5219 gimple_opt_pass * 5220 make_pass_reassoc (gcc::context *ctxt) 5221 { 5222 return new pass_reassoc (ctxt); 5223 } 5224