1 /* Reassociation for trees. 2 Copyright (C) 2005-2013 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "tree.h" 26 #include "basic-block.h" 27 #include "gimple-pretty-print.h" 28 #include "tree-inline.h" 29 #include "tree-flow.h" 30 #include "gimple.h" 31 #include "tree-iterator.h" 32 #include "tree-pass.h" 33 #include "alloc-pool.h" 34 #include "vec.h" 35 #include "langhooks.h" 36 #include "pointer-set.h" 37 #include "cfgloop.h" 38 #include "flags.h" 39 #include "target.h" 40 #include "params.h" 41 #include "diagnostic-core.h" 42 43 /* This is a simple global reassociation pass. It is, in part, based 44 on the LLVM pass of the same name (They do some things more/less 45 than we do, in different orders, etc). 46 47 It consists of five steps: 48 49 1. Breaking up subtract operations into addition + negate, where 50 it would promote the reassociation of adds. 51 52 2. Left linearization of the expression trees, so that (A+B)+(C+D) 53 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 54 During linearization, we place the operands of the binary 55 expressions into a vector of operand_entry_t 56 57 3. Optimization of the operand lists, eliminating things like a + 58 -a, a & a, etc. 59 60 3a. Combine repeated factors with the same occurrence counts 61 into a __builtin_powi call that will later be optimized into 62 an optimal number of multiplies. 63 64 4. Rewrite the expression trees we linearized and optimized so 65 they are in proper rank order. 66 67 5. Repropagate negates, as nothing else will clean it up ATM. 68 69 A bit of theory on #4, since nobody seems to write anything down 70 about why it makes sense to do it the way they do it: 71 72 We could do this much nicer theoretically, but don't (for reasons 73 explained after how to do it theoretically nice :P). 74 75 In order to promote the most redundancy elimination, you want 76 binary expressions whose operands are the same rank (or 77 preferably, the same value) exposed to the redundancy eliminator, 78 for possible elimination. 79 80 So the way to do this if we really cared, is to build the new op 81 tree from the leaves to the roots, merging as you go, and putting the 82 new op on the end of the worklist, until you are left with one 83 thing on the worklist. 84 85 IE if you have to rewrite the following set of operands (listed with 86 rank in parentheses), with opcode PLUS_EXPR: 87 88 a (1), b (1), c (1), d (2), e (2) 89 90 91 We start with our merge worklist empty, and the ops list with all of 92 those on it. 93 94 You want to first merge all leaves of the same rank, as much as 95 possible. 96 97 So first build a binary op of 98 99 mergetmp = a + b, and put "mergetmp" on the merge worklist. 100 101 Because there is no three operand form of PLUS_EXPR, c is not going to 102 be exposed to redundancy elimination as a rank 1 operand. 103 104 So you might as well throw it on the merge worklist (you could also 105 consider it to now be a rank two operand, and merge it with d and e, 106 but in this case, you then have evicted e from a binary op. So at 107 least in this situation, you can't win.) 108 109 Then build a binary op of d + e 110 mergetmp2 = d + e 111 112 and put mergetmp2 on the merge worklist. 113 114 so merge worklist = {mergetmp, c, mergetmp2} 115 116 Continue building binary ops of these operations until you have only 117 one operation left on the worklist. 118 119 So we have 120 121 build binary op 122 mergetmp3 = mergetmp + c 123 124 worklist = {mergetmp2, mergetmp3} 125 126 mergetmp4 = mergetmp2 + mergetmp3 127 128 worklist = {mergetmp4} 129 130 because we have one operation left, we can now just set the original 131 statement equal to the result of that operation. 132 133 This will at least expose a + b and d + e to redundancy elimination 134 as binary operations. 135 136 For extra points, you can reuse the old statements to build the 137 mergetmps, since you shouldn't run out. 138 139 So why don't we do this? 140 141 Because it's expensive, and rarely will help. Most trees we are 142 reassociating have 3 or less ops. If they have 2 ops, they already 143 will be written into a nice single binary op. If you have 3 ops, a 144 single simple check suffices to tell you whether the first two are of the 145 same rank. If so, you know to order it 146 147 mergetmp = op1 + op2 148 newstmt = mergetmp + op3 149 150 instead of 151 mergetmp = op2 + op3 152 newstmt = mergetmp + op1 153 154 If all three are of the same rank, you can't expose them all in a 155 single binary operator anyway, so the above is *still* the best you 156 can do. 157 158 Thus, this is what we do. When we have three ops left, we check to see 159 what order to put them in, and call it a day. As a nod to vector sum 160 reduction, we check if any of the ops are really a phi node that is a 161 destructive update for the associating op, and keep the destructive 162 update together for vector sum reduction recognition. */ 163 164 165 /* Statistics */ 166 static struct 167 { 168 int linearized; 169 int constants_eliminated; 170 int ops_eliminated; 171 int rewritten; 172 int pows_encountered; 173 int pows_created; 174 } reassociate_stats; 175 176 /* Operator, rank pair. */ 177 typedef struct operand_entry 178 { 179 unsigned int rank; 180 int id; 181 tree op; 182 unsigned int count; 183 } *operand_entry_t; 184 185 static alloc_pool operand_entry_pool; 186 187 /* This is used to assign a unique ID to each struct operand_entry 188 so that qsort results are identical on different hosts. */ 189 static int next_operand_entry_id; 190 191 /* Starting rank number for a given basic block, so that we can rank 192 operations using unmovable instructions in that BB based on the bb 193 depth. */ 194 static long *bb_rank; 195 196 /* Operand->rank hashtable. */ 197 static struct pointer_map_t *operand_rank; 198 199 /* Forward decls. */ 200 static long get_rank (tree); 201 202 203 /* Bias amount for loop-carried phis. We want this to be larger than 204 the depth of any reassociation tree we can see, but not larger than 205 the rank difference between two blocks. */ 206 #define PHI_LOOP_BIAS (1 << 15) 207 208 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 209 an innermost loop, and the phi has only a single use which is inside 210 the loop, then the rank is the block rank of the loop latch plus an 211 extra bias for the loop-carried dependence. This causes expressions 212 calculated into an accumulator variable to be independent for each 213 iteration of the loop. If STMT is some other phi, the rank is the 214 block rank of its containing block. */ 215 static long 216 phi_rank (gimple stmt) 217 { 218 basic_block bb = gimple_bb (stmt); 219 struct loop *father = bb->loop_father; 220 tree res; 221 unsigned i; 222 use_operand_p use; 223 gimple use_stmt; 224 225 /* We only care about real loops (those with a latch). */ 226 if (!father->latch) 227 return bb_rank[bb->index]; 228 229 /* Interesting phis must be in headers of innermost loops. */ 230 if (bb != father->header 231 || father->inner) 232 return bb_rank[bb->index]; 233 234 /* Ignore virtual SSA_NAMEs. */ 235 res = gimple_phi_result (stmt); 236 if (virtual_operand_p (res)) 237 return bb_rank[bb->index]; 238 239 /* The phi definition must have a single use, and that use must be 240 within the loop. Otherwise this isn't an accumulator pattern. */ 241 if (!single_imm_use (res, &use, &use_stmt) 242 || gimple_bb (use_stmt)->loop_father != father) 243 return bb_rank[bb->index]; 244 245 /* Look for phi arguments from within the loop. If found, bias this phi. */ 246 for (i = 0; i < gimple_phi_num_args (stmt); i++) 247 { 248 tree arg = gimple_phi_arg_def (stmt, i); 249 if (TREE_CODE (arg) == SSA_NAME 250 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 251 { 252 gimple def_stmt = SSA_NAME_DEF_STMT (arg); 253 if (gimple_bb (def_stmt)->loop_father == father) 254 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 255 } 256 } 257 258 /* Must be an uninteresting phi. */ 259 return bb_rank[bb->index]; 260 } 261 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 263 loop-carried dependence of an innermost loop, return TRUE; else 264 return FALSE. */ 265 static bool 266 loop_carried_phi (tree exp) 267 { 268 gimple phi_stmt; 269 long block_rank; 270 271 if (TREE_CODE (exp) != SSA_NAME 272 || SSA_NAME_IS_DEFAULT_DEF (exp)) 273 return false; 274 275 phi_stmt = SSA_NAME_DEF_STMT (exp); 276 277 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 278 return false; 279 280 /* Non-loop-carried phis have block rank. Loop-carried phis have 281 an additional bias added in. If this phi doesn't have block rank, 282 it's biased and should not be propagated. */ 283 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 284 285 if (phi_rank (phi_stmt) != block_rank) 286 return true; 287 288 return false; 289 } 290 291 /* Return the maximum of RANK and the rank that should be propagated 292 from expression OP. For most operands, this is just the rank of OP. 293 For loop-carried phis, the value is zero to avoid undoing the bias 294 in favor of the phi. */ 295 static long 296 propagate_rank (long rank, tree op) 297 { 298 long op_rank; 299 300 if (loop_carried_phi (op)) 301 return rank; 302 303 op_rank = get_rank (op); 304 305 return MAX (rank, op_rank); 306 } 307 308 /* Look up the operand rank structure for expression E. */ 309 310 static inline long 311 find_operand_rank (tree e) 312 { 313 void **slot = pointer_map_contains (operand_rank, e); 314 return slot ? (long) (intptr_t) *slot : -1; 315 } 316 317 /* Insert {E,RANK} into the operand rank hashtable. */ 318 319 static inline void 320 insert_operand_rank (tree e, long rank) 321 { 322 void **slot; 323 gcc_assert (rank > 0); 324 slot = pointer_map_insert (operand_rank, e); 325 gcc_assert (!*slot); 326 *slot = (void *) (intptr_t) rank; 327 } 328 329 /* Given an expression E, return the rank of the expression. */ 330 331 static long 332 get_rank (tree e) 333 { 334 /* Constants have rank 0. */ 335 if (is_gimple_min_invariant (e)) 336 return 0; 337 338 /* SSA_NAME's have the rank of the expression they are the result 339 of. 340 For globals and uninitialized values, the rank is 0. 341 For function arguments, use the pre-setup rank. 342 For PHI nodes, stores, asm statements, etc, we use the rank of 343 the BB. 344 For simple operations, the rank is the maximum rank of any of 345 its operands, or the bb_rank, whichever is less. 346 I make no claims that this is optimal, however, it gives good 347 results. */ 348 349 /* We make an exception to the normal ranking system to break 350 dependences of accumulator variables in loops. Suppose we 351 have a simple one-block loop containing: 352 353 x_1 = phi(x_0, x_2) 354 b = a + x_1 355 c = b + d 356 x_2 = c + e 357 358 As shown, each iteration of the calculation into x is fully 359 dependent upon the iteration before it. We would prefer to 360 see this in the form: 361 362 x_1 = phi(x_0, x_2) 363 b = a + d 364 c = b + e 365 x_2 = c + x_1 366 367 If the loop is unrolled, the calculations of b and c from 368 different iterations can be interleaved. 369 370 To obtain this result during reassociation, we bias the rank 371 of the phi definition x_1 upward, when it is recognized as an 372 accumulator pattern. The artificial rank causes it to be 373 added last, providing the desired independence. */ 374 375 if (TREE_CODE (e) == SSA_NAME) 376 { 377 gimple stmt; 378 long rank; 379 int i, n; 380 tree op; 381 382 if (SSA_NAME_IS_DEFAULT_DEF (e)) 383 return find_operand_rank (e); 384 385 stmt = SSA_NAME_DEF_STMT (e); 386 if (gimple_code (stmt) == GIMPLE_PHI) 387 return phi_rank (stmt); 388 389 if (!is_gimple_assign (stmt) 390 || gimple_vdef (stmt)) 391 return bb_rank[gimple_bb (stmt)->index]; 392 393 /* If we already have a rank for this expression, use that. */ 394 rank = find_operand_rank (e); 395 if (rank != -1) 396 return rank; 397 398 /* Otherwise, find the maximum rank for the operands. As an 399 exception, remove the bias from loop-carried phis when propagating 400 the rank so that dependent operations are not also biased. */ 401 rank = 0; 402 if (gimple_assign_single_p (stmt)) 403 { 404 tree rhs = gimple_assign_rhs1 (stmt); 405 n = TREE_OPERAND_LENGTH (rhs); 406 if (n == 0) 407 rank = propagate_rank (rank, rhs); 408 else 409 { 410 for (i = 0; i < n; i++) 411 { 412 op = TREE_OPERAND (rhs, i); 413 414 if (op != NULL_TREE) 415 rank = propagate_rank (rank, op); 416 } 417 } 418 } 419 else 420 { 421 n = gimple_num_ops (stmt); 422 for (i = 1; i < n; i++) 423 { 424 op = gimple_op (stmt, i); 425 gcc_assert (op); 426 rank = propagate_rank (rank, op); 427 } 428 } 429 430 if (dump_file && (dump_flags & TDF_DETAILS)) 431 { 432 fprintf (dump_file, "Rank for "); 433 print_generic_expr (dump_file, e, 0); 434 fprintf (dump_file, " is %ld\n", (rank + 1)); 435 } 436 437 /* Note the rank in the hashtable so we don't recompute it. */ 438 insert_operand_rank (e, (rank + 1)); 439 return (rank + 1); 440 } 441 442 /* Globals, etc, are rank 0 */ 443 return 0; 444 } 445 446 447 /* We want integer ones to end up last no matter what, since they are 448 the ones we can do the most with. */ 449 #define INTEGER_CONST_TYPE 1 << 3 450 #define FLOAT_CONST_TYPE 1 << 2 451 #define OTHER_CONST_TYPE 1 << 1 452 453 /* Classify an invariant tree into integer, float, or other, so that 454 we can sort them to be near other constants of the same type. */ 455 static inline int 456 constant_type (tree t) 457 { 458 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 459 return INTEGER_CONST_TYPE; 460 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 461 return FLOAT_CONST_TYPE; 462 else 463 return OTHER_CONST_TYPE; 464 } 465 466 /* qsort comparison function to sort operand entries PA and PB by rank 467 so that the sorted array is ordered by rank in decreasing order. */ 468 static int 469 sort_by_operand_rank (const void *pa, const void *pb) 470 { 471 const operand_entry_t oea = *(const operand_entry_t *)pa; 472 const operand_entry_t oeb = *(const operand_entry_t *)pb; 473 474 /* It's nicer for optimize_expression if constants that are likely 475 to fold when added/multiplied//whatever are put next to each 476 other. Since all constants have rank 0, order them by type. */ 477 if (oeb->rank == 0 && oea->rank == 0) 478 { 479 if (constant_type (oeb->op) != constant_type (oea->op)) 480 return constant_type (oeb->op) - constant_type (oea->op); 481 else 482 /* To make sorting result stable, we use unique IDs to determine 483 order. */ 484 return oeb->id - oea->id; 485 } 486 487 /* Lastly, make sure the versions that are the same go next to each 488 other. We use SSA_NAME_VERSION because it's stable. */ 489 if ((oeb->rank - oea->rank == 0) 490 && TREE_CODE (oea->op) == SSA_NAME 491 && TREE_CODE (oeb->op) == SSA_NAME) 492 { 493 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 494 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); 495 else 496 return oeb->id - oea->id; 497 } 498 499 if (oeb->rank != oea->rank) 500 return oeb->rank - oea->rank; 501 else 502 return oeb->id - oea->id; 503 } 504 505 /* Add an operand entry to *OPS for the tree operand OP. */ 506 507 static void 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op) 509 { 510 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 511 512 oe->op = op; 513 oe->rank = get_rank (op); 514 oe->id = next_operand_entry_id++; 515 oe->count = 1; 516 ops->safe_push (oe); 517 } 518 519 /* Add an operand entry to *OPS for the tree operand OP with repeat 520 count REPEAT. */ 521 522 static void 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, 524 HOST_WIDE_INT repeat) 525 { 526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 527 528 oe->op = op; 529 oe->rank = get_rank (op); 530 oe->id = next_operand_entry_id++; 531 oe->count = repeat; 532 ops->safe_push (oe); 533 534 reassociate_stats.pows_encountered++; 535 } 536 537 /* Return true if STMT is reassociable operation containing a binary 538 operation with tree code CODE, and is inside LOOP. */ 539 540 static bool 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) 542 { 543 basic_block bb = gimple_bb (stmt); 544 545 if (gimple_bb (stmt) == NULL) 546 return false; 547 548 if (!flow_bb_inside_loop_p (loop, bb)) 549 return false; 550 551 if (is_gimple_assign (stmt) 552 && gimple_assign_rhs_code (stmt) == code 553 && has_single_use (gimple_assign_lhs (stmt))) 554 return true; 555 556 return false; 557 } 558 559 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 561 operand of the negate operation. Otherwise, return NULL. */ 562 563 static tree 564 get_unary_op (tree name, enum tree_code opcode) 565 { 566 gimple stmt = SSA_NAME_DEF_STMT (name); 567 568 if (!is_gimple_assign (stmt)) 569 return NULL_TREE; 570 571 if (gimple_assign_rhs_code (stmt) == opcode) 572 return gimple_assign_rhs1 (stmt); 573 return NULL_TREE; 574 } 575 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to 577 eliminate through equivalences, do so, remove them from OPS, and 578 return true. Otherwise, return false. */ 579 580 static bool 581 eliminate_duplicate_pair (enum tree_code opcode, 582 vec<operand_entry_t> *ops, 583 bool *all_done, 584 unsigned int i, 585 operand_entry_t curr, 586 operand_entry_t last) 587 { 588 589 /* If we have two of the same op, and the opcode is & |, min, or max, 590 we can eliminate one of them. 591 If we have two of the same op, and the opcode is ^, we can 592 eliminate both of them. */ 593 594 if (last && last->op == curr->op) 595 { 596 switch (opcode) 597 { 598 case MAX_EXPR: 599 case MIN_EXPR: 600 case BIT_IOR_EXPR: 601 case BIT_AND_EXPR: 602 if (dump_file && (dump_flags & TDF_DETAILS)) 603 { 604 fprintf (dump_file, "Equivalence: "); 605 print_generic_expr (dump_file, curr->op, 0); 606 fprintf (dump_file, " [&|minmax] "); 607 print_generic_expr (dump_file, last->op, 0); 608 fprintf (dump_file, " -> "); 609 print_generic_stmt (dump_file, last->op, 0); 610 } 611 612 ops->ordered_remove (i); 613 reassociate_stats.ops_eliminated ++; 614 615 return true; 616 617 case BIT_XOR_EXPR: 618 if (dump_file && (dump_flags & TDF_DETAILS)) 619 { 620 fprintf (dump_file, "Equivalence: "); 621 print_generic_expr (dump_file, curr->op, 0); 622 fprintf (dump_file, " ^ "); 623 print_generic_expr (dump_file, last->op, 0); 624 fprintf (dump_file, " -> nothing\n"); 625 } 626 627 reassociate_stats.ops_eliminated += 2; 628 629 if (ops->length () == 2) 630 { 631 ops->create (0); 632 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 633 *all_done = true; 634 } 635 else 636 { 637 ops->ordered_remove (i-1); 638 ops->ordered_remove (i-1); 639 } 640 641 return true; 642 643 default: 644 break; 645 } 646 } 647 return false; 648 } 649 650 static vec<tree> plus_negates; 651 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 653 expression, look in OPS for a corresponding positive operation to cancel 654 it out. If we find one, remove the other from OPS, replace 655 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 656 return false. */ 657 658 static bool 659 eliminate_plus_minus_pair (enum tree_code opcode, 660 vec<operand_entry_t> *ops, 661 unsigned int currindex, 662 operand_entry_t curr) 663 { 664 tree negateop; 665 tree notop; 666 unsigned int i; 667 operand_entry_t oe; 668 669 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 670 return false; 671 672 negateop = get_unary_op (curr->op, NEGATE_EXPR); 673 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 674 if (negateop == NULL_TREE && notop == NULL_TREE) 675 return false; 676 677 /* Any non-negated version will have a rank that is one less than 678 the current rank. So once we hit those ranks, if we don't find 679 one, we can stop. */ 680 681 for (i = currindex + 1; 682 ops->iterate (i, &oe) 683 && oe->rank >= curr->rank - 1 ; 684 i++) 685 { 686 if (oe->op == negateop) 687 { 688 689 if (dump_file && (dump_flags & TDF_DETAILS)) 690 { 691 fprintf (dump_file, "Equivalence: "); 692 print_generic_expr (dump_file, negateop, 0); 693 fprintf (dump_file, " + -"); 694 print_generic_expr (dump_file, oe->op, 0); 695 fprintf (dump_file, " -> 0\n"); 696 } 697 698 ops->ordered_remove (i); 699 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 700 ops->ordered_remove (currindex); 701 reassociate_stats.ops_eliminated ++; 702 703 return true; 704 } 705 else if (oe->op == notop) 706 { 707 tree op_type = TREE_TYPE (oe->op); 708 709 if (dump_file && (dump_flags & TDF_DETAILS)) 710 { 711 fprintf (dump_file, "Equivalence: "); 712 print_generic_expr (dump_file, notop, 0); 713 fprintf (dump_file, " + ~"); 714 print_generic_expr (dump_file, oe->op, 0); 715 fprintf (dump_file, " -> -1\n"); 716 } 717 718 ops->ordered_remove (i); 719 add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); 720 ops->ordered_remove (currindex); 721 reassociate_stats.ops_eliminated ++; 722 723 return true; 724 } 725 } 726 727 /* CURR->OP is a negate expr in a plus expr: save it for later 728 inspection in repropagate_negates(). */ 729 if (negateop != NULL_TREE) 730 plus_negates.safe_push (curr->op); 731 732 return false; 733 } 734 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 736 bitwise not expression, look in OPS for a corresponding operand to 737 cancel it out. If we find one, remove the other from OPS, replace 738 OPS[CURRINDEX] with 0, and return true. Otherwise, return 739 false. */ 740 741 static bool 742 eliminate_not_pairs (enum tree_code opcode, 743 vec<operand_entry_t> *ops, 744 unsigned int currindex, 745 operand_entry_t curr) 746 { 747 tree notop; 748 unsigned int i; 749 operand_entry_t oe; 750 751 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 752 || TREE_CODE (curr->op) != SSA_NAME) 753 return false; 754 755 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 756 if (notop == NULL_TREE) 757 return false; 758 759 /* Any non-not version will have a rank that is one less than 760 the current rank. So once we hit those ranks, if we don't find 761 one, we can stop. */ 762 763 for (i = currindex + 1; 764 ops->iterate (i, &oe) 765 && oe->rank >= curr->rank - 1; 766 i++) 767 { 768 if (oe->op == notop) 769 { 770 if (dump_file && (dump_flags & TDF_DETAILS)) 771 { 772 fprintf (dump_file, "Equivalence: "); 773 print_generic_expr (dump_file, notop, 0); 774 if (opcode == BIT_AND_EXPR) 775 fprintf (dump_file, " & ~"); 776 else if (opcode == BIT_IOR_EXPR) 777 fprintf (dump_file, " | ~"); 778 print_generic_expr (dump_file, oe->op, 0); 779 if (opcode == BIT_AND_EXPR) 780 fprintf (dump_file, " -> 0\n"); 781 else if (opcode == BIT_IOR_EXPR) 782 fprintf (dump_file, " -> -1\n"); 783 } 784 785 if (opcode == BIT_AND_EXPR) 786 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 787 else if (opcode == BIT_IOR_EXPR) 788 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 789 790 reassociate_stats.ops_eliminated += ops->length () - 1; 791 ops->truncate (0); 792 ops->quick_push (oe); 793 return true; 794 } 795 } 796 797 return false; 798 } 799 800 /* Use constant value that may be present in OPS to try to eliminate 801 operands. Note that this function is only really used when we've 802 eliminated ops for other reasons, or merged constants. Across 803 single statements, fold already does all of this, plus more. There 804 is little point in duplicating logic, so I've only included the 805 identities that I could ever construct testcases to trigger. */ 806 807 static void 808 eliminate_using_constants (enum tree_code opcode, 809 vec<operand_entry_t> *ops) 810 { 811 operand_entry_t oelast = ops->last (); 812 tree type = TREE_TYPE (oelast->op); 813 814 if (oelast->rank == 0 815 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 816 { 817 switch (opcode) 818 { 819 case BIT_AND_EXPR: 820 if (integer_zerop (oelast->op)) 821 { 822 if (ops->length () != 1) 823 { 824 if (dump_file && (dump_flags & TDF_DETAILS)) 825 fprintf (dump_file, "Found & 0, removing all other ops\n"); 826 827 reassociate_stats.ops_eliminated += ops->length () - 1; 828 829 ops->truncate (0); 830 ops->quick_push (oelast); 831 return; 832 } 833 } 834 else if (integer_all_onesp (oelast->op)) 835 { 836 if (ops->length () != 1) 837 { 838 if (dump_file && (dump_flags & TDF_DETAILS)) 839 fprintf (dump_file, "Found & -1, removing\n"); 840 ops->pop (); 841 reassociate_stats.ops_eliminated++; 842 } 843 } 844 break; 845 case BIT_IOR_EXPR: 846 if (integer_all_onesp (oelast->op)) 847 { 848 if (ops->length () != 1) 849 { 850 if (dump_file && (dump_flags & TDF_DETAILS)) 851 fprintf (dump_file, "Found | -1, removing all other ops\n"); 852 853 reassociate_stats.ops_eliminated += ops->length () - 1; 854 855 ops->truncate (0); 856 ops->quick_push (oelast); 857 return; 858 } 859 } 860 else if (integer_zerop (oelast->op)) 861 { 862 if (ops->length () != 1) 863 { 864 if (dump_file && (dump_flags & TDF_DETAILS)) 865 fprintf (dump_file, "Found | 0, removing\n"); 866 ops->pop (); 867 reassociate_stats.ops_eliminated++; 868 } 869 } 870 break; 871 case MULT_EXPR: 872 if (integer_zerop (oelast->op) 873 || (FLOAT_TYPE_P (type) 874 && !HONOR_NANS (TYPE_MODE (type)) 875 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) 876 && real_zerop (oelast->op))) 877 { 878 if (ops->length () != 1) 879 { 880 if (dump_file && (dump_flags & TDF_DETAILS)) 881 fprintf (dump_file, "Found * 0, removing all other ops\n"); 882 883 reassociate_stats.ops_eliminated += ops->length () - 1; 884 ops->truncate (1); 885 ops->quick_push (oelast); 886 return; 887 } 888 } 889 else if (integer_onep (oelast->op) 890 || (FLOAT_TYPE_P (type) 891 && !HONOR_SNANS (TYPE_MODE (type)) 892 && real_onep (oelast->op))) 893 { 894 if (ops->length () != 1) 895 { 896 if (dump_file && (dump_flags & TDF_DETAILS)) 897 fprintf (dump_file, "Found * 1, removing\n"); 898 ops->pop (); 899 reassociate_stats.ops_eliminated++; 900 return; 901 } 902 } 903 break; 904 case BIT_XOR_EXPR: 905 case PLUS_EXPR: 906 case MINUS_EXPR: 907 if (integer_zerop (oelast->op) 908 || (FLOAT_TYPE_P (type) 909 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 910 && fold_real_zero_addition_p (type, oelast->op, 911 opcode == MINUS_EXPR))) 912 { 913 if (ops->length () != 1) 914 { 915 if (dump_file && (dump_flags & TDF_DETAILS)) 916 fprintf (dump_file, "Found [|^+] 0, removing\n"); 917 ops->pop (); 918 reassociate_stats.ops_eliminated++; 919 return; 920 } 921 } 922 break; 923 default: 924 break; 925 } 926 } 927 } 928 929 930 static void linearize_expr_tree (vec<operand_entry_t> *, gimple, 931 bool, bool); 932 933 /* Structure for tracking and counting operands. */ 934 typedef struct oecount_s { 935 int cnt; 936 int id; 937 enum tree_code oecode; 938 tree op; 939 } oecount; 940 941 942 /* The heap for the oecount hashtable and the sorted list of operands. */ 943 static vec<oecount> cvec; 944 945 /* Hash function for oecount. */ 946 947 static hashval_t 948 oecount_hash (const void *p) 949 { 950 const oecount *c = &cvec[(size_t)p - 42]; 951 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 952 } 953 954 /* Comparison function for oecount. */ 955 956 static int 957 oecount_eq (const void *p1, const void *p2) 958 { 959 const oecount *c1 = &cvec[(size_t)p1 - 42]; 960 const oecount *c2 = &cvec[(size_t)p2 - 42]; 961 return (c1->oecode == c2->oecode 962 && c1->op == c2->op); 963 } 964 965 /* Comparison function for qsort sorting oecount elements by count. */ 966 967 static int 968 oecount_cmp (const void *p1, const void *p2) 969 { 970 const oecount *c1 = (const oecount *)p1; 971 const oecount *c2 = (const oecount *)p2; 972 if (c1->cnt != c2->cnt) 973 return c1->cnt - c2->cnt; 974 else 975 /* If counts are identical, use unique IDs to stabilize qsort. */ 976 return c1->id - c2->id; 977 } 978 979 /* Return TRUE iff STMT represents a builtin call that raises OP 980 to some exponent. */ 981 982 static bool 983 stmt_is_power_of_op (gimple stmt, tree op) 984 { 985 tree fndecl; 986 987 if (!is_gimple_call (stmt)) 988 return false; 989 990 fndecl = gimple_call_fndecl (stmt); 991 992 if (!fndecl 993 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 994 return false; 995 996 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 997 { 998 CASE_FLT_FN (BUILT_IN_POW): 999 CASE_FLT_FN (BUILT_IN_POWI): 1000 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1001 1002 default: 1003 return false; 1004 } 1005 } 1006 1007 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1008 in place and return the result. Assumes that stmt_is_power_of_op 1009 was previously called for STMT and returned TRUE. */ 1010 1011 static HOST_WIDE_INT 1012 decrement_power (gimple stmt) 1013 { 1014 REAL_VALUE_TYPE c, cint; 1015 HOST_WIDE_INT power; 1016 tree arg1; 1017 1018 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 1019 { 1020 CASE_FLT_FN (BUILT_IN_POW): 1021 arg1 = gimple_call_arg (stmt, 1); 1022 c = TREE_REAL_CST (arg1); 1023 power = real_to_integer (&c) - 1; 1024 real_from_integer (&cint, VOIDmode, power, 0, 0); 1025 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1026 return power; 1027 1028 CASE_FLT_FN (BUILT_IN_POWI): 1029 arg1 = gimple_call_arg (stmt, 1); 1030 power = TREE_INT_CST_LOW (arg1) - 1; 1031 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1032 return power; 1033 1034 default: 1035 gcc_unreachable (); 1036 } 1037 } 1038 1039 /* Find the single immediate use of STMT's LHS, and replace it 1040 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1041 replace *DEF with OP as well. */ 1042 1043 static void 1044 propagate_op_to_single_use (tree op, gimple stmt, tree *def) 1045 { 1046 tree lhs; 1047 gimple use_stmt; 1048 use_operand_p use; 1049 gimple_stmt_iterator gsi; 1050 1051 if (is_gimple_call (stmt)) 1052 lhs = gimple_call_lhs (stmt); 1053 else 1054 lhs = gimple_assign_lhs (stmt); 1055 1056 gcc_assert (has_single_use (lhs)); 1057 single_imm_use (lhs, &use, &use_stmt); 1058 if (lhs == *def) 1059 *def = op; 1060 SET_USE (use, op); 1061 if (TREE_CODE (op) != SSA_NAME) 1062 update_stmt (use_stmt); 1063 gsi = gsi_for_stmt (stmt); 1064 unlink_stmt_vdef (stmt); 1065 gsi_remove (&gsi, true); 1066 release_defs (stmt); 1067 } 1068 1069 /* Walks the linear chain with result *DEF searching for an operation 1070 with operand OP and code OPCODE removing that from the chain. *DEF 1071 is updated if there is only one operand but no operation left. */ 1072 1073 static void 1074 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1075 { 1076 gimple stmt = SSA_NAME_DEF_STMT (*def); 1077 1078 do 1079 { 1080 tree name; 1081 1082 if (opcode == MULT_EXPR 1083 && stmt_is_power_of_op (stmt, op)) 1084 { 1085 if (decrement_power (stmt) == 1) 1086 propagate_op_to_single_use (op, stmt, def); 1087 return; 1088 } 1089 1090 name = gimple_assign_rhs1 (stmt); 1091 1092 /* If this is the operation we look for and one of the operands 1093 is ours simply propagate the other operand into the stmts 1094 single use. */ 1095 if (gimple_assign_rhs_code (stmt) == opcode 1096 && (name == op 1097 || gimple_assign_rhs2 (stmt) == op)) 1098 { 1099 if (name == op) 1100 name = gimple_assign_rhs2 (stmt); 1101 propagate_op_to_single_use (name, stmt, def); 1102 return; 1103 } 1104 1105 /* We might have a multiply of two __builtin_pow* calls, and 1106 the operand might be hiding in the rightmost one. */ 1107 if (opcode == MULT_EXPR 1108 && gimple_assign_rhs_code (stmt) == opcode 1109 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1110 && has_single_use (gimple_assign_rhs2 (stmt))) 1111 { 1112 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1113 if (stmt_is_power_of_op (stmt2, op)) 1114 { 1115 if (decrement_power (stmt2) == 1) 1116 propagate_op_to_single_use (op, stmt2, def); 1117 return; 1118 } 1119 } 1120 1121 /* Continue walking the chain. */ 1122 gcc_assert (name != op 1123 && TREE_CODE (name) == SSA_NAME); 1124 stmt = SSA_NAME_DEF_STMT (name); 1125 } 1126 while (1); 1127 } 1128 1129 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1130 the result. Places the statement after the definition of either 1131 OP1 or OP2. Returns the new statement. */ 1132 1133 static gimple 1134 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1135 { 1136 gimple op1def = NULL, op2def = NULL; 1137 gimple_stmt_iterator gsi; 1138 tree op; 1139 gimple sum; 1140 1141 /* Create the addition statement. */ 1142 op = make_ssa_name (type, NULL); 1143 sum = gimple_build_assign_with_ops (opcode, op, op1, op2); 1144 1145 /* Find an insertion place and insert. */ 1146 if (TREE_CODE (op1) == SSA_NAME) 1147 op1def = SSA_NAME_DEF_STMT (op1); 1148 if (TREE_CODE (op2) == SSA_NAME) 1149 op2def = SSA_NAME_DEF_STMT (op2); 1150 if ((!op1def || gimple_nop_p (op1def)) 1151 && (!op2def || gimple_nop_p (op2def))) 1152 { 1153 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); 1154 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1155 } 1156 else if ((!op1def || gimple_nop_p (op1def)) 1157 || (op2def && !gimple_nop_p (op2def) 1158 && stmt_dominates_stmt_p (op1def, op2def))) 1159 { 1160 if (gimple_code (op2def) == GIMPLE_PHI) 1161 { 1162 gsi = gsi_after_labels (gimple_bb (op2def)); 1163 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1164 } 1165 else 1166 { 1167 if (!stmt_ends_bb_p (op2def)) 1168 { 1169 gsi = gsi_for_stmt (op2def); 1170 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); 1171 } 1172 else 1173 { 1174 edge e; 1175 edge_iterator ei; 1176 1177 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) 1178 if (e->flags & EDGE_FALLTHRU) 1179 gsi_insert_on_edge_immediate (e, sum); 1180 } 1181 } 1182 } 1183 else 1184 { 1185 if (gimple_code (op1def) == GIMPLE_PHI) 1186 { 1187 gsi = gsi_after_labels (gimple_bb (op1def)); 1188 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1189 } 1190 else 1191 { 1192 if (!stmt_ends_bb_p (op1def)) 1193 { 1194 gsi = gsi_for_stmt (op1def); 1195 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); 1196 } 1197 else 1198 { 1199 edge e; 1200 edge_iterator ei; 1201 1202 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) 1203 if (e->flags & EDGE_FALLTHRU) 1204 gsi_insert_on_edge_immediate (e, sum); 1205 } 1206 } 1207 } 1208 update_stmt (sum); 1209 1210 return sum; 1211 } 1212 1213 /* Perform un-distribution of divisions and multiplications. 1214 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1215 to (A + B) / X for real X. 1216 1217 The algorithm is organized as follows. 1218 1219 - First we walk the addition chain *OPS looking for summands that 1220 are defined by a multiplication or a real division. This results 1221 in the candidates bitmap with relevant indices into *OPS. 1222 1223 - Second we build the chains of multiplications or divisions for 1224 these candidates, counting the number of occurrences of (operand, code) 1225 pairs in all of the candidates chains. 1226 1227 - Third we sort the (operand, code) pairs by number of occurrence and 1228 process them starting with the pair with the most uses. 1229 1230 * For each such pair we walk the candidates again to build a 1231 second candidate bitmap noting all multiplication/division chains 1232 that have at least one occurrence of (operand, code). 1233 1234 * We build an alternate addition chain only covering these 1235 candidates with one (operand, code) operation removed from their 1236 multiplication/division chain. 1237 1238 * The first candidate gets replaced by the alternate addition chain 1239 multiplied/divided by the operand. 1240 1241 * All candidate chains get disabled for further processing and 1242 processing of (operand, code) pairs continues. 1243 1244 The alternate addition chains built are re-processed by the main 1245 reassociation algorithm which allows optimizing a * x * y + b * y * x 1246 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1247 1248 static bool 1249 undistribute_ops_list (enum tree_code opcode, 1250 vec<operand_entry_t> *ops, struct loop *loop) 1251 { 1252 unsigned int length = ops->length (); 1253 operand_entry_t oe1; 1254 unsigned i, j; 1255 sbitmap candidates, candidates2; 1256 unsigned nr_candidates, nr_candidates2; 1257 sbitmap_iterator sbi0; 1258 vec<operand_entry_t> *subops; 1259 htab_t ctable; 1260 bool changed = false; 1261 int next_oecount_id = 0; 1262 1263 if (length <= 1 1264 || opcode != PLUS_EXPR) 1265 return false; 1266 1267 /* Build a list of candidates to process. */ 1268 candidates = sbitmap_alloc (length); 1269 bitmap_clear (candidates); 1270 nr_candidates = 0; 1271 FOR_EACH_VEC_ELT (*ops, i, oe1) 1272 { 1273 enum tree_code dcode; 1274 gimple oe1def; 1275 1276 if (TREE_CODE (oe1->op) != SSA_NAME) 1277 continue; 1278 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1279 if (!is_gimple_assign (oe1def)) 1280 continue; 1281 dcode = gimple_assign_rhs_code (oe1def); 1282 if ((dcode != MULT_EXPR 1283 && dcode != RDIV_EXPR) 1284 || !is_reassociable_op (oe1def, dcode, loop)) 1285 continue; 1286 1287 bitmap_set_bit (candidates, i); 1288 nr_candidates++; 1289 } 1290 1291 if (nr_candidates < 2) 1292 { 1293 sbitmap_free (candidates); 1294 return false; 1295 } 1296 1297 if (dump_file && (dump_flags & TDF_DETAILS)) 1298 { 1299 fprintf (dump_file, "searching for un-distribute opportunities "); 1300 print_generic_expr (dump_file, 1301 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1302 fprintf (dump_file, " %d\n", nr_candidates); 1303 } 1304 1305 /* Build linearized sub-operand lists and the counting table. */ 1306 cvec.create (0); 1307 ctable = htab_create (15, oecount_hash, oecount_eq, NULL); 1308 /* ??? Macro arguments cannot have multi-argument template types in 1309 them. This typedef is needed to workaround that limitation. */ 1310 typedef vec<operand_entry_t> vec_operand_entry_t_heap; 1311 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1312 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1313 { 1314 gimple oedef; 1315 enum tree_code oecode; 1316 unsigned j; 1317 1318 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1319 oecode = gimple_assign_rhs_code (oedef); 1320 linearize_expr_tree (&subops[i], oedef, 1321 associative_tree_code (oecode), false); 1322 1323 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1324 { 1325 oecount c; 1326 void **slot; 1327 size_t idx; 1328 c.oecode = oecode; 1329 c.cnt = 1; 1330 c.id = next_oecount_id++; 1331 c.op = oe1->op; 1332 cvec.safe_push (c); 1333 idx = cvec.length () + 41; 1334 slot = htab_find_slot (ctable, (void *)idx, INSERT); 1335 if (!*slot) 1336 { 1337 *slot = (void *)idx; 1338 } 1339 else 1340 { 1341 cvec.pop (); 1342 cvec[(size_t)*slot - 42].cnt++; 1343 } 1344 } 1345 } 1346 htab_delete (ctable); 1347 1348 /* Sort the counting table. */ 1349 cvec.qsort (oecount_cmp); 1350 1351 if (dump_file && (dump_flags & TDF_DETAILS)) 1352 { 1353 oecount *c; 1354 fprintf (dump_file, "Candidates:\n"); 1355 FOR_EACH_VEC_ELT (cvec, j, c) 1356 { 1357 fprintf (dump_file, " %u %s: ", c->cnt, 1358 c->oecode == MULT_EXPR 1359 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1360 print_generic_expr (dump_file, c->op, 0); 1361 fprintf (dump_file, "\n"); 1362 } 1363 } 1364 1365 /* Process the (operand, code) pairs in order of most occurence. */ 1366 candidates2 = sbitmap_alloc (length); 1367 while (!cvec.is_empty ()) 1368 { 1369 oecount *c = &cvec.last (); 1370 if (c->cnt < 2) 1371 break; 1372 1373 /* Now collect the operands in the outer chain that contain 1374 the common operand in their inner chain. */ 1375 bitmap_clear (candidates2); 1376 nr_candidates2 = 0; 1377 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1378 { 1379 gimple oedef; 1380 enum tree_code oecode; 1381 unsigned j; 1382 tree op = (*ops)[i]->op; 1383 1384 /* If we undistributed in this chain already this may be 1385 a constant. */ 1386 if (TREE_CODE (op) != SSA_NAME) 1387 continue; 1388 1389 oedef = SSA_NAME_DEF_STMT (op); 1390 oecode = gimple_assign_rhs_code (oedef); 1391 if (oecode != c->oecode) 1392 continue; 1393 1394 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1395 { 1396 if (oe1->op == c->op) 1397 { 1398 bitmap_set_bit (candidates2, i); 1399 ++nr_candidates2; 1400 break; 1401 } 1402 } 1403 } 1404 1405 if (nr_candidates2 >= 2) 1406 { 1407 operand_entry_t oe1, oe2; 1408 gimple prod; 1409 int first = bitmap_first_set_bit (candidates2); 1410 1411 /* Build the new addition chain. */ 1412 oe1 = (*ops)[first]; 1413 if (dump_file && (dump_flags & TDF_DETAILS)) 1414 { 1415 fprintf (dump_file, "Building ("); 1416 print_generic_expr (dump_file, oe1->op, 0); 1417 } 1418 zero_one_operation (&oe1->op, c->oecode, c->op); 1419 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1420 { 1421 gimple sum; 1422 oe2 = (*ops)[i]; 1423 if (dump_file && (dump_flags & TDF_DETAILS)) 1424 { 1425 fprintf (dump_file, " + "); 1426 print_generic_expr (dump_file, oe2->op, 0); 1427 } 1428 zero_one_operation (&oe2->op, c->oecode, c->op); 1429 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1430 oe1->op, oe2->op, opcode); 1431 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1432 oe2->rank = 0; 1433 oe1->op = gimple_get_lhs (sum); 1434 } 1435 1436 /* Apply the multiplication/division. */ 1437 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1438 oe1->op, c->op, c->oecode); 1439 if (dump_file && (dump_flags & TDF_DETAILS)) 1440 { 1441 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1442 print_generic_expr (dump_file, c->op, 0); 1443 fprintf (dump_file, "\n"); 1444 } 1445 1446 /* Record it in the addition chain and disable further 1447 undistribution with this op. */ 1448 oe1->op = gimple_assign_lhs (prod); 1449 oe1->rank = get_rank (oe1->op); 1450 subops[first].release (); 1451 1452 changed = true; 1453 } 1454 1455 cvec.pop (); 1456 } 1457 1458 for (i = 0; i < ops->length (); ++i) 1459 subops[i].release (); 1460 free (subops); 1461 cvec.release (); 1462 sbitmap_free (candidates); 1463 sbitmap_free (candidates2); 1464 1465 return changed; 1466 } 1467 1468 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1469 expression, examine the other OPS to see if any of them are comparisons 1470 of the same values, which we may be able to combine or eliminate. 1471 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1472 1473 static bool 1474 eliminate_redundant_comparison (enum tree_code opcode, 1475 vec<operand_entry_t> *ops, 1476 unsigned int currindex, 1477 operand_entry_t curr) 1478 { 1479 tree op1, op2; 1480 enum tree_code lcode, rcode; 1481 gimple def1, def2; 1482 int i; 1483 operand_entry_t oe; 1484 1485 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1486 return false; 1487 1488 /* Check that CURR is a comparison. */ 1489 if (TREE_CODE (curr->op) != SSA_NAME) 1490 return false; 1491 def1 = SSA_NAME_DEF_STMT (curr->op); 1492 if (!is_gimple_assign (def1)) 1493 return false; 1494 lcode = gimple_assign_rhs_code (def1); 1495 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1496 return false; 1497 op1 = gimple_assign_rhs1 (def1); 1498 op2 = gimple_assign_rhs2 (def1); 1499 1500 /* Now look for a similar comparison in the remaining OPS. */ 1501 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1502 { 1503 tree t; 1504 1505 if (TREE_CODE (oe->op) != SSA_NAME) 1506 continue; 1507 def2 = SSA_NAME_DEF_STMT (oe->op); 1508 if (!is_gimple_assign (def2)) 1509 continue; 1510 rcode = gimple_assign_rhs_code (def2); 1511 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1512 continue; 1513 1514 /* If we got here, we have a match. See if we can combine the 1515 two comparisons. */ 1516 if (opcode == BIT_IOR_EXPR) 1517 t = maybe_fold_or_comparisons (lcode, op1, op2, 1518 rcode, gimple_assign_rhs1 (def2), 1519 gimple_assign_rhs2 (def2)); 1520 else 1521 t = maybe_fold_and_comparisons (lcode, op1, op2, 1522 rcode, gimple_assign_rhs1 (def2), 1523 gimple_assign_rhs2 (def2)); 1524 if (!t) 1525 continue; 1526 1527 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1528 always give us a boolean_type_node value back. If the original 1529 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1530 we need to convert. */ 1531 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1532 t = fold_convert (TREE_TYPE (curr->op), t); 1533 1534 if (TREE_CODE (t) != INTEGER_CST 1535 && !operand_equal_p (t, curr->op, 0)) 1536 { 1537 enum tree_code subcode; 1538 tree newop1, newop2; 1539 if (!COMPARISON_CLASS_P (t)) 1540 continue; 1541 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1542 STRIP_USELESS_TYPE_CONVERSION (newop1); 1543 STRIP_USELESS_TYPE_CONVERSION (newop2); 1544 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1545 continue; 1546 } 1547 1548 if (dump_file && (dump_flags & TDF_DETAILS)) 1549 { 1550 fprintf (dump_file, "Equivalence: "); 1551 print_generic_expr (dump_file, curr->op, 0); 1552 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1553 print_generic_expr (dump_file, oe->op, 0); 1554 fprintf (dump_file, " -> "); 1555 print_generic_expr (dump_file, t, 0); 1556 fprintf (dump_file, "\n"); 1557 } 1558 1559 /* Now we can delete oe, as it has been subsumed by the new combined 1560 expression t. */ 1561 ops->ordered_remove (i); 1562 reassociate_stats.ops_eliminated ++; 1563 1564 /* If t is the same as curr->op, we're done. Otherwise we must 1565 replace curr->op with t. Special case is if we got a constant 1566 back, in which case we add it to the end instead of in place of 1567 the current entry. */ 1568 if (TREE_CODE (t) == INTEGER_CST) 1569 { 1570 ops->ordered_remove (currindex); 1571 add_to_ops_vec (ops, t); 1572 } 1573 else if (!operand_equal_p (t, curr->op, 0)) 1574 { 1575 gimple sum; 1576 enum tree_code subcode; 1577 tree newop1; 1578 tree newop2; 1579 gcc_assert (COMPARISON_CLASS_P (t)); 1580 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1581 STRIP_USELESS_TYPE_CONVERSION (newop1); 1582 STRIP_USELESS_TYPE_CONVERSION (newop2); 1583 gcc_checking_assert (is_gimple_val (newop1) 1584 && is_gimple_val (newop2)); 1585 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1586 curr->op = gimple_get_lhs (sum); 1587 } 1588 return true; 1589 } 1590 1591 return false; 1592 } 1593 1594 /* Perform various identities and other optimizations on the list of 1595 operand entries, stored in OPS. The tree code for the binary 1596 operation between all the operands is OPCODE. */ 1597 1598 static void 1599 optimize_ops_list (enum tree_code opcode, 1600 vec<operand_entry_t> *ops) 1601 { 1602 unsigned int length = ops->length (); 1603 unsigned int i; 1604 operand_entry_t oe; 1605 operand_entry_t oelast = NULL; 1606 bool iterate = false; 1607 1608 if (length == 1) 1609 return; 1610 1611 oelast = ops->last (); 1612 1613 /* If the last two are constants, pop the constants off, merge them 1614 and try the next two. */ 1615 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1616 { 1617 operand_entry_t oelm1 = (*ops)[length - 2]; 1618 1619 if (oelm1->rank == 0 1620 && is_gimple_min_invariant (oelm1->op) 1621 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1622 TREE_TYPE (oelast->op))) 1623 { 1624 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1625 oelm1->op, oelast->op); 1626 1627 if (folded && is_gimple_min_invariant (folded)) 1628 { 1629 if (dump_file && (dump_flags & TDF_DETAILS)) 1630 fprintf (dump_file, "Merging constants\n"); 1631 1632 ops->pop (); 1633 ops->pop (); 1634 1635 add_to_ops_vec (ops, folded); 1636 reassociate_stats.constants_eliminated++; 1637 1638 optimize_ops_list (opcode, ops); 1639 return; 1640 } 1641 } 1642 } 1643 1644 eliminate_using_constants (opcode, ops); 1645 oelast = NULL; 1646 1647 for (i = 0; ops->iterate (i, &oe);) 1648 { 1649 bool done = false; 1650 1651 if (eliminate_not_pairs (opcode, ops, i, oe)) 1652 return; 1653 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 1654 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 1655 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 1656 { 1657 if (done) 1658 return; 1659 iterate = true; 1660 oelast = NULL; 1661 continue; 1662 } 1663 oelast = oe; 1664 i++; 1665 } 1666 1667 length = ops->length (); 1668 oelast = ops->last (); 1669 1670 if (iterate) 1671 optimize_ops_list (opcode, ops); 1672 } 1673 1674 /* The following functions are subroutines to optimize_range_tests and allow 1675 it to try to change a logical combination of comparisons into a range 1676 test. 1677 1678 For example, both 1679 X == 2 || X == 5 || X == 3 || X == 4 1680 and 1681 X >= 2 && X <= 5 1682 are converted to 1683 (unsigned) (X - 2) <= 3 1684 1685 For more information see comments above fold_test_range in fold-const.c, 1686 this implementation is for GIMPLE. */ 1687 1688 struct range_entry 1689 { 1690 tree exp; 1691 tree low; 1692 tree high; 1693 bool in_p; 1694 bool strict_overflow_p; 1695 unsigned int idx, next; 1696 }; 1697 1698 /* This is similar to make_range in fold-const.c, but on top of 1699 GIMPLE instead of trees. If EXP is non-NULL, it should be 1700 an SSA_NAME and STMT argument is ignored, otherwise STMT 1701 argument should be a GIMPLE_COND. */ 1702 1703 static void 1704 init_range_entry (struct range_entry *r, tree exp, gimple stmt) 1705 { 1706 int in_p; 1707 tree low, high; 1708 bool is_bool, strict_overflow_p; 1709 1710 r->exp = NULL_TREE; 1711 r->in_p = false; 1712 r->strict_overflow_p = false; 1713 r->low = NULL_TREE; 1714 r->high = NULL_TREE; 1715 if (exp != NULL_TREE 1716 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 1717 return; 1718 1719 /* Start with simply saying "EXP != 0" and then look at the code of EXP 1720 and see if we can refine the range. Some of the cases below may not 1721 happen, but it doesn't seem worth worrying about this. We "continue" 1722 the outer loop when we've changed something; otherwise we "break" 1723 the switch, which will "break" the while. */ 1724 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 1725 high = low; 1726 in_p = 0; 1727 strict_overflow_p = false; 1728 is_bool = false; 1729 if (exp == NULL_TREE) 1730 is_bool = true; 1731 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 1732 { 1733 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 1734 is_bool = true; 1735 else 1736 return; 1737 } 1738 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 1739 is_bool = true; 1740 1741 while (1) 1742 { 1743 enum tree_code code; 1744 tree arg0, arg1, exp_type; 1745 tree nexp; 1746 location_t loc; 1747 1748 if (exp != NULL_TREE) 1749 { 1750 if (TREE_CODE (exp) != SSA_NAME) 1751 break; 1752 1753 stmt = SSA_NAME_DEF_STMT (exp); 1754 if (!is_gimple_assign (stmt)) 1755 break; 1756 1757 code = gimple_assign_rhs_code (stmt); 1758 arg0 = gimple_assign_rhs1 (stmt); 1759 arg1 = gimple_assign_rhs2 (stmt); 1760 exp_type = TREE_TYPE (exp); 1761 } 1762 else 1763 { 1764 code = gimple_cond_code (stmt); 1765 arg0 = gimple_cond_lhs (stmt); 1766 arg1 = gimple_cond_rhs (stmt); 1767 exp_type = boolean_type_node; 1768 } 1769 1770 if (TREE_CODE (arg0) != SSA_NAME) 1771 break; 1772 loc = gimple_location (stmt); 1773 switch (code) 1774 { 1775 case BIT_NOT_EXPR: 1776 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 1777 /* Ensure the range is either +[-,0], +[0,0], 1778 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 1779 -[1,1]. If it is e.g. +[-,-] or -[-,-] 1780 or similar expression of unconditional true or 1781 false, it should not be negated. */ 1782 && ((high && integer_zerop (high)) 1783 || (low && integer_onep (low)))) 1784 { 1785 in_p = !in_p; 1786 exp = arg0; 1787 continue; 1788 } 1789 break; 1790 case SSA_NAME: 1791 exp = arg0; 1792 continue; 1793 CASE_CONVERT: 1794 if (is_bool) 1795 goto do_default; 1796 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 1797 { 1798 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 1799 is_bool = true; 1800 else 1801 return; 1802 } 1803 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 1804 is_bool = true; 1805 goto do_default; 1806 case EQ_EXPR: 1807 case NE_EXPR: 1808 case LT_EXPR: 1809 case LE_EXPR: 1810 case GE_EXPR: 1811 case GT_EXPR: 1812 is_bool = true; 1813 /* FALLTHRU */ 1814 default: 1815 if (!is_bool) 1816 return; 1817 do_default: 1818 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 1819 &low, &high, &in_p, 1820 &strict_overflow_p); 1821 if (nexp != NULL_TREE) 1822 { 1823 exp = nexp; 1824 gcc_assert (TREE_CODE (exp) == SSA_NAME); 1825 continue; 1826 } 1827 break; 1828 } 1829 break; 1830 } 1831 if (is_bool) 1832 { 1833 r->exp = exp; 1834 r->in_p = in_p; 1835 r->low = low; 1836 r->high = high; 1837 r->strict_overflow_p = strict_overflow_p; 1838 } 1839 } 1840 1841 /* Comparison function for qsort. Sort entries 1842 without SSA_NAME exp first, then with SSA_NAMEs sorted 1843 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 1844 by increasing ->low and if ->low is the same, by increasing 1845 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 1846 maximum. */ 1847 1848 static int 1849 range_entry_cmp (const void *a, const void *b) 1850 { 1851 const struct range_entry *p = (const struct range_entry *) a; 1852 const struct range_entry *q = (const struct range_entry *) b; 1853 1854 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 1855 { 1856 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 1857 { 1858 /* Group range_entries for the same SSA_NAME together. */ 1859 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 1860 return -1; 1861 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 1862 return 1; 1863 /* If ->low is different, NULL low goes first, then by 1864 ascending low. */ 1865 if (p->low != NULL_TREE) 1866 { 1867 if (q->low != NULL_TREE) 1868 { 1869 tree tem = fold_binary (LT_EXPR, boolean_type_node, 1870 p->low, q->low); 1871 if (tem && integer_onep (tem)) 1872 return -1; 1873 tem = fold_binary (GT_EXPR, boolean_type_node, 1874 p->low, q->low); 1875 if (tem && integer_onep (tem)) 1876 return 1; 1877 } 1878 else 1879 return 1; 1880 } 1881 else if (q->low != NULL_TREE) 1882 return -1; 1883 /* If ->high is different, NULL high goes last, before that by 1884 ascending high. */ 1885 if (p->high != NULL_TREE) 1886 { 1887 if (q->high != NULL_TREE) 1888 { 1889 tree tem = fold_binary (LT_EXPR, boolean_type_node, 1890 p->high, q->high); 1891 if (tem && integer_onep (tem)) 1892 return -1; 1893 tem = fold_binary (GT_EXPR, boolean_type_node, 1894 p->high, q->high); 1895 if (tem && integer_onep (tem)) 1896 return 1; 1897 } 1898 else 1899 return -1; 1900 } 1901 else if (q->high != NULL_TREE) 1902 return 1; 1903 /* If both ranges are the same, sort below by ascending idx. */ 1904 } 1905 else 1906 return 1; 1907 } 1908 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 1909 return -1; 1910 1911 if (p->idx < q->idx) 1912 return -1; 1913 else 1914 { 1915 gcc_checking_assert (p->idx > q->idx); 1916 return 1; 1917 } 1918 } 1919 1920 /* Helper routine of optimize_range_test. 1921 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 1922 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 1923 OPCODE and OPS are arguments of optimize_range_tests. Return 1924 true if the range merge has been successful. 1925 If OPCODE is ERROR_MARK, this is called from within 1926 maybe_optimize_range_tests and is performing inter-bb range optimization. 1927 Changes should be then performed right away, and whether an op is 1928 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */ 1929 1930 static bool 1931 update_range_test (struct range_entry *range, struct range_entry *otherrange, 1932 unsigned int count, enum tree_code opcode, 1933 vec<operand_entry_t> *ops, tree exp, bool in_p, 1934 tree low, tree high, bool strict_overflow_p) 1935 { 1936 operand_entry_t oe = (*ops)[range->idx]; 1937 tree op = oe->op; 1938 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id)); 1939 location_t loc = gimple_location (stmt); 1940 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 1941 tree tem = build_range_check (loc, optype, exp, in_p, low, high); 1942 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 1943 gimple_stmt_iterator gsi; 1944 1945 if (tem == NULL_TREE) 1946 return false; 1947 1948 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 1949 warning_at (loc, OPT_Wstrict_overflow, 1950 "assuming signed overflow does not occur " 1951 "when simplifying range test"); 1952 1953 if (dump_file && (dump_flags & TDF_DETAILS)) 1954 { 1955 struct range_entry *r; 1956 fprintf (dump_file, "Optimizing range tests "); 1957 print_generic_expr (dump_file, range->exp, 0); 1958 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 1959 print_generic_expr (dump_file, range->low, 0); 1960 fprintf (dump_file, ", "); 1961 print_generic_expr (dump_file, range->high, 0); 1962 fprintf (dump_file, "]"); 1963 for (r = otherrange; r < otherrange + count; r++) 1964 { 1965 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 1966 print_generic_expr (dump_file, r->low, 0); 1967 fprintf (dump_file, ", "); 1968 print_generic_expr (dump_file, r->high, 0); 1969 fprintf (dump_file, "]"); 1970 } 1971 fprintf (dump_file, "\n into "); 1972 print_generic_expr (dump_file, tem, 0); 1973 fprintf (dump_file, "\n"); 1974 } 1975 1976 if (opcode == BIT_IOR_EXPR 1977 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 1978 tem = invert_truthvalue_loc (loc, tem); 1979 1980 tem = fold_convert_loc (loc, optype, tem); 1981 gsi = gsi_for_stmt (stmt); 1982 /* In rare cases range->exp can be equal to lhs of stmt. 1983 In that case we have to insert after the stmt rather then before 1984 it. */ 1985 if (op == range->exp) 1986 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, 1987 GSI_SAME_STMT); 1988 else 1989 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 1990 GSI_SAME_STMT); 1991 1992 /* If doing inter-bb range test optimization, update the 1993 stmts immediately. Start with changing the first range test 1994 immediate use to the new value (TEM), or, if the first range 1995 test is a GIMPLE_COND stmt, change that condition. */ 1996 if (opcode == ERROR_MARK) 1997 { 1998 if (op) 1999 { 2000 imm_use_iterator iter; 2001 use_operand_p use_p; 2002 gimple use_stmt; 2003 2004 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op) 2005 { 2006 if (is_gimple_debug (use_stmt)) 2007 continue; 2008 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 2009 SET_USE (use_p, tem); 2010 update_stmt (use_stmt); 2011 } 2012 } 2013 else 2014 { 2015 gimple_cond_set_code (stmt, NE_EXPR); 2016 gimple_cond_set_lhs (stmt, tem); 2017 gimple_cond_set_rhs (stmt, boolean_false_node); 2018 update_stmt (stmt); 2019 } 2020 } 2021 oe->op = tem; 2022 range->exp = exp; 2023 range->low = low; 2024 range->high = high; 2025 range->in_p = in_p; 2026 range->strict_overflow_p = false; 2027 2028 for (range = otherrange; range < otherrange + count; range++) 2029 { 2030 oe = (*ops)[range->idx]; 2031 /* Now change all the other range test immediate uses, so that 2032 those tests will be optimized away. */ 2033 if (opcode == ERROR_MARK) 2034 { 2035 if (oe->op) 2036 { 2037 imm_use_iterator iter; 2038 use_operand_p use_p; 2039 gimple use_stmt; 2040 2041 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op) 2042 { 2043 if (is_gimple_debug (use_stmt)) 2044 continue; 2045 /* If imm use of _8 is a statement like _7 = _8 | _9;, 2046 adjust it into _7 = _9;. */ 2047 if (is_gimple_assign (use_stmt) 2048 && gimple_assign_rhs_code (use_stmt) == oe->rank) 2049 { 2050 tree expr = NULL_TREE; 2051 if (oe->op == gimple_assign_rhs1 (use_stmt)) 2052 expr = gimple_assign_rhs2 (use_stmt); 2053 else if (oe->op == gimple_assign_rhs2 (use_stmt)) 2054 expr = gimple_assign_rhs1 (use_stmt); 2055 if (expr 2056 && expr != oe->op 2057 && TREE_CODE (expr) == SSA_NAME) 2058 { 2059 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); 2060 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME, 2061 expr, NULL_TREE); 2062 update_stmt (use_stmt); 2063 continue; 2064 } 2065 } 2066 /* If imm use of _8 is a statement like _7 = (int) _8;, 2067 adjust it into _7 = 0; or _7 = 1;. */ 2068 if (gimple_assign_cast_p (use_stmt) 2069 && oe->op == gimple_assign_rhs1 (use_stmt)) 2070 { 2071 tree lhs = gimple_assign_lhs (use_stmt); 2072 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) 2073 { 2074 gimple_stmt_iterator gsi2 2075 = gsi_for_stmt (use_stmt); 2076 tree expr = build_int_cst (TREE_TYPE (lhs), 2077 oe->rank == BIT_IOR_EXPR 2078 ? 0 : 1); 2079 gimple_assign_set_rhs_with_ops (&gsi2, 2080 INTEGER_CST, 2081 expr, NULL_TREE); 2082 update_stmt (use_stmt); 2083 continue; 2084 } 2085 } 2086 /* Otherwise replace the use with 0 or 1. */ 2087 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 2088 SET_USE (use_p, 2089 build_int_cst (TREE_TYPE (oe->op), 2090 oe->rank == BIT_IOR_EXPR 2091 ? 0 : 1)); 2092 update_stmt (use_stmt); 2093 } 2094 } 2095 else 2096 { 2097 /* If range test was a GIMPLE_COND, simply change it 2098 into an always false or always true condition. */ 2099 stmt = last_stmt (BASIC_BLOCK (oe->id)); 2100 if (oe->rank == BIT_IOR_EXPR) 2101 gimple_cond_make_false (stmt); 2102 else 2103 gimple_cond_make_true (stmt); 2104 update_stmt (stmt); 2105 } 2106 } 2107 oe->op = error_mark_node; 2108 range->exp = NULL_TREE; 2109 } 2110 return true; 2111 } 2112 2113 /* Optimize range tests, similarly how fold_range_test optimizes 2114 it on trees. The tree code for the binary 2115 operation between all the operands is OPCODE. 2116 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 2117 maybe_optimize_range_tests for inter-bb range optimization. 2118 In that case if oe->op is NULL, oe->id is bb->index whose 2119 GIMPLE_COND is && or ||ed into the test, and oe->rank says 2120 the actual opcode. */ 2121 2122 static void 2123 optimize_range_tests (enum tree_code opcode, 2124 vec<operand_entry_t> *ops) 2125 { 2126 unsigned int length = ops->length (), i, j, first; 2127 operand_entry_t oe; 2128 struct range_entry *ranges; 2129 bool any_changes = false; 2130 2131 if (length == 1) 2132 return; 2133 2134 ranges = XNEWVEC (struct range_entry, length); 2135 for (i = 0; i < length; i++) 2136 { 2137 oe = (*ops)[i]; 2138 ranges[i].idx = i; 2139 init_range_entry (ranges + i, oe->op, 2140 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id))); 2141 /* For | invert it now, we will invert it again before emitting 2142 the optimized expression. */ 2143 if (opcode == BIT_IOR_EXPR 2144 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2145 ranges[i].in_p = !ranges[i].in_p; 2146 } 2147 2148 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 2149 for (i = 0; i < length; i++) 2150 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 2151 break; 2152 2153 /* Try to merge ranges. */ 2154 for (first = i; i < length; i++) 2155 { 2156 tree low = ranges[i].low; 2157 tree high = ranges[i].high; 2158 int in_p = ranges[i].in_p; 2159 bool strict_overflow_p = ranges[i].strict_overflow_p; 2160 int update_fail_count = 0; 2161 2162 for (j = i + 1; j < length; j++) 2163 { 2164 if (ranges[i].exp != ranges[j].exp) 2165 break; 2166 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 2167 ranges[j].in_p, ranges[j].low, ranges[j].high)) 2168 break; 2169 strict_overflow_p |= ranges[j].strict_overflow_p; 2170 } 2171 2172 if (j == i + 1) 2173 continue; 2174 2175 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, 2176 ops, ranges[i].exp, in_p, low, high, 2177 strict_overflow_p)) 2178 { 2179 i = j - 1; 2180 any_changes = true; 2181 } 2182 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 2183 while update_range_test would fail. */ 2184 else if (update_fail_count == 64) 2185 i = j - 1; 2186 else 2187 ++update_fail_count; 2188 } 2189 2190 /* Optimize X == CST1 || X == CST2 2191 if popcount (CST1 ^ CST2) == 1 into 2192 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2193 Similarly for ranges. E.g. 2194 X != 2 && X != 3 && X != 10 && X != 11 2195 will be transformed by the above loop into 2196 (X - 2U) <= 1U && (X - 10U) <= 1U 2197 and this loop can transform that into 2198 ((X & ~8) - 2U) <= 1U. */ 2199 for (i = first; i < length; i++) 2200 { 2201 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; 2202 2203 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2204 continue; 2205 type = TREE_TYPE (ranges[i].exp); 2206 if (!INTEGRAL_TYPE_P (type)) 2207 continue; 2208 lowi = ranges[i].low; 2209 if (lowi == NULL_TREE) 2210 lowi = TYPE_MIN_VALUE (type); 2211 highi = ranges[i].high; 2212 if (highi == NULL_TREE) 2213 continue; 2214 for (j = i + 1; j < length && j < i + 64; j++) 2215 { 2216 if (ranges[j].exp == NULL_TREE) 2217 continue; 2218 if (ranges[i].exp != ranges[j].exp) 2219 break; 2220 if (ranges[j].in_p) 2221 continue; 2222 lowj = ranges[j].low; 2223 if (lowj == NULL_TREE) 2224 continue; 2225 highj = ranges[j].high; 2226 if (highj == NULL_TREE) 2227 highj = TYPE_MAX_VALUE (type); 2228 tem = fold_binary (GT_EXPR, boolean_type_node, 2229 lowj, highi); 2230 if (tem == NULL_TREE || !integer_onep (tem)) 2231 continue; 2232 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2233 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2234 continue; 2235 gcc_checking_assert (!integer_zerop (lowxor)); 2236 tem = fold_binary (MINUS_EXPR, type, lowxor, 2237 build_int_cst (type, 1)); 2238 if (tem == NULL_TREE) 2239 continue; 2240 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); 2241 if (tem == NULL_TREE || !integer_zerop (tem)) 2242 continue; 2243 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2244 if (!tree_int_cst_equal (lowxor, highxor)) 2245 continue; 2246 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2247 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); 2248 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2249 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2250 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, 2251 ranges[i].in_p, lowj, highj, 2252 ranges[i].strict_overflow_p 2253 || ranges[j].strict_overflow_p)) 2254 { 2255 any_changes = true; 2256 break; 2257 } 2258 } 2259 } 2260 2261 if (any_changes && opcode != ERROR_MARK) 2262 { 2263 j = 0; 2264 FOR_EACH_VEC_ELT (*ops, i, oe) 2265 { 2266 if (oe->op == error_mark_node) 2267 continue; 2268 else if (i != j) 2269 (*ops)[j] = oe; 2270 j++; 2271 } 2272 ops->truncate (j); 2273 } 2274 2275 XDELETEVEC (ranges); 2276 } 2277 2278 /* Return true if STMT is a cast like: 2279 <bb N>: 2280 ... 2281 _123 = (int) _234; 2282 2283 <bb M>: 2284 # _345 = PHI <_123(N), 1(...), 1(...)> 2285 where _234 has bool type, _123 has single use and 2286 bb N has a single successor M. This is commonly used in 2287 the last block of a range test. */ 2288 2289 static bool 2290 final_range_test_p (gimple stmt) 2291 { 2292 basic_block bb, rhs_bb; 2293 edge e; 2294 tree lhs, rhs; 2295 use_operand_p use_p; 2296 gimple use_stmt; 2297 2298 if (!gimple_assign_cast_p (stmt)) 2299 return false; 2300 bb = gimple_bb (stmt); 2301 if (!single_succ_p (bb)) 2302 return false; 2303 e = single_succ_edge (bb); 2304 if (e->flags & EDGE_COMPLEX) 2305 return false; 2306 2307 lhs = gimple_assign_lhs (stmt); 2308 rhs = gimple_assign_rhs1 (stmt); 2309 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2310 || TREE_CODE (rhs) != SSA_NAME 2311 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) 2312 return false; 2313 2314 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 2315 if (!single_imm_use (lhs, &use_p, &use_stmt)) 2316 return false; 2317 2318 if (gimple_code (use_stmt) != GIMPLE_PHI 2319 || gimple_bb (use_stmt) != e->dest) 2320 return false; 2321 2322 /* And that the rhs is defined in the same loop. */ 2323 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); 2324 if (rhs_bb == NULL 2325 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 2326 return false; 2327 2328 return true; 2329 } 2330 2331 /* Return true if BB is suitable basic block for inter-bb range test 2332 optimization. If BACKWARD is true, BB should be the only predecessor 2333 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 2334 or compared with to find a common basic block to which all conditions 2335 branch to if true resp. false. If BACKWARD is false, TEST_BB should 2336 be the only predecessor of BB. */ 2337 2338 static bool 2339 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 2340 bool backward) 2341 { 2342 edge_iterator ei, ei2; 2343 edge e, e2; 2344 gimple stmt; 2345 gimple_stmt_iterator gsi; 2346 bool other_edge_seen = false; 2347 bool is_cond; 2348 2349 if (test_bb == bb) 2350 return false; 2351 /* Check last stmt first. */ 2352 stmt = last_stmt (bb); 2353 if (stmt == NULL 2354 || (gimple_code (stmt) != GIMPLE_COND 2355 && (backward || !final_range_test_p (stmt))) 2356 || gimple_visited_p (stmt) 2357 || stmt_could_throw_p (stmt) 2358 || *other_bb == bb) 2359 return false; 2360 is_cond = gimple_code (stmt) == GIMPLE_COND; 2361 if (is_cond) 2362 { 2363 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 2364 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 2365 to *OTHER_BB (if not set yet, try to find it out). */ 2366 if (EDGE_COUNT (bb->succs) != 2) 2367 return false; 2368 FOR_EACH_EDGE (e, ei, bb->succs) 2369 { 2370 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2371 return false; 2372 if (e->dest == test_bb) 2373 { 2374 if (backward) 2375 continue; 2376 else 2377 return false; 2378 } 2379 if (e->dest == bb) 2380 return false; 2381 if (*other_bb == NULL) 2382 { 2383 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 2384 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2385 return false; 2386 else if (e->dest == e2->dest) 2387 *other_bb = e->dest; 2388 if (*other_bb == NULL) 2389 return false; 2390 } 2391 if (e->dest == *other_bb) 2392 other_edge_seen = true; 2393 else if (backward) 2394 return false; 2395 } 2396 if (*other_bb == NULL || !other_edge_seen) 2397 return false; 2398 } 2399 else if (single_succ (bb) != *other_bb) 2400 return false; 2401 2402 /* Now check all PHIs of *OTHER_BB. */ 2403 e = find_edge (bb, *other_bb); 2404 e2 = find_edge (test_bb, *other_bb); 2405 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 2406 { 2407 gimple phi = gsi_stmt (gsi); 2408 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 2409 corresponding to BB and TEST_BB predecessor must be the same. */ 2410 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 2411 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 2412 { 2413 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 2414 one of the PHIs should have the lhs of the last stmt in 2415 that block as PHI arg and that PHI should have 0 or 1 2416 corresponding to it in all other range test basic blocks 2417 considered. */ 2418 if (!is_cond) 2419 { 2420 if (gimple_phi_arg_def (phi, e->dest_idx) 2421 == gimple_assign_lhs (stmt) 2422 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 2423 || integer_onep (gimple_phi_arg_def (phi, 2424 e2->dest_idx)))) 2425 continue; 2426 } 2427 else 2428 { 2429 gimple test_last = last_stmt (test_bb); 2430 if (gimple_code (test_last) != GIMPLE_COND 2431 && gimple_phi_arg_def (phi, e2->dest_idx) 2432 == gimple_assign_lhs (test_last) 2433 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 2434 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 2435 continue; 2436 } 2437 2438 return false; 2439 } 2440 } 2441 return true; 2442 } 2443 2444 /* Return true if BB doesn't have side-effects that would disallow 2445 range test optimization, all SSA_NAMEs set in the bb are consumed 2446 in the bb and there are no PHIs. */ 2447 2448 static bool 2449 no_side_effect_bb (basic_block bb) 2450 { 2451 gimple_stmt_iterator gsi; 2452 gimple last; 2453 2454 if (!gimple_seq_empty_p (phi_nodes (bb))) 2455 return false; 2456 last = last_stmt (bb); 2457 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2458 { 2459 gimple stmt = gsi_stmt (gsi); 2460 tree lhs; 2461 imm_use_iterator imm_iter; 2462 use_operand_p use_p; 2463 2464 if (is_gimple_debug (stmt)) 2465 continue; 2466 if (gimple_has_side_effects (stmt)) 2467 return false; 2468 if (stmt == last) 2469 return true; 2470 if (!is_gimple_assign (stmt)) 2471 return false; 2472 lhs = gimple_assign_lhs (stmt); 2473 if (TREE_CODE (lhs) != SSA_NAME) 2474 return false; 2475 if (gimple_assign_rhs_could_trap_p (stmt)) 2476 return false; 2477 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 2478 { 2479 gimple use_stmt = USE_STMT (use_p); 2480 if (is_gimple_debug (use_stmt)) 2481 continue; 2482 if (gimple_bb (use_stmt) != bb) 2483 return false; 2484 } 2485 } 2486 return false; 2487 } 2488 2489 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 2490 return true and fill in *OPS recursively. */ 2491 2492 static bool 2493 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, 2494 struct loop *loop) 2495 { 2496 gimple stmt = SSA_NAME_DEF_STMT (var); 2497 tree rhs[2]; 2498 int i; 2499 2500 if (!is_reassociable_op (stmt, code, loop)) 2501 return false; 2502 2503 rhs[0] = gimple_assign_rhs1 (stmt); 2504 rhs[1] = gimple_assign_rhs2 (stmt); 2505 gimple_set_visited (stmt, true); 2506 for (i = 0; i < 2; i++) 2507 if (TREE_CODE (rhs[i]) == SSA_NAME 2508 && !get_ops (rhs[i], code, ops, loop) 2509 && has_single_use (rhs[i])) 2510 { 2511 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 2512 2513 oe->op = rhs[i]; 2514 oe->rank = code; 2515 oe->id = 0; 2516 oe->count = 1; 2517 ops->safe_push (oe); 2518 } 2519 return true; 2520 } 2521 2522 /* Inter-bb range test optimization. */ 2523 2524 static void 2525 maybe_optimize_range_tests (gimple stmt) 2526 { 2527 basic_block first_bb = gimple_bb (stmt); 2528 basic_block last_bb = first_bb; 2529 basic_block other_bb = NULL; 2530 basic_block bb; 2531 edge_iterator ei; 2532 edge e; 2533 vec<operand_entry_t> ops = vNULL; 2534 2535 /* Consider only basic blocks that end with GIMPLE_COND or 2536 a cast statement satisfying final_range_test_p. All 2537 but the last bb in the first_bb .. last_bb range 2538 should end with GIMPLE_COND. */ 2539 if (gimple_code (stmt) == GIMPLE_COND) 2540 { 2541 if (EDGE_COUNT (first_bb->succs) != 2) 2542 return; 2543 } 2544 else if (final_range_test_p (stmt)) 2545 other_bb = single_succ (first_bb); 2546 else 2547 return; 2548 2549 if (stmt_could_throw_p (stmt)) 2550 return; 2551 2552 /* As relative ordering of post-dominator sons isn't fixed, 2553 maybe_optimize_range_tests can be called first on any 2554 bb in the range we want to optimize. So, start searching 2555 backwards, if first_bb can be set to a predecessor. */ 2556 while (single_pred_p (first_bb)) 2557 { 2558 basic_block pred_bb = single_pred (first_bb); 2559 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 2560 break; 2561 if (!no_side_effect_bb (first_bb)) 2562 break; 2563 first_bb = pred_bb; 2564 } 2565 /* If first_bb is last_bb, other_bb hasn't been computed yet. 2566 Before starting forward search in last_bb successors, find 2567 out the other_bb. */ 2568 if (first_bb == last_bb) 2569 { 2570 other_bb = NULL; 2571 /* As non-GIMPLE_COND last stmt always terminates the range, 2572 if forward search didn't discover anything, just give up. */ 2573 if (gimple_code (stmt) != GIMPLE_COND) 2574 return; 2575 /* Look at both successors. Either it ends with a GIMPLE_COND 2576 and satisfies suitable_cond_bb, or ends with a cast and 2577 other_bb is that cast's successor. */ 2578 FOR_EACH_EDGE (e, ei, first_bb->succs) 2579 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 2580 || e->dest == first_bb) 2581 return; 2582 else if (single_pred_p (e->dest)) 2583 { 2584 stmt = last_stmt (e->dest); 2585 if (stmt 2586 && gimple_code (stmt) == GIMPLE_COND 2587 && EDGE_COUNT (e->dest->succs) == 2) 2588 { 2589 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 2590 break; 2591 else 2592 other_bb = NULL; 2593 } 2594 else if (stmt 2595 && final_range_test_p (stmt) 2596 && find_edge (first_bb, single_succ (e->dest))) 2597 { 2598 other_bb = single_succ (e->dest); 2599 if (other_bb == first_bb) 2600 other_bb = NULL; 2601 } 2602 } 2603 if (other_bb == NULL) 2604 return; 2605 } 2606 /* Now do the forward search, moving last_bb to successor bbs 2607 that aren't other_bb. */ 2608 while (EDGE_COUNT (last_bb->succs) == 2) 2609 { 2610 FOR_EACH_EDGE (e, ei, last_bb->succs) 2611 if (e->dest != other_bb) 2612 break; 2613 if (e == NULL) 2614 break; 2615 if (!single_pred_p (e->dest)) 2616 break; 2617 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 2618 break; 2619 if (!no_side_effect_bb (e->dest)) 2620 break; 2621 last_bb = e->dest; 2622 } 2623 if (first_bb == last_bb) 2624 return; 2625 /* Here basic blocks first_bb through last_bb's predecessor 2626 end with GIMPLE_COND, all of them have one of the edges to 2627 other_bb and another to another block in the range, 2628 all blocks except first_bb don't have side-effects and 2629 last_bb ends with either GIMPLE_COND, or cast satisfying 2630 final_range_test_p. */ 2631 for (bb = last_bb; ; bb = single_pred (bb)) 2632 { 2633 enum tree_code code; 2634 tree lhs, rhs; 2635 2636 e = find_edge (bb, other_bb); 2637 stmt = last_stmt (bb); 2638 gimple_set_visited (stmt, true); 2639 if (gimple_code (stmt) != GIMPLE_COND) 2640 { 2641 use_operand_p use_p; 2642 gimple phi; 2643 edge e2; 2644 unsigned int d; 2645 2646 lhs = gimple_assign_lhs (stmt); 2647 rhs = gimple_assign_rhs1 (stmt); 2648 gcc_assert (bb == last_bb); 2649 2650 /* stmt is 2651 _123 = (int) _234; 2652 2653 followed by: 2654 <bb M>: 2655 # _345 = PHI <_123(N), 1(...), 1(...)> 2656 2657 or 0 instead of 1. If it is 0, the _234 2658 range test is anded together with all the 2659 other range tests, if it is 1, it is ored with 2660 them. */ 2661 single_imm_use (lhs, &use_p, &phi); 2662 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 2663 e2 = find_edge (first_bb, other_bb); 2664 d = e2->dest_idx; 2665 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 2666 if (integer_zerop (gimple_phi_arg_def (phi, d))) 2667 code = BIT_AND_EXPR; 2668 else 2669 { 2670 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 2671 code = BIT_IOR_EXPR; 2672 } 2673 2674 /* If _234 SSA_NAME_DEF_STMT is 2675 _234 = _567 | _789; 2676 (or &, corresponding to 1/0 in the phi arguments, 2677 push into ops the individual range test arguments 2678 of the bitwise or resp. and, recursively. */ 2679 if (!get_ops (rhs, code, &ops, 2680 loop_containing_stmt (stmt)) 2681 && has_single_use (rhs)) 2682 { 2683 /* Otherwise, push the _234 range test itself. */ 2684 operand_entry_t oe 2685 = (operand_entry_t) pool_alloc (operand_entry_pool); 2686 2687 oe->op = rhs; 2688 oe->rank = code; 2689 oe->id = 0; 2690 oe->count = 1; 2691 ops.safe_push (oe); 2692 } 2693 continue; 2694 } 2695 /* Otherwise stmt is GIMPLE_COND. */ 2696 code = gimple_cond_code (stmt); 2697 lhs = gimple_cond_lhs (stmt); 2698 rhs = gimple_cond_rhs (stmt); 2699 if (TREE_CODE (lhs) == SSA_NAME 2700 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2701 && ((code != EQ_EXPR && code != NE_EXPR) 2702 || rhs != boolean_false_node 2703 /* Either push into ops the individual bitwise 2704 or resp. and operands, depending on which 2705 edge is other_bb. */ 2706 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 2707 ^ (code == EQ_EXPR)) 2708 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 2709 loop_containing_stmt (stmt)))) 2710 { 2711 /* Or push the GIMPLE_COND stmt itself. */ 2712 operand_entry_t oe 2713 = (operand_entry_t) pool_alloc (operand_entry_pool); 2714 2715 oe->op = NULL; 2716 oe->rank = (e->flags & EDGE_TRUE_VALUE) 2717 ? BIT_IOR_EXPR : BIT_AND_EXPR; 2718 /* oe->op = NULL signs that there is no SSA_NAME 2719 for the range test, and oe->id instead is the 2720 basic block number, at which's end the GIMPLE_COND 2721 is. */ 2722 oe->id = bb->index; 2723 oe->count = 1; 2724 ops.safe_push (oe); 2725 } 2726 if (bb == first_bb) 2727 break; 2728 } 2729 if (ops.length () > 1) 2730 optimize_range_tests (ERROR_MARK, &ops); 2731 ops.release (); 2732 } 2733 2734 /* Return true if OPERAND is defined by a PHI node which uses the LHS 2735 of STMT in it's operands. This is also known as a "destructive 2736 update" operation. */ 2737 2738 static bool 2739 is_phi_for_stmt (gimple stmt, tree operand) 2740 { 2741 gimple def_stmt; 2742 tree lhs; 2743 use_operand_p arg_p; 2744 ssa_op_iter i; 2745 2746 if (TREE_CODE (operand) != SSA_NAME) 2747 return false; 2748 2749 lhs = gimple_assign_lhs (stmt); 2750 2751 def_stmt = SSA_NAME_DEF_STMT (operand); 2752 if (gimple_code (def_stmt) != GIMPLE_PHI) 2753 return false; 2754 2755 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) 2756 if (lhs == USE_FROM_PTR (arg_p)) 2757 return true; 2758 return false; 2759 } 2760 2761 /* Remove def stmt of VAR if VAR has zero uses and recurse 2762 on rhs1 operand if so. */ 2763 2764 static void 2765 remove_visited_stmt_chain (tree var) 2766 { 2767 gimple stmt; 2768 gimple_stmt_iterator gsi; 2769 2770 while (1) 2771 { 2772 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 2773 return; 2774 stmt = SSA_NAME_DEF_STMT (var); 2775 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 2776 { 2777 var = gimple_assign_rhs1 (stmt); 2778 gsi = gsi_for_stmt (stmt); 2779 gsi_remove (&gsi, true); 2780 release_defs (stmt); 2781 } 2782 else 2783 return; 2784 } 2785 } 2786 2787 /* This function checks three consequtive operands in 2788 passed operands vector OPS starting from OPINDEX and 2789 swaps two operands if it is profitable for binary operation 2790 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 2791 2792 We pair ops with the same rank if possible. 2793 2794 The alternative we try is to see if STMT is a destructive 2795 update style statement, which is like: 2796 b = phi (a, ...) 2797 a = c + b; 2798 In that case, we want to use the destructive update form to 2799 expose the possible vectorizer sum reduction opportunity. 2800 In that case, the third operand will be the phi node. This 2801 check is not performed if STMT is null. 2802 2803 We could, of course, try to be better as noted above, and do a 2804 lot of work to try to find these opportunities in >3 operand 2805 cases, but it is unlikely to be worth it. */ 2806 2807 static void 2808 swap_ops_for_binary_stmt (vec<operand_entry_t> ops, 2809 unsigned int opindex, gimple stmt) 2810 { 2811 operand_entry_t oe1, oe2, oe3; 2812 2813 oe1 = ops[opindex]; 2814 oe2 = ops[opindex + 1]; 2815 oe3 = ops[opindex + 2]; 2816 2817 if ((oe1->rank == oe2->rank 2818 && oe2->rank != oe3->rank) 2819 || (stmt && is_phi_for_stmt (stmt, oe3->op) 2820 && !is_phi_for_stmt (stmt, oe1->op) 2821 && !is_phi_for_stmt (stmt, oe2->op))) 2822 { 2823 struct operand_entry temp = *oe3; 2824 oe3->op = oe1->op; 2825 oe3->rank = oe1->rank; 2826 oe1->op = temp.op; 2827 oe1->rank= temp.rank; 2828 } 2829 else if ((oe1->rank == oe3->rank 2830 && oe2->rank != oe3->rank) 2831 || (stmt && is_phi_for_stmt (stmt, oe2->op) 2832 && !is_phi_for_stmt (stmt, oe1->op) 2833 && !is_phi_for_stmt (stmt, oe3->op))) 2834 { 2835 struct operand_entry temp = *oe2; 2836 oe2->op = oe1->op; 2837 oe2->rank = oe1->rank; 2838 oe1->op = temp.op; 2839 oe1->rank= temp.rank; 2840 } 2841 } 2842 2843 /* Recursively rewrite our linearized statements so that the operators 2844 match those in OPS[OPINDEX], putting the computation in rank 2845 order. */ 2846 2847 static void 2848 rewrite_expr_tree (gimple stmt, unsigned int opindex, 2849 vec<operand_entry_t> ops, bool moved) 2850 { 2851 tree rhs1 = gimple_assign_rhs1 (stmt); 2852 tree rhs2 = gimple_assign_rhs2 (stmt); 2853 operand_entry_t oe; 2854 2855 /* If we have three operands left, then we want to make sure the ones 2856 that get the double binary op are chosen wisely. */ 2857 if (opindex + 3 == ops.length ()) 2858 swap_ops_for_binary_stmt (ops, opindex, stmt); 2859 2860 /* The final recursion case for this function is that you have 2861 exactly two operations left. 2862 If we had one exactly one op in the entire list to start with, we 2863 would have never called this function, and the tail recursion 2864 rewrites them one at a time. */ 2865 if (opindex + 2 == ops.length ()) 2866 { 2867 operand_entry_t oe1, oe2; 2868 2869 oe1 = ops[opindex]; 2870 oe2 = ops[opindex + 1]; 2871 2872 if (rhs1 != oe1->op || rhs2 != oe2->op) 2873 { 2874 if (dump_file && (dump_flags & TDF_DETAILS)) 2875 { 2876 fprintf (dump_file, "Transforming "); 2877 print_gimple_stmt (dump_file, stmt, 0, 0); 2878 } 2879 2880 gimple_assign_set_rhs1 (stmt, oe1->op); 2881 gimple_assign_set_rhs2 (stmt, oe2->op); 2882 update_stmt (stmt); 2883 if (rhs1 != oe1->op && rhs1 != oe2->op) 2884 remove_visited_stmt_chain (rhs1); 2885 2886 if (dump_file && (dump_flags & TDF_DETAILS)) 2887 { 2888 fprintf (dump_file, " into "); 2889 print_gimple_stmt (dump_file, stmt, 0, 0); 2890 } 2891 } 2892 return; 2893 } 2894 2895 /* If we hit here, we should have 3 or more ops left. */ 2896 gcc_assert (opindex + 2 < ops.length ()); 2897 2898 /* Rewrite the next operator. */ 2899 oe = ops[opindex]; 2900 2901 if (oe->op != rhs2) 2902 { 2903 if (!moved) 2904 { 2905 gimple_stmt_iterator gsinow, gsirhs1; 2906 gimple stmt1 = stmt, stmt2; 2907 unsigned int count; 2908 2909 gsinow = gsi_for_stmt (stmt); 2910 count = ops.length () - opindex - 2; 2911 while (count-- != 0) 2912 { 2913 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); 2914 gsirhs1 = gsi_for_stmt (stmt2); 2915 gsi_move_before (&gsirhs1, &gsinow); 2916 gsi_prev (&gsinow); 2917 stmt1 = stmt2; 2918 } 2919 moved = true; 2920 } 2921 2922 if (dump_file && (dump_flags & TDF_DETAILS)) 2923 { 2924 fprintf (dump_file, "Transforming "); 2925 print_gimple_stmt (dump_file, stmt, 0, 0); 2926 } 2927 2928 gimple_assign_set_rhs2 (stmt, oe->op); 2929 update_stmt (stmt); 2930 2931 if (dump_file && (dump_flags & TDF_DETAILS)) 2932 { 2933 fprintf (dump_file, " into "); 2934 print_gimple_stmt (dump_file, stmt, 0, 0); 2935 } 2936 } 2937 /* Recurse on the LHS of the binary operator, which is guaranteed to 2938 be the non-leaf side. */ 2939 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); 2940 } 2941 2942 /* Find out how many cycles we need to compute statements chain. 2943 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 2944 maximum number of independent statements we may execute per cycle. */ 2945 2946 static int 2947 get_required_cycles (int ops_num, int cpu_width) 2948 { 2949 int res; 2950 int elog; 2951 unsigned int rest; 2952 2953 /* While we have more than 2 * cpu_width operands 2954 we may reduce number of operands by cpu_width 2955 per cycle. */ 2956 res = ops_num / (2 * cpu_width); 2957 2958 /* Remained operands count may be reduced twice per cycle 2959 until we have only one operand. */ 2960 rest = (unsigned)(ops_num - res * cpu_width); 2961 elog = exact_log2 (rest); 2962 if (elog >= 0) 2963 res += elog; 2964 else 2965 res += floor_log2 (rest) + 1; 2966 2967 return res; 2968 } 2969 2970 /* Returns an optimal number of registers to use for computation of 2971 given statements. */ 2972 2973 static int 2974 get_reassociation_width (int ops_num, enum tree_code opc, 2975 enum machine_mode mode) 2976 { 2977 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 2978 int width; 2979 int width_min; 2980 int cycles_best; 2981 2982 if (param_width > 0) 2983 width = param_width; 2984 else 2985 width = targetm.sched.reassociation_width (opc, mode); 2986 2987 if (width == 1) 2988 return width; 2989 2990 /* Get the minimal time required for sequence computation. */ 2991 cycles_best = get_required_cycles (ops_num, width); 2992 2993 /* Check if we may use less width and still compute sequence for 2994 the same time. It will allow us to reduce registers usage. 2995 get_required_cycles is monotonically increasing with lower width 2996 so we can perform a binary search for the minimal width that still 2997 results in the optimal cycle count. */ 2998 width_min = 1; 2999 while (width > width_min) 3000 { 3001 int width_mid = (width + width_min) / 2; 3002 3003 if (get_required_cycles (ops_num, width_mid) == cycles_best) 3004 width = width_mid; 3005 else if (width_min < width_mid) 3006 width_min = width_mid; 3007 else 3008 break; 3009 } 3010 3011 return width; 3012 } 3013 3014 /* Recursively rewrite our linearized statements so that the operators 3015 match those in OPS[OPINDEX], putting the computation in rank 3016 order and trying to allow operations to be executed in 3017 parallel. */ 3018 3019 static void 3020 rewrite_expr_tree_parallel (gimple stmt, int width, 3021 vec<operand_entry_t> ops) 3022 { 3023 enum tree_code opcode = gimple_assign_rhs_code (stmt); 3024 int op_num = ops.length (); 3025 int stmt_num = op_num - 1; 3026 gimple *stmts = XALLOCAVEC (gimple, stmt_num); 3027 int op_index = op_num - 1; 3028 int stmt_index = 0; 3029 int ready_stmts_end = 0; 3030 int i = 0; 3031 tree last_rhs1 = gimple_assign_rhs1 (stmt); 3032 3033 /* We start expression rewriting from the top statements. 3034 So, in this loop we create a full list of statements 3035 we will work with. */ 3036 stmts[stmt_num - 1] = stmt; 3037 for (i = stmt_num - 2; i >= 0; i--) 3038 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 3039 3040 for (i = 0; i < stmt_num; i++) 3041 { 3042 tree op1, op2; 3043 3044 /* Determine whether we should use results of 3045 already handled statements or not. */ 3046 if (ready_stmts_end == 0 3047 && (i - stmt_index >= width || op_index < 1)) 3048 ready_stmts_end = i; 3049 3050 /* Now we choose operands for the next statement. Non zero 3051 value in ready_stmts_end means here that we should use 3052 the result of already generated statements as new operand. */ 3053 if (ready_stmts_end > 0) 3054 { 3055 op1 = gimple_assign_lhs (stmts[stmt_index++]); 3056 if (ready_stmts_end > stmt_index) 3057 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3058 else if (op_index >= 0) 3059 op2 = ops[op_index--]->op; 3060 else 3061 { 3062 gcc_assert (stmt_index < i); 3063 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3064 } 3065 3066 if (stmt_index >= ready_stmts_end) 3067 ready_stmts_end = 0; 3068 } 3069 else 3070 { 3071 if (op_index > 1) 3072 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 3073 op2 = ops[op_index--]->op; 3074 op1 = ops[op_index--]->op; 3075 } 3076 3077 /* If we emit the last statement then we should put 3078 operands into the last statement. It will also 3079 break the loop. */ 3080 if (op_index < 0 && stmt_index == i) 3081 i = stmt_num - 1; 3082 3083 if (dump_file && (dump_flags & TDF_DETAILS)) 3084 { 3085 fprintf (dump_file, "Transforming "); 3086 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3087 } 3088 3089 /* We keep original statement only for the last one. All 3090 others are recreated. */ 3091 if (i == stmt_num - 1) 3092 { 3093 gimple_assign_set_rhs1 (stmts[i], op1); 3094 gimple_assign_set_rhs2 (stmts[i], op2); 3095 update_stmt (stmts[i]); 3096 } 3097 else 3098 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 3099 3100 if (dump_file && (dump_flags & TDF_DETAILS)) 3101 { 3102 fprintf (dump_file, " into "); 3103 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3104 } 3105 } 3106 3107 remove_visited_stmt_chain (last_rhs1); 3108 } 3109 3110 /* Transform STMT, which is really (A +B) + (C + D) into the left 3111 linear form, ((A+B)+C)+D. 3112 Recurse on D if necessary. */ 3113 3114 static void 3115 linearize_expr (gimple stmt) 3116 { 3117 gimple_stmt_iterator gsinow, gsirhs; 3118 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 3119 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3120 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 3121 gimple newbinrhs = NULL; 3122 struct loop *loop = loop_containing_stmt (stmt); 3123 3124 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 3125 && is_reassociable_op (binrhs, rhscode, loop)); 3126 3127 gsinow = gsi_for_stmt (stmt); 3128 gsirhs = gsi_for_stmt (binrhs); 3129 gsi_move_before (&gsirhs, &gsinow); 3130 3131 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 3132 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); 3133 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 3134 3135 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 3136 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3137 3138 if (dump_file && (dump_flags & TDF_DETAILS)) 3139 { 3140 fprintf (dump_file, "Linearized: "); 3141 print_gimple_stmt (dump_file, stmt, 0, 0); 3142 } 3143 3144 reassociate_stats.linearized++; 3145 update_stmt (binrhs); 3146 update_stmt (binlhs); 3147 update_stmt (stmt); 3148 3149 gimple_set_visited (stmt, true); 3150 gimple_set_visited (binlhs, true); 3151 gimple_set_visited (binrhs, true); 3152 3153 /* Tail recurse on the new rhs if it still needs reassociation. */ 3154 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 3155 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 3156 want to change the algorithm while converting to tuples. */ 3157 linearize_expr (stmt); 3158 } 3159 3160 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 3161 it. Otherwise, return NULL. */ 3162 3163 static gimple 3164 get_single_immediate_use (tree lhs) 3165 { 3166 use_operand_p immuse; 3167 gimple immusestmt; 3168 3169 if (TREE_CODE (lhs) == SSA_NAME 3170 && single_imm_use (lhs, &immuse, &immusestmt) 3171 && is_gimple_assign (immusestmt)) 3172 return immusestmt; 3173 3174 return NULL; 3175 } 3176 3177 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 3178 representing the negated value. Insertions of any necessary 3179 instructions go before GSI. 3180 This function is recursive in that, if you hand it "a_5" as the 3181 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 3182 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 3183 3184 static tree 3185 negate_value (tree tonegate, gimple_stmt_iterator *gsi) 3186 { 3187 gimple negatedefstmt= NULL; 3188 tree resultofnegate; 3189 3190 /* If we are trying to negate a name, defined by an add, negate the 3191 add operands instead. */ 3192 if (TREE_CODE (tonegate) == SSA_NAME) 3193 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 3194 if (TREE_CODE (tonegate) == SSA_NAME 3195 && is_gimple_assign (negatedefstmt) 3196 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 3197 && has_single_use (gimple_assign_lhs (negatedefstmt)) 3198 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 3199 { 3200 gimple_stmt_iterator gsi; 3201 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 3202 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 3203 3204 gsi = gsi_for_stmt (negatedefstmt); 3205 rhs1 = negate_value (rhs1, &gsi); 3206 gimple_assign_set_rhs1 (negatedefstmt, rhs1); 3207 3208 gsi = gsi_for_stmt (negatedefstmt); 3209 rhs2 = negate_value (rhs2, &gsi); 3210 gimple_assign_set_rhs2 (negatedefstmt, rhs2); 3211 3212 update_stmt (negatedefstmt); 3213 return gimple_assign_lhs (negatedefstmt); 3214 } 3215 3216 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 3217 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true, 3218 NULL_TREE, true, GSI_SAME_STMT); 3219 return resultofnegate; 3220 } 3221 3222 /* Return true if we should break up the subtract in STMT into an add 3223 with negate. This is true when we the subtract operands are really 3224 adds, or the subtract itself is used in an add expression. In 3225 either case, breaking up the subtract into an add with negate 3226 exposes the adds to reassociation. */ 3227 3228 static bool 3229 should_break_up_subtract (gimple stmt) 3230 { 3231 tree lhs = gimple_assign_lhs (stmt); 3232 tree binlhs = gimple_assign_rhs1 (stmt); 3233 tree binrhs = gimple_assign_rhs2 (stmt); 3234 gimple immusestmt; 3235 struct loop *loop = loop_containing_stmt (stmt); 3236 3237 if (TREE_CODE (binlhs) == SSA_NAME 3238 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 3239 return true; 3240 3241 if (TREE_CODE (binrhs) == SSA_NAME 3242 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 3243 return true; 3244 3245 if (TREE_CODE (lhs) == SSA_NAME 3246 && (immusestmt = get_single_immediate_use (lhs)) 3247 && is_gimple_assign (immusestmt) 3248 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 3249 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 3250 return true; 3251 return false; 3252 } 3253 3254 /* Transform STMT from A - B into A + -B. */ 3255 3256 static void 3257 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) 3258 { 3259 tree rhs1 = gimple_assign_rhs1 (stmt); 3260 tree rhs2 = gimple_assign_rhs2 (stmt); 3261 3262 if (dump_file && (dump_flags & TDF_DETAILS)) 3263 { 3264 fprintf (dump_file, "Breaking up subtract "); 3265 print_gimple_stmt (dump_file, stmt, 0, 0); 3266 } 3267 3268 rhs2 = negate_value (rhs2, gsip); 3269 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 3270 update_stmt (stmt); 3271 } 3272 3273 /* Determine whether STMT is a builtin call that raises an SSA name 3274 to an integer power and has only one use. If so, and this is early 3275 reassociation and unsafe math optimizations are permitted, place 3276 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 3277 If any of these conditions does not hold, return FALSE. */ 3278 3279 static bool 3280 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) 3281 { 3282 tree fndecl, arg1; 3283 REAL_VALUE_TYPE c, cint; 3284 3285 if (!first_pass_instance 3286 || !flag_unsafe_math_optimizations 3287 || !is_gimple_call (stmt) 3288 || !has_single_use (gimple_call_lhs (stmt))) 3289 return false; 3290 3291 fndecl = gimple_call_fndecl (stmt); 3292 3293 if (!fndecl 3294 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 3295 return false; 3296 3297 switch (DECL_FUNCTION_CODE (fndecl)) 3298 { 3299 CASE_FLT_FN (BUILT_IN_POW): 3300 if (flag_errno_math) 3301 return false; 3302 3303 *base = gimple_call_arg (stmt, 0); 3304 arg1 = gimple_call_arg (stmt, 1); 3305 3306 if (TREE_CODE (arg1) != REAL_CST) 3307 return false; 3308 3309 c = TREE_REAL_CST (arg1); 3310 3311 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 3312 return false; 3313 3314 *exponent = real_to_integer (&c); 3315 real_from_integer (&cint, VOIDmode, *exponent, 3316 *exponent < 0 ? -1 : 0, 0); 3317 if (!real_identical (&c, &cint)) 3318 return false; 3319 3320 break; 3321 3322 CASE_FLT_FN (BUILT_IN_POWI): 3323 *base = gimple_call_arg (stmt, 0); 3324 arg1 = gimple_call_arg (stmt, 1); 3325 3326 if (!host_integerp (arg1, 0)) 3327 return false; 3328 3329 *exponent = TREE_INT_CST_LOW (arg1); 3330 break; 3331 3332 default: 3333 return false; 3334 } 3335 3336 /* Expanding negative exponents is generally unproductive, so we don't 3337 complicate matters with those. Exponents of zero and one should 3338 have been handled by expression folding. */ 3339 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 3340 return false; 3341 3342 return true; 3343 } 3344 3345 /* Recursively linearize a binary expression that is the RHS of STMT. 3346 Place the operands of the expression tree in the vector named OPS. */ 3347 3348 static void 3349 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, 3350 bool is_associative, bool set_visited) 3351 { 3352 tree binlhs = gimple_assign_rhs1 (stmt); 3353 tree binrhs = gimple_assign_rhs2 (stmt); 3354 gimple binlhsdef = NULL, binrhsdef = NULL; 3355 bool binlhsisreassoc = false; 3356 bool binrhsisreassoc = false; 3357 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 3358 struct loop *loop = loop_containing_stmt (stmt); 3359 tree base = NULL_TREE; 3360 HOST_WIDE_INT exponent = 0; 3361 3362 if (set_visited) 3363 gimple_set_visited (stmt, true); 3364 3365 if (TREE_CODE (binlhs) == SSA_NAME) 3366 { 3367 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 3368 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 3369 && !stmt_could_throw_p (binlhsdef)); 3370 } 3371 3372 if (TREE_CODE (binrhs) == SSA_NAME) 3373 { 3374 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 3375 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 3376 && !stmt_could_throw_p (binrhsdef)); 3377 } 3378 3379 /* If the LHS is not reassociable, but the RHS is, we need to swap 3380 them. If neither is reassociable, there is nothing we can do, so 3381 just put them in the ops vector. If the LHS is reassociable, 3382 linearize it. If both are reassociable, then linearize the RHS 3383 and the LHS. */ 3384 3385 if (!binlhsisreassoc) 3386 { 3387 tree temp; 3388 3389 /* If this is not a associative operation like division, give up. */ 3390 if (!is_associative) 3391 { 3392 add_to_ops_vec (ops, binrhs); 3393 return; 3394 } 3395 3396 if (!binrhsisreassoc) 3397 { 3398 if (rhscode == MULT_EXPR 3399 && TREE_CODE (binrhs) == SSA_NAME 3400 && acceptable_pow_call (binrhsdef, &base, &exponent)) 3401 { 3402 add_repeat_to_ops_vec (ops, base, exponent); 3403 gimple_set_visited (binrhsdef, true); 3404 } 3405 else 3406 add_to_ops_vec (ops, binrhs); 3407 3408 if (rhscode == MULT_EXPR 3409 && TREE_CODE (binlhs) == SSA_NAME 3410 && acceptable_pow_call (binlhsdef, &base, &exponent)) 3411 { 3412 add_repeat_to_ops_vec (ops, base, exponent); 3413 gimple_set_visited (binlhsdef, true); 3414 } 3415 else 3416 add_to_ops_vec (ops, binlhs); 3417 3418 return; 3419 } 3420 3421 if (dump_file && (dump_flags & TDF_DETAILS)) 3422 { 3423 fprintf (dump_file, "swapping operands of "); 3424 print_gimple_stmt (dump_file, stmt, 0, 0); 3425 } 3426 3427 swap_tree_operands (stmt, 3428 gimple_assign_rhs1_ptr (stmt), 3429 gimple_assign_rhs2_ptr (stmt)); 3430 update_stmt (stmt); 3431 3432 if (dump_file && (dump_flags & TDF_DETAILS)) 3433 { 3434 fprintf (dump_file, " is now "); 3435 print_gimple_stmt (dump_file, stmt, 0, 0); 3436 } 3437 3438 /* We want to make it so the lhs is always the reassociative op, 3439 so swap. */ 3440 temp = binlhs; 3441 binlhs = binrhs; 3442 binrhs = temp; 3443 } 3444 else if (binrhsisreassoc) 3445 { 3446 linearize_expr (stmt); 3447 binlhs = gimple_assign_rhs1 (stmt); 3448 binrhs = gimple_assign_rhs2 (stmt); 3449 } 3450 3451 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 3452 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 3453 rhscode, loop)); 3454 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 3455 is_associative, set_visited); 3456 3457 if (rhscode == MULT_EXPR 3458 && TREE_CODE (binrhs) == SSA_NAME 3459 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) 3460 { 3461 add_repeat_to_ops_vec (ops, base, exponent); 3462 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); 3463 } 3464 else 3465 add_to_ops_vec (ops, binrhs); 3466 } 3467 3468 /* Repropagate the negates back into subtracts, since no other pass 3469 currently does it. */ 3470 3471 static void 3472 repropagate_negates (void) 3473 { 3474 unsigned int i = 0; 3475 tree negate; 3476 3477 FOR_EACH_VEC_ELT (plus_negates, i, negate) 3478 { 3479 gimple user = get_single_immediate_use (negate); 3480 3481 if (!user || !is_gimple_assign (user)) 3482 continue; 3483 3484 /* The negate operand can be either operand of a PLUS_EXPR 3485 (it can be the LHS if the RHS is a constant for example). 3486 3487 Force the negate operand to the RHS of the PLUS_EXPR, then 3488 transform the PLUS_EXPR into a MINUS_EXPR. */ 3489 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 3490 { 3491 /* If the negated operand appears on the LHS of the 3492 PLUS_EXPR, exchange the operands of the PLUS_EXPR 3493 to force the negated operand to the RHS of the PLUS_EXPR. */ 3494 if (gimple_assign_rhs1 (user) == negate) 3495 { 3496 swap_tree_operands (user, 3497 gimple_assign_rhs1_ptr (user), 3498 gimple_assign_rhs2_ptr (user)); 3499 } 3500 3501 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 3502 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 3503 if (gimple_assign_rhs2 (user) == negate) 3504 { 3505 tree rhs1 = gimple_assign_rhs1 (user); 3506 tree rhs2 = get_unary_op (negate, NEGATE_EXPR); 3507 gimple_stmt_iterator gsi = gsi_for_stmt (user); 3508 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 3509 update_stmt (user); 3510 } 3511 } 3512 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 3513 { 3514 if (gimple_assign_rhs1 (user) == negate) 3515 { 3516 /* We have 3517 x = -a 3518 y = x - b 3519 which we transform into 3520 x = a + b 3521 y = -x . 3522 This pushes down the negate which we possibly can merge 3523 into some other operation, hence insert it into the 3524 plus_negates vector. */ 3525 gimple feed = SSA_NAME_DEF_STMT (negate); 3526 tree a = gimple_assign_rhs1 (feed); 3527 tree rhs2 = gimple_assign_rhs2 (user); 3528 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2; 3529 gimple_replace_lhs (feed, negate); 3530 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2); 3531 update_stmt (gsi_stmt (gsi)); 3532 gsi2 = gsi_for_stmt (user); 3533 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL); 3534 update_stmt (gsi_stmt (gsi2)); 3535 gsi_move_before (&gsi, &gsi2); 3536 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2))); 3537 } 3538 else 3539 { 3540 /* Transform "x = -a; y = b - x" into "y = b + a", getting 3541 rid of one operation. */ 3542 gimple feed = SSA_NAME_DEF_STMT (negate); 3543 tree a = gimple_assign_rhs1 (feed); 3544 tree rhs1 = gimple_assign_rhs1 (user); 3545 gimple_stmt_iterator gsi = gsi_for_stmt (user); 3546 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 3547 update_stmt (gsi_stmt (gsi)); 3548 } 3549 } 3550 } 3551 } 3552 3553 /* Returns true if OP is of a type for which we can do reassociation. 3554 That is for integral or non-saturating fixed-point types, and for 3555 floating point type when associative-math is enabled. */ 3556 3557 static bool 3558 can_reassociate_p (tree op) 3559 { 3560 tree type = TREE_TYPE (op); 3561 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 3562 || NON_SAT_FIXED_POINT_TYPE_P (type) 3563 || (flag_associative_math && FLOAT_TYPE_P (type))) 3564 return true; 3565 return false; 3566 } 3567 3568 /* Break up subtract operations in block BB. 3569 3570 We do this top down because we don't know whether the subtract is 3571 part of a possible chain of reassociation except at the top. 3572 3573 IE given 3574 d = f + g 3575 c = a + e 3576 b = c - d 3577 q = b - r 3578 k = t - q 3579 3580 we want to break up k = t - q, but we won't until we've transformed q 3581 = b - r, which won't be broken up until we transform b = c - d. 3582 3583 En passant, clear the GIMPLE visited flag on every statement. */ 3584 3585 static void 3586 break_up_subtract_bb (basic_block bb) 3587 { 3588 gimple_stmt_iterator gsi; 3589 basic_block son; 3590 3591 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3592 { 3593 gimple stmt = gsi_stmt (gsi); 3594 gimple_set_visited (stmt, false); 3595 3596 if (!is_gimple_assign (stmt) 3597 || !can_reassociate_p (gimple_assign_lhs (stmt))) 3598 continue; 3599 3600 /* Look for simple gimple subtract operations. */ 3601 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 3602 { 3603 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 3604 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 3605 continue; 3606 3607 /* Check for a subtract used only in an addition. If this 3608 is the case, transform it into add of a negate for better 3609 reassociation. IE transform C = A-B into C = A + -B if C 3610 is only used in an addition. */ 3611 if (should_break_up_subtract (stmt)) 3612 break_up_subtract (stmt, &gsi); 3613 } 3614 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 3615 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 3616 plus_negates.safe_push (gimple_assign_lhs (stmt)); 3617 } 3618 for (son = first_dom_son (CDI_DOMINATORS, bb); 3619 son; 3620 son = next_dom_son (CDI_DOMINATORS, son)) 3621 break_up_subtract_bb (son); 3622 } 3623 3624 /* Used for repeated factor analysis. */ 3625 struct repeat_factor_d 3626 { 3627 /* An SSA name that occurs in a multiply chain. */ 3628 tree factor; 3629 3630 /* Cached rank of the factor. */ 3631 unsigned rank; 3632 3633 /* Number of occurrences of the factor in the chain. */ 3634 HOST_WIDE_INT count; 3635 3636 /* An SSA name representing the product of this factor and 3637 all factors appearing later in the repeated factor vector. */ 3638 tree repr; 3639 }; 3640 3641 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; 3642 typedef const struct repeat_factor_d *const_repeat_factor_t; 3643 3644 3645 static vec<repeat_factor> repeat_factor_vec; 3646 3647 /* Used for sorting the repeat factor vector. Sort primarily by 3648 ascending occurrence count, secondarily by descending rank. */ 3649 3650 static int 3651 compare_repeat_factors (const void *x1, const void *x2) 3652 { 3653 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; 3654 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; 3655 3656 if (rf1->count != rf2->count) 3657 return rf1->count - rf2->count; 3658 3659 return rf2->rank - rf1->rank; 3660 } 3661 3662 /* Look for repeated operands in OPS in the multiply tree rooted at 3663 STMT. Replace them with an optimal sequence of multiplies and powi 3664 builtin calls, and remove the used operands from OPS. Return an 3665 SSA name representing the value of the replacement sequence. */ 3666 3667 static tree 3668 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) 3669 { 3670 unsigned i, j, vec_len; 3671 int ii; 3672 operand_entry_t oe; 3673 repeat_factor_t rf1, rf2; 3674 repeat_factor rfnew; 3675 tree result = NULL_TREE; 3676 tree target_ssa, iter_result; 3677 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 3678 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 3679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3680 gimple mul_stmt, pow_stmt; 3681 3682 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 3683 target. */ 3684 if (!powi_fndecl) 3685 return NULL_TREE; 3686 3687 /* Allocate the repeated factor vector. */ 3688 repeat_factor_vec.create (10); 3689 3690 /* Scan the OPS vector for all SSA names in the product and build 3691 up a vector of occurrence counts for each factor. */ 3692 FOR_EACH_VEC_ELT (*ops, i, oe) 3693 { 3694 if (TREE_CODE (oe->op) == SSA_NAME) 3695 { 3696 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 3697 { 3698 if (rf1->factor == oe->op) 3699 { 3700 rf1->count += oe->count; 3701 break; 3702 } 3703 } 3704 3705 if (j >= repeat_factor_vec.length ()) 3706 { 3707 rfnew.factor = oe->op; 3708 rfnew.rank = oe->rank; 3709 rfnew.count = oe->count; 3710 rfnew.repr = NULL_TREE; 3711 repeat_factor_vec.safe_push (rfnew); 3712 } 3713 } 3714 } 3715 3716 /* Sort the repeated factor vector by (a) increasing occurrence count, 3717 and (b) decreasing rank. */ 3718 repeat_factor_vec.qsort (compare_repeat_factors); 3719 3720 /* It is generally best to combine as many base factors as possible 3721 into a product before applying __builtin_powi to the result. 3722 However, the sort order chosen for the repeated factor vector 3723 allows us to cache partial results for the product of the base 3724 factors for subsequent use. When we already have a cached partial 3725 result from a previous iteration, it is best to make use of it 3726 before looking for another __builtin_pow opportunity. 3727 3728 As an example, consider x * x * y * y * y * z * z * z * z. 3729 We want to first compose the product x * y * z, raise it to the 3730 second power, then multiply this by y * z, and finally multiply 3731 by z. This can be done in 5 multiplies provided we cache y * z 3732 for use in both expressions: 3733 3734 t1 = y * z 3735 t2 = t1 * x 3736 t3 = t2 * t2 3737 t4 = t1 * t3 3738 result = t4 * z 3739 3740 If we instead ignored the cached y * z and first multiplied by 3741 the __builtin_pow opportunity z * z, we would get the inferior: 3742 3743 t1 = y * z 3744 t2 = t1 * x 3745 t3 = t2 * t2 3746 t4 = z * z 3747 t5 = t3 * t4 3748 result = t5 * y */ 3749 3750 vec_len = repeat_factor_vec.length (); 3751 3752 /* Repeatedly look for opportunities to create a builtin_powi call. */ 3753 while (true) 3754 { 3755 HOST_WIDE_INT power; 3756 3757 /* First look for the largest cached product of factors from 3758 preceding iterations. If found, create a builtin_powi for 3759 it if the minimum occurrence count for its factors is at 3760 least 2, or just use this cached product as our next 3761 multiplicand if the minimum occurrence count is 1. */ 3762 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 3763 { 3764 if (rf1->repr && rf1->count > 0) 3765 break; 3766 } 3767 3768 if (j < vec_len) 3769 { 3770 power = rf1->count; 3771 3772 if (power == 1) 3773 { 3774 iter_result = rf1->repr; 3775 3776 if (dump_file && (dump_flags & TDF_DETAILS)) 3777 { 3778 unsigned elt; 3779 repeat_factor_t rf; 3780 fputs ("Multiplying by cached product ", dump_file); 3781 for (elt = j; elt < vec_len; elt++) 3782 { 3783 rf = &repeat_factor_vec[elt]; 3784 print_generic_expr (dump_file, rf->factor, 0); 3785 if (elt < vec_len - 1) 3786 fputs (" * ", dump_file); 3787 } 3788 fputs ("\n", dump_file); 3789 } 3790 } 3791 else 3792 { 3793 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 3794 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 3795 build_int_cst (integer_type_node, 3796 power)); 3797 gimple_call_set_lhs (pow_stmt, iter_result); 3798 gimple_set_location (pow_stmt, gimple_location (stmt)); 3799 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 3800 3801 if (dump_file && (dump_flags & TDF_DETAILS)) 3802 { 3803 unsigned elt; 3804 repeat_factor_t rf; 3805 fputs ("Building __builtin_pow call for cached product (", 3806 dump_file); 3807 for (elt = j; elt < vec_len; elt++) 3808 { 3809 rf = &repeat_factor_vec[elt]; 3810 print_generic_expr (dump_file, rf->factor, 0); 3811 if (elt < vec_len - 1) 3812 fputs (" * ", dump_file); 3813 } 3814 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", 3815 power); 3816 } 3817 } 3818 } 3819 else 3820 { 3821 /* Otherwise, find the first factor in the repeated factor 3822 vector whose occurrence count is at least 2. If no such 3823 factor exists, there are no builtin_powi opportunities 3824 remaining. */ 3825 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 3826 { 3827 if (rf1->count >= 2) 3828 break; 3829 } 3830 3831 if (j >= vec_len) 3832 break; 3833 3834 power = rf1->count; 3835 3836 if (dump_file && (dump_flags & TDF_DETAILS)) 3837 { 3838 unsigned elt; 3839 repeat_factor_t rf; 3840 fputs ("Building __builtin_pow call for (", dump_file); 3841 for (elt = j; elt < vec_len; elt++) 3842 { 3843 rf = &repeat_factor_vec[elt]; 3844 print_generic_expr (dump_file, rf->factor, 0); 3845 if (elt < vec_len - 1) 3846 fputs (" * ", dump_file); 3847 } 3848 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); 3849 } 3850 3851 reassociate_stats.pows_created++; 3852 3853 /* Visit each element of the vector in reverse order (so that 3854 high-occurrence elements are visited first, and within the 3855 same occurrence count, lower-ranked elements are visited 3856 first). Form a linear product of all elements in this order 3857 whose occurrencce count is at least that of element J. 3858 Record the SSA name representing the product of each element 3859 with all subsequent elements in the vector. */ 3860 if (j == vec_len - 1) 3861 rf1->repr = rf1->factor; 3862 else 3863 { 3864 for (ii = vec_len - 2; ii >= (int)j; ii--) 3865 { 3866 tree op1, op2; 3867 3868 rf1 = &repeat_factor_vec[ii]; 3869 rf2 = &repeat_factor_vec[ii + 1]; 3870 3871 /* Init the last factor's representative to be itself. */ 3872 if (!rf2->repr) 3873 rf2->repr = rf2->factor; 3874 3875 op1 = rf1->factor; 3876 op2 = rf2->repr; 3877 3878 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 3879 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, 3880 target_ssa, 3881 op1, op2); 3882 gimple_set_location (mul_stmt, gimple_location (stmt)); 3883 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 3884 rf1->repr = target_ssa; 3885 3886 /* Don't reprocess the multiply we just introduced. */ 3887 gimple_set_visited (mul_stmt, true); 3888 } 3889 } 3890 3891 /* Form a call to __builtin_powi for the maximum product 3892 just formed, raised to the power obtained earlier. */ 3893 rf1 = &repeat_factor_vec[j]; 3894 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 3895 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 3896 build_int_cst (integer_type_node, 3897 power)); 3898 gimple_call_set_lhs (pow_stmt, iter_result); 3899 gimple_set_location (pow_stmt, gimple_location (stmt)); 3900 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 3901 } 3902 3903 /* If we previously formed at least one other builtin_powi call, 3904 form the product of this one and those others. */ 3905 if (result) 3906 { 3907 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 3908 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result, 3909 result, iter_result); 3910 gimple_set_location (mul_stmt, gimple_location (stmt)); 3911 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 3912 gimple_set_visited (mul_stmt, true); 3913 result = new_result; 3914 } 3915 else 3916 result = iter_result; 3917 3918 /* Decrement the occurrence count of each element in the product 3919 by the count found above, and remove this many copies of each 3920 factor from OPS. */ 3921 for (i = j; i < vec_len; i++) 3922 { 3923 unsigned k = power; 3924 unsigned n; 3925 3926 rf1 = &repeat_factor_vec[i]; 3927 rf1->count -= power; 3928 3929 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 3930 { 3931 if (oe->op == rf1->factor) 3932 { 3933 if (oe->count <= k) 3934 { 3935 ops->ordered_remove (n); 3936 k -= oe->count; 3937 3938 if (k == 0) 3939 break; 3940 } 3941 else 3942 { 3943 oe->count -= k; 3944 break; 3945 } 3946 } 3947 } 3948 } 3949 } 3950 3951 /* At this point all elements in the repeated factor vector have a 3952 remaining occurrence count of 0 or 1, and those with a count of 1 3953 don't have cached representatives. Re-sort the ops vector and 3954 clean up. */ 3955 ops->qsort (sort_by_operand_rank); 3956 repeat_factor_vec.release (); 3957 3958 /* Return the final product computed herein. Note that there may 3959 still be some elements with single occurrence count left in OPS; 3960 those will be handled by the normal reassociation logic. */ 3961 return result; 3962 } 3963 3964 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 3965 3966 static void 3967 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) 3968 { 3969 tree rhs1; 3970 3971 if (dump_file && (dump_flags & TDF_DETAILS)) 3972 { 3973 fprintf (dump_file, "Transforming "); 3974 print_gimple_stmt (dump_file, stmt, 0, 0); 3975 } 3976 3977 rhs1 = gimple_assign_rhs1 (stmt); 3978 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 3979 update_stmt (stmt); 3980 remove_visited_stmt_chain (rhs1); 3981 3982 if (dump_file && (dump_flags & TDF_DETAILS)) 3983 { 3984 fprintf (dump_file, " into "); 3985 print_gimple_stmt (dump_file, stmt, 0, 0); 3986 } 3987 } 3988 3989 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 3990 3991 static void 3992 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, 3993 tree rhs1, tree rhs2) 3994 { 3995 if (dump_file && (dump_flags & TDF_DETAILS)) 3996 { 3997 fprintf (dump_file, "Transforming "); 3998 print_gimple_stmt (dump_file, stmt, 0, 0); 3999 } 4000 4001 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 4002 update_stmt (gsi_stmt (*gsi)); 4003 remove_visited_stmt_chain (rhs1); 4004 4005 if (dump_file && (dump_flags & TDF_DETAILS)) 4006 { 4007 fprintf (dump_file, " into "); 4008 print_gimple_stmt (dump_file, stmt, 0, 0); 4009 } 4010 } 4011 4012 /* Reassociate expressions in basic block BB and its post-dominator as 4013 children. */ 4014 4015 static void 4016 reassociate_bb (basic_block bb) 4017 { 4018 gimple_stmt_iterator gsi; 4019 basic_block son; 4020 gimple stmt = last_stmt (bb); 4021 4022 if (stmt && !gimple_visited_p (stmt)) 4023 maybe_optimize_range_tests (stmt); 4024 4025 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 4026 { 4027 stmt = gsi_stmt (gsi); 4028 4029 if (is_gimple_assign (stmt) 4030 && !stmt_could_throw_p (stmt)) 4031 { 4032 tree lhs, rhs1, rhs2; 4033 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 4034 4035 /* If this is not a gimple binary expression, there is 4036 nothing for us to do with it. */ 4037 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 4038 continue; 4039 4040 /* If this was part of an already processed statement, 4041 we don't need to touch it again. */ 4042 if (gimple_visited_p (stmt)) 4043 { 4044 /* This statement might have become dead because of previous 4045 reassociations. */ 4046 if (has_zero_uses (gimple_get_lhs (stmt))) 4047 { 4048 gsi_remove (&gsi, true); 4049 release_defs (stmt); 4050 /* We might end up removing the last stmt above which 4051 places the iterator to the end of the sequence. 4052 Reset it to the last stmt in this case which might 4053 be the end of the sequence as well if we removed 4054 the last statement of the sequence. In which case 4055 we need to bail out. */ 4056 if (gsi_end_p (gsi)) 4057 { 4058 gsi = gsi_last_bb (bb); 4059 if (gsi_end_p (gsi)) 4060 break; 4061 } 4062 } 4063 continue; 4064 } 4065 4066 lhs = gimple_assign_lhs (stmt); 4067 rhs1 = gimple_assign_rhs1 (stmt); 4068 rhs2 = gimple_assign_rhs2 (stmt); 4069 4070 /* For non-bit or min/max operations we can't associate 4071 all types. Verify that here. */ 4072 if (rhs_code != BIT_IOR_EXPR 4073 && rhs_code != BIT_AND_EXPR 4074 && rhs_code != BIT_XOR_EXPR 4075 && rhs_code != MIN_EXPR 4076 && rhs_code != MAX_EXPR 4077 && (!can_reassociate_p (lhs) 4078 || !can_reassociate_p (rhs1) 4079 || !can_reassociate_p (rhs2))) 4080 continue; 4081 4082 if (associative_tree_code (rhs_code)) 4083 { 4084 vec<operand_entry_t> ops = vNULL; 4085 tree powi_result = NULL_TREE; 4086 4087 /* There may be no immediate uses left by the time we 4088 get here because we may have eliminated them all. */ 4089 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 4090 continue; 4091 4092 gimple_set_visited (stmt, true); 4093 linearize_expr_tree (&ops, stmt, true, true); 4094 ops.qsort (sort_by_operand_rank); 4095 optimize_ops_list (rhs_code, &ops); 4096 if (undistribute_ops_list (rhs_code, &ops, 4097 loop_containing_stmt (stmt))) 4098 { 4099 ops.qsort (sort_by_operand_rank); 4100 optimize_ops_list (rhs_code, &ops); 4101 } 4102 4103 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 4104 optimize_range_tests (rhs_code, &ops); 4105 4106 if (first_pass_instance 4107 && rhs_code == MULT_EXPR 4108 && flag_unsafe_math_optimizations) 4109 powi_result = attempt_builtin_powi (stmt, &ops); 4110 4111 /* If the operand vector is now empty, all operands were 4112 consumed by the __builtin_powi optimization. */ 4113 if (ops.length () == 0) 4114 transform_stmt_to_copy (&gsi, stmt, powi_result); 4115 else if (ops.length () == 1) 4116 { 4117 tree last_op = ops.last ()->op; 4118 4119 if (powi_result) 4120 transform_stmt_to_multiply (&gsi, stmt, last_op, 4121 powi_result); 4122 else 4123 transform_stmt_to_copy (&gsi, stmt, last_op); 4124 } 4125 else 4126 { 4127 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 4128 int ops_num = ops.length (); 4129 int width = get_reassociation_width (ops_num, rhs_code, mode); 4130 4131 if (dump_file && (dump_flags & TDF_DETAILS)) 4132 fprintf (dump_file, 4133 "Width = %d was chosen for reassociation\n", width); 4134 4135 if (width > 1 4136 && ops.length () > 3) 4137 rewrite_expr_tree_parallel (stmt, width, ops); 4138 else 4139 rewrite_expr_tree (stmt, 0, ops, false); 4140 4141 /* If we combined some repeated factors into a 4142 __builtin_powi call, multiply that result by the 4143 reassociated operands. */ 4144 if (powi_result) 4145 { 4146 gimple mul_stmt; 4147 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 4148 tree target_ssa = make_temp_ssa_name (type, NULL, 4149 "reassocpow"); 4150 gimple_set_lhs (stmt, target_ssa); 4151 update_stmt (stmt); 4152 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs, 4153 powi_result, 4154 target_ssa); 4155 gimple_set_location (mul_stmt, gimple_location (stmt)); 4156 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 4157 } 4158 } 4159 4160 ops.release (); 4161 } 4162 } 4163 } 4164 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 4165 son; 4166 son = next_dom_son (CDI_POST_DOMINATORS, son)) 4167 reassociate_bb (son); 4168 } 4169 4170 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); 4171 void debug_ops_vector (vec<operand_entry_t> ops); 4172 4173 /* Dump the operand entry vector OPS to FILE. */ 4174 4175 void 4176 dump_ops_vector (FILE *file, vec<operand_entry_t> ops) 4177 { 4178 operand_entry_t oe; 4179 unsigned int i; 4180 4181 FOR_EACH_VEC_ELT (ops, i, oe) 4182 { 4183 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 4184 print_generic_expr (file, oe->op, 0); 4185 } 4186 } 4187 4188 /* Dump the operand entry vector OPS to STDERR. */ 4189 4190 DEBUG_FUNCTION void 4191 debug_ops_vector (vec<operand_entry_t> ops) 4192 { 4193 dump_ops_vector (stderr, ops); 4194 } 4195 4196 static void 4197 do_reassoc (void) 4198 { 4199 break_up_subtract_bb (ENTRY_BLOCK_PTR); 4200 reassociate_bb (EXIT_BLOCK_PTR); 4201 } 4202 4203 /* Initialize the reassociation pass. */ 4204 4205 static void 4206 init_reassoc (void) 4207 { 4208 int i; 4209 long rank = 2; 4210 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS); 4211 4212 /* Find the loops, so that we can prevent moving calculations in 4213 them. */ 4214 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 4215 4216 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 4217 4218 operand_entry_pool = create_alloc_pool ("operand entry pool", 4219 sizeof (struct operand_entry), 30); 4220 next_operand_entry_id = 0; 4221 4222 /* Reverse RPO (Reverse Post Order) will give us something where 4223 deeper loops come later. */ 4224 pre_and_rev_post_order_compute (NULL, bbs, false); 4225 bb_rank = XCNEWVEC (long, last_basic_block); 4226 operand_rank = pointer_map_create (); 4227 4228 /* Give each default definition a distinct rank. This includes 4229 parameters and the static chain. Walk backwards over all 4230 SSA names so that we get proper rank ordering according 4231 to tree_swap_operands_p. */ 4232 for (i = num_ssa_names - 1; i > 0; --i) 4233 { 4234 tree name = ssa_name (i); 4235 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 4236 insert_operand_rank (name, ++rank); 4237 } 4238 4239 /* Set up rank for each BB */ 4240 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) 4241 bb_rank[bbs[i]] = ++rank << 16; 4242 4243 free (bbs); 4244 calculate_dominance_info (CDI_POST_DOMINATORS); 4245 plus_negates = vNULL; 4246 } 4247 4248 /* Cleanup after the reassociation pass, and print stats if 4249 requested. */ 4250 4251 static void 4252 fini_reassoc (void) 4253 { 4254 statistics_counter_event (cfun, "Linearized", 4255 reassociate_stats.linearized); 4256 statistics_counter_event (cfun, "Constants eliminated", 4257 reassociate_stats.constants_eliminated); 4258 statistics_counter_event (cfun, "Ops eliminated", 4259 reassociate_stats.ops_eliminated); 4260 statistics_counter_event (cfun, "Statements rewritten", 4261 reassociate_stats.rewritten); 4262 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 4263 reassociate_stats.pows_encountered); 4264 statistics_counter_event (cfun, "Built-in powi calls created", 4265 reassociate_stats.pows_created); 4266 4267 pointer_map_destroy (operand_rank); 4268 free_alloc_pool (operand_entry_pool); 4269 free (bb_rank); 4270 plus_negates.release (); 4271 free_dominance_info (CDI_POST_DOMINATORS); 4272 loop_optimizer_finalize (); 4273 } 4274 4275 /* Gate and execute functions for Reassociation. */ 4276 4277 static unsigned int 4278 execute_reassoc (void) 4279 { 4280 init_reassoc (); 4281 4282 do_reassoc (); 4283 repropagate_negates (); 4284 4285 fini_reassoc (); 4286 return 0; 4287 } 4288 4289 static bool 4290 gate_tree_ssa_reassoc (void) 4291 { 4292 return flag_tree_reassoc != 0; 4293 } 4294 4295 struct gimple_opt_pass pass_reassoc = 4296 { 4297 { 4298 GIMPLE_PASS, 4299 "reassoc", /* name */ 4300 OPTGROUP_NONE, /* optinfo_flags */ 4301 gate_tree_ssa_reassoc, /* gate */ 4302 execute_reassoc, /* execute */ 4303 NULL, /* sub */ 4304 NULL, /* next */ 4305 0, /* static_pass_number */ 4306 TV_TREE_REASSOC, /* tv_id */ 4307 PROP_cfg | PROP_ssa, /* properties_required */ 4308 0, /* properties_provided */ 4309 0, /* properties_destroyed */ 4310 0, /* todo_flags_start */ 4311 TODO_verify_ssa 4312 | TODO_update_ssa_only_virtuals 4313 | TODO_verify_flow 4314 | TODO_ggc_collect /* todo_flags_finish */ 4315 } 4316 }; 4317