1 /* Reassociation for trees. 2 Copyright (C) 2005-2017 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "params.h" 52 #include "builtins.h" 53 #include "gimplify.h" 54 #include "case-cfn-macros.h" 55 56 /* This is a simple global reassociation pass. It is, in part, based 57 on the LLVM pass of the same name (They do some things more/less 58 than we do, in different orders, etc). 59 60 It consists of five steps: 61 62 1. Breaking up subtract operations into addition + negate, where 63 it would promote the reassociation of adds. 64 65 2. Left linearization of the expression trees, so that (A+B)+(C+D) 66 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 67 During linearization, we place the operands of the binary 68 expressions into a vector of operand_entry_* 69 70 3. Optimization of the operand lists, eliminating things like a + 71 -a, a & a, etc. 72 73 3a. Combine repeated factors with the same occurrence counts 74 into a __builtin_powi call that will later be optimized into 75 an optimal number of multiplies. 76 77 4. Rewrite the expression trees we linearized and optimized so 78 they are in proper rank order. 79 80 5. Repropagate negates, as nothing else will clean it up ATM. 81 82 A bit of theory on #4, since nobody seems to write anything down 83 about why it makes sense to do it the way they do it: 84 85 We could do this much nicer theoretically, but don't (for reasons 86 explained after how to do it theoretically nice :P). 87 88 In order to promote the most redundancy elimination, you want 89 binary expressions whose operands are the same rank (or 90 preferably, the same value) exposed to the redundancy eliminator, 91 for possible elimination. 92 93 So the way to do this if we really cared, is to build the new op 94 tree from the leaves to the roots, merging as you go, and putting the 95 new op on the end of the worklist, until you are left with one 96 thing on the worklist. 97 98 IE if you have to rewrite the following set of operands (listed with 99 rank in parentheses), with opcode PLUS_EXPR: 100 101 a (1), b (1), c (1), d (2), e (2) 102 103 104 We start with our merge worklist empty, and the ops list with all of 105 those on it. 106 107 You want to first merge all leaves of the same rank, as much as 108 possible. 109 110 So first build a binary op of 111 112 mergetmp = a + b, and put "mergetmp" on the merge worklist. 113 114 Because there is no three operand form of PLUS_EXPR, c is not going to 115 be exposed to redundancy elimination as a rank 1 operand. 116 117 So you might as well throw it on the merge worklist (you could also 118 consider it to now be a rank two operand, and merge it with d and e, 119 but in this case, you then have evicted e from a binary op. So at 120 least in this situation, you can't win.) 121 122 Then build a binary op of d + e 123 mergetmp2 = d + e 124 125 and put mergetmp2 on the merge worklist. 126 127 so merge worklist = {mergetmp, c, mergetmp2} 128 129 Continue building binary ops of these operations until you have only 130 one operation left on the worklist. 131 132 So we have 133 134 build binary op 135 mergetmp3 = mergetmp + c 136 137 worklist = {mergetmp2, mergetmp3} 138 139 mergetmp4 = mergetmp2 + mergetmp3 140 141 worklist = {mergetmp4} 142 143 because we have one operation left, we can now just set the original 144 statement equal to the result of that operation. 145 146 This will at least expose a + b and d + e to redundancy elimination 147 as binary operations. 148 149 For extra points, you can reuse the old statements to build the 150 mergetmps, since you shouldn't run out. 151 152 So why don't we do this? 153 154 Because it's expensive, and rarely will help. Most trees we are 155 reassociating have 3 or less ops. If they have 2 ops, they already 156 will be written into a nice single binary op. If you have 3 ops, a 157 single simple check suffices to tell you whether the first two are of the 158 same rank. If so, you know to order it 159 160 mergetmp = op1 + op2 161 newstmt = mergetmp + op3 162 163 instead of 164 mergetmp = op2 + op3 165 newstmt = mergetmp + op1 166 167 If all three are of the same rank, you can't expose them all in a 168 single binary operator anyway, so the above is *still* the best you 169 can do. 170 171 Thus, this is what we do. When we have three ops left, we check to see 172 what order to put them in, and call it a day. As a nod to vector sum 173 reduction, we check if any of the ops are really a phi node that is a 174 destructive update for the associating op, and keep the destructive 175 update together for vector sum reduction recognition. */ 176 177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 178 point 3a in the pass header comment. */ 179 static bool reassoc_insert_powi_p; 180 181 /* Statistics */ 182 static struct 183 { 184 int linearized; 185 int constants_eliminated; 186 int ops_eliminated; 187 int rewritten; 188 int pows_encountered; 189 int pows_created; 190 } reassociate_stats; 191 192 /* Operator, rank pair. */ 193 struct operand_entry 194 { 195 unsigned int rank; 196 unsigned int id; 197 tree op; 198 unsigned int count; 199 gimple *stmt_to_insert; 200 }; 201 202 static object_allocator<operand_entry> operand_entry_pool 203 ("operand entry pool"); 204 205 /* This is used to assign a unique ID to each struct operand_entry 206 so that qsort results are identical on different hosts. */ 207 static unsigned int next_operand_entry_id; 208 209 /* Starting rank number for a given basic block, so that we can rank 210 operations using unmovable instructions in that BB based on the bb 211 depth. */ 212 static long *bb_rank; 213 214 /* Operand->rank hashtable. */ 215 static hash_map<tree, long> *operand_rank; 216 217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 218 all basic blocks the CFG should be adjusted - basic blocks 219 split right after that SSA_NAME's definition statement and before 220 the only use, which must be a bit ior. */ 221 static vec<tree> reassoc_branch_fixups; 222 223 /* Forward decls. */ 224 static long get_rank (tree); 225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 226 227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 228 possibly added by gsi_remove. */ 229 230 bool 231 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 232 { 233 gimple *stmt = gsi_stmt (*gsi); 234 235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI) 236 return gsi_remove (gsi, true); 237 238 gimple_stmt_iterator prev = *gsi; 239 gsi_prev (&prev); 240 unsigned uid = gimple_uid (stmt); 241 basic_block bb = gimple_bb (stmt); 242 bool ret = gsi_remove (gsi, true); 243 if (!gsi_end_p (prev)) 244 gsi_next (&prev); 245 else 246 prev = gsi_start_bb (bb); 247 gimple *end_stmt = gsi_stmt (*gsi); 248 while ((stmt = gsi_stmt (prev)) != end_stmt) 249 { 250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 251 gimple_set_uid (stmt, uid); 252 gsi_next (&prev); 253 } 254 return ret; 255 } 256 257 /* Bias amount for loop-carried phis. We want this to be larger than 258 the depth of any reassociation tree we can see, but not larger than 259 the rank difference between two blocks. */ 260 #define PHI_LOOP_BIAS (1 << 15) 261 262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 263 an innermost loop, and the phi has only a single use which is inside 264 the loop, then the rank is the block rank of the loop latch plus an 265 extra bias for the loop-carried dependence. This causes expressions 266 calculated into an accumulator variable to be independent for each 267 iteration of the loop. If STMT is some other phi, the rank is the 268 block rank of its containing block. */ 269 static long 270 phi_rank (gimple *stmt) 271 { 272 basic_block bb = gimple_bb (stmt); 273 struct loop *father = bb->loop_father; 274 tree res; 275 unsigned i; 276 use_operand_p use; 277 gimple *use_stmt; 278 279 /* We only care about real loops (those with a latch). */ 280 if (!father->latch) 281 return bb_rank[bb->index]; 282 283 /* Interesting phis must be in headers of innermost loops. */ 284 if (bb != father->header 285 || father->inner) 286 return bb_rank[bb->index]; 287 288 /* Ignore virtual SSA_NAMEs. */ 289 res = gimple_phi_result (stmt); 290 if (virtual_operand_p (res)) 291 return bb_rank[bb->index]; 292 293 /* The phi definition must have a single use, and that use must be 294 within the loop. Otherwise this isn't an accumulator pattern. */ 295 if (!single_imm_use (res, &use, &use_stmt) 296 || gimple_bb (use_stmt)->loop_father != father) 297 return bb_rank[bb->index]; 298 299 /* Look for phi arguments from within the loop. If found, bias this phi. */ 300 for (i = 0; i < gimple_phi_num_args (stmt); i++) 301 { 302 tree arg = gimple_phi_arg_def (stmt, i); 303 if (TREE_CODE (arg) == SSA_NAME 304 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 305 { 306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 307 if (gimple_bb (def_stmt)->loop_father == father) 308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 309 } 310 } 311 312 /* Must be an uninteresting phi. */ 313 return bb_rank[bb->index]; 314 } 315 316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 317 loop-carried dependence of an innermost loop, return TRUE; else 318 return FALSE. */ 319 static bool 320 loop_carried_phi (tree exp) 321 { 322 gimple *phi_stmt; 323 long block_rank; 324 325 if (TREE_CODE (exp) != SSA_NAME 326 || SSA_NAME_IS_DEFAULT_DEF (exp)) 327 return false; 328 329 phi_stmt = SSA_NAME_DEF_STMT (exp); 330 331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 332 return false; 333 334 /* Non-loop-carried phis have block rank. Loop-carried phis have 335 an additional bias added in. If this phi doesn't have block rank, 336 it's biased and should not be propagated. */ 337 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 338 339 if (phi_rank (phi_stmt) != block_rank) 340 return true; 341 342 return false; 343 } 344 345 /* Return the maximum of RANK and the rank that should be propagated 346 from expression OP. For most operands, this is just the rank of OP. 347 For loop-carried phis, the value is zero to avoid undoing the bias 348 in favor of the phi. */ 349 static long 350 propagate_rank (long rank, tree op) 351 { 352 long op_rank; 353 354 if (loop_carried_phi (op)) 355 return rank; 356 357 op_rank = get_rank (op); 358 359 return MAX (rank, op_rank); 360 } 361 362 /* Look up the operand rank structure for expression E. */ 363 364 static inline long 365 find_operand_rank (tree e) 366 { 367 long *slot = operand_rank->get (e); 368 return slot ? *slot : -1; 369 } 370 371 /* Insert {E,RANK} into the operand rank hashtable. */ 372 373 static inline void 374 insert_operand_rank (tree e, long rank) 375 { 376 gcc_assert (rank > 0); 377 gcc_assert (!operand_rank->put (e, rank)); 378 } 379 380 /* Given an expression E, return the rank of the expression. */ 381 382 static long 383 get_rank (tree e) 384 { 385 /* SSA_NAME's have the rank of the expression they are the result 386 of. 387 For globals and uninitialized values, the rank is 0. 388 For function arguments, use the pre-setup rank. 389 For PHI nodes, stores, asm statements, etc, we use the rank of 390 the BB. 391 For simple operations, the rank is the maximum rank of any of 392 its operands, or the bb_rank, whichever is less. 393 I make no claims that this is optimal, however, it gives good 394 results. */ 395 396 /* We make an exception to the normal ranking system to break 397 dependences of accumulator variables in loops. Suppose we 398 have a simple one-block loop containing: 399 400 x_1 = phi(x_0, x_2) 401 b = a + x_1 402 c = b + d 403 x_2 = c + e 404 405 As shown, each iteration of the calculation into x is fully 406 dependent upon the iteration before it. We would prefer to 407 see this in the form: 408 409 x_1 = phi(x_0, x_2) 410 b = a + d 411 c = b + e 412 x_2 = c + x_1 413 414 If the loop is unrolled, the calculations of b and c from 415 different iterations can be interleaved. 416 417 To obtain this result during reassociation, we bias the rank 418 of the phi definition x_1 upward, when it is recognized as an 419 accumulator pattern. The artificial rank causes it to be 420 added last, providing the desired independence. */ 421 422 if (TREE_CODE (e) == SSA_NAME) 423 { 424 ssa_op_iter iter; 425 gimple *stmt; 426 long rank; 427 tree op; 428 429 if (SSA_NAME_IS_DEFAULT_DEF (e)) 430 return find_operand_rank (e); 431 432 stmt = SSA_NAME_DEF_STMT (e); 433 if (gimple_code (stmt) == GIMPLE_PHI) 434 return phi_rank (stmt); 435 436 if (!is_gimple_assign (stmt)) 437 return bb_rank[gimple_bb (stmt)->index]; 438 439 /* If we already have a rank for this expression, use that. */ 440 rank = find_operand_rank (e); 441 if (rank != -1) 442 return rank; 443 444 /* Otherwise, find the maximum rank for the operands. As an 445 exception, remove the bias from loop-carried phis when propagating 446 the rank so that dependent operations are not also biased. */ 447 /* Simply walk over all SSA uses - this takes advatage of the 448 fact that non-SSA operands are is_gimple_min_invariant and 449 thus have rank 0. */ 450 rank = 0; 451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 452 rank = propagate_rank (rank, op); 453 454 if (dump_file && (dump_flags & TDF_DETAILS)) 455 { 456 fprintf (dump_file, "Rank for "); 457 print_generic_expr (dump_file, e, 0); 458 fprintf (dump_file, " is %ld\n", (rank + 1)); 459 } 460 461 /* Note the rank in the hashtable so we don't recompute it. */ 462 insert_operand_rank (e, (rank + 1)); 463 return (rank + 1); 464 } 465 466 /* Constants, globals, etc., are rank 0 */ 467 return 0; 468 } 469 470 471 /* We want integer ones to end up last no matter what, since they are 472 the ones we can do the most with. */ 473 #define INTEGER_CONST_TYPE 1 << 4 474 #define FLOAT_ONE_CONST_TYPE 1 << 3 475 #define FLOAT_CONST_TYPE 1 << 2 476 #define OTHER_CONST_TYPE 1 << 1 477 478 /* Classify an invariant tree into integer, float, or other, so that 479 we can sort them to be near other constants of the same type. */ 480 static inline int 481 constant_type (tree t) 482 { 483 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 484 return INTEGER_CONST_TYPE; 485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 486 { 487 /* Sort -1.0 and 1.0 constants last, while in some cases 488 const_binop can't optimize some inexact operations, multiplication 489 by -1.0 or 1.0 can be always merged with others. */ 490 if (real_onep (t) || real_minus_onep (t)) 491 return FLOAT_ONE_CONST_TYPE; 492 return FLOAT_CONST_TYPE; 493 } 494 else 495 return OTHER_CONST_TYPE; 496 } 497 498 /* qsort comparison function to sort operand entries PA and PB by rank 499 so that the sorted array is ordered by rank in decreasing order. */ 500 static int 501 sort_by_operand_rank (const void *pa, const void *pb) 502 { 503 const operand_entry *oea = *(const operand_entry *const *)pa; 504 const operand_entry *oeb = *(const operand_entry *const *)pb; 505 506 /* It's nicer for optimize_expression if constants that are likely 507 to fold when added/multiplied//whatever are put next to each 508 other. Since all constants have rank 0, order them by type. */ 509 if (oeb->rank == 0 && oea->rank == 0) 510 { 511 if (constant_type (oeb->op) != constant_type (oea->op)) 512 return constant_type (oea->op) - constant_type (oeb->op); 513 else 514 /* To make sorting result stable, we use unique IDs to determine 515 order. */ 516 return oeb->id > oea->id ? 1 : -1; 517 } 518 519 /* Lastly, make sure the versions that are the same go next to each 520 other. */ 521 if (oeb->rank == oea->rank 522 && TREE_CODE (oea->op) == SSA_NAME 523 && TREE_CODE (oeb->op) == SSA_NAME) 524 { 525 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 526 versions of removed SSA_NAMEs, so if possible, prefer to sort 527 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 528 See PR60418. */ 529 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) 530 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) 531 && !oea->stmt_to_insert 532 && !oeb->stmt_to_insert 533 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 534 { 535 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 536 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 537 basic_block bba = gimple_bb (stmta); 538 basic_block bbb = gimple_bb (stmtb); 539 if (bbb != bba) 540 { 541 if (bb_rank[bbb->index] != bb_rank[bba->index]) 542 return bb_rank[bbb->index] - bb_rank[bba->index]; 543 } 544 else 545 { 546 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 547 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 548 if (da != db) 549 return da ? 1 : -1; 550 } 551 } 552 553 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 554 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 555 else 556 return oeb->id > oea->id ? 1 : -1; 557 } 558 559 if (oeb->rank != oea->rank) 560 return oeb->rank > oea->rank ? 1 : -1; 561 else 562 return oeb->id > oea->id ? 1 : -1; 563 } 564 565 /* Add an operand entry to *OPS for the tree operand OP. */ 566 567 static void 568 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 569 { 570 operand_entry *oe = operand_entry_pool.allocate (); 571 572 oe->op = op; 573 oe->rank = get_rank (op); 574 oe->id = next_operand_entry_id++; 575 oe->count = 1; 576 oe->stmt_to_insert = stmt_to_insert; 577 ops->safe_push (oe); 578 } 579 580 /* Add an operand entry to *OPS for the tree operand OP with repeat 581 count REPEAT. */ 582 583 static void 584 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 585 HOST_WIDE_INT repeat) 586 { 587 operand_entry *oe = operand_entry_pool.allocate (); 588 589 oe->op = op; 590 oe->rank = get_rank (op); 591 oe->id = next_operand_entry_id++; 592 oe->count = repeat; 593 oe->stmt_to_insert = NULL; 594 ops->safe_push (oe); 595 596 reassociate_stats.pows_encountered++; 597 } 598 599 /* Return true if STMT is reassociable operation containing a binary 600 operation with tree code CODE, and is inside LOOP. */ 601 602 static bool 603 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) 604 { 605 basic_block bb = gimple_bb (stmt); 606 607 if (gimple_bb (stmt) == NULL) 608 return false; 609 610 if (!flow_bb_inside_loop_p (loop, bb)) 611 return false; 612 613 if (is_gimple_assign (stmt) 614 && gimple_assign_rhs_code (stmt) == code 615 && has_single_use (gimple_assign_lhs (stmt))) 616 { 617 tree rhs1 = gimple_assign_rhs1 (stmt); 618 tree rhs2 = gimple_assign_rhs1 (stmt); 619 if (TREE_CODE (rhs1) == SSA_NAME 620 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 621 return false; 622 if (rhs2 623 && TREE_CODE (rhs2) == SSA_NAME 624 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 625 return false; 626 return true; 627 } 628 629 return false; 630 } 631 632 633 /* Return true if STMT is a nop-conversion. */ 634 635 static bool 636 gimple_nop_conversion_p (gimple *stmt) 637 { 638 if (gassign *ass = dyn_cast <gassign *> (stmt)) 639 { 640 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 641 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 642 TREE_TYPE (gimple_assign_rhs1 (ass)))) 643 return true; 644 } 645 return false; 646 } 647 648 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 649 operand of the negate operation. Otherwise, return NULL. */ 650 651 static tree 652 get_unary_op (tree name, enum tree_code opcode) 653 { 654 gimple *stmt = SSA_NAME_DEF_STMT (name); 655 656 /* Look through nop conversions (sign changes). */ 657 if (gimple_nop_conversion_p (stmt) 658 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 659 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 660 661 if (!is_gimple_assign (stmt)) 662 return NULL_TREE; 663 664 if (gimple_assign_rhs_code (stmt) == opcode) 665 return gimple_assign_rhs1 (stmt); 666 return NULL_TREE; 667 } 668 669 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 670 671 static bool 672 ops_equal_values_p (tree op1, tree op2) 673 { 674 if (op1 == op2) 675 return true; 676 677 tree orig_op1 = op1; 678 if (TREE_CODE (op1) == SSA_NAME) 679 { 680 gimple *stmt = SSA_NAME_DEF_STMT (op1); 681 if (gimple_nop_conversion_p (stmt)) 682 { 683 op1 = gimple_assign_rhs1 (stmt); 684 if (op1 == op2) 685 return true; 686 } 687 } 688 689 if (TREE_CODE (op2) == SSA_NAME) 690 { 691 gimple *stmt = SSA_NAME_DEF_STMT (op2); 692 if (gimple_nop_conversion_p (stmt)) 693 { 694 op2 = gimple_assign_rhs1 (stmt); 695 if (op1 == op2 696 || orig_op1 == op2) 697 return true; 698 } 699 } 700 701 return false; 702 } 703 704 705 /* If CURR and LAST are a pair of ops that OPCODE allows us to 706 eliminate through equivalences, do so, remove them from OPS, and 707 return true. Otherwise, return false. */ 708 709 static bool 710 eliminate_duplicate_pair (enum tree_code opcode, 711 vec<operand_entry *> *ops, 712 bool *all_done, 713 unsigned int i, 714 operand_entry *curr, 715 operand_entry *last) 716 { 717 718 /* If we have two of the same op, and the opcode is & |, min, or max, 719 we can eliminate one of them. 720 If we have two of the same op, and the opcode is ^, we can 721 eliminate both of them. */ 722 723 if (last && last->op == curr->op) 724 { 725 switch (opcode) 726 { 727 case MAX_EXPR: 728 case MIN_EXPR: 729 case BIT_IOR_EXPR: 730 case BIT_AND_EXPR: 731 if (dump_file && (dump_flags & TDF_DETAILS)) 732 { 733 fprintf (dump_file, "Equivalence: "); 734 print_generic_expr (dump_file, curr->op, 0); 735 fprintf (dump_file, " [&|minmax] "); 736 print_generic_expr (dump_file, last->op, 0); 737 fprintf (dump_file, " -> "); 738 print_generic_stmt (dump_file, last->op, 0); 739 } 740 741 ops->ordered_remove (i); 742 reassociate_stats.ops_eliminated ++; 743 744 return true; 745 746 case BIT_XOR_EXPR: 747 if (dump_file && (dump_flags & TDF_DETAILS)) 748 { 749 fprintf (dump_file, "Equivalence: "); 750 print_generic_expr (dump_file, curr->op, 0); 751 fprintf (dump_file, " ^ "); 752 print_generic_expr (dump_file, last->op, 0); 753 fprintf (dump_file, " -> nothing\n"); 754 } 755 756 reassociate_stats.ops_eliminated += 2; 757 758 if (ops->length () == 2) 759 { 760 ops->truncate (0); 761 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 762 *all_done = true; 763 } 764 else 765 { 766 ops->ordered_remove (i-1); 767 ops->ordered_remove (i-1); 768 } 769 770 return true; 771 772 default: 773 break; 774 } 775 } 776 return false; 777 } 778 779 static vec<tree> plus_negates; 780 781 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 782 expression, look in OPS for a corresponding positive operation to cancel 783 it out. If we find one, remove the other from OPS, replace 784 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 785 return false. */ 786 787 static bool 788 eliminate_plus_minus_pair (enum tree_code opcode, 789 vec<operand_entry *> *ops, 790 unsigned int currindex, 791 operand_entry *curr) 792 { 793 tree negateop; 794 tree notop; 795 unsigned int i; 796 operand_entry *oe; 797 798 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 799 return false; 800 801 negateop = get_unary_op (curr->op, NEGATE_EXPR); 802 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 803 if (negateop == NULL_TREE && notop == NULL_TREE) 804 return false; 805 806 /* Any non-negated version will have a rank that is one less than 807 the current rank. So once we hit those ranks, if we don't find 808 one, we can stop. */ 809 810 for (i = currindex + 1; 811 ops->iterate (i, &oe) 812 && oe->rank >= curr->rank - 1 ; 813 i++) 814 { 815 if (negateop 816 && ops_equal_values_p (oe->op, negateop)) 817 { 818 if (dump_file && (dump_flags & TDF_DETAILS)) 819 { 820 fprintf (dump_file, "Equivalence: "); 821 print_generic_expr (dump_file, negateop, 0); 822 fprintf (dump_file, " + -"); 823 print_generic_expr (dump_file, oe->op, 0); 824 fprintf (dump_file, " -> 0\n"); 825 } 826 827 ops->ordered_remove (i); 828 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 829 ops->ordered_remove (currindex); 830 reassociate_stats.ops_eliminated ++; 831 832 return true; 833 } 834 else if (notop 835 && ops_equal_values_p (oe->op, notop)) 836 { 837 tree op_type = TREE_TYPE (oe->op); 838 839 if (dump_file && (dump_flags & TDF_DETAILS)) 840 { 841 fprintf (dump_file, "Equivalence: "); 842 print_generic_expr (dump_file, notop, 0); 843 fprintf (dump_file, " + ~"); 844 print_generic_expr (dump_file, oe->op, 0); 845 fprintf (dump_file, " -> -1\n"); 846 } 847 848 ops->ordered_remove (i); 849 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 850 ops->ordered_remove (currindex); 851 reassociate_stats.ops_eliminated ++; 852 853 return true; 854 } 855 } 856 857 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 858 save it for later inspection in repropagate_negates(). */ 859 if (negateop != NULL_TREE 860 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 861 plus_negates.safe_push (curr->op); 862 863 return false; 864 } 865 866 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 867 bitwise not expression, look in OPS for a corresponding operand to 868 cancel it out. If we find one, remove the other from OPS, replace 869 OPS[CURRINDEX] with 0, and return true. Otherwise, return 870 false. */ 871 872 static bool 873 eliminate_not_pairs (enum tree_code opcode, 874 vec<operand_entry *> *ops, 875 unsigned int currindex, 876 operand_entry *curr) 877 { 878 tree notop; 879 unsigned int i; 880 operand_entry *oe; 881 882 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 883 || TREE_CODE (curr->op) != SSA_NAME) 884 return false; 885 886 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 887 if (notop == NULL_TREE) 888 return false; 889 890 /* Any non-not version will have a rank that is one less than 891 the current rank. So once we hit those ranks, if we don't find 892 one, we can stop. */ 893 894 for (i = currindex + 1; 895 ops->iterate (i, &oe) 896 && oe->rank >= curr->rank - 1; 897 i++) 898 { 899 if (oe->op == notop) 900 { 901 if (dump_file && (dump_flags & TDF_DETAILS)) 902 { 903 fprintf (dump_file, "Equivalence: "); 904 print_generic_expr (dump_file, notop, 0); 905 if (opcode == BIT_AND_EXPR) 906 fprintf (dump_file, " & ~"); 907 else if (opcode == BIT_IOR_EXPR) 908 fprintf (dump_file, " | ~"); 909 print_generic_expr (dump_file, oe->op, 0); 910 if (opcode == BIT_AND_EXPR) 911 fprintf (dump_file, " -> 0\n"); 912 else if (opcode == BIT_IOR_EXPR) 913 fprintf (dump_file, " -> -1\n"); 914 } 915 916 if (opcode == BIT_AND_EXPR) 917 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 918 else if (opcode == BIT_IOR_EXPR) 919 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 920 921 reassociate_stats.ops_eliminated += ops->length () - 1; 922 ops->truncate (0); 923 ops->quick_push (oe); 924 return true; 925 } 926 } 927 928 return false; 929 } 930 931 /* Use constant value that may be present in OPS to try to eliminate 932 operands. Note that this function is only really used when we've 933 eliminated ops for other reasons, or merged constants. Across 934 single statements, fold already does all of this, plus more. There 935 is little point in duplicating logic, so I've only included the 936 identities that I could ever construct testcases to trigger. */ 937 938 static void 939 eliminate_using_constants (enum tree_code opcode, 940 vec<operand_entry *> *ops) 941 { 942 operand_entry *oelast = ops->last (); 943 tree type = TREE_TYPE (oelast->op); 944 945 if (oelast->rank == 0 946 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 947 { 948 switch (opcode) 949 { 950 case BIT_AND_EXPR: 951 if (integer_zerop (oelast->op)) 952 { 953 if (ops->length () != 1) 954 { 955 if (dump_file && (dump_flags & TDF_DETAILS)) 956 fprintf (dump_file, "Found & 0, removing all other ops\n"); 957 958 reassociate_stats.ops_eliminated += ops->length () - 1; 959 960 ops->truncate (0); 961 ops->quick_push (oelast); 962 return; 963 } 964 } 965 else if (integer_all_onesp (oelast->op)) 966 { 967 if (ops->length () != 1) 968 { 969 if (dump_file && (dump_flags & TDF_DETAILS)) 970 fprintf (dump_file, "Found & -1, removing\n"); 971 ops->pop (); 972 reassociate_stats.ops_eliminated++; 973 } 974 } 975 break; 976 case BIT_IOR_EXPR: 977 if (integer_all_onesp (oelast->op)) 978 { 979 if (ops->length () != 1) 980 { 981 if (dump_file && (dump_flags & TDF_DETAILS)) 982 fprintf (dump_file, "Found | -1, removing all other ops\n"); 983 984 reassociate_stats.ops_eliminated += ops->length () - 1; 985 986 ops->truncate (0); 987 ops->quick_push (oelast); 988 return; 989 } 990 } 991 else if (integer_zerop (oelast->op)) 992 { 993 if (ops->length () != 1) 994 { 995 if (dump_file && (dump_flags & TDF_DETAILS)) 996 fprintf (dump_file, "Found | 0, removing\n"); 997 ops->pop (); 998 reassociate_stats.ops_eliminated++; 999 } 1000 } 1001 break; 1002 case MULT_EXPR: 1003 if (integer_zerop (oelast->op) 1004 || (FLOAT_TYPE_P (type) 1005 && !HONOR_NANS (type) 1006 && !HONOR_SIGNED_ZEROS (type) 1007 && real_zerop (oelast->op))) 1008 { 1009 if (ops->length () != 1) 1010 { 1011 if (dump_file && (dump_flags & TDF_DETAILS)) 1012 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1013 1014 reassociate_stats.ops_eliminated += ops->length () - 1; 1015 ops->truncate (1); 1016 ops->quick_push (oelast); 1017 return; 1018 } 1019 } 1020 else if (integer_onep (oelast->op) 1021 || (FLOAT_TYPE_P (type) 1022 && !HONOR_SNANS (type) 1023 && real_onep (oelast->op))) 1024 { 1025 if (ops->length () != 1) 1026 { 1027 if (dump_file && (dump_flags & TDF_DETAILS)) 1028 fprintf (dump_file, "Found * 1, removing\n"); 1029 ops->pop (); 1030 reassociate_stats.ops_eliminated++; 1031 return; 1032 } 1033 } 1034 break; 1035 case BIT_XOR_EXPR: 1036 case PLUS_EXPR: 1037 case MINUS_EXPR: 1038 if (integer_zerop (oelast->op) 1039 || (FLOAT_TYPE_P (type) 1040 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1041 && fold_real_zero_addition_p (type, oelast->op, 1042 opcode == MINUS_EXPR))) 1043 { 1044 if (ops->length () != 1) 1045 { 1046 if (dump_file && (dump_flags & TDF_DETAILS)) 1047 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1048 ops->pop (); 1049 reassociate_stats.ops_eliminated++; 1050 return; 1051 } 1052 } 1053 break; 1054 default: 1055 break; 1056 } 1057 } 1058 } 1059 1060 1061 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1062 bool, bool); 1063 1064 /* Structure for tracking and counting operands. */ 1065 struct oecount { 1066 unsigned int cnt; 1067 unsigned int id; 1068 enum tree_code oecode; 1069 tree op; 1070 }; 1071 1072 1073 /* The heap for the oecount hashtable and the sorted list of operands. */ 1074 static vec<oecount> cvec; 1075 1076 1077 /* Oecount hashtable helpers. */ 1078 1079 struct oecount_hasher : int_hash <int, 0, 1> 1080 { 1081 static inline hashval_t hash (int); 1082 static inline bool equal (int, int); 1083 }; 1084 1085 /* Hash function for oecount. */ 1086 1087 inline hashval_t 1088 oecount_hasher::hash (int p) 1089 { 1090 const oecount *c = &cvec[p - 42]; 1091 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1092 } 1093 1094 /* Comparison function for oecount. */ 1095 1096 inline bool 1097 oecount_hasher::equal (int p1, int p2) 1098 { 1099 const oecount *c1 = &cvec[p1 - 42]; 1100 const oecount *c2 = &cvec[p2 - 42]; 1101 return c1->oecode == c2->oecode && c1->op == c2->op; 1102 } 1103 1104 /* Comparison function for qsort sorting oecount elements by count. */ 1105 1106 static int 1107 oecount_cmp (const void *p1, const void *p2) 1108 { 1109 const oecount *c1 = (const oecount *)p1; 1110 const oecount *c2 = (const oecount *)p2; 1111 if (c1->cnt != c2->cnt) 1112 return c1->cnt > c2->cnt ? 1 : -1; 1113 else 1114 /* If counts are identical, use unique IDs to stabilize qsort. */ 1115 return c1->id > c2->id ? 1 : -1; 1116 } 1117 1118 /* Return TRUE iff STMT represents a builtin call that raises OP 1119 to some exponent. */ 1120 1121 static bool 1122 stmt_is_power_of_op (gimple *stmt, tree op) 1123 { 1124 if (!is_gimple_call (stmt)) 1125 return false; 1126 1127 switch (gimple_call_combined_fn (stmt)) 1128 { 1129 CASE_CFN_POW: 1130 CASE_CFN_POWI: 1131 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1132 1133 default: 1134 return false; 1135 } 1136 } 1137 1138 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1139 in place and return the result. Assumes that stmt_is_power_of_op 1140 was previously called for STMT and returned TRUE. */ 1141 1142 static HOST_WIDE_INT 1143 decrement_power (gimple *stmt) 1144 { 1145 REAL_VALUE_TYPE c, cint; 1146 HOST_WIDE_INT power; 1147 tree arg1; 1148 1149 switch (gimple_call_combined_fn (stmt)) 1150 { 1151 CASE_CFN_POW: 1152 arg1 = gimple_call_arg (stmt, 1); 1153 c = TREE_REAL_CST (arg1); 1154 power = real_to_integer (&c) - 1; 1155 real_from_integer (&cint, VOIDmode, power, SIGNED); 1156 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1157 return power; 1158 1159 CASE_CFN_POWI: 1160 arg1 = gimple_call_arg (stmt, 1); 1161 power = TREE_INT_CST_LOW (arg1) - 1; 1162 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1163 return power; 1164 1165 default: 1166 gcc_unreachable (); 1167 } 1168 } 1169 1170 /* Replace SSA defined by STMT and replace all its uses with new 1171 SSA. Also return the new SSA. */ 1172 1173 static tree 1174 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1175 { 1176 gimple *use_stmt; 1177 use_operand_p use; 1178 imm_use_iterator iter; 1179 tree new_lhs, new_debug_lhs = NULL_TREE; 1180 tree lhs = gimple_get_lhs (stmt); 1181 1182 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1183 gimple_set_lhs (stmt, new_lhs); 1184 1185 /* Also need to update GIMPLE_DEBUGs. */ 1186 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1187 { 1188 tree repl = new_lhs; 1189 if (is_gimple_debug (use_stmt)) 1190 { 1191 if (new_debug_lhs == NULL_TREE) 1192 { 1193 new_debug_lhs = make_node (DEBUG_EXPR_DECL); 1194 gdebug *def_temp 1195 = gimple_build_debug_bind (new_debug_lhs, 1196 build2 (opcode, TREE_TYPE (lhs), 1197 new_lhs, op), 1198 stmt); 1199 DECL_ARTIFICIAL (new_debug_lhs) = 1; 1200 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); 1201 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); 1202 gimple_set_uid (def_temp, gimple_uid (stmt)); 1203 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1204 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1205 } 1206 repl = new_debug_lhs; 1207 } 1208 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1209 SET_USE (use, repl); 1210 update_stmt (use_stmt); 1211 } 1212 return new_lhs; 1213 } 1214 1215 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1216 uses with new SSAs. Also do this for the stmt that defines DEF 1217 if *DEF is not OP. */ 1218 1219 static void 1220 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1221 vec<gimple *> &stmts_to_fix) 1222 { 1223 unsigned i; 1224 gimple *stmt; 1225 1226 if (*def != op 1227 && TREE_CODE (*def) == SSA_NAME 1228 && (stmt = SSA_NAME_DEF_STMT (*def)) 1229 && gimple_code (stmt) != GIMPLE_NOP) 1230 *def = make_new_ssa_for_def (stmt, opcode, op); 1231 1232 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1233 make_new_ssa_for_def (stmt, opcode, op); 1234 } 1235 1236 /* Find the single immediate use of STMT's LHS, and replace it 1237 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1238 replace *DEF with OP as well. */ 1239 1240 static void 1241 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1242 { 1243 tree lhs; 1244 gimple *use_stmt; 1245 use_operand_p use; 1246 gimple_stmt_iterator gsi; 1247 1248 if (is_gimple_call (stmt)) 1249 lhs = gimple_call_lhs (stmt); 1250 else 1251 lhs = gimple_assign_lhs (stmt); 1252 1253 gcc_assert (has_single_use (lhs)); 1254 single_imm_use (lhs, &use, &use_stmt); 1255 if (lhs == *def) 1256 *def = op; 1257 SET_USE (use, op); 1258 if (TREE_CODE (op) != SSA_NAME) 1259 update_stmt (use_stmt); 1260 gsi = gsi_for_stmt (stmt); 1261 unlink_stmt_vdef (stmt); 1262 reassoc_remove_stmt (&gsi); 1263 release_defs (stmt); 1264 } 1265 1266 /* Walks the linear chain with result *DEF searching for an operation 1267 with operand OP and code OPCODE removing that from the chain. *DEF 1268 is updated if there is only one operand but no operation left. */ 1269 1270 static void 1271 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1272 { 1273 tree orig_def = *def; 1274 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1275 /* PR72835 - Record the stmt chain that has to be updated such that 1276 we dont use the same LHS when the values computed are different. */ 1277 auto_vec<gimple *, 64> stmts_to_fix; 1278 1279 do 1280 { 1281 tree name; 1282 1283 if (opcode == MULT_EXPR) 1284 { 1285 if (stmt_is_power_of_op (stmt, op)) 1286 { 1287 if (decrement_power (stmt) == 1) 1288 { 1289 if (stmts_to_fix.length () > 0) 1290 stmts_to_fix.pop (); 1291 propagate_op_to_single_use (op, stmt, def); 1292 } 1293 break; 1294 } 1295 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1296 { 1297 if (gimple_assign_rhs1 (stmt) == op) 1298 { 1299 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1300 if (stmts_to_fix.length () > 0) 1301 stmts_to_fix.pop (); 1302 propagate_op_to_single_use (cst, stmt, def); 1303 break; 1304 } 1305 else if (integer_minus_onep (op) 1306 || real_minus_onep (op)) 1307 { 1308 gimple_assign_set_rhs_code 1309 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1310 break; 1311 } 1312 } 1313 } 1314 1315 name = gimple_assign_rhs1 (stmt); 1316 1317 /* If this is the operation we look for and one of the operands 1318 is ours simply propagate the other operand into the stmts 1319 single use. */ 1320 if (gimple_assign_rhs_code (stmt) == opcode 1321 && (name == op 1322 || gimple_assign_rhs2 (stmt) == op)) 1323 { 1324 if (name == op) 1325 name = gimple_assign_rhs2 (stmt); 1326 if (stmts_to_fix.length () > 0) 1327 stmts_to_fix.pop (); 1328 propagate_op_to_single_use (name, stmt, def); 1329 break; 1330 } 1331 1332 /* We might have a multiply of two __builtin_pow* calls, and 1333 the operand might be hiding in the rightmost one. Likewise 1334 this can happen for a negate. */ 1335 if (opcode == MULT_EXPR 1336 && gimple_assign_rhs_code (stmt) == opcode 1337 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1338 && has_single_use (gimple_assign_rhs2 (stmt))) 1339 { 1340 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1341 if (stmt_is_power_of_op (stmt2, op)) 1342 { 1343 if (decrement_power (stmt2) == 1) 1344 propagate_op_to_single_use (op, stmt2, def); 1345 else 1346 stmts_to_fix.safe_push (stmt2); 1347 break; 1348 } 1349 else if (is_gimple_assign (stmt2) 1350 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1351 { 1352 if (gimple_assign_rhs1 (stmt2) == op) 1353 { 1354 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1355 propagate_op_to_single_use (cst, stmt2, def); 1356 break; 1357 } 1358 else if (integer_minus_onep (op) 1359 || real_minus_onep (op)) 1360 { 1361 stmts_to_fix.safe_push (stmt2); 1362 gimple_assign_set_rhs_code 1363 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1364 break; 1365 } 1366 } 1367 } 1368 1369 /* Continue walking the chain. */ 1370 gcc_assert (name != op 1371 && TREE_CODE (name) == SSA_NAME); 1372 stmt = SSA_NAME_DEF_STMT (name); 1373 stmts_to_fix.safe_push (stmt); 1374 } 1375 while (1); 1376 1377 if (stmts_to_fix.length () > 0 || *def == orig_def) 1378 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1379 } 1380 1381 /* Returns true if statement S1 dominates statement S2. Like 1382 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1383 1384 static bool 1385 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1386 { 1387 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1388 1389 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1390 SSA_NAME. Assume it lives at the beginning of function and 1391 thus dominates everything. */ 1392 if (!bb1 || s1 == s2) 1393 return true; 1394 1395 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1396 if (!bb2) 1397 return false; 1398 1399 if (bb1 == bb2) 1400 { 1401 /* PHIs in the same basic block are assumed to be 1402 executed all in parallel, if only one stmt is a PHI, 1403 it dominates the other stmt in the same basic block. */ 1404 if (gimple_code (s1) == GIMPLE_PHI) 1405 return true; 1406 1407 if (gimple_code (s2) == GIMPLE_PHI) 1408 return false; 1409 1410 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1411 1412 if (gimple_uid (s1) < gimple_uid (s2)) 1413 return true; 1414 1415 if (gimple_uid (s1) > gimple_uid (s2)) 1416 return false; 1417 1418 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1419 unsigned int uid = gimple_uid (s1); 1420 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1421 { 1422 gimple *s = gsi_stmt (gsi); 1423 if (gimple_uid (s) != uid) 1424 break; 1425 if (s == s2) 1426 return true; 1427 } 1428 1429 return false; 1430 } 1431 1432 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1433 } 1434 1435 /* Insert STMT after INSERT_POINT. */ 1436 1437 static void 1438 insert_stmt_after (gimple *stmt, gimple *insert_point) 1439 { 1440 gimple_stmt_iterator gsi; 1441 basic_block bb; 1442 1443 if (gimple_code (insert_point) == GIMPLE_PHI) 1444 bb = gimple_bb (insert_point); 1445 else if (!stmt_ends_bb_p (insert_point)) 1446 { 1447 gsi = gsi_for_stmt (insert_point); 1448 gimple_set_uid (stmt, gimple_uid (insert_point)); 1449 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1450 return; 1451 } 1452 else 1453 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1454 thus if it must end a basic block, it should be a call that can 1455 throw, or some assignment that can throw. If it throws, the LHS 1456 of it will not be initialized though, so only valid places using 1457 the SSA_NAME should be dominated by the fallthru edge. */ 1458 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1459 gsi = gsi_after_labels (bb); 1460 if (gsi_end_p (gsi)) 1461 { 1462 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1463 gimple_set_uid (stmt, 1464 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1465 } 1466 else 1467 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1468 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1469 } 1470 1471 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1472 the result. Places the statement after the definition of either 1473 OP1 or OP2. Returns the new statement. */ 1474 1475 static gimple * 1476 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1477 { 1478 gimple *op1def = NULL, *op2def = NULL; 1479 gimple_stmt_iterator gsi; 1480 tree op; 1481 gassign *sum; 1482 1483 /* Create the addition statement. */ 1484 op = make_ssa_name (type); 1485 sum = gimple_build_assign (op, opcode, op1, op2); 1486 1487 /* Find an insertion place and insert. */ 1488 if (TREE_CODE (op1) == SSA_NAME) 1489 op1def = SSA_NAME_DEF_STMT (op1); 1490 if (TREE_CODE (op2) == SSA_NAME) 1491 op2def = SSA_NAME_DEF_STMT (op2); 1492 if ((!op1def || gimple_nop_p (op1def)) 1493 && (!op2def || gimple_nop_p (op2def))) 1494 { 1495 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1496 if (gsi_end_p (gsi)) 1497 { 1498 gimple_stmt_iterator gsi2 1499 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1500 gimple_set_uid (sum, 1501 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1502 } 1503 else 1504 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1505 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1506 } 1507 else 1508 { 1509 gimple *insert_point; 1510 if ((!op1def || gimple_nop_p (op1def)) 1511 || (op2def && !gimple_nop_p (op2def) 1512 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1513 insert_point = op2def; 1514 else 1515 insert_point = op1def; 1516 insert_stmt_after (sum, insert_point); 1517 } 1518 update_stmt (sum); 1519 1520 return sum; 1521 } 1522 1523 /* Perform un-distribution of divisions and multiplications. 1524 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1525 to (A + B) / X for real X. 1526 1527 The algorithm is organized as follows. 1528 1529 - First we walk the addition chain *OPS looking for summands that 1530 are defined by a multiplication or a real division. This results 1531 in the candidates bitmap with relevant indices into *OPS. 1532 1533 - Second we build the chains of multiplications or divisions for 1534 these candidates, counting the number of occurrences of (operand, code) 1535 pairs in all of the candidates chains. 1536 1537 - Third we sort the (operand, code) pairs by number of occurrence and 1538 process them starting with the pair with the most uses. 1539 1540 * For each such pair we walk the candidates again to build a 1541 second candidate bitmap noting all multiplication/division chains 1542 that have at least one occurrence of (operand, code). 1543 1544 * We build an alternate addition chain only covering these 1545 candidates with one (operand, code) operation removed from their 1546 multiplication/division chain. 1547 1548 * The first candidate gets replaced by the alternate addition chain 1549 multiplied/divided by the operand. 1550 1551 * All candidate chains get disabled for further processing and 1552 processing of (operand, code) pairs continues. 1553 1554 The alternate addition chains built are re-processed by the main 1555 reassociation algorithm which allows optimizing a * x * y + b * y * x 1556 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1557 1558 static bool 1559 undistribute_ops_list (enum tree_code opcode, 1560 vec<operand_entry *> *ops, struct loop *loop) 1561 { 1562 unsigned int length = ops->length (); 1563 operand_entry *oe1; 1564 unsigned i, j; 1565 unsigned nr_candidates, nr_candidates2; 1566 sbitmap_iterator sbi0; 1567 vec<operand_entry *> *subops; 1568 bool changed = false; 1569 unsigned int next_oecount_id = 0; 1570 1571 if (length <= 1 1572 || opcode != PLUS_EXPR) 1573 return false; 1574 1575 /* Build a list of candidates to process. */ 1576 auto_sbitmap candidates (length); 1577 bitmap_clear (candidates); 1578 nr_candidates = 0; 1579 FOR_EACH_VEC_ELT (*ops, i, oe1) 1580 { 1581 enum tree_code dcode; 1582 gimple *oe1def; 1583 1584 if (TREE_CODE (oe1->op) != SSA_NAME) 1585 continue; 1586 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1587 if (!is_gimple_assign (oe1def)) 1588 continue; 1589 dcode = gimple_assign_rhs_code (oe1def); 1590 if ((dcode != MULT_EXPR 1591 && dcode != RDIV_EXPR) 1592 || !is_reassociable_op (oe1def, dcode, loop)) 1593 continue; 1594 1595 bitmap_set_bit (candidates, i); 1596 nr_candidates++; 1597 } 1598 1599 if (nr_candidates < 2) 1600 return false; 1601 1602 if (dump_file && (dump_flags & TDF_DETAILS)) 1603 { 1604 fprintf (dump_file, "searching for un-distribute opportunities "); 1605 print_generic_expr (dump_file, 1606 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1607 fprintf (dump_file, " %d\n", nr_candidates); 1608 } 1609 1610 /* Build linearized sub-operand lists and the counting table. */ 1611 cvec.create (0); 1612 1613 hash_table<oecount_hasher> ctable (15); 1614 1615 /* ??? Macro arguments cannot have multi-argument template types in 1616 them. This typedef is needed to workaround that limitation. */ 1617 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1618 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1619 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1620 { 1621 gimple *oedef; 1622 enum tree_code oecode; 1623 unsigned j; 1624 1625 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1626 oecode = gimple_assign_rhs_code (oedef); 1627 linearize_expr_tree (&subops[i], oedef, 1628 associative_tree_code (oecode), false); 1629 1630 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1631 { 1632 oecount c; 1633 int *slot; 1634 int idx; 1635 c.oecode = oecode; 1636 c.cnt = 1; 1637 c.id = next_oecount_id++; 1638 c.op = oe1->op; 1639 cvec.safe_push (c); 1640 idx = cvec.length () + 41; 1641 slot = ctable.find_slot (idx, INSERT); 1642 if (!*slot) 1643 { 1644 *slot = idx; 1645 } 1646 else 1647 { 1648 cvec.pop (); 1649 cvec[*slot - 42].cnt++; 1650 } 1651 } 1652 } 1653 1654 /* Sort the counting table. */ 1655 cvec.qsort (oecount_cmp); 1656 1657 if (dump_file && (dump_flags & TDF_DETAILS)) 1658 { 1659 oecount *c; 1660 fprintf (dump_file, "Candidates:\n"); 1661 FOR_EACH_VEC_ELT (cvec, j, c) 1662 { 1663 fprintf (dump_file, " %u %s: ", c->cnt, 1664 c->oecode == MULT_EXPR 1665 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1666 print_generic_expr (dump_file, c->op, 0); 1667 fprintf (dump_file, "\n"); 1668 } 1669 } 1670 1671 /* Process the (operand, code) pairs in order of most occurrence. */ 1672 auto_sbitmap candidates2 (length); 1673 while (!cvec.is_empty ()) 1674 { 1675 oecount *c = &cvec.last (); 1676 if (c->cnt < 2) 1677 break; 1678 1679 /* Now collect the operands in the outer chain that contain 1680 the common operand in their inner chain. */ 1681 bitmap_clear (candidates2); 1682 nr_candidates2 = 0; 1683 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1684 { 1685 gimple *oedef; 1686 enum tree_code oecode; 1687 unsigned j; 1688 tree op = (*ops)[i]->op; 1689 1690 /* If we undistributed in this chain already this may be 1691 a constant. */ 1692 if (TREE_CODE (op) != SSA_NAME) 1693 continue; 1694 1695 oedef = SSA_NAME_DEF_STMT (op); 1696 oecode = gimple_assign_rhs_code (oedef); 1697 if (oecode != c->oecode) 1698 continue; 1699 1700 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1701 { 1702 if (oe1->op == c->op) 1703 { 1704 bitmap_set_bit (candidates2, i); 1705 ++nr_candidates2; 1706 break; 1707 } 1708 } 1709 } 1710 1711 if (nr_candidates2 >= 2) 1712 { 1713 operand_entry *oe1, *oe2; 1714 gimple *prod; 1715 int first = bitmap_first_set_bit (candidates2); 1716 1717 /* Build the new addition chain. */ 1718 oe1 = (*ops)[first]; 1719 if (dump_file && (dump_flags & TDF_DETAILS)) 1720 { 1721 fprintf (dump_file, "Building ("); 1722 print_generic_expr (dump_file, oe1->op, 0); 1723 } 1724 zero_one_operation (&oe1->op, c->oecode, c->op); 1725 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1726 { 1727 gimple *sum; 1728 oe2 = (*ops)[i]; 1729 if (dump_file && (dump_flags & TDF_DETAILS)) 1730 { 1731 fprintf (dump_file, " + "); 1732 print_generic_expr (dump_file, oe2->op, 0); 1733 } 1734 zero_one_operation (&oe2->op, c->oecode, c->op); 1735 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1736 oe1->op, oe2->op, opcode); 1737 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1738 oe2->rank = 0; 1739 oe1->op = gimple_get_lhs (sum); 1740 } 1741 1742 /* Apply the multiplication/division. */ 1743 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1744 oe1->op, c->op, c->oecode); 1745 if (dump_file && (dump_flags & TDF_DETAILS)) 1746 { 1747 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1748 print_generic_expr (dump_file, c->op, 0); 1749 fprintf (dump_file, "\n"); 1750 } 1751 1752 /* Record it in the addition chain and disable further 1753 undistribution with this op. */ 1754 oe1->op = gimple_assign_lhs (prod); 1755 oe1->rank = get_rank (oe1->op); 1756 subops[first].release (); 1757 1758 changed = true; 1759 } 1760 1761 cvec.pop (); 1762 } 1763 1764 for (i = 0; i < ops->length (); ++i) 1765 subops[i].release (); 1766 free (subops); 1767 cvec.release (); 1768 1769 return changed; 1770 } 1771 1772 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1773 expression, examine the other OPS to see if any of them are comparisons 1774 of the same values, which we may be able to combine or eliminate. 1775 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1776 1777 static bool 1778 eliminate_redundant_comparison (enum tree_code opcode, 1779 vec<operand_entry *> *ops, 1780 unsigned int currindex, 1781 operand_entry *curr) 1782 { 1783 tree op1, op2; 1784 enum tree_code lcode, rcode; 1785 gimple *def1, *def2; 1786 int i; 1787 operand_entry *oe; 1788 1789 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1790 return false; 1791 1792 /* Check that CURR is a comparison. */ 1793 if (TREE_CODE (curr->op) != SSA_NAME) 1794 return false; 1795 def1 = SSA_NAME_DEF_STMT (curr->op); 1796 if (!is_gimple_assign (def1)) 1797 return false; 1798 lcode = gimple_assign_rhs_code (def1); 1799 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1800 return false; 1801 op1 = gimple_assign_rhs1 (def1); 1802 op2 = gimple_assign_rhs2 (def1); 1803 1804 /* Now look for a similar comparison in the remaining OPS. */ 1805 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1806 { 1807 tree t; 1808 1809 if (TREE_CODE (oe->op) != SSA_NAME) 1810 continue; 1811 def2 = SSA_NAME_DEF_STMT (oe->op); 1812 if (!is_gimple_assign (def2)) 1813 continue; 1814 rcode = gimple_assign_rhs_code (def2); 1815 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1816 continue; 1817 1818 /* If we got here, we have a match. See if we can combine the 1819 two comparisons. */ 1820 if (opcode == BIT_IOR_EXPR) 1821 t = maybe_fold_or_comparisons (lcode, op1, op2, 1822 rcode, gimple_assign_rhs1 (def2), 1823 gimple_assign_rhs2 (def2)); 1824 else 1825 t = maybe_fold_and_comparisons (lcode, op1, op2, 1826 rcode, gimple_assign_rhs1 (def2), 1827 gimple_assign_rhs2 (def2)); 1828 if (!t) 1829 continue; 1830 1831 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1832 always give us a boolean_type_node value back. If the original 1833 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1834 we need to convert. */ 1835 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1836 t = fold_convert (TREE_TYPE (curr->op), t); 1837 1838 if (TREE_CODE (t) != INTEGER_CST 1839 && !operand_equal_p (t, curr->op, 0)) 1840 { 1841 enum tree_code subcode; 1842 tree newop1, newop2; 1843 if (!COMPARISON_CLASS_P (t)) 1844 continue; 1845 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1846 STRIP_USELESS_TYPE_CONVERSION (newop1); 1847 STRIP_USELESS_TYPE_CONVERSION (newop2); 1848 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1849 continue; 1850 } 1851 1852 if (dump_file && (dump_flags & TDF_DETAILS)) 1853 { 1854 fprintf (dump_file, "Equivalence: "); 1855 print_generic_expr (dump_file, curr->op, 0); 1856 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1857 print_generic_expr (dump_file, oe->op, 0); 1858 fprintf (dump_file, " -> "); 1859 print_generic_expr (dump_file, t, 0); 1860 fprintf (dump_file, "\n"); 1861 } 1862 1863 /* Now we can delete oe, as it has been subsumed by the new combined 1864 expression t. */ 1865 ops->ordered_remove (i); 1866 reassociate_stats.ops_eliminated ++; 1867 1868 /* If t is the same as curr->op, we're done. Otherwise we must 1869 replace curr->op with t. Special case is if we got a constant 1870 back, in which case we add it to the end instead of in place of 1871 the current entry. */ 1872 if (TREE_CODE (t) == INTEGER_CST) 1873 { 1874 ops->ordered_remove (currindex); 1875 add_to_ops_vec (ops, t); 1876 } 1877 else if (!operand_equal_p (t, curr->op, 0)) 1878 { 1879 gimple *sum; 1880 enum tree_code subcode; 1881 tree newop1; 1882 tree newop2; 1883 gcc_assert (COMPARISON_CLASS_P (t)); 1884 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1885 STRIP_USELESS_TYPE_CONVERSION (newop1); 1886 STRIP_USELESS_TYPE_CONVERSION (newop2); 1887 gcc_checking_assert (is_gimple_val (newop1) 1888 && is_gimple_val (newop2)); 1889 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1890 curr->op = gimple_get_lhs (sum); 1891 } 1892 return true; 1893 } 1894 1895 return false; 1896 } 1897 1898 1899 /* Transform repeated addition of same values into multiply with 1900 constant. */ 1901 static bool 1902 transform_add_to_multiply (vec<operand_entry *> *ops) 1903 { 1904 operand_entry *oe; 1905 tree op = NULL_TREE; 1906 int j; 1907 int i, start = -1, end = 0, count = 0; 1908 auto_vec<std::pair <int, int> > indxs; 1909 bool changed = false; 1910 1911 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1912 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1913 || !flag_unsafe_math_optimizations)) 1914 return false; 1915 1916 /* Look for repeated operands. */ 1917 FOR_EACH_VEC_ELT (*ops, i, oe) 1918 { 1919 if (start == -1) 1920 { 1921 count = 1; 1922 op = oe->op; 1923 start = i; 1924 } 1925 else if (operand_equal_p (oe->op, op, 0)) 1926 { 1927 count++; 1928 end = i; 1929 } 1930 else 1931 { 1932 if (count > 1) 1933 indxs.safe_push (std::make_pair (start, end)); 1934 count = 1; 1935 op = oe->op; 1936 start = i; 1937 } 1938 } 1939 1940 if (count > 1) 1941 indxs.safe_push (std::make_pair (start, end)); 1942 1943 for (j = indxs.length () - 1; j >= 0; --j) 1944 { 1945 /* Convert repeated operand addition to multiplication. */ 1946 start = indxs[j].first; 1947 end = indxs[j].second; 1948 op = (*ops)[start]->op; 1949 count = end - start + 1; 1950 for (i = end; i >= start; --i) 1951 ops->unordered_remove (i); 1952 tree tmp = make_ssa_name (TREE_TYPE (op)); 1953 tree cst = build_int_cst (integer_type_node, count); 1954 gassign *mul_stmt 1955 = gimple_build_assign (tmp, MULT_EXPR, 1956 op, fold_convert (TREE_TYPE (op), cst)); 1957 gimple_set_visited (mul_stmt, true); 1958 add_to_ops_vec (ops, tmp, mul_stmt); 1959 changed = true; 1960 } 1961 1962 return changed; 1963 } 1964 1965 1966 /* Perform various identities and other optimizations on the list of 1967 operand entries, stored in OPS. The tree code for the binary 1968 operation between all the operands is OPCODE. */ 1969 1970 static void 1971 optimize_ops_list (enum tree_code opcode, 1972 vec<operand_entry *> *ops) 1973 { 1974 unsigned int length = ops->length (); 1975 unsigned int i; 1976 operand_entry *oe; 1977 operand_entry *oelast = NULL; 1978 bool iterate = false; 1979 1980 if (length == 1) 1981 return; 1982 1983 oelast = ops->last (); 1984 1985 /* If the last two are constants, pop the constants off, merge them 1986 and try the next two. */ 1987 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1988 { 1989 operand_entry *oelm1 = (*ops)[length - 2]; 1990 1991 if (oelm1->rank == 0 1992 && is_gimple_min_invariant (oelm1->op) 1993 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1994 TREE_TYPE (oelast->op))) 1995 { 1996 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1997 oelm1->op, oelast->op); 1998 1999 if (folded && is_gimple_min_invariant (folded)) 2000 { 2001 if (dump_file && (dump_flags & TDF_DETAILS)) 2002 fprintf (dump_file, "Merging constants\n"); 2003 2004 ops->pop (); 2005 ops->pop (); 2006 2007 add_to_ops_vec (ops, folded); 2008 reassociate_stats.constants_eliminated++; 2009 2010 optimize_ops_list (opcode, ops); 2011 return; 2012 } 2013 } 2014 } 2015 2016 eliminate_using_constants (opcode, ops); 2017 oelast = NULL; 2018 2019 for (i = 0; ops->iterate (i, &oe);) 2020 { 2021 bool done = false; 2022 2023 if (eliminate_not_pairs (opcode, ops, i, oe)) 2024 return; 2025 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2026 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2027 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2028 { 2029 if (done) 2030 return; 2031 iterate = true; 2032 oelast = NULL; 2033 continue; 2034 } 2035 oelast = oe; 2036 i++; 2037 } 2038 2039 length = ops->length (); 2040 oelast = ops->last (); 2041 2042 if (iterate) 2043 optimize_ops_list (opcode, ops); 2044 } 2045 2046 /* The following functions are subroutines to optimize_range_tests and allow 2047 it to try to change a logical combination of comparisons into a range 2048 test. 2049 2050 For example, both 2051 X == 2 || X == 5 || X == 3 || X == 4 2052 and 2053 X >= 2 && X <= 5 2054 are converted to 2055 (unsigned) (X - 2) <= 3 2056 2057 For more information see comments above fold_test_range in fold-const.c, 2058 this implementation is for GIMPLE. */ 2059 2060 struct range_entry 2061 { 2062 tree exp; 2063 tree low; 2064 tree high; 2065 bool in_p; 2066 bool strict_overflow_p; 2067 unsigned int idx, next; 2068 }; 2069 2070 /* This is similar to make_range in fold-const.c, but on top of 2071 GIMPLE instead of trees. If EXP is non-NULL, it should be 2072 an SSA_NAME and STMT argument is ignored, otherwise STMT 2073 argument should be a GIMPLE_COND. */ 2074 2075 static void 2076 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2077 { 2078 int in_p; 2079 tree low, high; 2080 bool is_bool, strict_overflow_p; 2081 2082 r->exp = NULL_TREE; 2083 r->in_p = false; 2084 r->strict_overflow_p = false; 2085 r->low = NULL_TREE; 2086 r->high = NULL_TREE; 2087 if (exp != NULL_TREE 2088 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2089 return; 2090 2091 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2092 and see if we can refine the range. Some of the cases below may not 2093 happen, but it doesn't seem worth worrying about this. We "continue" 2094 the outer loop when we've changed something; otherwise we "break" 2095 the switch, which will "break" the while. */ 2096 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2097 high = low; 2098 in_p = 0; 2099 strict_overflow_p = false; 2100 is_bool = false; 2101 if (exp == NULL_TREE) 2102 is_bool = true; 2103 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2104 { 2105 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2106 is_bool = true; 2107 else 2108 return; 2109 } 2110 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2111 is_bool = true; 2112 2113 while (1) 2114 { 2115 enum tree_code code; 2116 tree arg0, arg1, exp_type; 2117 tree nexp; 2118 location_t loc; 2119 2120 if (exp != NULL_TREE) 2121 { 2122 if (TREE_CODE (exp) != SSA_NAME 2123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2124 break; 2125 2126 stmt = SSA_NAME_DEF_STMT (exp); 2127 if (!is_gimple_assign (stmt)) 2128 break; 2129 2130 code = gimple_assign_rhs_code (stmt); 2131 arg0 = gimple_assign_rhs1 (stmt); 2132 arg1 = gimple_assign_rhs2 (stmt); 2133 exp_type = TREE_TYPE (exp); 2134 } 2135 else 2136 { 2137 code = gimple_cond_code (stmt); 2138 arg0 = gimple_cond_lhs (stmt); 2139 arg1 = gimple_cond_rhs (stmt); 2140 exp_type = boolean_type_node; 2141 } 2142 2143 if (TREE_CODE (arg0) != SSA_NAME) 2144 break; 2145 loc = gimple_location (stmt); 2146 switch (code) 2147 { 2148 case BIT_NOT_EXPR: 2149 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2150 /* Ensure the range is either +[-,0], +[0,0], 2151 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2152 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2153 or similar expression of unconditional true or 2154 false, it should not be negated. */ 2155 && ((high && integer_zerop (high)) 2156 || (low && integer_onep (low)))) 2157 { 2158 in_p = !in_p; 2159 exp = arg0; 2160 continue; 2161 } 2162 break; 2163 case SSA_NAME: 2164 exp = arg0; 2165 continue; 2166 CASE_CONVERT: 2167 if (is_bool) 2168 goto do_default; 2169 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2170 { 2171 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2172 is_bool = true; 2173 else 2174 return; 2175 } 2176 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2177 is_bool = true; 2178 goto do_default; 2179 case EQ_EXPR: 2180 case NE_EXPR: 2181 case LT_EXPR: 2182 case LE_EXPR: 2183 case GE_EXPR: 2184 case GT_EXPR: 2185 is_bool = true; 2186 /* FALLTHRU */ 2187 default: 2188 if (!is_bool) 2189 return; 2190 do_default: 2191 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2192 &low, &high, &in_p, 2193 &strict_overflow_p); 2194 if (nexp != NULL_TREE) 2195 { 2196 exp = nexp; 2197 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2198 continue; 2199 } 2200 break; 2201 } 2202 break; 2203 } 2204 if (is_bool) 2205 { 2206 r->exp = exp; 2207 r->in_p = in_p; 2208 r->low = low; 2209 r->high = high; 2210 r->strict_overflow_p = strict_overflow_p; 2211 } 2212 } 2213 2214 /* Comparison function for qsort. Sort entries 2215 without SSA_NAME exp first, then with SSA_NAMEs sorted 2216 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2217 by increasing ->low and if ->low is the same, by increasing 2218 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2219 maximum. */ 2220 2221 static int 2222 range_entry_cmp (const void *a, const void *b) 2223 { 2224 const struct range_entry *p = (const struct range_entry *) a; 2225 const struct range_entry *q = (const struct range_entry *) b; 2226 2227 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2228 { 2229 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2230 { 2231 /* Group range_entries for the same SSA_NAME together. */ 2232 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2233 return -1; 2234 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2235 return 1; 2236 /* If ->low is different, NULL low goes first, then by 2237 ascending low. */ 2238 if (p->low != NULL_TREE) 2239 { 2240 if (q->low != NULL_TREE) 2241 { 2242 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2243 p->low, q->low); 2244 if (tem && integer_onep (tem)) 2245 return -1; 2246 tem = fold_binary (GT_EXPR, boolean_type_node, 2247 p->low, q->low); 2248 if (tem && integer_onep (tem)) 2249 return 1; 2250 } 2251 else 2252 return 1; 2253 } 2254 else if (q->low != NULL_TREE) 2255 return -1; 2256 /* If ->high is different, NULL high goes last, before that by 2257 ascending high. */ 2258 if (p->high != NULL_TREE) 2259 { 2260 if (q->high != NULL_TREE) 2261 { 2262 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2263 p->high, q->high); 2264 if (tem && integer_onep (tem)) 2265 return -1; 2266 tem = fold_binary (GT_EXPR, boolean_type_node, 2267 p->high, q->high); 2268 if (tem && integer_onep (tem)) 2269 return 1; 2270 } 2271 else 2272 return -1; 2273 } 2274 else if (q->high != NULL_TREE) 2275 return 1; 2276 /* If both ranges are the same, sort below by ascending idx. */ 2277 } 2278 else 2279 return 1; 2280 } 2281 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2282 return -1; 2283 2284 if (p->idx < q->idx) 2285 return -1; 2286 else 2287 { 2288 gcc_checking_assert (p->idx > q->idx); 2289 return 1; 2290 } 2291 } 2292 2293 /* Helper routine of optimize_range_test. 2294 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2295 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2296 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2297 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2298 an array of COUNT pointers to other ranges. Return 2299 true if the range merge has been successful. 2300 If OPCODE is ERROR_MARK, this is called from within 2301 maybe_optimize_range_tests and is performing inter-bb range optimization. 2302 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2303 oe->rank. */ 2304 2305 static bool 2306 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2307 struct range_entry **otherrangep, 2308 unsigned int count, enum tree_code opcode, 2309 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2310 bool in_p, tree low, tree high, bool strict_overflow_p) 2311 { 2312 operand_entry *oe = (*ops)[range->idx]; 2313 tree op = oe->op; 2314 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2315 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2316 location_t loc = gimple_location (stmt); 2317 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2318 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2319 in_p, low, high); 2320 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2321 gimple_stmt_iterator gsi; 2322 unsigned int i, uid; 2323 2324 if (tem == NULL_TREE) 2325 return false; 2326 2327 /* If op is default def SSA_NAME, there is no place to insert the 2328 new comparison. Give up, unless we can use OP itself as the 2329 range test. */ 2330 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2331 { 2332 if (op == range->exp 2333 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2334 || TREE_CODE (optype) == BOOLEAN_TYPE) 2335 && (op == tem 2336 || (TREE_CODE (tem) == EQ_EXPR 2337 && TREE_OPERAND (tem, 0) == op 2338 && integer_onep (TREE_OPERAND (tem, 1)))) 2339 && opcode != BIT_IOR_EXPR 2340 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2341 { 2342 stmt = NULL; 2343 tem = op; 2344 } 2345 else 2346 return false; 2347 } 2348 2349 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2350 warning_at (loc, OPT_Wstrict_overflow, 2351 "assuming signed overflow does not occur " 2352 "when simplifying range test"); 2353 2354 if (dump_file && (dump_flags & TDF_DETAILS)) 2355 { 2356 struct range_entry *r; 2357 fprintf (dump_file, "Optimizing range tests "); 2358 print_generic_expr (dump_file, range->exp, 0); 2359 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2360 print_generic_expr (dump_file, range->low, 0); 2361 fprintf (dump_file, ", "); 2362 print_generic_expr (dump_file, range->high, 0); 2363 fprintf (dump_file, "]"); 2364 for (i = 0; i < count; i++) 2365 { 2366 if (otherrange) 2367 r = otherrange + i; 2368 else 2369 r = otherrangep[i]; 2370 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 2371 print_generic_expr (dump_file, r->low, 0); 2372 fprintf (dump_file, ", "); 2373 print_generic_expr (dump_file, r->high, 0); 2374 fprintf (dump_file, "]"); 2375 } 2376 fprintf (dump_file, "\n into "); 2377 print_generic_expr (dump_file, tem, 0); 2378 fprintf (dump_file, "\n"); 2379 } 2380 2381 if (opcode == BIT_IOR_EXPR 2382 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2383 tem = invert_truthvalue_loc (loc, tem); 2384 2385 tem = fold_convert_loc (loc, optype, tem); 2386 if (stmt) 2387 { 2388 gsi = gsi_for_stmt (stmt); 2389 uid = gimple_uid (stmt); 2390 } 2391 else 2392 { 2393 gsi = gsi_none (); 2394 uid = 0; 2395 } 2396 if (stmt == NULL) 2397 gcc_checking_assert (tem == op); 2398 /* In rare cases range->exp can be equal to lhs of stmt. 2399 In that case we have to insert after the stmt rather then before 2400 it. If stmt is a PHI, insert it at the start of the basic block. */ 2401 else if (op != range->exp) 2402 { 2403 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2404 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2405 GSI_SAME_STMT); 2406 gsi_prev (&gsi); 2407 } 2408 else if (gimple_code (stmt) != GIMPLE_PHI) 2409 { 2410 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2411 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, 2412 GSI_CONTINUE_LINKING); 2413 } 2414 else 2415 { 2416 gsi = gsi_after_labels (gimple_bb (stmt)); 2417 if (!gsi_end_p (gsi)) 2418 uid = gimple_uid (gsi_stmt (gsi)); 2419 else 2420 { 2421 gsi = gsi_start_bb (gimple_bb (stmt)); 2422 uid = 1; 2423 while (!gsi_end_p (gsi)) 2424 { 2425 uid = gimple_uid (gsi_stmt (gsi)); 2426 gsi_next (&gsi); 2427 } 2428 } 2429 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2430 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2431 GSI_SAME_STMT); 2432 if (gsi_end_p (gsi)) 2433 gsi = gsi_last_bb (gimple_bb (stmt)); 2434 else 2435 gsi_prev (&gsi); 2436 } 2437 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2438 if (gimple_uid (gsi_stmt (gsi))) 2439 break; 2440 else 2441 gimple_set_uid (gsi_stmt (gsi), uid); 2442 2443 oe->op = tem; 2444 range->exp = exp; 2445 range->low = low; 2446 range->high = high; 2447 range->in_p = in_p; 2448 range->strict_overflow_p = false; 2449 2450 for (i = 0; i < count; i++) 2451 { 2452 if (otherrange) 2453 range = otherrange + i; 2454 else 2455 range = otherrangep[i]; 2456 oe = (*ops)[range->idx]; 2457 /* Now change all the other range test immediate uses, so that 2458 those tests will be optimized away. */ 2459 if (opcode == ERROR_MARK) 2460 { 2461 if (oe->op) 2462 oe->op = build_int_cst (TREE_TYPE (oe->op), 2463 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2464 else 2465 oe->op = (oe->rank == BIT_IOR_EXPR 2466 ? boolean_false_node : boolean_true_node); 2467 } 2468 else 2469 oe->op = error_mark_node; 2470 range->exp = NULL_TREE; 2471 range->low = NULL_TREE; 2472 range->high = NULL_TREE; 2473 } 2474 return true; 2475 } 2476 2477 /* Optimize X == CST1 || X == CST2 2478 if popcount (CST1 ^ CST2) == 1 into 2479 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2480 Similarly for ranges. E.g. 2481 X != 2 && X != 3 && X != 10 && X != 11 2482 will be transformed by the previous optimization into 2483 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2484 and this loop can transform that into 2485 !(((X & ~8) - 2U) <= 1U). */ 2486 2487 static bool 2488 optimize_range_tests_xor (enum tree_code opcode, tree type, 2489 tree lowi, tree lowj, tree highi, tree highj, 2490 vec<operand_entry *> *ops, 2491 struct range_entry *rangei, 2492 struct range_entry *rangej) 2493 { 2494 tree lowxor, highxor, tem, exp; 2495 /* Check lowi ^ lowj == highi ^ highj and 2496 popcount (lowi ^ lowj) == 1. */ 2497 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2498 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2499 return false; 2500 if (!integer_pow2p (lowxor)) 2501 return false; 2502 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2503 if (!tree_int_cst_equal (lowxor, highxor)) 2504 return false; 2505 2506 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2507 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2508 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2509 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2510 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2511 NULL, rangei->in_p, lowj, highj, 2512 rangei->strict_overflow_p 2513 || rangej->strict_overflow_p)) 2514 return true; 2515 return false; 2516 } 2517 2518 /* Optimize X == CST1 || X == CST2 2519 if popcount (CST2 - CST1) == 1 into 2520 ((X - CST1) & ~(CST2 - CST1)) == 0. 2521 Similarly for ranges. E.g. 2522 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2523 || X == 75 || X == 45 2524 will be transformed by the previous optimization into 2525 (X - 43U) <= 3U || (X - 75U) <= 3U 2526 and this loop can transform that into 2527 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2528 static bool 2529 optimize_range_tests_diff (enum tree_code opcode, tree type, 2530 tree lowi, tree lowj, tree highi, tree highj, 2531 vec<operand_entry *> *ops, 2532 struct range_entry *rangei, 2533 struct range_entry *rangej) 2534 { 2535 tree tem1, tem2, mask; 2536 /* Check highi - lowi == highj - lowj. */ 2537 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2538 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2539 return false; 2540 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2541 if (!tree_int_cst_equal (tem1, tem2)) 2542 return false; 2543 /* Check popcount (lowj - lowi) == 1. */ 2544 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2545 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2546 return false; 2547 if (!integer_pow2p (tem1)) 2548 return false; 2549 2550 type = unsigned_type_for (type); 2551 tem1 = fold_convert (type, tem1); 2552 tem2 = fold_convert (type, tem2); 2553 lowi = fold_convert (type, lowi); 2554 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2555 tem1 = fold_binary (MINUS_EXPR, type, 2556 fold_convert (type, rangei->exp), lowi); 2557 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2558 lowj = build_int_cst (type, 0); 2559 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2560 NULL, rangei->in_p, lowj, tem2, 2561 rangei->strict_overflow_p 2562 || rangej->strict_overflow_p)) 2563 return true; 2564 return false; 2565 } 2566 2567 /* It does some common checks for function optimize_range_tests_xor and 2568 optimize_range_tests_diff. 2569 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2570 Else it calls optimize_range_tests_diff. */ 2571 2572 static bool 2573 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2574 bool optimize_xor, vec<operand_entry *> *ops, 2575 struct range_entry *ranges) 2576 { 2577 int i, j; 2578 bool any_changes = false; 2579 for (i = first; i < length; i++) 2580 { 2581 tree lowi, highi, lowj, highj, type, tem; 2582 2583 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2584 continue; 2585 type = TREE_TYPE (ranges[i].exp); 2586 if (!INTEGRAL_TYPE_P (type)) 2587 continue; 2588 lowi = ranges[i].low; 2589 if (lowi == NULL_TREE) 2590 lowi = TYPE_MIN_VALUE (type); 2591 highi = ranges[i].high; 2592 if (highi == NULL_TREE) 2593 continue; 2594 for (j = i + 1; j < length && j < i + 64; j++) 2595 { 2596 bool changes; 2597 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2598 continue; 2599 lowj = ranges[j].low; 2600 if (lowj == NULL_TREE) 2601 continue; 2602 highj = ranges[j].high; 2603 if (highj == NULL_TREE) 2604 highj = TYPE_MAX_VALUE (type); 2605 /* Check lowj > highi. */ 2606 tem = fold_binary (GT_EXPR, boolean_type_node, 2607 lowj, highi); 2608 if (tem == NULL_TREE || !integer_onep (tem)) 2609 continue; 2610 if (optimize_xor) 2611 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2612 highi, highj, ops, 2613 ranges + i, ranges + j); 2614 else 2615 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2616 highi, highj, ops, 2617 ranges + i, ranges + j); 2618 if (changes) 2619 { 2620 any_changes = true; 2621 break; 2622 } 2623 } 2624 } 2625 return any_changes; 2626 } 2627 2628 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 2629 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2630 EXP on success, NULL otherwise. */ 2631 2632 static tree 2633 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2634 wide_int *mask, tree *totallowp) 2635 { 2636 tree tem = int_const_binop (MINUS_EXPR, high, low); 2637 if (tem == NULL_TREE 2638 || TREE_CODE (tem) != INTEGER_CST 2639 || TREE_OVERFLOW (tem) 2640 || tree_int_cst_sgn (tem) == -1 2641 || compare_tree_int (tem, prec) != -1) 2642 return NULL_TREE; 2643 2644 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2645 *mask = wi::shifted_mask (0, max, false, prec); 2646 if (TREE_CODE (exp) == BIT_AND_EXPR 2647 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2648 { 2649 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2650 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2651 if (wi::popcount (msk) == 1 2652 && wi::ltu_p (msk, prec - max)) 2653 { 2654 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2655 max += msk.to_uhwi (); 2656 exp = TREE_OPERAND (exp, 0); 2657 if (integer_zerop (low) 2658 && TREE_CODE (exp) == PLUS_EXPR 2659 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2660 { 2661 tree ret = TREE_OPERAND (exp, 0); 2662 STRIP_NOPS (ret); 2663 widest_int bias 2664 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2665 TYPE_PRECISION (TREE_TYPE (low)))); 2666 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2667 if (totallowp) 2668 { 2669 *totallowp = tbias; 2670 return ret; 2671 } 2672 else if (!tree_int_cst_lt (totallow, tbias)) 2673 return NULL_TREE; 2674 bias = wi::to_widest (tbias); 2675 bias -= wi::to_widest (totallow); 2676 if (bias >= 0 && bias < prec - max) 2677 { 2678 *mask = wi::lshift (*mask, bias); 2679 return ret; 2680 } 2681 } 2682 } 2683 } 2684 if (totallowp) 2685 return exp; 2686 if (!tree_int_cst_lt (totallow, low)) 2687 return exp; 2688 tem = int_const_binop (MINUS_EXPR, low, totallow); 2689 if (tem == NULL_TREE 2690 || TREE_CODE (tem) != INTEGER_CST 2691 || TREE_OVERFLOW (tem) 2692 || compare_tree_int (tem, prec - max) == 1) 2693 return NULL_TREE; 2694 2695 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2696 return exp; 2697 } 2698 2699 /* Attempt to optimize small range tests using bit test. 2700 E.g. 2701 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2702 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2703 has been by earlier optimizations optimized into: 2704 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2705 As all the 43 through 82 range is less than 64 numbers, 2706 for 64-bit word targets optimize that into: 2707 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2708 2709 static bool 2710 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2711 vec<operand_entry *> *ops, 2712 struct range_entry *ranges) 2713 { 2714 int i, j; 2715 bool any_changes = false; 2716 int prec = GET_MODE_BITSIZE (word_mode); 2717 auto_vec<struct range_entry *, 64> candidates; 2718 2719 for (i = first; i < length - 2; i++) 2720 { 2721 tree lowi, highi, lowj, highj, type; 2722 2723 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2724 continue; 2725 type = TREE_TYPE (ranges[i].exp); 2726 if (!INTEGRAL_TYPE_P (type)) 2727 continue; 2728 lowi = ranges[i].low; 2729 if (lowi == NULL_TREE) 2730 lowi = TYPE_MIN_VALUE (type); 2731 highi = ranges[i].high; 2732 if (highi == NULL_TREE) 2733 continue; 2734 wide_int mask; 2735 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2736 highi, &mask, &lowi); 2737 if (exp == NULL_TREE) 2738 continue; 2739 bool strict_overflow_p = ranges[i].strict_overflow_p; 2740 candidates.truncate (0); 2741 int end = MIN (i + 64, length); 2742 for (j = i + 1; j < end; j++) 2743 { 2744 tree exp2; 2745 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2746 continue; 2747 if (ranges[j].exp == exp) 2748 ; 2749 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2750 { 2751 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2752 if (exp2 == exp) 2753 ; 2754 else if (TREE_CODE (exp2) == PLUS_EXPR) 2755 { 2756 exp2 = TREE_OPERAND (exp2, 0); 2757 STRIP_NOPS (exp2); 2758 if (exp2 != exp) 2759 continue; 2760 } 2761 else 2762 continue; 2763 } 2764 else 2765 continue; 2766 lowj = ranges[j].low; 2767 if (lowj == NULL_TREE) 2768 continue; 2769 highj = ranges[j].high; 2770 if (highj == NULL_TREE) 2771 highj = TYPE_MAX_VALUE (type); 2772 wide_int mask2; 2773 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2774 highj, &mask2, NULL); 2775 if (exp2 != exp) 2776 continue; 2777 mask |= mask2; 2778 strict_overflow_p |= ranges[j].strict_overflow_p; 2779 candidates.safe_push (&ranges[j]); 2780 } 2781 2782 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2783 if (candidates.length () >= 2) 2784 { 2785 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2786 wi::to_widest (lowi) 2787 + prec - 1 - wi::clz (mask)); 2788 operand_entry *oe = (*ops)[ranges[i].idx]; 2789 tree op = oe->op; 2790 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2791 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2792 location_t loc = gimple_location (stmt); 2793 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2794 2795 /* See if it isn't cheaper to pretend the minimum value of the 2796 range is 0, if maximum value is small enough. 2797 We can avoid then subtraction of the minimum value, but the 2798 mask constant could be perhaps more expensive. */ 2799 if (compare_tree_int (lowi, 0) > 0 2800 && compare_tree_int (high, prec) < 0) 2801 { 2802 int cost_diff; 2803 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2804 rtx reg = gen_raw_REG (word_mode, 10000); 2805 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2806 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2807 GEN_INT (-m)), speed_p); 2808 rtx r = immed_wide_int_const (mask, word_mode); 2809 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2810 word_mode, speed_p); 2811 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2812 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2813 word_mode, speed_p); 2814 if (cost_diff > 0) 2815 { 2816 mask = wi::lshift (mask, m); 2817 lowi = build_zero_cst (TREE_TYPE (lowi)); 2818 } 2819 } 2820 2821 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2822 false, lowi, high); 2823 if (tem == NULL_TREE || is_gimple_val (tem)) 2824 continue; 2825 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2826 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2827 fold_convert_loc (loc, etype, exp), 2828 fold_convert_loc (loc, etype, lowi)); 2829 exp = fold_convert_loc (loc, integer_type_node, exp); 2830 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2831 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2832 build_int_cst (word_type, 1), exp); 2833 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2834 wide_int_to_tree (word_type, mask)); 2835 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2836 build_zero_cst (word_type)); 2837 if (is_gimple_val (exp)) 2838 continue; 2839 2840 /* The shift might have undefined behavior if TEM is true, 2841 but reassociate_bb isn't prepared to have basic blocks 2842 split when it is running. So, temporarily emit a code 2843 with BIT_IOR_EXPR instead of &&, and fix it up in 2844 branch_fixup. */ 2845 gimple_seq seq; 2846 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2847 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2848 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2849 gimple_seq seq2; 2850 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2851 gimple_seq_add_seq_without_update (&seq, seq2); 2852 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2853 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2854 gimple *g = gimple_build_assign (make_ssa_name (optype), 2855 BIT_IOR_EXPR, tem, exp); 2856 gimple_set_location (g, loc); 2857 gimple_seq_add_stmt_without_update (&seq, g); 2858 exp = gimple_assign_lhs (g); 2859 tree val = build_zero_cst (optype); 2860 if (update_range_test (&ranges[i], NULL, candidates.address (), 2861 candidates.length (), opcode, ops, exp, 2862 seq, false, val, val, strict_overflow_p)) 2863 { 2864 any_changes = true; 2865 reassoc_branch_fixups.safe_push (tem); 2866 } 2867 else 2868 gimple_seq_discard (seq); 2869 } 2870 } 2871 return any_changes; 2872 } 2873 2874 /* Attempt to optimize for signed a and b where b is known to be >= 0: 2875 a >= 0 && a < b into (unsigned) a < (unsigned) b 2876 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 2877 2878 static bool 2879 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 2880 vec<operand_entry *> *ops, 2881 struct range_entry *ranges, 2882 basic_block first_bb) 2883 { 2884 int i; 2885 bool any_changes = false; 2886 hash_map<tree, int> *map = NULL; 2887 2888 for (i = first; i < length; i++) 2889 { 2890 if (ranges[i].exp == NULL_TREE 2891 || TREE_CODE (ranges[i].exp) != SSA_NAME 2892 || !ranges[i].in_p) 2893 continue; 2894 2895 tree type = TREE_TYPE (ranges[i].exp); 2896 if (!INTEGRAL_TYPE_P (type) 2897 || TYPE_UNSIGNED (type) 2898 || ranges[i].low == NULL_TREE 2899 || !integer_zerop (ranges[i].low) 2900 || ranges[i].high != NULL_TREE) 2901 continue; 2902 /* EXP >= 0 here. */ 2903 if (map == NULL) 2904 map = new hash_map <tree, int>; 2905 map->put (ranges[i].exp, i); 2906 } 2907 2908 if (map == NULL) 2909 return false; 2910 2911 for (i = 0; i < length; i++) 2912 { 2913 if (ranges[i].low == NULL_TREE 2914 || ranges[i].high == NULL_TREE 2915 || !integer_zerop (ranges[i].low) 2916 || !integer_zerop (ranges[i].high)) 2917 continue; 2918 2919 gimple *stmt; 2920 tree_code ccode; 2921 tree rhs1, rhs2; 2922 if (ranges[i].exp) 2923 { 2924 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 2925 continue; 2926 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 2927 if (!is_gimple_assign (stmt)) 2928 continue; 2929 ccode = gimple_assign_rhs_code (stmt); 2930 rhs1 = gimple_assign_rhs1 (stmt); 2931 rhs2 = gimple_assign_rhs2 (stmt); 2932 } 2933 else 2934 { 2935 operand_entry *oe = (*ops)[ranges[i].idx]; 2936 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2937 if (gimple_code (stmt) != GIMPLE_COND) 2938 continue; 2939 ccode = gimple_cond_code (stmt); 2940 rhs1 = gimple_cond_lhs (stmt); 2941 rhs2 = gimple_cond_rhs (stmt); 2942 } 2943 2944 if (TREE_CODE (rhs1) != SSA_NAME 2945 || rhs2 == NULL_TREE 2946 || TREE_CODE (rhs2) != SSA_NAME) 2947 continue; 2948 2949 switch (ccode) 2950 { 2951 case GT_EXPR: 2952 case GE_EXPR: 2953 case LT_EXPR: 2954 case LE_EXPR: 2955 break; 2956 default: 2957 continue; 2958 } 2959 if (ranges[i].in_p) 2960 ccode = invert_tree_comparison (ccode, false); 2961 switch (ccode) 2962 { 2963 case GT_EXPR: 2964 case GE_EXPR: 2965 std::swap (rhs1, rhs2); 2966 ccode = swap_tree_comparison (ccode); 2967 break; 2968 case LT_EXPR: 2969 case LE_EXPR: 2970 break; 2971 default: 2972 gcc_unreachable (); 2973 } 2974 2975 int *idx = map->get (rhs1); 2976 if (idx == NULL) 2977 continue; 2978 2979 /* maybe_optimize_range_tests allows statements without side-effects 2980 in the basic blocks as long as they are consumed in the same bb. 2981 Make sure rhs2's def stmt is not among them, otherwise we can't 2982 use safely get_nonzero_bits on it. E.g. in: 2983 # RANGE [-83, 1] NONZERO 173 2984 # k_32 = PHI <k_47(13), k_12(9)> 2985 ... 2986 if (k_32 >= 0) 2987 goto <bb 5>; [26.46%] 2988 else 2989 goto <bb 9>; [73.54%] 2990 2991 <bb 5> [local count: 140323371]: 2992 # RANGE [0, 1] NONZERO 1 2993 _5 = (int) k_32; 2994 # RANGE [0, 4] NONZERO 4 2995 _21 = _5 << 2; 2996 # RANGE [0, 4] NONZERO 4 2997 iftmp.0_44 = (char) _21; 2998 if (k_32 < iftmp.0_44) 2999 goto <bb 6>; [84.48%] 3000 else 3001 goto <bb 9>; [15.52%] 3002 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3003 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3004 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3005 those stmts even for negative k_32 and the value ranges would be no 3006 longer guaranteed and so the optimization would be invalid. */ 3007 if (opcode == ERROR_MARK) 3008 { 3009 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3010 basic_block bb2 = gimple_bb (g); 3011 if (bb2 3012 && bb2 != first_bb 3013 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3014 { 3015 /* As an exception, handle a few common cases. */ 3016 if (gimple_assign_cast_p (g) 3017 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))) 3018 && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))) 3019 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3020 > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g))))) 3021 /* Zero-extension is always ok. */ ; 3022 else if (is_gimple_assign (g) 3023 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3024 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3025 && !wi::neg_p (gimple_assign_rhs2 (g))) 3026 /* Masking with INTEGER_CST with MSB clear is always ok 3027 too. */ ; 3028 else 3029 continue; 3030 } 3031 } 3032 3033 wide_int nz = get_nonzero_bits (rhs2); 3034 if (wi::neg_p (nz)) 3035 continue; 3036 3037 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3038 and RHS2 is known to be RHS2 >= 0. */ 3039 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3040 3041 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3042 if ((ranges[*idx].strict_overflow_p 3043 || ranges[i].strict_overflow_p) 3044 && issue_strict_overflow_warning (wc)) 3045 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3046 "assuming signed overflow does not occur " 3047 "when simplifying range test"); 3048 3049 if (dump_file && (dump_flags & TDF_DETAILS)) 3050 { 3051 struct range_entry *r = &ranges[*idx]; 3052 fprintf (dump_file, "Optimizing range test "); 3053 print_generic_expr (dump_file, r->exp, 0); 3054 fprintf (dump_file, " +["); 3055 print_generic_expr (dump_file, r->low, 0); 3056 fprintf (dump_file, ", "); 3057 print_generic_expr (dump_file, r->high, 0); 3058 fprintf (dump_file, "] and comparison "); 3059 print_generic_expr (dump_file, rhs1, 0); 3060 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3061 print_generic_expr (dump_file, rhs2, 0); 3062 fprintf (dump_file, "\n into ("); 3063 print_generic_expr (dump_file, utype, 0); 3064 fprintf (dump_file, ") "); 3065 print_generic_expr (dump_file, rhs1, 0); 3066 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3067 print_generic_expr (dump_file, utype, 0); 3068 fprintf (dump_file, ") "); 3069 print_generic_expr (dump_file, rhs2, 0); 3070 fprintf (dump_file, "\n"); 3071 } 3072 3073 operand_entry *oe = (*ops)[ranges[i].idx]; 3074 ranges[i].in_p = 0; 3075 if (opcode == BIT_IOR_EXPR 3076 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3077 { 3078 ranges[i].in_p = 1; 3079 ccode = invert_tree_comparison (ccode, false); 3080 } 3081 3082 unsigned int uid = gimple_uid (stmt); 3083 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3084 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3085 gimple_set_uid (g, uid); 3086 rhs1 = gimple_assign_lhs (g); 3087 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3088 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3089 gimple_set_uid (g, uid); 3090 rhs2 = gimple_assign_lhs (g); 3091 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3092 if (tree_swap_operands_p (rhs1, rhs2)) 3093 { 3094 std::swap (rhs1, rhs2); 3095 ccode = swap_tree_comparison (ccode); 3096 } 3097 if (gimple_code (stmt) == GIMPLE_COND) 3098 { 3099 gcond *c = as_a <gcond *> (stmt); 3100 gimple_cond_set_code (c, ccode); 3101 gimple_cond_set_lhs (c, rhs1); 3102 gimple_cond_set_rhs (c, rhs2); 3103 update_stmt (stmt); 3104 } 3105 else 3106 { 3107 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3108 if (!INTEGRAL_TYPE_P (ctype) 3109 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3110 && TYPE_PRECISION (ctype) != 1)) 3111 ctype = boolean_type_node; 3112 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3113 gimple_set_uid (g, uid); 3114 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3115 if (oe->op && ctype != TREE_TYPE (oe->op)) 3116 { 3117 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3118 NOP_EXPR, gimple_assign_lhs (g)); 3119 gimple_set_uid (g, uid); 3120 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3121 } 3122 ranges[i].exp = gimple_assign_lhs (g); 3123 oe->op = ranges[i].exp; 3124 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 3125 ranges[i].high = ranges[i].low; 3126 } 3127 ranges[i].strict_overflow_p = false; 3128 oe = (*ops)[ranges[*idx].idx]; 3129 /* Now change all the other range test immediate uses, so that 3130 those tests will be optimized away. */ 3131 if (opcode == ERROR_MARK) 3132 { 3133 if (oe->op) 3134 oe->op = build_int_cst (TREE_TYPE (oe->op), 3135 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3136 else 3137 oe->op = (oe->rank == BIT_IOR_EXPR 3138 ? boolean_false_node : boolean_true_node); 3139 } 3140 else 3141 oe->op = error_mark_node; 3142 ranges[*idx].exp = NULL_TREE; 3143 ranges[*idx].low = NULL_TREE; 3144 ranges[*idx].high = NULL_TREE; 3145 any_changes = true; 3146 } 3147 3148 delete map; 3149 return any_changes; 3150 } 3151 3152 /* Optimize range tests, similarly how fold_range_test optimizes 3153 it on trees. The tree code for the binary 3154 operation between all the operands is OPCODE. 3155 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 3156 maybe_optimize_range_tests for inter-bb range optimization. 3157 In that case if oe->op is NULL, oe->id is bb->index whose 3158 GIMPLE_COND is && or ||ed into the test, and oe->rank says 3159 the actual opcode. 3160 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 3161 3162 static bool 3163 optimize_range_tests (enum tree_code opcode, 3164 vec<operand_entry *> *ops, basic_block first_bb) 3165 { 3166 unsigned int length = ops->length (), i, j, first; 3167 operand_entry *oe; 3168 struct range_entry *ranges; 3169 bool any_changes = false; 3170 3171 if (length == 1) 3172 return false; 3173 3174 ranges = XNEWVEC (struct range_entry, length); 3175 for (i = 0; i < length; i++) 3176 { 3177 oe = (*ops)[i]; 3178 ranges[i].idx = i; 3179 init_range_entry (ranges + i, oe->op, 3180 oe->op 3181 ? NULL 3182 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 3183 /* For | invert it now, we will invert it again before emitting 3184 the optimized expression. */ 3185 if (opcode == BIT_IOR_EXPR 3186 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3187 ranges[i].in_p = !ranges[i].in_p; 3188 } 3189 3190 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 3191 for (i = 0; i < length; i++) 3192 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 3193 break; 3194 3195 /* Try to merge ranges. */ 3196 for (first = i; i < length; i++) 3197 { 3198 tree low = ranges[i].low; 3199 tree high = ranges[i].high; 3200 int in_p = ranges[i].in_p; 3201 bool strict_overflow_p = ranges[i].strict_overflow_p; 3202 int update_fail_count = 0; 3203 3204 for (j = i + 1; j < length; j++) 3205 { 3206 if (ranges[i].exp != ranges[j].exp) 3207 break; 3208 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 3209 ranges[j].in_p, ranges[j].low, ranges[j].high)) 3210 break; 3211 strict_overflow_p |= ranges[j].strict_overflow_p; 3212 } 3213 3214 if (j == i + 1) 3215 continue; 3216 3217 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 3218 opcode, ops, ranges[i].exp, NULL, in_p, 3219 low, high, strict_overflow_p)) 3220 { 3221 i = j - 1; 3222 any_changes = true; 3223 } 3224 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 3225 while update_range_test would fail. */ 3226 else if (update_fail_count == 64) 3227 i = j - 1; 3228 else 3229 ++update_fail_count; 3230 } 3231 3232 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 3233 ops, ranges); 3234 3235 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 3236 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 3237 ops, ranges); 3238 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 3239 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 3240 ops, ranges); 3241 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 3242 ranges, first_bb); 3243 3244 if (any_changes && opcode != ERROR_MARK) 3245 { 3246 j = 0; 3247 FOR_EACH_VEC_ELT (*ops, i, oe) 3248 { 3249 if (oe->op == error_mark_node) 3250 continue; 3251 else if (i != j) 3252 (*ops)[j] = oe; 3253 j++; 3254 } 3255 ops->truncate (j); 3256 } 3257 3258 XDELETEVEC (ranges); 3259 return any_changes; 3260 } 3261 3262 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3263 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3264 otherwise the comparison code. */ 3265 3266 static tree_code 3267 ovce_extract_ops (tree var, gassign **rets, bool *reti) 3268 { 3269 if (TREE_CODE (var) != SSA_NAME) 3270 return ERROR_MARK; 3271 3272 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3273 if (stmt == NULL) 3274 return ERROR_MARK; 3275 3276 /* ??? If we start creating more COND_EXPR, we could perform 3277 this same optimization with them. For now, simplify. */ 3278 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 3279 return ERROR_MARK; 3280 3281 tree cond = gimple_assign_rhs1 (stmt); 3282 tree_code cmp = TREE_CODE (cond); 3283 if (TREE_CODE_CLASS (cmp) != tcc_comparison) 3284 return ERROR_MARK; 3285 3286 /* ??? For now, allow only canonical true and false result vectors. 3287 We could expand this to other constants should the need arise, 3288 but at the moment we don't create them. */ 3289 tree t = gimple_assign_rhs2 (stmt); 3290 tree f = gimple_assign_rhs3 (stmt); 3291 bool inv; 3292 if (integer_all_onesp (t)) 3293 inv = false; 3294 else if (integer_all_onesp (f)) 3295 { 3296 cmp = invert_tree_comparison (cmp, false); 3297 inv = true; 3298 } 3299 else 3300 return ERROR_MARK; 3301 if (!integer_zerop (f)) 3302 return ERROR_MARK; 3303 3304 /* Success! */ 3305 if (rets) 3306 *rets = stmt; 3307 if (reti) 3308 *reti = inv; 3309 return cmp; 3310 } 3311 3312 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3313 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3314 3315 static bool 3316 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 3317 { 3318 unsigned int length = ops->length (), i, j; 3319 bool any_changes = false; 3320 3321 if (length == 1) 3322 return false; 3323 3324 for (i = 0; i < length; ++i) 3325 { 3326 tree elt0 = (*ops)[i]->op; 3327 3328 gassign *stmt0; 3329 bool invert; 3330 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); 3331 if (cmp0 == ERROR_MARK) 3332 continue; 3333 3334 for (j = i + 1; j < length; ++j) 3335 { 3336 tree &elt1 = (*ops)[j]->op; 3337 3338 gassign *stmt1; 3339 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); 3340 if (cmp1 == ERROR_MARK) 3341 continue; 3342 3343 tree cond0 = gimple_assign_rhs1 (stmt0); 3344 tree x0 = TREE_OPERAND (cond0, 0); 3345 tree y0 = TREE_OPERAND (cond0, 1); 3346 3347 tree cond1 = gimple_assign_rhs1 (stmt1); 3348 tree x1 = TREE_OPERAND (cond1, 0); 3349 tree y1 = TREE_OPERAND (cond1, 1); 3350 3351 tree comb; 3352 if (opcode == BIT_AND_EXPR) 3353 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3354 else if (opcode == BIT_IOR_EXPR) 3355 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3356 else 3357 gcc_unreachable (); 3358 if (comb == NULL) 3359 continue; 3360 3361 /* Success! */ 3362 if (dump_file && (dump_flags & TDF_DETAILS)) 3363 { 3364 fprintf (dump_file, "Transforming "); 3365 print_generic_expr (dump_file, cond0, 0); 3366 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 3367 print_generic_expr (dump_file, cond1, 0); 3368 fprintf (dump_file, " into "); 3369 print_generic_expr (dump_file, comb, 0); 3370 fputc ('\n', dump_file); 3371 } 3372 3373 gimple_assign_set_rhs1 (stmt0, comb); 3374 if (invert) 3375 std::swap (*gimple_assign_rhs2_ptr (stmt0), 3376 *gimple_assign_rhs3_ptr (stmt0)); 3377 update_stmt (stmt0); 3378 3379 elt1 = error_mark_node; 3380 any_changes = true; 3381 } 3382 } 3383 3384 if (any_changes) 3385 { 3386 operand_entry *oe; 3387 j = 0; 3388 FOR_EACH_VEC_ELT (*ops, i, oe) 3389 { 3390 if (oe->op == error_mark_node) 3391 continue; 3392 else if (i != j) 3393 (*ops)[j] = oe; 3394 j++; 3395 } 3396 ops->truncate (j); 3397 } 3398 3399 return any_changes; 3400 } 3401 3402 /* Return true if STMT is a cast like: 3403 <bb N>: 3404 ... 3405 _123 = (int) _234; 3406 3407 <bb M>: 3408 # _345 = PHI <_123(N), 1(...), 1(...)> 3409 where _234 has bool type, _123 has single use and 3410 bb N has a single successor M. This is commonly used in 3411 the last block of a range test. 3412 3413 Also Return true if STMT is tcc_compare like: 3414 <bb N>: 3415 ... 3416 _234 = a_2(D) == 2; 3417 3418 <bb M>: 3419 # _345 = PHI <_234(N), 1(...), 1(...)> 3420 _346 = (int) _345; 3421 where _234 has booltype, single use and 3422 bb N has a single successor M. This is commonly used in 3423 the last block of a range test. */ 3424 3425 static bool 3426 final_range_test_p (gimple *stmt) 3427 { 3428 basic_block bb, rhs_bb, lhs_bb; 3429 edge e; 3430 tree lhs, rhs; 3431 use_operand_p use_p; 3432 gimple *use_stmt; 3433 3434 if (!gimple_assign_cast_p (stmt) 3435 && (!is_gimple_assign (stmt) 3436 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3437 != tcc_comparison))) 3438 return false; 3439 bb = gimple_bb (stmt); 3440 if (!single_succ_p (bb)) 3441 return false; 3442 e = single_succ_edge (bb); 3443 if (e->flags & EDGE_COMPLEX) 3444 return false; 3445 3446 lhs = gimple_assign_lhs (stmt); 3447 rhs = gimple_assign_rhs1 (stmt); 3448 if (gimple_assign_cast_p (stmt) 3449 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3450 || TREE_CODE (rhs) != SSA_NAME 3451 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 3452 return false; 3453 3454 if (!gimple_assign_cast_p (stmt) 3455 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 3456 return false; 3457 3458 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 3459 if (!single_imm_use (lhs, &use_p, &use_stmt)) 3460 return false; 3461 3462 if (gimple_code (use_stmt) != GIMPLE_PHI 3463 || gimple_bb (use_stmt) != e->dest) 3464 return false; 3465 3466 /* And that the rhs is defined in the same loop. */ 3467 if (gimple_assign_cast_p (stmt)) 3468 { 3469 if (TREE_CODE (rhs) != SSA_NAME 3470 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 3471 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 3472 return false; 3473 } 3474 else 3475 { 3476 if (TREE_CODE (lhs) != SSA_NAME 3477 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 3478 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 3479 return false; 3480 } 3481 3482 return true; 3483 } 3484 3485 /* Return true if BB is suitable basic block for inter-bb range test 3486 optimization. If BACKWARD is true, BB should be the only predecessor 3487 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 3488 or compared with to find a common basic block to which all conditions 3489 branch to if true resp. false. If BACKWARD is false, TEST_BB should 3490 be the only predecessor of BB. */ 3491 3492 static bool 3493 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 3494 bool backward) 3495 { 3496 edge_iterator ei, ei2; 3497 edge e, e2; 3498 gimple *stmt; 3499 gphi_iterator gsi; 3500 bool other_edge_seen = false; 3501 bool is_cond; 3502 3503 if (test_bb == bb) 3504 return false; 3505 /* Check last stmt first. */ 3506 stmt = last_stmt (bb); 3507 if (stmt == NULL 3508 || (gimple_code (stmt) != GIMPLE_COND 3509 && (backward || !final_range_test_p (stmt))) 3510 || gimple_visited_p (stmt) 3511 || stmt_could_throw_p (stmt) 3512 || *other_bb == bb) 3513 return false; 3514 is_cond = gimple_code (stmt) == GIMPLE_COND; 3515 if (is_cond) 3516 { 3517 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 3518 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 3519 to *OTHER_BB (if not set yet, try to find it out). */ 3520 if (EDGE_COUNT (bb->succs) != 2) 3521 return false; 3522 FOR_EACH_EDGE (e, ei, bb->succs) 3523 { 3524 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3525 return false; 3526 if (e->dest == test_bb) 3527 { 3528 if (backward) 3529 continue; 3530 else 3531 return false; 3532 } 3533 if (e->dest == bb) 3534 return false; 3535 if (*other_bb == NULL) 3536 { 3537 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 3538 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3539 return false; 3540 else if (e->dest == e2->dest) 3541 *other_bb = e->dest; 3542 if (*other_bb == NULL) 3543 return false; 3544 } 3545 if (e->dest == *other_bb) 3546 other_edge_seen = true; 3547 else if (backward) 3548 return false; 3549 } 3550 if (*other_bb == NULL || !other_edge_seen) 3551 return false; 3552 } 3553 else if (single_succ (bb) != *other_bb) 3554 return false; 3555 3556 /* Now check all PHIs of *OTHER_BB. */ 3557 e = find_edge (bb, *other_bb); 3558 e2 = find_edge (test_bb, *other_bb); 3559 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 3560 { 3561 gphi *phi = gsi.phi (); 3562 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 3563 corresponding to BB and TEST_BB predecessor must be the same. */ 3564 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 3565 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 3566 { 3567 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 3568 one of the PHIs should have the lhs of the last stmt in 3569 that block as PHI arg and that PHI should have 0 or 1 3570 corresponding to it in all other range test basic blocks 3571 considered. */ 3572 if (!is_cond) 3573 { 3574 if (gimple_phi_arg_def (phi, e->dest_idx) 3575 == gimple_assign_lhs (stmt) 3576 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 3577 || integer_onep (gimple_phi_arg_def (phi, 3578 e2->dest_idx)))) 3579 continue; 3580 } 3581 else 3582 { 3583 gimple *test_last = last_stmt (test_bb); 3584 if (gimple_code (test_last) != GIMPLE_COND 3585 && gimple_phi_arg_def (phi, e2->dest_idx) 3586 == gimple_assign_lhs (test_last) 3587 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 3588 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 3589 continue; 3590 } 3591 3592 return false; 3593 } 3594 } 3595 return true; 3596 } 3597 3598 /* Return true if BB doesn't have side-effects that would disallow 3599 range test optimization, all SSA_NAMEs set in the bb are consumed 3600 in the bb and there are no PHIs. */ 3601 3602 static bool 3603 no_side_effect_bb (basic_block bb) 3604 { 3605 gimple_stmt_iterator gsi; 3606 gimple *last; 3607 3608 if (!gimple_seq_empty_p (phi_nodes (bb))) 3609 return false; 3610 last = last_stmt (bb); 3611 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3612 { 3613 gimple *stmt = gsi_stmt (gsi); 3614 tree lhs; 3615 imm_use_iterator imm_iter; 3616 use_operand_p use_p; 3617 3618 if (is_gimple_debug (stmt)) 3619 continue; 3620 if (gimple_has_side_effects (stmt)) 3621 return false; 3622 if (stmt == last) 3623 return true; 3624 if (!is_gimple_assign (stmt)) 3625 return false; 3626 lhs = gimple_assign_lhs (stmt); 3627 if (TREE_CODE (lhs) != SSA_NAME) 3628 return false; 3629 if (gimple_assign_rhs_could_trap_p (stmt)) 3630 return false; 3631 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 3632 { 3633 gimple *use_stmt = USE_STMT (use_p); 3634 if (is_gimple_debug (use_stmt)) 3635 continue; 3636 if (gimple_bb (use_stmt) != bb) 3637 return false; 3638 } 3639 } 3640 return false; 3641 } 3642 3643 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3644 return true and fill in *OPS recursively. */ 3645 3646 static bool 3647 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 3648 struct loop *loop) 3649 { 3650 gimple *stmt = SSA_NAME_DEF_STMT (var); 3651 tree rhs[2]; 3652 int i; 3653 3654 if (!is_reassociable_op (stmt, code, loop)) 3655 return false; 3656 3657 rhs[0] = gimple_assign_rhs1 (stmt); 3658 rhs[1] = gimple_assign_rhs2 (stmt); 3659 gimple_set_visited (stmt, true); 3660 for (i = 0; i < 2; i++) 3661 if (TREE_CODE (rhs[i]) == SSA_NAME 3662 && !get_ops (rhs[i], code, ops, loop) 3663 && has_single_use (rhs[i])) 3664 { 3665 operand_entry *oe = operand_entry_pool.allocate (); 3666 3667 oe->op = rhs[i]; 3668 oe->rank = code; 3669 oe->id = 0; 3670 oe->count = 1; 3671 oe->stmt_to_insert = NULL; 3672 ops->safe_push (oe); 3673 } 3674 return true; 3675 } 3676 3677 /* Find the ops that were added by get_ops starting from VAR, see if 3678 they were changed during update_range_test and if yes, create new 3679 stmts. */ 3680 3681 static tree 3682 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 3683 unsigned int *pidx, struct loop *loop) 3684 { 3685 gimple *stmt = SSA_NAME_DEF_STMT (var); 3686 tree rhs[4]; 3687 int i; 3688 3689 if (!is_reassociable_op (stmt, code, loop)) 3690 return NULL; 3691 3692 rhs[0] = gimple_assign_rhs1 (stmt); 3693 rhs[1] = gimple_assign_rhs2 (stmt); 3694 rhs[2] = rhs[0]; 3695 rhs[3] = rhs[1]; 3696 for (i = 0; i < 2; i++) 3697 if (TREE_CODE (rhs[i]) == SSA_NAME) 3698 { 3699 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3700 if (rhs[2 + i] == NULL_TREE) 3701 { 3702 if (has_single_use (rhs[i])) 3703 rhs[2 + i] = ops[(*pidx)++]->op; 3704 else 3705 rhs[2 + i] = rhs[i]; 3706 } 3707 } 3708 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3709 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3710 { 3711 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3712 var = make_ssa_name (TREE_TYPE (var)); 3713 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3714 rhs[2], rhs[3]); 3715 gimple_set_uid (g, gimple_uid (stmt)); 3716 gimple_set_visited (g, true); 3717 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3718 } 3719 return var; 3720 } 3721 3722 /* Structure to track the initial value passed to get_ops and 3723 the range in the ops vector for each basic block. */ 3724 3725 struct inter_bb_range_test_entry 3726 { 3727 tree op; 3728 unsigned int first_idx, last_idx; 3729 }; 3730 3731 /* Inter-bb range test optimization. 3732 3733 Returns TRUE if a gimple conditional is optimized to a true/false, 3734 otherwise return FALSE. 3735 3736 This indicates to the caller that it should run a CFG cleanup pass 3737 once reassociation is completed. */ 3738 3739 static bool 3740 maybe_optimize_range_tests (gimple *stmt) 3741 { 3742 basic_block first_bb = gimple_bb (stmt); 3743 basic_block last_bb = first_bb; 3744 basic_block other_bb = NULL; 3745 basic_block bb; 3746 edge_iterator ei; 3747 edge e; 3748 auto_vec<operand_entry *> ops; 3749 auto_vec<inter_bb_range_test_entry> bbinfo; 3750 bool any_changes = false; 3751 bool cfg_cleanup_needed = false; 3752 3753 /* Consider only basic blocks that end with GIMPLE_COND or 3754 a cast statement satisfying final_range_test_p. All 3755 but the last bb in the first_bb .. last_bb range 3756 should end with GIMPLE_COND. */ 3757 if (gimple_code (stmt) == GIMPLE_COND) 3758 { 3759 if (EDGE_COUNT (first_bb->succs) != 2) 3760 return cfg_cleanup_needed; 3761 } 3762 else if (final_range_test_p (stmt)) 3763 other_bb = single_succ (first_bb); 3764 else 3765 return cfg_cleanup_needed; 3766 3767 if (stmt_could_throw_p (stmt)) 3768 return cfg_cleanup_needed; 3769 3770 /* As relative ordering of post-dominator sons isn't fixed, 3771 maybe_optimize_range_tests can be called first on any 3772 bb in the range we want to optimize. So, start searching 3773 backwards, if first_bb can be set to a predecessor. */ 3774 while (single_pred_p (first_bb)) 3775 { 3776 basic_block pred_bb = single_pred (first_bb); 3777 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3778 break; 3779 if (!no_side_effect_bb (first_bb)) 3780 break; 3781 first_bb = pred_bb; 3782 } 3783 /* If first_bb is last_bb, other_bb hasn't been computed yet. 3784 Before starting forward search in last_bb successors, find 3785 out the other_bb. */ 3786 if (first_bb == last_bb) 3787 { 3788 other_bb = NULL; 3789 /* As non-GIMPLE_COND last stmt always terminates the range, 3790 if forward search didn't discover anything, just give up. */ 3791 if (gimple_code (stmt) != GIMPLE_COND) 3792 return cfg_cleanup_needed; 3793 /* Look at both successors. Either it ends with a GIMPLE_COND 3794 and satisfies suitable_cond_bb, or ends with a cast and 3795 other_bb is that cast's successor. */ 3796 FOR_EACH_EDGE (e, ei, first_bb->succs) 3797 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 3798 || e->dest == first_bb) 3799 return cfg_cleanup_needed; 3800 else if (single_pred_p (e->dest)) 3801 { 3802 stmt = last_stmt (e->dest); 3803 if (stmt 3804 && gimple_code (stmt) == GIMPLE_COND 3805 && EDGE_COUNT (e->dest->succs) == 2) 3806 { 3807 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 3808 break; 3809 else 3810 other_bb = NULL; 3811 } 3812 else if (stmt 3813 && final_range_test_p (stmt) 3814 && find_edge (first_bb, single_succ (e->dest))) 3815 { 3816 other_bb = single_succ (e->dest); 3817 if (other_bb == first_bb) 3818 other_bb = NULL; 3819 } 3820 } 3821 if (other_bb == NULL) 3822 return cfg_cleanup_needed; 3823 } 3824 /* Now do the forward search, moving last_bb to successor bbs 3825 that aren't other_bb. */ 3826 while (EDGE_COUNT (last_bb->succs) == 2) 3827 { 3828 FOR_EACH_EDGE (e, ei, last_bb->succs) 3829 if (e->dest != other_bb) 3830 break; 3831 if (e == NULL) 3832 break; 3833 if (!single_pred_p (e->dest)) 3834 break; 3835 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 3836 break; 3837 if (!no_side_effect_bb (e->dest)) 3838 break; 3839 last_bb = e->dest; 3840 } 3841 if (first_bb == last_bb) 3842 return cfg_cleanup_needed; 3843 /* Here basic blocks first_bb through last_bb's predecessor 3844 end with GIMPLE_COND, all of them have one of the edges to 3845 other_bb and another to another block in the range, 3846 all blocks except first_bb don't have side-effects and 3847 last_bb ends with either GIMPLE_COND, or cast satisfying 3848 final_range_test_p. */ 3849 for (bb = last_bb; ; bb = single_pred (bb)) 3850 { 3851 enum tree_code code; 3852 tree lhs, rhs; 3853 inter_bb_range_test_entry bb_ent; 3854 3855 bb_ent.op = NULL_TREE; 3856 bb_ent.first_idx = ops.length (); 3857 bb_ent.last_idx = bb_ent.first_idx; 3858 e = find_edge (bb, other_bb); 3859 stmt = last_stmt (bb); 3860 gimple_set_visited (stmt, true); 3861 if (gimple_code (stmt) != GIMPLE_COND) 3862 { 3863 use_operand_p use_p; 3864 gimple *phi; 3865 edge e2; 3866 unsigned int d; 3867 3868 lhs = gimple_assign_lhs (stmt); 3869 rhs = gimple_assign_rhs1 (stmt); 3870 gcc_assert (bb == last_bb); 3871 3872 /* stmt is 3873 _123 = (int) _234; 3874 OR 3875 _234 = a_2(D) == 2; 3876 3877 followed by: 3878 <bb M>: 3879 # _345 = PHI <_123(N), 1(...), 1(...)> 3880 3881 or 0 instead of 1. If it is 0, the _234 3882 range test is anded together with all the 3883 other range tests, if it is 1, it is ored with 3884 them. */ 3885 single_imm_use (lhs, &use_p, &phi); 3886 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 3887 e2 = find_edge (first_bb, other_bb); 3888 d = e2->dest_idx; 3889 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 3890 if (integer_zerop (gimple_phi_arg_def (phi, d))) 3891 code = BIT_AND_EXPR; 3892 else 3893 { 3894 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 3895 code = BIT_IOR_EXPR; 3896 } 3897 3898 /* If _234 SSA_NAME_DEF_STMT is 3899 _234 = _567 | _789; 3900 (or &, corresponding to 1/0 in the phi arguments, 3901 push into ops the individual range test arguments 3902 of the bitwise or resp. and, recursively. */ 3903 if (TREE_CODE (rhs) == SSA_NAME 3904 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3905 != tcc_comparison) 3906 && !get_ops (rhs, code, &ops, 3907 loop_containing_stmt (stmt)) 3908 && has_single_use (rhs)) 3909 { 3910 /* Otherwise, push the _234 range test itself. */ 3911 operand_entry *oe = operand_entry_pool.allocate (); 3912 3913 oe->op = rhs; 3914 oe->rank = code; 3915 oe->id = 0; 3916 oe->count = 1; 3917 oe->stmt_to_insert = NULL; 3918 ops.safe_push (oe); 3919 bb_ent.last_idx++; 3920 bb_ent.op = rhs; 3921 } 3922 else if (is_gimple_assign (stmt) 3923 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3924 == tcc_comparison) 3925 && !get_ops (lhs, code, &ops, 3926 loop_containing_stmt (stmt)) 3927 && has_single_use (lhs)) 3928 { 3929 operand_entry *oe = operand_entry_pool.allocate (); 3930 oe->op = lhs; 3931 oe->rank = code; 3932 oe->id = 0; 3933 oe->count = 1; 3934 ops.safe_push (oe); 3935 bb_ent.last_idx++; 3936 bb_ent.op = lhs; 3937 } 3938 else 3939 { 3940 bb_ent.last_idx = ops.length (); 3941 bb_ent.op = rhs; 3942 } 3943 bbinfo.safe_push (bb_ent); 3944 continue; 3945 } 3946 /* Otherwise stmt is GIMPLE_COND. */ 3947 code = gimple_cond_code (stmt); 3948 lhs = gimple_cond_lhs (stmt); 3949 rhs = gimple_cond_rhs (stmt); 3950 if (TREE_CODE (lhs) == SSA_NAME 3951 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3952 && ((code != EQ_EXPR && code != NE_EXPR) 3953 || rhs != boolean_false_node 3954 /* Either push into ops the individual bitwise 3955 or resp. and operands, depending on which 3956 edge is other_bb. */ 3957 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 3958 ^ (code == EQ_EXPR)) 3959 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 3960 loop_containing_stmt (stmt)))) 3961 { 3962 /* Or push the GIMPLE_COND stmt itself. */ 3963 operand_entry *oe = operand_entry_pool.allocate (); 3964 3965 oe->op = NULL; 3966 oe->rank = (e->flags & EDGE_TRUE_VALUE) 3967 ? BIT_IOR_EXPR : BIT_AND_EXPR; 3968 /* oe->op = NULL signs that there is no SSA_NAME 3969 for the range test, and oe->id instead is the 3970 basic block number, at which's end the GIMPLE_COND 3971 is. */ 3972 oe->id = bb->index; 3973 oe->count = 1; 3974 oe->stmt_to_insert = NULL; 3975 ops.safe_push (oe); 3976 bb_ent.op = NULL; 3977 bb_ent.last_idx++; 3978 } 3979 else if (ops.length () > bb_ent.first_idx) 3980 { 3981 bb_ent.op = lhs; 3982 bb_ent.last_idx = ops.length (); 3983 } 3984 bbinfo.safe_push (bb_ent); 3985 if (bb == first_bb) 3986 break; 3987 } 3988 if (ops.length () > 1) 3989 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 3990 if (any_changes) 3991 { 3992 unsigned int idx, max_idx = 0; 3993 /* update_ops relies on has_single_use predicates returning the 3994 same values as it did during get_ops earlier. Additionally it 3995 never removes statements, only adds new ones and it should walk 3996 from the single imm use and check the predicate already before 3997 making those changes. 3998 On the other side, the handling of GIMPLE_COND directly can turn 3999 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4000 it needs to be done in a separate loop afterwards. */ 4001 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4002 { 4003 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4004 && bbinfo[idx].op != NULL_TREE) 4005 { 4006 tree new_op; 4007 4008 max_idx = idx; 4009 stmt = last_stmt (bb); 4010 new_op = update_ops (bbinfo[idx].op, 4011 (enum tree_code) 4012 ops[bbinfo[idx].first_idx]->rank, 4013 ops, &bbinfo[idx].first_idx, 4014 loop_containing_stmt (stmt)); 4015 if (new_op == NULL_TREE) 4016 { 4017 gcc_assert (bb == last_bb); 4018 new_op = ops[bbinfo[idx].first_idx++]->op; 4019 } 4020 if (bbinfo[idx].op != new_op) 4021 { 4022 imm_use_iterator iter; 4023 use_operand_p use_p; 4024 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4025 4026 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 4027 if (is_gimple_debug (use_stmt)) 4028 continue; 4029 else if (gimple_code (use_stmt) == GIMPLE_COND 4030 || gimple_code (use_stmt) == GIMPLE_PHI) 4031 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4032 SET_USE (use_p, new_op); 4033 else if ((is_gimple_assign (use_stmt) 4034 && (TREE_CODE_CLASS 4035 (gimple_assign_rhs_code (use_stmt)) 4036 == tcc_comparison))) 4037 cast_or_tcc_cmp_stmt = use_stmt; 4038 else if (gimple_assign_cast_p (use_stmt)) 4039 cast_or_tcc_cmp_stmt = use_stmt; 4040 else 4041 gcc_unreachable (); 4042 4043 if (cast_or_tcc_cmp_stmt) 4044 { 4045 gcc_assert (bb == last_bb); 4046 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 4047 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 4048 enum tree_code rhs_code 4049 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 4050 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 4051 : CONVERT_EXPR; 4052 gassign *g; 4053 if (is_gimple_min_invariant (new_op)) 4054 { 4055 new_op = fold_convert (TREE_TYPE (lhs), new_op); 4056 g = gimple_build_assign (new_lhs, new_op); 4057 } 4058 else 4059 g = gimple_build_assign (new_lhs, rhs_code, new_op); 4060 gimple_stmt_iterator gsi 4061 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 4062 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 4063 gimple_set_visited (g, true); 4064 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4065 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 4066 if (is_gimple_debug (use_stmt)) 4067 continue; 4068 else if (gimple_code (use_stmt) == GIMPLE_COND 4069 || gimple_code (use_stmt) == GIMPLE_PHI) 4070 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4071 SET_USE (use_p, new_lhs); 4072 else 4073 gcc_unreachable (); 4074 } 4075 } 4076 } 4077 if (bb == first_bb) 4078 break; 4079 } 4080 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4081 { 4082 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4083 && bbinfo[idx].op == NULL_TREE 4084 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 4085 { 4086 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 4087 4088 if (idx > max_idx) 4089 max_idx = idx; 4090 4091 /* If we collapse the conditional to a true/false 4092 condition, then bubble that knowledge up to our caller. */ 4093 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 4094 { 4095 gimple_cond_make_false (cond_stmt); 4096 cfg_cleanup_needed = true; 4097 } 4098 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 4099 { 4100 gimple_cond_make_true (cond_stmt); 4101 cfg_cleanup_needed = true; 4102 } 4103 else 4104 { 4105 gimple_cond_set_code (cond_stmt, NE_EXPR); 4106 gimple_cond_set_lhs (cond_stmt, 4107 ops[bbinfo[idx].first_idx]->op); 4108 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 4109 } 4110 update_stmt (cond_stmt); 4111 } 4112 if (bb == first_bb) 4113 break; 4114 } 4115 4116 /* The above changes could result in basic blocks after the first 4117 modified one, up to and including last_bb, to be executed even if 4118 they would not be in the original program. If the value ranges of 4119 assignment lhs' in those bbs were dependent on the conditions 4120 guarding those basic blocks which now can change, the VRs might 4121 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 4122 are only used within the same bb, it should be not a big deal if 4123 we just reset all the VRs in those bbs. See PR68671. */ 4124 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 4125 reset_flow_sensitive_info_in_bb (bb); 4126 } 4127 return cfg_cleanup_needed; 4128 } 4129 4130 /* Return true if OPERAND is defined by a PHI node which uses the LHS 4131 of STMT in it's operands. This is also known as a "destructive 4132 update" operation. */ 4133 4134 static bool 4135 is_phi_for_stmt (gimple *stmt, tree operand) 4136 { 4137 gimple *def_stmt; 4138 gphi *def_phi; 4139 tree lhs; 4140 use_operand_p arg_p; 4141 ssa_op_iter i; 4142 4143 if (TREE_CODE (operand) != SSA_NAME) 4144 return false; 4145 4146 lhs = gimple_assign_lhs (stmt); 4147 4148 def_stmt = SSA_NAME_DEF_STMT (operand); 4149 def_phi = dyn_cast <gphi *> (def_stmt); 4150 if (!def_phi) 4151 return false; 4152 4153 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 4154 if (lhs == USE_FROM_PTR (arg_p)) 4155 return true; 4156 return false; 4157 } 4158 4159 /* Remove def stmt of VAR if VAR has zero uses and recurse 4160 on rhs1 operand if so. */ 4161 4162 static void 4163 remove_visited_stmt_chain (tree var) 4164 { 4165 gimple *stmt; 4166 gimple_stmt_iterator gsi; 4167 4168 while (1) 4169 { 4170 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 4171 return; 4172 stmt = SSA_NAME_DEF_STMT (var); 4173 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 4174 { 4175 var = gimple_assign_rhs1 (stmt); 4176 gsi = gsi_for_stmt (stmt); 4177 reassoc_remove_stmt (&gsi); 4178 release_defs (stmt); 4179 } 4180 else 4181 return; 4182 } 4183 } 4184 4185 /* This function checks three consequtive operands in 4186 passed operands vector OPS starting from OPINDEX and 4187 swaps two operands if it is profitable for binary operation 4188 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 4189 4190 We pair ops with the same rank if possible. 4191 4192 The alternative we try is to see if STMT is a destructive 4193 update style statement, which is like: 4194 b = phi (a, ...) 4195 a = c + b; 4196 In that case, we want to use the destructive update form to 4197 expose the possible vectorizer sum reduction opportunity. 4198 In that case, the third operand will be the phi node. This 4199 check is not performed if STMT is null. 4200 4201 We could, of course, try to be better as noted above, and do a 4202 lot of work to try to find these opportunities in >3 operand 4203 cases, but it is unlikely to be worth it. */ 4204 4205 static void 4206 swap_ops_for_binary_stmt (vec<operand_entry *> ops, 4207 unsigned int opindex, gimple *stmt) 4208 { 4209 operand_entry *oe1, *oe2, *oe3; 4210 4211 oe1 = ops[opindex]; 4212 oe2 = ops[opindex + 1]; 4213 oe3 = ops[opindex + 2]; 4214 4215 if ((oe1->rank == oe2->rank 4216 && oe2->rank != oe3->rank) 4217 || (stmt && is_phi_for_stmt (stmt, oe3->op) 4218 && !is_phi_for_stmt (stmt, oe1->op) 4219 && !is_phi_for_stmt (stmt, oe2->op))) 4220 std::swap (*oe1, *oe3); 4221 else if ((oe1->rank == oe3->rank 4222 && oe2->rank != oe3->rank) 4223 || (stmt && is_phi_for_stmt (stmt, oe2->op) 4224 && !is_phi_for_stmt (stmt, oe1->op) 4225 && !is_phi_for_stmt (stmt, oe3->op))) 4226 std::swap (*oe1, *oe2); 4227 } 4228 4229 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 4230 two definitions, otherwise return STMT. */ 4231 4232 static inline gimple * 4233 find_insert_point (gimple *stmt, tree rhs1, tree rhs2) 4234 { 4235 if (TREE_CODE (rhs1) == SSA_NAME 4236 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 4237 stmt = SSA_NAME_DEF_STMT (rhs1); 4238 if (TREE_CODE (rhs2) == SSA_NAME 4239 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 4240 stmt = SSA_NAME_DEF_STMT (rhs2); 4241 return stmt; 4242 } 4243 4244 /* If the stmt that defines operand has to be inserted, insert it 4245 before the use. */ 4246 static void 4247 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 4248 { 4249 gcc_assert (is_gimple_assign (stmt_to_insert)); 4250 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 4251 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 4252 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2); 4253 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 4254 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 4255 4256 /* If the insert point is not stmt, then insert_point would be 4257 the point where operand rhs1 or rhs2 is defined. In this case, 4258 stmt_to_insert has to be inserted afterwards. This would 4259 only happen when the stmt insertion point is flexible. */ 4260 if (stmt == insert_point) 4261 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 4262 else 4263 insert_stmt_after (stmt_to_insert, insert_point); 4264 } 4265 4266 4267 /* Recursively rewrite our linearized statements so that the operators 4268 match those in OPS[OPINDEX], putting the computation in rank 4269 order. Return new lhs. 4270 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 4271 the current stmt and during recursive invocations. 4272 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 4273 recursive invocations. */ 4274 4275 static tree 4276 rewrite_expr_tree (gimple *stmt, unsigned int opindex, 4277 vec<operand_entry *> ops, bool changed, bool next_changed) 4278 { 4279 tree rhs1 = gimple_assign_rhs1 (stmt); 4280 tree rhs2 = gimple_assign_rhs2 (stmt); 4281 tree lhs = gimple_assign_lhs (stmt); 4282 operand_entry *oe; 4283 4284 /* The final recursion case for this function is that you have 4285 exactly two operations left. 4286 If we had exactly one op in the entire list to start with, we 4287 would have never called this function, and the tail recursion 4288 rewrites them one at a time. */ 4289 if (opindex + 2 == ops.length ()) 4290 { 4291 operand_entry *oe1, *oe2; 4292 4293 oe1 = ops[opindex]; 4294 oe2 = ops[opindex + 1]; 4295 4296 if (rhs1 != oe1->op || rhs2 != oe2->op) 4297 { 4298 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4299 unsigned int uid = gimple_uid (stmt); 4300 4301 if (dump_file && (dump_flags & TDF_DETAILS)) 4302 { 4303 fprintf (dump_file, "Transforming "); 4304 print_gimple_stmt (dump_file, stmt, 0, 0); 4305 } 4306 4307 /* If the stmt that defines operand has to be inserted, insert it 4308 before the use. */ 4309 if (oe1->stmt_to_insert) 4310 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 4311 if (oe2->stmt_to_insert) 4312 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 4313 /* Even when changed is false, reassociation could have e.g. removed 4314 some redundant operations, so unless we are just swapping the 4315 arguments or unless there is no change at all (then we just 4316 return lhs), force creation of a new SSA_NAME. */ 4317 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 4318 { 4319 gimple *insert_point 4320 = find_insert_point (stmt, oe1->op, oe2->op); 4321 lhs = make_ssa_name (TREE_TYPE (lhs)); 4322 stmt 4323 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4324 oe1->op, oe2->op); 4325 gimple_set_uid (stmt, uid); 4326 gimple_set_visited (stmt, true); 4327 if (insert_point == gsi_stmt (gsi)) 4328 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4329 else 4330 insert_stmt_after (stmt, insert_point); 4331 } 4332 else 4333 { 4334 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 4335 == stmt); 4336 gimple_assign_set_rhs1 (stmt, oe1->op); 4337 gimple_assign_set_rhs2 (stmt, oe2->op); 4338 update_stmt (stmt); 4339 } 4340 4341 if (rhs1 != oe1->op && rhs1 != oe2->op) 4342 remove_visited_stmt_chain (rhs1); 4343 4344 if (dump_file && (dump_flags & TDF_DETAILS)) 4345 { 4346 fprintf (dump_file, " into "); 4347 print_gimple_stmt (dump_file, stmt, 0, 0); 4348 } 4349 } 4350 return lhs; 4351 } 4352 4353 /* If we hit here, we should have 3 or more ops left. */ 4354 gcc_assert (opindex + 2 < ops.length ()); 4355 4356 /* Rewrite the next operator. */ 4357 oe = ops[opindex]; 4358 4359 /* If the stmt that defines operand has to be inserted, insert it 4360 before the use. */ 4361 if (oe->stmt_to_insert) 4362 insert_stmt_before_use (stmt, oe->stmt_to_insert); 4363 4364 /* Recurse on the LHS of the binary operator, which is guaranteed to 4365 be the non-leaf side. */ 4366 tree new_rhs1 4367 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 4368 changed || oe->op != rhs2 || next_changed, 4369 false); 4370 4371 if (oe->op != rhs2 || new_rhs1 != rhs1) 4372 { 4373 if (dump_file && (dump_flags & TDF_DETAILS)) 4374 { 4375 fprintf (dump_file, "Transforming "); 4376 print_gimple_stmt (dump_file, stmt, 0, 0); 4377 } 4378 4379 /* If changed is false, this is either opindex == 0 4380 or all outer rhs2's were equal to corresponding oe->op, 4381 and powi_result is NULL. 4382 That means lhs is equivalent before and after reassociation. 4383 Otherwise ensure the old lhs SSA_NAME is not reused and 4384 create a new stmt as well, so that any debug stmts will be 4385 properly adjusted. */ 4386 if (changed) 4387 { 4388 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4389 unsigned int uid = gimple_uid (stmt); 4390 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); 4391 4392 lhs = make_ssa_name (TREE_TYPE (lhs)); 4393 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4394 new_rhs1, oe->op); 4395 gimple_set_uid (stmt, uid); 4396 gimple_set_visited (stmt, true); 4397 if (insert_point == gsi_stmt (gsi)) 4398 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4399 else 4400 insert_stmt_after (stmt, insert_point); 4401 } 4402 else 4403 { 4404 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 4405 == stmt); 4406 gimple_assign_set_rhs1 (stmt, new_rhs1); 4407 gimple_assign_set_rhs2 (stmt, oe->op); 4408 update_stmt (stmt); 4409 } 4410 4411 if (dump_file && (dump_flags & TDF_DETAILS)) 4412 { 4413 fprintf (dump_file, " into "); 4414 print_gimple_stmt (dump_file, stmt, 0, 0); 4415 } 4416 } 4417 return lhs; 4418 } 4419 4420 /* Find out how many cycles we need to compute statements chain. 4421 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 4422 maximum number of independent statements we may execute per cycle. */ 4423 4424 static int 4425 get_required_cycles (int ops_num, int cpu_width) 4426 { 4427 int res; 4428 int elog; 4429 unsigned int rest; 4430 4431 /* While we have more than 2 * cpu_width operands 4432 we may reduce number of operands by cpu_width 4433 per cycle. */ 4434 res = ops_num / (2 * cpu_width); 4435 4436 /* Remained operands count may be reduced twice per cycle 4437 until we have only one operand. */ 4438 rest = (unsigned)(ops_num - res * cpu_width); 4439 elog = exact_log2 (rest); 4440 if (elog >= 0) 4441 res += elog; 4442 else 4443 res += floor_log2 (rest) + 1; 4444 4445 return res; 4446 } 4447 4448 /* Returns an optimal number of registers to use for computation of 4449 given statements. */ 4450 4451 static int 4452 get_reassociation_width (int ops_num, enum tree_code opc, 4453 machine_mode mode) 4454 { 4455 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 4456 int width; 4457 int width_min; 4458 int cycles_best; 4459 4460 if (param_width > 0) 4461 width = param_width; 4462 else 4463 width = targetm.sched.reassociation_width (opc, mode); 4464 4465 if (width == 1) 4466 return width; 4467 4468 /* Get the minimal time required for sequence computation. */ 4469 cycles_best = get_required_cycles (ops_num, width); 4470 4471 /* Check if we may use less width and still compute sequence for 4472 the same time. It will allow us to reduce registers usage. 4473 get_required_cycles is monotonically increasing with lower width 4474 so we can perform a binary search for the minimal width that still 4475 results in the optimal cycle count. */ 4476 width_min = 1; 4477 while (width > width_min) 4478 { 4479 int width_mid = (width + width_min) / 2; 4480 4481 if (get_required_cycles (ops_num, width_mid) == cycles_best) 4482 width = width_mid; 4483 else if (width_min < width_mid) 4484 width_min = width_mid; 4485 else 4486 break; 4487 } 4488 4489 return width; 4490 } 4491 4492 /* Recursively rewrite our linearized statements so that the operators 4493 match those in OPS[OPINDEX], putting the computation in rank 4494 order and trying to allow operations to be executed in 4495 parallel. */ 4496 4497 static void 4498 rewrite_expr_tree_parallel (gassign *stmt, int width, 4499 vec<operand_entry *> ops) 4500 { 4501 enum tree_code opcode = gimple_assign_rhs_code (stmt); 4502 int op_num = ops.length (); 4503 gcc_assert (op_num > 0); 4504 int stmt_num = op_num - 1; 4505 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 4506 int op_index = op_num - 1; 4507 int stmt_index = 0; 4508 int ready_stmts_end = 0; 4509 int i = 0; 4510 gimple *stmt1 = NULL, *stmt2 = NULL; 4511 tree last_rhs1 = gimple_assign_rhs1 (stmt); 4512 4513 /* We start expression rewriting from the top statements. 4514 So, in this loop we create a full list of statements 4515 we will work with. */ 4516 stmts[stmt_num - 1] = stmt; 4517 for (i = stmt_num - 2; i >= 0; i--) 4518 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 4519 4520 for (i = 0; i < stmt_num; i++) 4521 { 4522 tree op1, op2; 4523 4524 /* Determine whether we should use results of 4525 already handled statements or not. */ 4526 if (ready_stmts_end == 0 4527 && (i - stmt_index >= width || op_index < 1)) 4528 ready_stmts_end = i; 4529 4530 /* Now we choose operands for the next statement. Non zero 4531 value in ready_stmts_end means here that we should use 4532 the result of already generated statements as new operand. */ 4533 if (ready_stmts_end > 0) 4534 { 4535 op1 = gimple_assign_lhs (stmts[stmt_index++]); 4536 if (ready_stmts_end > stmt_index) 4537 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4538 else if (op_index >= 0) 4539 { 4540 operand_entry *oe = ops[op_index--]; 4541 stmt2 = oe->stmt_to_insert; 4542 op2 = oe->op; 4543 } 4544 else 4545 { 4546 gcc_assert (stmt_index < i); 4547 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4548 } 4549 4550 if (stmt_index >= ready_stmts_end) 4551 ready_stmts_end = 0; 4552 } 4553 else 4554 { 4555 if (op_index > 1) 4556 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 4557 operand_entry *oe2 = ops[op_index--]; 4558 operand_entry *oe1 = ops[op_index--]; 4559 op2 = oe2->op; 4560 stmt2 = oe2->stmt_to_insert; 4561 op1 = oe1->op; 4562 stmt1 = oe1->stmt_to_insert; 4563 } 4564 4565 /* If we emit the last statement then we should put 4566 operands into the last statement. It will also 4567 break the loop. */ 4568 if (op_index < 0 && stmt_index == i) 4569 i = stmt_num - 1; 4570 4571 if (dump_file && (dump_flags & TDF_DETAILS)) 4572 { 4573 fprintf (dump_file, "Transforming "); 4574 print_gimple_stmt (dump_file, stmts[i], 0, 0); 4575 } 4576 4577 /* If the stmt that defines operand has to be inserted, insert it 4578 before the use. */ 4579 if (stmt1) 4580 insert_stmt_before_use (stmts[i], stmt1); 4581 if (stmt2) 4582 insert_stmt_before_use (stmts[i], stmt2); 4583 stmt1 = stmt2 = NULL; 4584 4585 /* We keep original statement only for the last one. All 4586 others are recreated. */ 4587 if (i == stmt_num - 1) 4588 { 4589 gimple_assign_set_rhs1 (stmts[i], op1); 4590 gimple_assign_set_rhs2 (stmts[i], op2); 4591 update_stmt (stmts[i]); 4592 } 4593 else 4594 { 4595 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 4596 } 4597 if (dump_file && (dump_flags & TDF_DETAILS)) 4598 { 4599 fprintf (dump_file, " into "); 4600 print_gimple_stmt (dump_file, stmts[i], 0, 0); 4601 } 4602 } 4603 4604 remove_visited_stmt_chain (last_rhs1); 4605 } 4606 4607 /* Transform STMT, which is really (A +B) + (C + D) into the left 4608 linear form, ((A+B)+C)+D. 4609 Recurse on D if necessary. */ 4610 4611 static void 4612 linearize_expr (gimple *stmt) 4613 { 4614 gimple_stmt_iterator gsi; 4615 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 4616 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4617 gimple *oldbinrhs = binrhs; 4618 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4619 gimple *newbinrhs = NULL; 4620 struct loop *loop = loop_containing_stmt (stmt); 4621 tree lhs = gimple_assign_lhs (stmt); 4622 4623 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 4624 && is_reassociable_op (binrhs, rhscode, loop)); 4625 4626 gsi = gsi_for_stmt (stmt); 4627 4628 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 4629 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 4630 gimple_assign_rhs_code (binrhs), 4631 gimple_assign_lhs (binlhs), 4632 gimple_assign_rhs2 (binrhs)); 4633 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 4634 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 4635 gimple_set_uid (binrhs, gimple_uid (stmt)); 4636 4637 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 4638 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4639 4640 if (dump_file && (dump_flags & TDF_DETAILS)) 4641 { 4642 fprintf (dump_file, "Linearized: "); 4643 print_gimple_stmt (dump_file, stmt, 0, 0); 4644 } 4645 4646 reassociate_stats.linearized++; 4647 update_stmt (stmt); 4648 4649 gsi = gsi_for_stmt (oldbinrhs); 4650 reassoc_remove_stmt (&gsi); 4651 release_defs (oldbinrhs); 4652 4653 gimple_set_visited (stmt, true); 4654 gimple_set_visited (binlhs, true); 4655 gimple_set_visited (binrhs, true); 4656 4657 /* Tail recurse on the new rhs if it still needs reassociation. */ 4658 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 4659 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 4660 want to change the algorithm while converting to tuples. */ 4661 linearize_expr (stmt); 4662 } 4663 4664 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 4665 it. Otherwise, return NULL. */ 4666 4667 static gimple * 4668 get_single_immediate_use (tree lhs) 4669 { 4670 use_operand_p immuse; 4671 gimple *immusestmt; 4672 4673 if (TREE_CODE (lhs) == SSA_NAME 4674 && single_imm_use (lhs, &immuse, &immusestmt) 4675 && is_gimple_assign (immusestmt)) 4676 return immusestmt; 4677 4678 return NULL; 4679 } 4680 4681 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 4682 representing the negated value. Insertions of any necessary 4683 instructions go before GSI. 4684 This function is recursive in that, if you hand it "a_5" as the 4685 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 4686 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 4687 4688 static tree 4689 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 4690 { 4691 gimple *negatedefstmt = NULL; 4692 tree resultofnegate; 4693 gimple_stmt_iterator gsi; 4694 unsigned int uid; 4695 4696 /* If we are trying to negate a name, defined by an add, negate the 4697 add operands instead. */ 4698 if (TREE_CODE (tonegate) == SSA_NAME) 4699 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 4700 if (TREE_CODE (tonegate) == SSA_NAME 4701 && is_gimple_assign (negatedefstmt) 4702 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 4703 && has_single_use (gimple_assign_lhs (negatedefstmt)) 4704 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 4705 { 4706 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 4707 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 4708 tree lhs = gimple_assign_lhs (negatedefstmt); 4709 gimple *g; 4710 4711 gsi = gsi_for_stmt (negatedefstmt); 4712 rhs1 = negate_value (rhs1, &gsi); 4713 4714 gsi = gsi_for_stmt (negatedefstmt); 4715 rhs2 = negate_value (rhs2, &gsi); 4716 4717 gsi = gsi_for_stmt (negatedefstmt); 4718 lhs = make_ssa_name (TREE_TYPE (lhs)); 4719 gimple_set_visited (negatedefstmt, true); 4720 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 4721 gimple_set_uid (g, gimple_uid (negatedefstmt)); 4722 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4723 return lhs; 4724 } 4725 4726 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 4727 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 4728 NULL_TREE, true, GSI_SAME_STMT); 4729 gsi = *gsip; 4730 uid = gimple_uid (gsi_stmt (gsi)); 4731 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 4732 { 4733 gimple *stmt = gsi_stmt (gsi); 4734 if (gimple_uid (stmt) != 0) 4735 break; 4736 gimple_set_uid (stmt, uid); 4737 } 4738 return resultofnegate; 4739 } 4740 4741 /* Return true if we should break up the subtract in STMT into an add 4742 with negate. This is true when we the subtract operands are really 4743 adds, or the subtract itself is used in an add expression. In 4744 either case, breaking up the subtract into an add with negate 4745 exposes the adds to reassociation. */ 4746 4747 static bool 4748 should_break_up_subtract (gimple *stmt) 4749 { 4750 tree lhs = gimple_assign_lhs (stmt); 4751 tree binlhs = gimple_assign_rhs1 (stmt); 4752 tree binrhs = gimple_assign_rhs2 (stmt); 4753 gimple *immusestmt; 4754 struct loop *loop = loop_containing_stmt (stmt); 4755 4756 if (TREE_CODE (binlhs) == SSA_NAME 4757 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4758 return true; 4759 4760 if (TREE_CODE (binrhs) == SSA_NAME 4761 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4762 return true; 4763 4764 if (TREE_CODE (lhs) == SSA_NAME 4765 && (immusestmt = get_single_immediate_use (lhs)) 4766 && is_gimple_assign (immusestmt) 4767 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4768 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4769 return true; 4770 return false; 4771 } 4772 4773 /* Transform STMT from A - B into A + -B. */ 4774 4775 static void 4776 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 4777 { 4778 tree rhs1 = gimple_assign_rhs1 (stmt); 4779 tree rhs2 = gimple_assign_rhs2 (stmt); 4780 4781 if (dump_file && (dump_flags & TDF_DETAILS)) 4782 { 4783 fprintf (dump_file, "Breaking up subtract "); 4784 print_gimple_stmt (dump_file, stmt, 0, 0); 4785 } 4786 4787 rhs2 = negate_value (rhs2, gsip); 4788 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 4789 update_stmt (stmt); 4790 } 4791 4792 /* Determine whether STMT is a builtin call that raises an SSA name 4793 to an integer power and has only one use. If so, and this is early 4794 reassociation and unsafe math optimizations are permitted, place 4795 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 4796 If any of these conditions does not hold, return FALSE. */ 4797 4798 static bool 4799 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 4800 { 4801 tree arg1; 4802 REAL_VALUE_TYPE c, cint; 4803 4804 switch (gimple_call_combined_fn (stmt)) 4805 { 4806 CASE_CFN_POW: 4807 if (flag_errno_math) 4808 return false; 4809 4810 *base = gimple_call_arg (stmt, 0); 4811 arg1 = gimple_call_arg (stmt, 1); 4812 4813 if (TREE_CODE (arg1) != REAL_CST) 4814 return false; 4815 4816 c = TREE_REAL_CST (arg1); 4817 4818 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 4819 return false; 4820 4821 *exponent = real_to_integer (&c); 4822 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 4823 if (!real_identical (&c, &cint)) 4824 return false; 4825 4826 break; 4827 4828 CASE_CFN_POWI: 4829 *base = gimple_call_arg (stmt, 0); 4830 arg1 = gimple_call_arg (stmt, 1); 4831 4832 if (!tree_fits_shwi_p (arg1)) 4833 return false; 4834 4835 *exponent = tree_to_shwi (arg1); 4836 break; 4837 4838 default: 4839 return false; 4840 } 4841 4842 /* Expanding negative exponents is generally unproductive, so we don't 4843 complicate matters with those. Exponents of zero and one should 4844 have been handled by expression folding. */ 4845 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 4846 return false; 4847 4848 return true; 4849 } 4850 4851 /* Try to derive and add operand entry for OP to *OPS. Return false if 4852 unsuccessful. */ 4853 4854 static bool 4855 try_special_add_to_ops (vec<operand_entry *> *ops, 4856 enum tree_code code, 4857 tree op, gimple* def_stmt) 4858 { 4859 tree base = NULL_TREE; 4860 HOST_WIDE_INT exponent = 0; 4861 4862 if (TREE_CODE (op) != SSA_NAME 4863 || ! has_single_use (op)) 4864 return false; 4865 4866 if (code == MULT_EXPR 4867 && reassoc_insert_powi_p 4868 && flag_unsafe_math_optimizations 4869 && is_gimple_call (def_stmt) 4870 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 4871 { 4872 add_repeat_to_ops_vec (ops, base, exponent); 4873 gimple_set_visited (def_stmt, true); 4874 return true; 4875 } 4876 else if (code == MULT_EXPR 4877 && is_gimple_assign (def_stmt) 4878 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 4879 && !HONOR_SNANS (TREE_TYPE (op)) 4880 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 4881 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) 4882 { 4883 tree rhs1 = gimple_assign_rhs1 (def_stmt); 4884 tree cst = build_minus_one_cst (TREE_TYPE (op)); 4885 add_to_ops_vec (ops, rhs1); 4886 add_to_ops_vec (ops, cst); 4887 gimple_set_visited (def_stmt, true); 4888 return true; 4889 } 4890 4891 return false; 4892 } 4893 4894 /* Recursively linearize a binary expression that is the RHS of STMT. 4895 Place the operands of the expression tree in the vector named OPS. */ 4896 4897 static void 4898 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 4899 bool is_associative, bool set_visited) 4900 { 4901 tree binlhs = gimple_assign_rhs1 (stmt); 4902 tree binrhs = gimple_assign_rhs2 (stmt); 4903 gimple *binlhsdef = NULL, *binrhsdef = NULL; 4904 bool binlhsisreassoc = false; 4905 bool binrhsisreassoc = false; 4906 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4907 struct loop *loop = loop_containing_stmt (stmt); 4908 4909 if (set_visited) 4910 gimple_set_visited (stmt, true); 4911 4912 if (TREE_CODE (binlhs) == SSA_NAME) 4913 { 4914 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 4915 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 4916 && !stmt_could_throw_p (binlhsdef)); 4917 } 4918 4919 if (TREE_CODE (binrhs) == SSA_NAME) 4920 { 4921 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 4922 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 4923 && !stmt_could_throw_p (binrhsdef)); 4924 } 4925 4926 /* If the LHS is not reassociable, but the RHS is, we need to swap 4927 them. If neither is reassociable, there is nothing we can do, so 4928 just put them in the ops vector. If the LHS is reassociable, 4929 linearize it. If both are reassociable, then linearize the RHS 4930 and the LHS. */ 4931 4932 if (!binlhsisreassoc) 4933 { 4934 /* If this is not a associative operation like division, give up. */ 4935 if (!is_associative) 4936 { 4937 add_to_ops_vec (ops, binrhs); 4938 return; 4939 } 4940 4941 if (!binrhsisreassoc) 4942 { 4943 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 4944 add_to_ops_vec (ops, binrhs); 4945 4946 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 4947 add_to_ops_vec (ops, binlhs); 4948 4949 return; 4950 } 4951 4952 if (dump_file && (dump_flags & TDF_DETAILS)) 4953 { 4954 fprintf (dump_file, "swapping operands of "); 4955 print_gimple_stmt (dump_file, stmt, 0, 0); 4956 } 4957 4958 swap_ssa_operands (stmt, 4959 gimple_assign_rhs1_ptr (stmt), 4960 gimple_assign_rhs2_ptr (stmt)); 4961 update_stmt (stmt); 4962 4963 if (dump_file && (dump_flags & TDF_DETAILS)) 4964 { 4965 fprintf (dump_file, " is now "); 4966 print_gimple_stmt (dump_file, stmt, 0, 0); 4967 } 4968 4969 /* We want to make it so the lhs is always the reassociative op, 4970 so swap. */ 4971 std::swap (binlhs, binrhs); 4972 } 4973 else if (binrhsisreassoc) 4974 { 4975 linearize_expr (stmt); 4976 binlhs = gimple_assign_rhs1 (stmt); 4977 binrhs = gimple_assign_rhs2 (stmt); 4978 } 4979 4980 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 4981 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 4982 rhscode, loop)); 4983 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 4984 is_associative, set_visited); 4985 4986 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 4987 add_to_ops_vec (ops, binrhs); 4988 } 4989 4990 /* Repropagate the negates back into subtracts, since no other pass 4991 currently does it. */ 4992 4993 static void 4994 repropagate_negates (void) 4995 { 4996 unsigned int i = 0; 4997 tree negate; 4998 4999 FOR_EACH_VEC_ELT (plus_negates, i, negate) 5000 { 5001 gimple *user = get_single_immediate_use (negate); 5002 5003 if (!user || !is_gimple_assign (user)) 5004 continue; 5005 5006 /* The negate operand can be either operand of a PLUS_EXPR 5007 (it can be the LHS if the RHS is a constant for example). 5008 5009 Force the negate operand to the RHS of the PLUS_EXPR, then 5010 transform the PLUS_EXPR into a MINUS_EXPR. */ 5011 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 5012 { 5013 /* If the negated operand appears on the LHS of the 5014 PLUS_EXPR, exchange the operands of the PLUS_EXPR 5015 to force the negated operand to the RHS of the PLUS_EXPR. */ 5016 if (gimple_assign_rhs1 (user) == negate) 5017 { 5018 swap_ssa_operands (user, 5019 gimple_assign_rhs1_ptr (user), 5020 gimple_assign_rhs2_ptr (user)); 5021 } 5022 5023 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 5024 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 5025 if (gimple_assign_rhs2 (user) == negate) 5026 { 5027 tree rhs1 = gimple_assign_rhs1 (user); 5028 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 5029 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5030 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 5031 update_stmt (user); 5032 } 5033 } 5034 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 5035 { 5036 if (gimple_assign_rhs1 (user) == negate) 5037 { 5038 /* We have 5039 x = -a 5040 y = x - b 5041 which we transform into 5042 x = a + b 5043 y = -x . 5044 This pushes down the negate which we possibly can merge 5045 into some other operation, hence insert it into the 5046 plus_negates vector. */ 5047 gimple *feed = SSA_NAME_DEF_STMT (negate); 5048 tree a = gimple_assign_rhs1 (feed); 5049 tree b = gimple_assign_rhs2 (user); 5050 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 5051 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 5052 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 5053 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); 5054 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5055 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 5056 user = gsi_stmt (gsi2); 5057 update_stmt (user); 5058 reassoc_remove_stmt (&gsi); 5059 release_defs (feed); 5060 plus_negates.safe_push (gimple_assign_lhs (user)); 5061 } 5062 else 5063 { 5064 /* Transform "x = -a; y = b - x" into "y = b + a", getting 5065 rid of one operation. */ 5066 gimple *feed = SSA_NAME_DEF_STMT (negate); 5067 tree a = gimple_assign_rhs1 (feed); 5068 tree rhs1 = gimple_assign_rhs1 (user); 5069 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5070 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 5071 update_stmt (gsi_stmt (gsi)); 5072 } 5073 } 5074 } 5075 } 5076 5077 /* Returns true if OP is of a type for which we can do reassociation. 5078 That is for integral or non-saturating fixed-point types, and for 5079 floating point type when associative-math is enabled. */ 5080 5081 static bool 5082 can_reassociate_p (tree op) 5083 { 5084 tree type = TREE_TYPE (op); 5085 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 5086 return false; 5087 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 5088 || NON_SAT_FIXED_POINT_TYPE_P (type) 5089 || (flag_associative_math && FLOAT_TYPE_P (type))) 5090 return true; 5091 return false; 5092 } 5093 5094 /* Break up subtract operations in block BB. 5095 5096 We do this top down because we don't know whether the subtract is 5097 part of a possible chain of reassociation except at the top. 5098 5099 IE given 5100 d = f + g 5101 c = a + e 5102 b = c - d 5103 q = b - r 5104 k = t - q 5105 5106 we want to break up k = t - q, but we won't until we've transformed q 5107 = b - r, which won't be broken up until we transform b = c - d. 5108 5109 En passant, clear the GIMPLE visited flag on every statement 5110 and set UIDs within each basic block. */ 5111 5112 static void 5113 break_up_subtract_bb (basic_block bb) 5114 { 5115 gimple_stmt_iterator gsi; 5116 basic_block son; 5117 unsigned int uid = 1; 5118 5119 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5120 { 5121 gimple *stmt = gsi_stmt (gsi); 5122 gimple_set_visited (stmt, false); 5123 gimple_set_uid (stmt, uid++); 5124 5125 if (!is_gimple_assign (stmt) 5126 || !can_reassociate_p (gimple_assign_lhs (stmt))) 5127 continue; 5128 5129 /* Look for simple gimple subtract operations. */ 5130 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 5131 { 5132 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 5133 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 5134 continue; 5135 5136 /* Check for a subtract used only in an addition. If this 5137 is the case, transform it into add of a negate for better 5138 reassociation. IE transform C = A-B into C = A + -B if C 5139 is only used in an addition. */ 5140 if (should_break_up_subtract (stmt)) 5141 break_up_subtract (stmt, &gsi); 5142 } 5143 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 5144 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 5145 plus_negates.safe_push (gimple_assign_lhs (stmt)); 5146 } 5147 for (son = first_dom_son (CDI_DOMINATORS, bb); 5148 son; 5149 son = next_dom_son (CDI_DOMINATORS, son)) 5150 break_up_subtract_bb (son); 5151 } 5152 5153 /* Used for repeated factor analysis. */ 5154 struct repeat_factor 5155 { 5156 /* An SSA name that occurs in a multiply chain. */ 5157 tree factor; 5158 5159 /* Cached rank of the factor. */ 5160 unsigned rank; 5161 5162 /* Number of occurrences of the factor in the chain. */ 5163 HOST_WIDE_INT count; 5164 5165 /* An SSA name representing the product of this factor and 5166 all factors appearing later in the repeated factor vector. */ 5167 tree repr; 5168 }; 5169 5170 5171 static vec<repeat_factor> repeat_factor_vec; 5172 5173 /* Used for sorting the repeat factor vector. Sort primarily by 5174 ascending occurrence count, secondarily by descending rank. */ 5175 5176 static int 5177 compare_repeat_factors (const void *x1, const void *x2) 5178 { 5179 const repeat_factor *rf1 = (const repeat_factor *) x1; 5180 const repeat_factor *rf2 = (const repeat_factor *) x2; 5181 5182 if (rf1->count != rf2->count) 5183 return rf1->count - rf2->count; 5184 5185 return rf2->rank - rf1->rank; 5186 } 5187 5188 /* Look for repeated operands in OPS in the multiply tree rooted at 5189 STMT. Replace them with an optimal sequence of multiplies and powi 5190 builtin calls, and remove the used operands from OPS. Return an 5191 SSA name representing the value of the replacement sequence. */ 5192 5193 static tree 5194 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 5195 { 5196 unsigned i, j, vec_len; 5197 int ii; 5198 operand_entry *oe; 5199 repeat_factor *rf1, *rf2; 5200 repeat_factor rfnew; 5201 tree result = NULL_TREE; 5202 tree target_ssa, iter_result; 5203 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 5204 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 5205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5206 gimple *mul_stmt, *pow_stmt; 5207 5208 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 5209 target. */ 5210 if (!powi_fndecl) 5211 return NULL_TREE; 5212 5213 /* Allocate the repeated factor vector. */ 5214 repeat_factor_vec.create (10); 5215 5216 /* Scan the OPS vector for all SSA names in the product and build 5217 up a vector of occurrence counts for each factor. */ 5218 FOR_EACH_VEC_ELT (*ops, i, oe) 5219 { 5220 if (TREE_CODE (oe->op) == SSA_NAME) 5221 { 5222 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5223 { 5224 if (rf1->factor == oe->op) 5225 { 5226 rf1->count += oe->count; 5227 break; 5228 } 5229 } 5230 5231 if (j >= repeat_factor_vec.length ()) 5232 { 5233 rfnew.factor = oe->op; 5234 rfnew.rank = oe->rank; 5235 rfnew.count = oe->count; 5236 rfnew.repr = NULL_TREE; 5237 repeat_factor_vec.safe_push (rfnew); 5238 } 5239 } 5240 } 5241 5242 /* Sort the repeated factor vector by (a) increasing occurrence count, 5243 and (b) decreasing rank. */ 5244 repeat_factor_vec.qsort (compare_repeat_factors); 5245 5246 /* It is generally best to combine as many base factors as possible 5247 into a product before applying __builtin_powi to the result. 5248 However, the sort order chosen for the repeated factor vector 5249 allows us to cache partial results for the product of the base 5250 factors for subsequent use. When we already have a cached partial 5251 result from a previous iteration, it is best to make use of it 5252 before looking for another __builtin_pow opportunity. 5253 5254 As an example, consider x * x * y * y * y * z * z * z * z. 5255 We want to first compose the product x * y * z, raise it to the 5256 second power, then multiply this by y * z, and finally multiply 5257 by z. This can be done in 5 multiplies provided we cache y * z 5258 for use in both expressions: 5259 5260 t1 = y * z 5261 t2 = t1 * x 5262 t3 = t2 * t2 5263 t4 = t1 * t3 5264 result = t4 * z 5265 5266 If we instead ignored the cached y * z and first multiplied by 5267 the __builtin_pow opportunity z * z, we would get the inferior: 5268 5269 t1 = y * z 5270 t2 = t1 * x 5271 t3 = t2 * t2 5272 t4 = z * z 5273 t5 = t3 * t4 5274 result = t5 * y */ 5275 5276 vec_len = repeat_factor_vec.length (); 5277 5278 /* Repeatedly look for opportunities to create a builtin_powi call. */ 5279 while (true) 5280 { 5281 HOST_WIDE_INT power; 5282 5283 /* First look for the largest cached product of factors from 5284 preceding iterations. If found, create a builtin_powi for 5285 it if the minimum occurrence count for its factors is at 5286 least 2, or just use this cached product as our next 5287 multiplicand if the minimum occurrence count is 1. */ 5288 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5289 { 5290 if (rf1->repr && rf1->count > 0) 5291 break; 5292 } 5293 5294 if (j < vec_len) 5295 { 5296 power = rf1->count; 5297 5298 if (power == 1) 5299 { 5300 iter_result = rf1->repr; 5301 5302 if (dump_file && (dump_flags & TDF_DETAILS)) 5303 { 5304 unsigned elt; 5305 repeat_factor *rf; 5306 fputs ("Multiplying by cached product ", dump_file); 5307 for (elt = j; elt < vec_len; elt++) 5308 { 5309 rf = &repeat_factor_vec[elt]; 5310 print_generic_expr (dump_file, rf->factor, 0); 5311 if (elt < vec_len - 1) 5312 fputs (" * ", dump_file); 5313 } 5314 fputs ("\n", dump_file); 5315 } 5316 } 5317 else 5318 { 5319 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5320 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5321 build_int_cst (integer_type_node, 5322 power)); 5323 gimple_call_set_lhs (pow_stmt, iter_result); 5324 gimple_set_location (pow_stmt, gimple_location (stmt)); 5325 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5326 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5327 5328 if (dump_file && (dump_flags & TDF_DETAILS)) 5329 { 5330 unsigned elt; 5331 repeat_factor *rf; 5332 fputs ("Building __builtin_pow call for cached product (", 5333 dump_file); 5334 for (elt = j; elt < vec_len; elt++) 5335 { 5336 rf = &repeat_factor_vec[elt]; 5337 print_generic_expr (dump_file, rf->factor, 0); 5338 if (elt < vec_len - 1) 5339 fputs (" * ", dump_file); 5340 } 5341 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 5342 power); 5343 } 5344 } 5345 } 5346 else 5347 { 5348 /* Otherwise, find the first factor in the repeated factor 5349 vector whose occurrence count is at least 2. If no such 5350 factor exists, there are no builtin_powi opportunities 5351 remaining. */ 5352 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5353 { 5354 if (rf1->count >= 2) 5355 break; 5356 } 5357 5358 if (j >= vec_len) 5359 break; 5360 5361 power = rf1->count; 5362 5363 if (dump_file && (dump_flags & TDF_DETAILS)) 5364 { 5365 unsigned elt; 5366 repeat_factor *rf; 5367 fputs ("Building __builtin_pow call for (", dump_file); 5368 for (elt = j; elt < vec_len; elt++) 5369 { 5370 rf = &repeat_factor_vec[elt]; 5371 print_generic_expr (dump_file, rf->factor, 0); 5372 if (elt < vec_len - 1) 5373 fputs (" * ", dump_file); 5374 } 5375 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 5376 } 5377 5378 reassociate_stats.pows_created++; 5379 5380 /* Visit each element of the vector in reverse order (so that 5381 high-occurrence elements are visited first, and within the 5382 same occurrence count, lower-ranked elements are visited 5383 first). Form a linear product of all elements in this order 5384 whose occurrencce count is at least that of element J. 5385 Record the SSA name representing the product of each element 5386 with all subsequent elements in the vector. */ 5387 if (j == vec_len - 1) 5388 rf1->repr = rf1->factor; 5389 else 5390 { 5391 for (ii = vec_len - 2; ii >= (int)j; ii--) 5392 { 5393 tree op1, op2; 5394 5395 rf1 = &repeat_factor_vec[ii]; 5396 rf2 = &repeat_factor_vec[ii + 1]; 5397 5398 /* Init the last factor's representative to be itself. */ 5399 if (!rf2->repr) 5400 rf2->repr = rf2->factor; 5401 5402 op1 = rf1->factor; 5403 op2 = rf2->repr; 5404 5405 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 5406 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 5407 op1, op2); 5408 gimple_set_location (mul_stmt, gimple_location (stmt)); 5409 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5410 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5411 rf1->repr = target_ssa; 5412 5413 /* Don't reprocess the multiply we just introduced. */ 5414 gimple_set_visited (mul_stmt, true); 5415 } 5416 } 5417 5418 /* Form a call to __builtin_powi for the maximum product 5419 just formed, raised to the power obtained earlier. */ 5420 rf1 = &repeat_factor_vec[j]; 5421 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5422 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5423 build_int_cst (integer_type_node, 5424 power)); 5425 gimple_call_set_lhs (pow_stmt, iter_result); 5426 gimple_set_location (pow_stmt, gimple_location (stmt)); 5427 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5428 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5429 } 5430 5431 /* If we previously formed at least one other builtin_powi call, 5432 form the product of this one and those others. */ 5433 if (result) 5434 { 5435 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5436 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 5437 result, iter_result); 5438 gimple_set_location (mul_stmt, gimple_location (stmt)); 5439 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5440 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5441 gimple_set_visited (mul_stmt, true); 5442 result = new_result; 5443 } 5444 else 5445 result = iter_result; 5446 5447 /* Decrement the occurrence count of each element in the product 5448 by the count found above, and remove this many copies of each 5449 factor from OPS. */ 5450 for (i = j; i < vec_len; i++) 5451 { 5452 unsigned k = power; 5453 unsigned n; 5454 5455 rf1 = &repeat_factor_vec[i]; 5456 rf1->count -= power; 5457 5458 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 5459 { 5460 if (oe->op == rf1->factor) 5461 { 5462 if (oe->count <= k) 5463 { 5464 ops->ordered_remove (n); 5465 k -= oe->count; 5466 5467 if (k == 0) 5468 break; 5469 } 5470 else 5471 { 5472 oe->count -= k; 5473 break; 5474 } 5475 } 5476 } 5477 } 5478 } 5479 5480 /* At this point all elements in the repeated factor vector have a 5481 remaining occurrence count of 0 or 1, and those with a count of 1 5482 don't have cached representatives. Re-sort the ops vector and 5483 clean up. */ 5484 ops->qsort (sort_by_operand_rank); 5485 repeat_factor_vec.release (); 5486 5487 /* Return the final product computed herein. Note that there may 5488 still be some elements with single occurrence count left in OPS; 5489 those will be handled by the normal reassociation logic. */ 5490 return result; 5491 } 5492 5493 /* Attempt to optimize 5494 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 5495 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 5496 5497 static void 5498 attempt_builtin_copysign (vec<operand_entry *> *ops) 5499 { 5500 operand_entry *oe; 5501 unsigned int i; 5502 unsigned int length = ops->length (); 5503 tree cst = ops->last ()->op; 5504 5505 if (length == 1 || TREE_CODE (cst) != REAL_CST) 5506 return; 5507 5508 FOR_EACH_VEC_ELT (*ops, i, oe) 5509 { 5510 if (TREE_CODE (oe->op) == SSA_NAME 5511 && has_single_use (oe->op)) 5512 { 5513 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 5514 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 5515 { 5516 tree arg0, arg1; 5517 switch (gimple_call_combined_fn (old_call)) 5518 { 5519 CASE_CFN_COPYSIGN: 5520 arg0 = gimple_call_arg (old_call, 0); 5521 arg1 = gimple_call_arg (old_call, 1); 5522 /* The first argument of copysign must be a constant, 5523 otherwise there's nothing to do. */ 5524 if (TREE_CODE (arg0) == REAL_CST) 5525 { 5526 tree type = TREE_TYPE (arg0); 5527 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 5528 /* If we couldn't fold to a single constant, skip it. 5529 That happens e.g. for inexact multiplication when 5530 -frounding-math. */ 5531 if (mul == NULL_TREE) 5532 break; 5533 /* Instead of adjusting OLD_CALL, let's build a new 5534 call to not leak the LHS and prevent keeping bogus 5535 debug statements. DCE will clean up the old call. */ 5536 gcall *new_call; 5537 if (gimple_call_internal_p (old_call)) 5538 new_call = gimple_build_call_internal 5539 (IFN_COPYSIGN, 2, mul, arg1); 5540 else 5541 new_call = gimple_build_call 5542 (gimple_call_fndecl (old_call), 2, mul, arg1); 5543 tree lhs = make_ssa_name (type); 5544 gimple_call_set_lhs (new_call, lhs); 5545 gimple_set_location (new_call, 5546 gimple_location (old_call)); 5547 insert_stmt_after (new_call, old_call); 5548 /* We've used the constant, get rid of it. */ 5549 ops->pop (); 5550 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 5551 /* Handle the CST1 < 0 case by negating the result. */ 5552 if (cst1_neg) 5553 { 5554 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 5555 gimple *negate_stmt 5556 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 5557 insert_stmt_after (negate_stmt, new_call); 5558 oe->op = negrhs; 5559 } 5560 else 5561 oe->op = lhs; 5562 if (dump_file && (dump_flags & TDF_DETAILS)) 5563 { 5564 fprintf (dump_file, "Optimizing copysign: "); 5565 print_generic_expr (dump_file, cst, 0); 5566 fprintf (dump_file, " * COPYSIGN ("); 5567 print_generic_expr (dump_file, arg0, 0); 5568 fprintf (dump_file, ", "); 5569 print_generic_expr (dump_file, arg1, 0); 5570 fprintf (dump_file, ") into %sCOPYSIGN (", 5571 cst1_neg ? "-" : ""); 5572 print_generic_expr (dump_file, mul, 0); 5573 fprintf (dump_file, ", "); 5574 print_generic_expr (dump_file, arg1, 0); 5575 fprintf (dump_file, "\n"); 5576 } 5577 return; 5578 } 5579 break; 5580 default: 5581 break; 5582 } 5583 } 5584 } 5585 } 5586 } 5587 5588 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 5589 5590 static void 5591 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 5592 { 5593 tree rhs1; 5594 5595 if (dump_file && (dump_flags & TDF_DETAILS)) 5596 { 5597 fprintf (dump_file, "Transforming "); 5598 print_gimple_stmt (dump_file, stmt, 0, 0); 5599 } 5600 5601 rhs1 = gimple_assign_rhs1 (stmt); 5602 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 5603 update_stmt (stmt); 5604 remove_visited_stmt_chain (rhs1); 5605 5606 if (dump_file && (dump_flags & TDF_DETAILS)) 5607 { 5608 fprintf (dump_file, " into "); 5609 print_gimple_stmt (dump_file, stmt, 0, 0); 5610 } 5611 } 5612 5613 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 5614 5615 static void 5616 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 5617 tree rhs1, tree rhs2) 5618 { 5619 if (dump_file && (dump_flags & TDF_DETAILS)) 5620 { 5621 fprintf (dump_file, "Transforming "); 5622 print_gimple_stmt (dump_file, stmt, 0, 0); 5623 } 5624 5625 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 5626 update_stmt (gsi_stmt (*gsi)); 5627 remove_visited_stmt_chain (rhs1); 5628 5629 if (dump_file && (dump_flags & TDF_DETAILS)) 5630 { 5631 fprintf (dump_file, " into "); 5632 print_gimple_stmt (dump_file, stmt, 0, 0); 5633 } 5634 } 5635 5636 /* Reassociate expressions in basic block BB and its post-dominator as 5637 children. 5638 5639 Bubble up return status from maybe_optimize_range_tests. */ 5640 5641 static bool 5642 reassociate_bb (basic_block bb) 5643 { 5644 gimple_stmt_iterator gsi; 5645 basic_block son; 5646 gimple *stmt = last_stmt (bb); 5647 bool cfg_cleanup_needed = false; 5648 5649 if (stmt && !gimple_visited_p (stmt)) 5650 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 5651 5652 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 5653 { 5654 stmt = gsi_stmt (gsi); 5655 5656 if (is_gimple_assign (stmt) 5657 && !stmt_could_throw_p (stmt)) 5658 { 5659 tree lhs, rhs1, rhs2; 5660 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 5661 5662 /* If this is not a gimple binary expression, there is 5663 nothing for us to do with it. */ 5664 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 5665 continue; 5666 5667 /* If this was part of an already processed statement, 5668 we don't need to touch it again. */ 5669 if (gimple_visited_p (stmt)) 5670 { 5671 /* This statement might have become dead because of previous 5672 reassociations. */ 5673 if (has_zero_uses (gimple_get_lhs (stmt))) 5674 { 5675 reassoc_remove_stmt (&gsi); 5676 release_defs (stmt); 5677 /* We might end up removing the last stmt above which 5678 places the iterator to the end of the sequence. 5679 Reset it to the last stmt in this case which might 5680 be the end of the sequence as well if we removed 5681 the last statement of the sequence. In which case 5682 we need to bail out. */ 5683 if (gsi_end_p (gsi)) 5684 { 5685 gsi = gsi_last_bb (bb); 5686 if (gsi_end_p (gsi)) 5687 break; 5688 } 5689 } 5690 continue; 5691 } 5692 5693 lhs = gimple_assign_lhs (stmt); 5694 rhs1 = gimple_assign_rhs1 (stmt); 5695 rhs2 = gimple_assign_rhs2 (stmt); 5696 5697 /* For non-bit or min/max operations we can't associate 5698 all types. Verify that here. */ 5699 if (rhs_code != BIT_IOR_EXPR 5700 && rhs_code != BIT_AND_EXPR 5701 && rhs_code != BIT_XOR_EXPR 5702 && rhs_code != MIN_EXPR 5703 && rhs_code != MAX_EXPR 5704 && (!can_reassociate_p (lhs) 5705 || !can_reassociate_p (rhs1) 5706 || !can_reassociate_p (rhs2))) 5707 continue; 5708 5709 if (associative_tree_code (rhs_code)) 5710 { 5711 auto_vec<operand_entry *> ops; 5712 tree powi_result = NULL_TREE; 5713 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 5714 5715 /* There may be no immediate uses left by the time we 5716 get here because we may have eliminated them all. */ 5717 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 5718 continue; 5719 5720 gimple_set_visited (stmt, true); 5721 linearize_expr_tree (&ops, stmt, true, true); 5722 ops.qsort (sort_by_operand_rank); 5723 int orig_len = ops.length (); 5724 optimize_ops_list (rhs_code, &ops); 5725 if (undistribute_ops_list (rhs_code, &ops, 5726 loop_containing_stmt (stmt))) 5727 { 5728 ops.qsort (sort_by_operand_rank); 5729 optimize_ops_list (rhs_code, &ops); 5730 } 5731 5732 if (rhs_code == PLUS_EXPR 5733 && transform_add_to_multiply (&ops)) 5734 ops.qsort (sort_by_operand_rank); 5735 5736 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 5737 { 5738 if (is_vector) 5739 optimize_vec_cond_expr (rhs_code, &ops); 5740 else 5741 optimize_range_tests (rhs_code, &ops, NULL); 5742 } 5743 5744 if (rhs_code == MULT_EXPR && !is_vector) 5745 { 5746 attempt_builtin_copysign (&ops); 5747 5748 if (reassoc_insert_powi_p 5749 && flag_unsafe_math_optimizations) 5750 powi_result = attempt_builtin_powi (stmt, &ops); 5751 } 5752 5753 operand_entry *last; 5754 bool negate_result = false; 5755 if (ops.length () > 1 5756 && rhs_code == MULT_EXPR) 5757 { 5758 last = ops.last (); 5759 if ((integer_minus_onep (last->op) 5760 || real_minus_onep (last->op)) 5761 && !HONOR_SNANS (TREE_TYPE (lhs)) 5762 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 5763 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 5764 { 5765 ops.pop (); 5766 negate_result = true; 5767 } 5768 } 5769 5770 tree new_lhs = lhs; 5771 /* If the operand vector is now empty, all operands were 5772 consumed by the __builtin_powi optimization. */ 5773 if (ops.length () == 0) 5774 transform_stmt_to_copy (&gsi, stmt, powi_result); 5775 else if (ops.length () == 1) 5776 { 5777 tree last_op = ops.last ()->op; 5778 5779 /* If the stmt that defines operand has to be inserted, insert it 5780 before the use. */ 5781 if (ops.last ()->stmt_to_insert) 5782 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 5783 if (powi_result) 5784 transform_stmt_to_multiply (&gsi, stmt, last_op, 5785 powi_result); 5786 else 5787 transform_stmt_to_copy (&gsi, stmt, last_op); 5788 } 5789 else 5790 { 5791 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 5792 int ops_num = ops.length (); 5793 int width = get_reassociation_width (ops_num, rhs_code, mode); 5794 5795 if (dump_file && (dump_flags & TDF_DETAILS)) 5796 fprintf (dump_file, 5797 "Width = %d was chosen for reassociation\n", width); 5798 5799 if (width > 1 5800 && ops.length () > 3) 5801 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 5802 width, ops); 5803 else 5804 { 5805 /* When there are three operands left, we want 5806 to make sure the ones that get the double 5807 binary op are chosen wisely. */ 5808 int len = ops.length (); 5809 if (len >= 3) 5810 swap_ops_for_binary_stmt (ops, len - 3, stmt); 5811 5812 new_lhs = rewrite_expr_tree (stmt, 0, ops, 5813 powi_result != NULL 5814 || negate_result, 5815 len != orig_len); 5816 } 5817 5818 /* If we combined some repeated factors into a 5819 __builtin_powi call, multiply that result by the 5820 reassociated operands. */ 5821 if (powi_result) 5822 { 5823 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 5824 tree type = TREE_TYPE (lhs); 5825 tree target_ssa = make_temp_ssa_name (type, NULL, 5826 "reassocpow"); 5827 gimple_set_lhs (lhs_stmt, target_ssa); 5828 update_stmt (lhs_stmt); 5829 if (lhs != new_lhs) 5830 { 5831 target_ssa = new_lhs; 5832 new_lhs = lhs; 5833 } 5834 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 5835 powi_result, target_ssa); 5836 gimple_set_location (mul_stmt, gimple_location (stmt)); 5837 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5838 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 5839 } 5840 } 5841 5842 if (negate_result) 5843 { 5844 stmt = SSA_NAME_DEF_STMT (lhs); 5845 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 5846 gimple_set_lhs (stmt, tmp); 5847 if (lhs != new_lhs) 5848 tmp = new_lhs; 5849 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 5850 tmp); 5851 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 5852 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 5853 update_stmt (stmt); 5854 } 5855 } 5856 } 5857 } 5858 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 5859 son; 5860 son = next_dom_son (CDI_POST_DOMINATORS, son)) 5861 cfg_cleanup_needed |= reassociate_bb (son); 5862 5863 return cfg_cleanup_needed; 5864 } 5865 5866 /* Add jumps around shifts for range tests turned into bit tests. 5867 For each SSA_NAME VAR we have code like: 5868 VAR = ...; // final stmt of range comparison 5869 // bit test here...; 5870 OTHERVAR = ...; // final stmt of the bit test sequence 5871 RES = VAR | OTHERVAR; 5872 Turn the above into: 5873 VAR = ...; 5874 if (VAR != 0) 5875 goto <l3>; 5876 else 5877 goto <l2>; 5878 <l2>: 5879 // bit test here...; 5880 OTHERVAR = ...; 5881 <l3>: 5882 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 5883 5884 static void 5885 branch_fixup (void) 5886 { 5887 tree var; 5888 unsigned int i; 5889 5890 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 5891 { 5892 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 5893 gimple *use_stmt; 5894 use_operand_p use; 5895 bool ok = single_imm_use (var, &use, &use_stmt); 5896 gcc_assert (ok 5897 && is_gimple_assign (use_stmt) 5898 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 5899 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 5900 5901 basic_block cond_bb = gimple_bb (def_stmt); 5902 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 5903 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 5904 5905 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 5906 gimple *g = gimple_build_cond (NE_EXPR, var, 5907 build_zero_cst (TREE_TYPE (var)), 5908 NULL_TREE, NULL_TREE); 5909 location_t loc = gimple_location (use_stmt); 5910 gimple_set_location (g, loc); 5911 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 5912 5913 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 5914 etrue->probability = REG_BR_PROB_BASE / 2; 5915 etrue->count = cond_bb->count / 2; 5916 edge efalse = find_edge (cond_bb, then_bb); 5917 efalse->flags = EDGE_FALSE_VALUE; 5918 efalse->probability -= etrue->probability; 5919 efalse->count -= etrue->count; 5920 then_bb->count -= etrue->count; 5921 5922 tree othervar = NULL_TREE; 5923 if (gimple_assign_rhs1 (use_stmt) == var) 5924 othervar = gimple_assign_rhs2 (use_stmt); 5925 else if (gimple_assign_rhs2 (use_stmt) == var) 5926 othervar = gimple_assign_rhs1 (use_stmt); 5927 else 5928 gcc_unreachable (); 5929 tree lhs = gimple_assign_lhs (use_stmt); 5930 gphi *phi = create_phi_node (lhs, merge_bb); 5931 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 5932 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 5933 gsi = gsi_for_stmt (use_stmt); 5934 gsi_remove (&gsi, true); 5935 5936 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 5937 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 5938 } 5939 reassoc_branch_fixups.release (); 5940 } 5941 5942 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 5943 void debug_ops_vector (vec<operand_entry *> ops); 5944 5945 /* Dump the operand entry vector OPS to FILE. */ 5946 5947 void 5948 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 5949 { 5950 operand_entry *oe; 5951 unsigned int i; 5952 5953 FOR_EACH_VEC_ELT (ops, i, oe) 5954 { 5955 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 5956 print_generic_expr (file, oe->op, 0); 5957 fprintf (file, "\n"); 5958 } 5959 } 5960 5961 /* Dump the operand entry vector OPS to STDERR. */ 5962 5963 DEBUG_FUNCTION void 5964 debug_ops_vector (vec<operand_entry *> ops) 5965 { 5966 dump_ops_vector (stderr, ops); 5967 } 5968 5969 /* Bubble up return status from reassociate_bb. */ 5970 5971 static bool 5972 do_reassoc (void) 5973 { 5974 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 5975 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 5976 } 5977 5978 /* Initialize the reassociation pass. */ 5979 5980 static void 5981 init_reassoc (void) 5982 { 5983 int i; 5984 long rank = 2; 5985 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 5986 5987 /* Find the loops, so that we can prevent moving calculations in 5988 them. */ 5989 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 5990 5991 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 5992 5993 next_operand_entry_id = 0; 5994 5995 /* Reverse RPO (Reverse Post Order) will give us something where 5996 deeper loops come later. */ 5997 pre_and_rev_post_order_compute (NULL, bbs, false); 5998 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 5999 operand_rank = new hash_map<tree, long>; 6000 6001 /* Give each default definition a distinct rank. This includes 6002 parameters and the static chain. Walk backwards over all 6003 SSA names so that we get proper rank ordering according 6004 to tree_swap_operands_p. */ 6005 for (i = num_ssa_names - 1; i > 0; --i) 6006 { 6007 tree name = ssa_name (i); 6008 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 6009 insert_operand_rank (name, ++rank); 6010 } 6011 6012 /* Set up rank for each BB */ 6013 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 6014 bb_rank[bbs[i]] = ++rank << 16; 6015 6016 free (bbs); 6017 calculate_dominance_info (CDI_POST_DOMINATORS); 6018 plus_negates = vNULL; 6019 } 6020 6021 /* Cleanup after the reassociation pass, and print stats if 6022 requested. */ 6023 6024 static void 6025 fini_reassoc (void) 6026 { 6027 statistics_counter_event (cfun, "Linearized", 6028 reassociate_stats.linearized); 6029 statistics_counter_event (cfun, "Constants eliminated", 6030 reassociate_stats.constants_eliminated); 6031 statistics_counter_event (cfun, "Ops eliminated", 6032 reassociate_stats.ops_eliminated); 6033 statistics_counter_event (cfun, "Statements rewritten", 6034 reassociate_stats.rewritten); 6035 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 6036 reassociate_stats.pows_encountered); 6037 statistics_counter_event (cfun, "Built-in powi calls created", 6038 reassociate_stats.pows_created); 6039 6040 delete operand_rank; 6041 operand_entry_pool.release (); 6042 free (bb_rank); 6043 plus_negates.release (); 6044 free_dominance_info (CDI_POST_DOMINATORS); 6045 loop_optimizer_finalize (); 6046 } 6047 6048 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 6049 insertion of __builtin_powi calls. 6050 6051 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 6052 optimization of a gimple conditional. Otherwise returns zero. */ 6053 6054 static unsigned int 6055 execute_reassoc (bool insert_powi_p) 6056 { 6057 reassoc_insert_powi_p = insert_powi_p; 6058 6059 init_reassoc (); 6060 6061 bool cfg_cleanup_needed; 6062 cfg_cleanup_needed = do_reassoc (); 6063 repropagate_negates (); 6064 branch_fixup (); 6065 6066 fini_reassoc (); 6067 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 6068 } 6069 6070 namespace { 6071 6072 const pass_data pass_data_reassoc = 6073 { 6074 GIMPLE_PASS, /* type */ 6075 "reassoc", /* name */ 6076 OPTGROUP_NONE, /* optinfo_flags */ 6077 TV_TREE_REASSOC, /* tv_id */ 6078 ( PROP_cfg | PROP_ssa ), /* properties_required */ 6079 0, /* properties_provided */ 6080 0, /* properties_destroyed */ 6081 0, /* todo_flags_start */ 6082 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 6083 }; 6084 6085 class pass_reassoc : public gimple_opt_pass 6086 { 6087 public: 6088 pass_reassoc (gcc::context *ctxt) 6089 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 6090 {} 6091 6092 /* opt_pass methods: */ 6093 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 6094 void set_pass_param (unsigned int n, bool param) 6095 { 6096 gcc_assert (n == 0); 6097 insert_powi_p = param; 6098 } 6099 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 6100 virtual unsigned int execute (function *) 6101 { return execute_reassoc (insert_powi_p); } 6102 6103 private: 6104 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 6105 point 3a in the pass header comment. */ 6106 bool insert_powi_p; 6107 }; // class pass_reassoc 6108 6109 } // anon namespace 6110 6111 gimple_opt_pass * 6112 make_pass_reassoc (gcc::context *ctxt) 6113 { 6114 return new pass_reassoc (ctxt); 6115 } 6116