1 /* Reassociation for trees. 2 Copyright (C) 2005-2017 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "params.h" 52 #include "builtins.h" 53 #include "gimplify.h" 54 #include "case-cfn-macros.h" 55 56 /* This is a simple global reassociation pass. It is, in part, based 57 on the LLVM pass of the same name (They do some things more/less 58 than we do, in different orders, etc). 59 60 It consists of five steps: 61 62 1. Breaking up subtract operations into addition + negate, where 63 it would promote the reassociation of adds. 64 65 2. Left linearization of the expression trees, so that (A+B)+(C+D) 66 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 67 During linearization, we place the operands of the binary 68 expressions into a vector of operand_entry_* 69 70 3. Optimization of the operand lists, eliminating things like a + 71 -a, a & a, etc. 72 73 3a. Combine repeated factors with the same occurrence counts 74 into a __builtin_powi call that will later be optimized into 75 an optimal number of multiplies. 76 77 4. Rewrite the expression trees we linearized and optimized so 78 they are in proper rank order. 79 80 5. Repropagate negates, as nothing else will clean it up ATM. 81 82 A bit of theory on #4, since nobody seems to write anything down 83 about why it makes sense to do it the way they do it: 84 85 We could do this much nicer theoretically, but don't (for reasons 86 explained after how to do it theoretically nice :P). 87 88 In order to promote the most redundancy elimination, you want 89 binary expressions whose operands are the same rank (or 90 preferably, the same value) exposed to the redundancy eliminator, 91 for possible elimination. 92 93 So the way to do this if we really cared, is to build the new op 94 tree from the leaves to the roots, merging as you go, and putting the 95 new op on the end of the worklist, until you are left with one 96 thing on the worklist. 97 98 IE if you have to rewrite the following set of operands (listed with 99 rank in parentheses), with opcode PLUS_EXPR: 100 101 a (1), b (1), c (1), d (2), e (2) 102 103 104 We start with our merge worklist empty, and the ops list with all of 105 those on it. 106 107 You want to first merge all leaves of the same rank, as much as 108 possible. 109 110 So first build a binary op of 111 112 mergetmp = a + b, and put "mergetmp" on the merge worklist. 113 114 Because there is no three operand form of PLUS_EXPR, c is not going to 115 be exposed to redundancy elimination as a rank 1 operand. 116 117 So you might as well throw it on the merge worklist (you could also 118 consider it to now be a rank two operand, and merge it with d and e, 119 but in this case, you then have evicted e from a binary op. So at 120 least in this situation, you can't win.) 121 122 Then build a binary op of d + e 123 mergetmp2 = d + e 124 125 and put mergetmp2 on the merge worklist. 126 127 so merge worklist = {mergetmp, c, mergetmp2} 128 129 Continue building binary ops of these operations until you have only 130 one operation left on the worklist. 131 132 So we have 133 134 build binary op 135 mergetmp3 = mergetmp + c 136 137 worklist = {mergetmp2, mergetmp3} 138 139 mergetmp4 = mergetmp2 + mergetmp3 140 141 worklist = {mergetmp4} 142 143 because we have one operation left, we can now just set the original 144 statement equal to the result of that operation. 145 146 This will at least expose a + b and d + e to redundancy elimination 147 as binary operations. 148 149 For extra points, you can reuse the old statements to build the 150 mergetmps, since you shouldn't run out. 151 152 So why don't we do this? 153 154 Because it's expensive, and rarely will help. Most trees we are 155 reassociating have 3 or less ops. If they have 2 ops, they already 156 will be written into a nice single binary op. If you have 3 ops, a 157 single simple check suffices to tell you whether the first two are of the 158 same rank. If so, you know to order it 159 160 mergetmp = op1 + op2 161 newstmt = mergetmp + op3 162 163 instead of 164 mergetmp = op2 + op3 165 newstmt = mergetmp + op1 166 167 If all three are of the same rank, you can't expose them all in a 168 single binary operator anyway, so the above is *still* the best you 169 can do. 170 171 Thus, this is what we do. When we have three ops left, we check to see 172 what order to put them in, and call it a day. As a nod to vector sum 173 reduction, we check if any of the ops are really a phi node that is a 174 destructive update for the associating op, and keep the destructive 175 update together for vector sum reduction recognition. */ 176 177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 178 point 3a in the pass header comment. */ 179 static bool reassoc_insert_powi_p; 180 181 /* Statistics */ 182 static struct 183 { 184 int linearized; 185 int constants_eliminated; 186 int ops_eliminated; 187 int rewritten; 188 int pows_encountered; 189 int pows_created; 190 } reassociate_stats; 191 192 /* Operator, rank pair. */ 193 struct operand_entry 194 { 195 unsigned int rank; 196 unsigned int id; 197 tree op; 198 unsigned int count; 199 gimple *stmt_to_insert; 200 }; 201 202 static object_allocator<operand_entry> operand_entry_pool 203 ("operand entry pool"); 204 205 /* This is used to assign a unique ID to each struct operand_entry 206 so that qsort results are identical on different hosts. */ 207 static unsigned int next_operand_entry_id; 208 209 /* Starting rank number for a given basic block, so that we can rank 210 operations using unmovable instructions in that BB based on the bb 211 depth. */ 212 static long *bb_rank; 213 214 /* Operand->rank hashtable. */ 215 static hash_map<tree, long> *operand_rank; 216 217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 218 all basic blocks the CFG should be adjusted - basic blocks 219 split right after that SSA_NAME's definition statement and before 220 the only use, which must be a bit ior. */ 221 static vec<tree> reassoc_branch_fixups; 222 223 /* Forward decls. */ 224 static long get_rank (tree); 225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 226 227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 228 possibly added by gsi_remove. */ 229 230 bool 231 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 232 { 233 gimple *stmt = gsi_stmt (*gsi); 234 235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI) 236 return gsi_remove (gsi, true); 237 238 gimple_stmt_iterator prev = *gsi; 239 gsi_prev (&prev); 240 unsigned uid = gimple_uid (stmt); 241 basic_block bb = gimple_bb (stmt); 242 bool ret = gsi_remove (gsi, true); 243 if (!gsi_end_p (prev)) 244 gsi_next (&prev); 245 else 246 prev = gsi_start_bb (bb); 247 gimple *end_stmt = gsi_stmt (*gsi); 248 while ((stmt = gsi_stmt (prev)) != end_stmt) 249 { 250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 251 gimple_set_uid (stmt, uid); 252 gsi_next (&prev); 253 } 254 return ret; 255 } 256 257 /* Bias amount for loop-carried phis. We want this to be larger than 258 the depth of any reassociation tree we can see, but not larger than 259 the rank difference between two blocks. */ 260 #define PHI_LOOP_BIAS (1 << 15) 261 262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 263 an innermost loop, and the phi has only a single use which is inside 264 the loop, then the rank is the block rank of the loop latch plus an 265 extra bias for the loop-carried dependence. This causes expressions 266 calculated into an accumulator variable to be independent for each 267 iteration of the loop. If STMT is some other phi, the rank is the 268 block rank of its containing block. */ 269 static long 270 phi_rank (gimple *stmt) 271 { 272 basic_block bb = gimple_bb (stmt); 273 struct loop *father = bb->loop_father; 274 tree res; 275 unsigned i; 276 use_operand_p use; 277 gimple *use_stmt; 278 279 /* We only care about real loops (those with a latch). */ 280 if (!father->latch) 281 return bb_rank[bb->index]; 282 283 /* Interesting phis must be in headers of innermost loops. */ 284 if (bb != father->header 285 || father->inner) 286 return bb_rank[bb->index]; 287 288 /* Ignore virtual SSA_NAMEs. */ 289 res = gimple_phi_result (stmt); 290 if (virtual_operand_p (res)) 291 return bb_rank[bb->index]; 292 293 /* The phi definition must have a single use, and that use must be 294 within the loop. Otherwise this isn't an accumulator pattern. */ 295 if (!single_imm_use (res, &use, &use_stmt) 296 || gimple_bb (use_stmt)->loop_father != father) 297 return bb_rank[bb->index]; 298 299 /* Look for phi arguments from within the loop. If found, bias this phi. */ 300 for (i = 0; i < gimple_phi_num_args (stmt); i++) 301 { 302 tree arg = gimple_phi_arg_def (stmt, i); 303 if (TREE_CODE (arg) == SSA_NAME 304 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 305 { 306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 307 if (gimple_bb (def_stmt)->loop_father == father) 308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 309 } 310 } 311 312 /* Must be an uninteresting phi. */ 313 return bb_rank[bb->index]; 314 } 315 316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 317 loop-carried dependence of an innermost loop, return TRUE; else 318 return FALSE. */ 319 static bool 320 loop_carried_phi (tree exp) 321 { 322 gimple *phi_stmt; 323 long block_rank; 324 325 if (TREE_CODE (exp) != SSA_NAME 326 || SSA_NAME_IS_DEFAULT_DEF (exp)) 327 return false; 328 329 phi_stmt = SSA_NAME_DEF_STMT (exp); 330 331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 332 return false; 333 334 /* Non-loop-carried phis have block rank. Loop-carried phis have 335 an additional bias added in. If this phi doesn't have block rank, 336 it's biased and should not be propagated. */ 337 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 338 339 if (phi_rank (phi_stmt) != block_rank) 340 return true; 341 342 return false; 343 } 344 345 /* Return the maximum of RANK and the rank that should be propagated 346 from expression OP. For most operands, this is just the rank of OP. 347 For loop-carried phis, the value is zero to avoid undoing the bias 348 in favor of the phi. */ 349 static long 350 propagate_rank (long rank, tree op) 351 { 352 long op_rank; 353 354 if (loop_carried_phi (op)) 355 return rank; 356 357 op_rank = get_rank (op); 358 359 return MAX (rank, op_rank); 360 } 361 362 /* Look up the operand rank structure for expression E. */ 363 364 static inline long 365 find_operand_rank (tree e) 366 { 367 long *slot = operand_rank->get (e); 368 return slot ? *slot : -1; 369 } 370 371 /* Insert {E,RANK} into the operand rank hashtable. */ 372 373 static inline void 374 insert_operand_rank (tree e, long rank) 375 { 376 gcc_assert (rank > 0); 377 gcc_assert (!operand_rank->put (e, rank)); 378 } 379 380 /* Given an expression E, return the rank of the expression. */ 381 382 static long 383 get_rank (tree e) 384 { 385 /* SSA_NAME's have the rank of the expression they are the result 386 of. 387 For globals and uninitialized values, the rank is 0. 388 For function arguments, use the pre-setup rank. 389 For PHI nodes, stores, asm statements, etc, we use the rank of 390 the BB. 391 For simple operations, the rank is the maximum rank of any of 392 its operands, or the bb_rank, whichever is less. 393 I make no claims that this is optimal, however, it gives good 394 results. */ 395 396 /* We make an exception to the normal ranking system to break 397 dependences of accumulator variables in loops. Suppose we 398 have a simple one-block loop containing: 399 400 x_1 = phi(x_0, x_2) 401 b = a + x_1 402 c = b + d 403 x_2 = c + e 404 405 As shown, each iteration of the calculation into x is fully 406 dependent upon the iteration before it. We would prefer to 407 see this in the form: 408 409 x_1 = phi(x_0, x_2) 410 b = a + d 411 c = b + e 412 x_2 = c + x_1 413 414 If the loop is unrolled, the calculations of b and c from 415 different iterations can be interleaved. 416 417 To obtain this result during reassociation, we bias the rank 418 of the phi definition x_1 upward, when it is recognized as an 419 accumulator pattern. The artificial rank causes it to be 420 added last, providing the desired independence. */ 421 422 if (TREE_CODE (e) == SSA_NAME) 423 { 424 ssa_op_iter iter; 425 gimple *stmt; 426 long rank; 427 tree op; 428 429 if (SSA_NAME_IS_DEFAULT_DEF (e)) 430 return find_operand_rank (e); 431 432 stmt = SSA_NAME_DEF_STMT (e); 433 if (gimple_code (stmt) == GIMPLE_PHI) 434 return phi_rank (stmt); 435 436 if (!is_gimple_assign (stmt)) 437 return bb_rank[gimple_bb (stmt)->index]; 438 439 /* If we already have a rank for this expression, use that. */ 440 rank = find_operand_rank (e); 441 if (rank != -1) 442 return rank; 443 444 /* Otherwise, find the maximum rank for the operands. As an 445 exception, remove the bias from loop-carried phis when propagating 446 the rank so that dependent operations are not also biased. */ 447 /* Simply walk over all SSA uses - this takes advatage of the 448 fact that non-SSA operands are is_gimple_min_invariant and 449 thus have rank 0. */ 450 rank = 0; 451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 452 rank = propagate_rank (rank, op); 453 454 if (dump_file && (dump_flags & TDF_DETAILS)) 455 { 456 fprintf (dump_file, "Rank for "); 457 print_generic_expr (dump_file, e, 0); 458 fprintf (dump_file, " is %ld\n", (rank + 1)); 459 } 460 461 /* Note the rank in the hashtable so we don't recompute it. */ 462 insert_operand_rank (e, (rank + 1)); 463 return (rank + 1); 464 } 465 466 /* Constants, globals, etc., are rank 0 */ 467 return 0; 468 } 469 470 471 /* We want integer ones to end up last no matter what, since they are 472 the ones we can do the most with. */ 473 #define INTEGER_CONST_TYPE 1 << 4 474 #define FLOAT_ONE_CONST_TYPE 1 << 3 475 #define FLOAT_CONST_TYPE 1 << 2 476 #define OTHER_CONST_TYPE 1 << 1 477 478 /* Classify an invariant tree into integer, float, or other, so that 479 we can sort them to be near other constants of the same type. */ 480 static inline int 481 constant_type (tree t) 482 { 483 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 484 return INTEGER_CONST_TYPE; 485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 486 { 487 /* Sort -1.0 and 1.0 constants last, while in some cases 488 const_binop can't optimize some inexact operations, multiplication 489 by -1.0 or 1.0 can be always merged with others. */ 490 if (real_onep (t) || real_minus_onep (t)) 491 return FLOAT_ONE_CONST_TYPE; 492 return FLOAT_CONST_TYPE; 493 } 494 else 495 return OTHER_CONST_TYPE; 496 } 497 498 /* qsort comparison function to sort operand entries PA and PB by rank 499 so that the sorted array is ordered by rank in decreasing order. */ 500 static int 501 sort_by_operand_rank (const void *pa, const void *pb) 502 { 503 const operand_entry *oea = *(const operand_entry *const *)pa; 504 const operand_entry *oeb = *(const operand_entry *const *)pb; 505 506 /* It's nicer for optimize_expression if constants that are likely 507 to fold when added/multiplied//whatever are put next to each 508 other. Since all constants have rank 0, order them by type. */ 509 if (oeb->rank == 0 && oea->rank == 0) 510 { 511 if (constant_type (oeb->op) != constant_type (oea->op)) 512 return constant_type (oea->op) - constant_type (oeb->op); 513 else 514 /* To make sorting result stable, we use unique IDs to determine 515 order. */ 516 return oeb->id > oea->id ? 1 : -1; 517 } 518 519 /* Lastly, make sure the versions that are the same go next to each 520 other. */ 521 if (oeb->rank == oea->rank 522 && TREE_CODE (oea->op) == SSA_NAME 523 && TREE_CODE (oeb->op) == SSA_NAME) 524 { 525 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 526 versions of removed SSA_NAMEs, so if possible, prefer to sort 527 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 528 See PR60418. */ 529 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) 530 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) 531 && !oea->stmt_to_insert 532 && !oeb->stmt_to_insert 533 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 534 { 535 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 536 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 537 basic_block bba = gimple_bb (stmta); 538 basic_block bbb = gimple_bb (stmtb); 539 if (bbb != bba) 540 { 541 if (bb_rank[bbb->index] != bb_rank[bba->index]) 542 return bb_rank[bbb->index] - bb_rank[bba->index]; 543 } 544 else 545 { 546 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 547 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 548 if (da != db) 549 return da ? 1 : -1; 550 } 551 } 552 553 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 554 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 555 else 556 return oeb->id > oea->id ? 1 : -1; 557 } 558 559 if (oeb->rank != oea->rank) 560 return oeb->rank > oea->rank ? 1 : -1; 561 else 562 return oeb->id > oea->id ? 1 : -1; 563 } 564 565 /* Add an operand entry to *OPS for the tree operand OP. */ 566 567 static void 568 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 569 { 570 operand_entry *oe = operand_entry_pool.allocate (); 571 572 oe->op = op; 573 oe->rank = get_rank (op); 574 oe->id = next_operand_entry_id++; 575 oe->count = 1; 576 oe->stmt_to_insert = stmt_to_insert; 577 ops->safe_push (oe); 578 } 579 580 /* Add an operand entry to *OPS for the tree operand OP with repeat 581 count REPEAT. */ 582 583 static void 584 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 585 HOST_WIDE_INT repeat) 586 { 587 operand_entry *oe = operand_entry_pool.allocate (); 588 589 oe->op = op; 590 oe->rank = get_rank (op); 591 oe->id = next_operand_entry_id++; 592 oe->count = repeat; 593 oe->stmt_to_insert = NULL; 594 ops->safe_push (oe); 595 596 reassociate_stats.pows_encountered++; 597 } 598 599 /* Return true if STMT is reassociable operation containing a binary 600 operation with tree code CODE, and is inside LOOP. */ 601 602 static bool 603 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) 604 { 605 basic_block bb = gimple_bb (stmt); 606 607 if (gimple_bb (stmt) == NULL) 608 return false; 609 610 if (!flow_bb_inside_loop_p (loop, bb)) 611 return false; 612 613 if (is_gimple_assign (stmt) 614 && gimple_assign_rhs_code (stmt) == code 615 && has_single_use (gimple_assign_lhs (stmt))) 616 { 617 tree rhs1 = gimple_assign_rhs1 (stmt); 618 tree rhs2 = gimple_assign_rhs1 (stmt); 619 if (TREE_CODE (rhs1) == SSA_NAME 620 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 621 return false; 622 if (rhs2 623 && TREE_CODE (rhs2) == SSA_NAME 624 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 625 return false; 626 return true; 627 } 628 629 return false; 630 } 631 632 633 /* Return true if STMT is a nop-conversion. */ 634 635 static bool 636 gimple_nop_conversion_p (gimple *stmt) 637 { 638 if (gassign *ass = dyn_cast <gassign *> (stmt)) 639 { 640 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 641 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 642 TREE_TYPE (gimple_assign_rhs1 (ass)))) 643 return true; 644 } 645 return false; 646 } 647 648 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 649 operand of the negate operation. Otherwise, return NULL. */ 650 651 static tree 652 get_unary_op (tree name, enum tree_code opcode) 653 { 654 gimple *stmt = SSA_NAME_DEF_STMT (name); 655 656 /* Look through nop conversions (sign changes). */ 657 if (gimple_nop_conversion_p (stmt) 658 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 659 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 660 661 if (!is_gimple_assign (stmt)) 662 return NULL_TREE; 663 664 if (gimple_assign_rhs_code (stmt) == opcode) 665 return gimple_assign_rhs1 (stmt); 666 return NULL_TREE; 667 } 668 669 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 670 671 static bool 672 ops_equal_values_p (tree op1, tree op2) 673 { 674 if (op1 == op2) 675 return true; 676 677 tree orig_op1 = op1; 678 if (TREE_CODE (op1) == SSA_NAME) 679 { 680 gimple *stmt = SSA_NAME_DEF_STMT (op1); 681 if (gimple_nop_conversion_p (stmt)) 682 { 683 op1 = gimple_assign_rhs1 (stmt); 684 if (op1 == op2) 685 return true; 686 } 687 } 688 689 if (TREE_CODE (op2) == SSA_NAME) 690 { 691 gimple *stmt = SSA_NAME_DEF_STMT (op2); 692 if (gimple_nop_conversion_p (stmt)) 693 { 694 op2 = gimple_assign_rhs1 (stmt); 695 if (op1 == op2 696 || orig_op1 == op2) 697 return true; 698 } 699 } 700 701 return false; 702 } 703 704 705 /* If CURR and LAST are a pair of ops that OPCODE allows us to 706 eliminate through equivalences, do so, remove them from OPS, and 707 return true. Otherwise, return false. */ 708 709 static bool 710 eliminate_duplicate_pair (enum tree_code opcode, 711 vec<operand_entry *> *ops, 712 bool *all_done, 713 unsigned int i, 714 operand_entry *curr, 715 operand_entry *last) 716 { 717 718 /* If we have two of the same op, and the opcode is & |, min, or max, 719 we can eliminate one of them. 720 If we have two of the same op, and the opcode is ^, we can 721 eliminate both of them. */ 722 723 if (last && last->op == curr->op) 724 { 725 switch (opcode) 726 { 727 case MAX_EXPR: 728 case MIN_EXPR: 729 case BIT_IOR_EXPR: 730 case BIT_AND_EXPR: 731 if (dump_file && (dump_flags & TDF_DETAILS)) 732 { 733 fprintf (dump_file, "Equivalence: "); 734 print_generic_expr (dump_file, curr->op, 0); 735 fprintf (dump_file, " [&|minmax] "); 736 print_generic_expr (dump_file, last->op, 0); 737 fprintf (dump_file, " -> "); 738 print_generic_stmt (dump_file, last->op, 0); 739 } 740 741 ops->ordered_remove (i); 742 reassociate_stats.ops_eliminated ++; 743 744 return true; 745 746 case BIT_XOR_EXPR: 747 if (dump_file && (dump_flags & TDF_DETAILS)) 748 { 749 fprintf (dump_file, "Equivalence: "); 750 print_generic_expr (dump_file, curr->op, 0); 751 fprintf (dump_file, " ^ "); 752 print_generic_expr (dump_file, last->op, 0); 753 fprintf (dump_file, " -> nothing\n"); 754 } 755 756 reassociate_stats.ops_eliminated += 2; 757 758 if (ops->length () == 2) 759 { 760 ops->truncate (0); 761 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 762 *all_done = true; 763 } 764 else 765 { 766 ops->ordered_remove (i-1); 767 ops->ordered_remove (i-1); 768 } 769 770 return true; 771 772 default: 773 break; 774 } 775 } 776 return false; 777 } 778 779 static vec<tree> plus_negates; 780 781 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 782 expression, look in OPS for a corresponding positive operation to cancel 783 it out. If we find one, remove the other from OPS, replace 784 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 785 return false. */ 786 787 static bool 788 eliminate_plus_minus_pair (enum tree_code opcode, 789 vec<operand_entry *> *ops, 790 unsigned int currindex, 791 operand_entry *curr) 792 { 793 tree negateop; 794 tree notop; 795 unsigned int i; 796 operand_entry *oe; 797 798 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 799 return false; 800 801 negateop = get_unary_op (curr->op, NEGATE_EXPR); 802 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 803 if (negateop == NULL_TREE && notop == NULL_TREE) 804 return false; 805 806 /* Any non-negated version will have a rank that is one less than 807 the current rank. So once we hit those ranks, if we don't find 808 one, we can stop. */ 809 810 for (i = currindex + 1; 811 ops->iterate (i, &oe) 812 && oe->rank >= curr->rank - 1 ; 813 i++) 814 { 815 if (negateop 816 && ops_equal_values_p (oe->op, negateop)) 817 { 818 if (dump_file && (dump_flags & TDF_DETAILS)) 819 { 820 fprintf (dump_file, "Equivalence: "); 821 print_generic_expr (dump_file, negateop, 0); 822 fprintf (dump_file, " + -"); 823 print_generic_expr (dump_file, oe->op, 0); 824 fprintf (dump_file, " -> 0\n"); 825 } 826 827 ops->ordered_remove (i); 828 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 829 ops->ordered_remove (currindex); 830 reassociate_stats.ops_eliminated ++; 831 832 return true; 833 } 834 else if (notop 835 && ops_equal_values_p (oe->op, notop)) 836 { 837 tree op_type = TREE_TYPE (oe->op); 838 839 if (dump_file && (dump_flags & TDF_DETAILS)) 840 { 841 fprintf (dump_file, "Equivalence: "); 842 print_generic_expr (dump_file, notop, 0); 843 fprintf (dump_file, " + ~"); 844 print_generic_expr (dump_file, oe->op, 0); 845 fprintf (dump_file, " -> -1\n"); 846 } 847 848 ops->ordered_remove (i); 849 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 850 ops->ordered_remove (currindex); 851 reassociate_stats.ops_eliminated ++; 852 853 return true; 854 } 855 } 856 857 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 858 save it for later inspection in repropagate_negates(). */ 859 if (negateop != NULL_TREE 860 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 861 plus_negates.safe_push (curr->op); 862 863 return false; 864 } 865 866 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 867 bitwise not expression, look in OPS for a corresponding operand to 868 cancel it out. If we find one, remove the other from OPS, replace 869 OPS[CURRINDEX] with 0, and return true. Otherwise, return 870 false. */ 871 872 static bool 873 eliminate_not_pairs (enum tree_code opcode, 874 vec<operand_entry *> *ops, 875 unsigned int currindex, 876 operand_entry *curr) 877 { 878 tree notop; 879 unsigned int i; 880 operand_entry *oe; 881 882 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 883 || TREE_CODE (curr->op) != SSA_NAME) 884 return false; 885 886 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 887 if (notop == NULL_TREE) 888 return false; 889 890 /* Any non-not version will have a rank that is one less than 891 the current rank. So once we hit those ranks, if we don't find 892 one, we can stop. */ 893 894 for (i = currindex + 1; 895 ops->iterate (i, &oe) 896 && oe->rank >= curr->rank - 1; 897 i++) 898 { 899 if (oe->op == notop) 900 { 901 if (dump_file && (dump_flags & TDF_DETAILS)) 902 { 903 fprintf (dump_file, "Equivalence: "); 904 print_generic_expr (dump_file, notop, 0); 905 if (opcode == BIT_AND_EXPR) 906 fprintf (dump_file, " & ~"); 907 else if (opcode == BIT_IOR_EXPR) 908 fprintf (dump_file, " | ~"); 909 print_generic_expr (dump_file, oe->op, 0); 910 if (opcode == BIT_AND_EXPR) 911 fprintf (dump_file, " -> 0\n"); 912 else if (opcode == BIT_IOR_EXPR) 913 fprintf (dump_file, " -> -1\n"); 914 } 915 916 if (opcode == BIT_AND_EXPR) 917 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 918 else if (opcode == BIT_IOR_EXPR) 919 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 920 921 reassociate_stats.ops_eliminated += ops->length () - 1; 922 ops->truncate (0); 923 ops->quick_push (oe); 924 return true; 925 } 926 } 927 928 return false; 929 } 930 931 /* Use constant value that may be present in OPS to try to eliminate 932 operands. Note that this function is only really used when we've 933 eliminated ops for other reasons, or merged constants. Across 934 single statements, fold already does all of this, plus more. There 935 is little point in duplicating logic, so I've only included the 936 identities that I could ever construct testcases to trigger. */ 937 938 static void 939 eliminate_using_constants (enum tree_code opcode, 940 vec<operand_entry *> *ops) 941 { 942 operand_entry *oelast = ops->last (); 943 tree type = TREE_TYPE (oelast->op); 944 945 if (oelast->rank == 0 946 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 947 { 948 switch (opcode) 949 { 950 case BIT_AND_EXPR: 951 if (integer_zerop (oelast->op)) 952 { 953 if (ops->length () != 1) 954 { 955 if (dump_file && (dump_flags & TDF_DETAILS)) 956 fprintf (dump_file, "Found & 0, removing all other ops\n"); 957 958 reassociate_stats.ops_eliminated += ops->length () - 1; 959 960 ops->truncate (0); 961 ops->quick_push (oelast); 962 return; 963 } 964 } 965 else if (integer_all_onesp (oelast->op)) 966 { 967 if (ops->length () != 1) 968 { 969 if (dump_file && (dump_flags & TDF_DETAILS)) 970 fprintf (dump_file, "Found & -1, removing\n"); 971 ops->pop (); 972 reassociate_stats.ops_eliminated++; 973 } 974 } 975 break; 976 case BIT_IOR_EXPR: 977 if (integer_all_onesp (oelast->op)) 978 { 979 if (ops->length () != 1) 980 { 981 if (dump_file && (dump_flags & TDF_DETAILS)) 982 fprintf (dump_file, "Found | -1, removing all other ops\n"); 983 984 reassociate_stats.ops_eliminated += ops->length () - 1; 985 986 ops->truncate (0); 987 ops->quick_push (oelast); 988 return; 989 } 990 } 991 else if (integer_zerop (oelast->op)) 992 { 993 if (ops->length () != 1) 994 { 995 if (dump_file && (dump_flags & TDF_DETAILS)) 996 fprintf (dump_file, "Found | 0, removing\n"); 997 ops->pop (); 998 reassociate_stats.ops_eliminated++; 999 } 1000 } 1001 break; 1002 case MULT_EXPR: 1003 if (integer_zerop (oelast->op) 1004 || (FLOAT_TYPE_P (type) 1005 && !HONOR_NANS (type) 1006 && !HONOR_SIGNED_ZEROS (type) 1007 && real_zerop (oelast->op))) 1008 { 1009 if (ops->length () != 1) 1010 { 1011 if (dump_file && (dump_flags & TDF_DETAILS)) 1012 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1013 1014 reassociate_stats.ops_eliminated += ops->length () - 1; 1015 ops->truncate (0); 1016 ops->quick_push (oelast); 1017 return; 1018 } 1019 } 1020 else if (integer_onep (oelast->op) 1021 || (FLOAT_TYPE_P (type) 1022 && !HONOR_SNANS (type) 1023 && real_onep (oelast->op))) 1024 { 1025 if (ops->length () != 1) 1026 { 1027 if (dump_file && (dump_flags & TDF_DETAILS)) 1028 fprintf (dump_file, "Found * 1, removing\n"); 1029 ops->pop (); 1030 reassociate_stats.ops_eliminated++; 1031 return; 1032 } 1033 } 1034 break; 1035 case BIT_XOR_EXPR: 1036 case PLUS_EXPR: 1037 case MINUS_EXPR: 1038 if (integer_zerop (oelast->op) 1039 || (FLOAT_TYPE_P (type) 1040 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1041 && fold_real_zero_addition_p (type, oelast->op, 1042 opcode == MINUS_EXPR))) 1043 { 1044 if (ops->length () != 1) 1045 { 1046 if (dump_file && (dump_flags & TDF_DETAILS)) 1047 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1048 ops->pop (); 1049 reassociate_stats.ops_eliminated++; 1050 return; 1051 } 1052 } 1053 break; 1054 default: 1055 break; 1056 } 1057 } 1058 } 1059 1060 1061 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1062 bool, bool); 1063 1064 /* Structure for tracking and counting operands. */ 1065 struct oecount { 1066 unsigned int cnt; 1067 unsigned int id; 1068 enum tree_code oecode; 1069 tree op; 1070 }; 1071 1072 1073 /* The heap for the oecount hashtable and the sorted list of operands. */ 1074 static vec<oecount> cvec; 1075 1076 1077 /* Oecount hashtable helpers. */ 1078 1079 struct oecount_hasher : int_hash <int, 0, 1> 1080 { 1081 static inline hashval_t hash (int); 1082 static inline bool equal (int, int); 1083 }; 1084 1085 /* Hash function for oecount. */ 1086 1087 inline hashval_t 1088 oecount_hasher::hash (int p) 1089 { 1090 const oecount *c = &cvec[p - 42]; 1091 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1092 } 1093 1094 /* Comparison function for oecount. */ 1095 1096 inline bool 1097 oecount_hasher::equal (int p1, int p2) 1098 { 1099 const oecount *c1 = &cvec[p1 - 42]; 1100 const oecount *c2 = &cvec[p2 - 42]; 1101 return c1->oecode == c2->oecode && c1->op == c2->op; 1102 } 1103 1104 /* Comparison function for qsort sorting oecount elements by count. */ 1105 1106 static int 1107 oecount_cmp (const void *p1, const void *p2) 1108 { 1109 const oecount *c1 = (const oecount *)p1; 1110 const oecount *c2 = (const oecount *)p2; 1111 if (c1->cnt != c2->cnt) 1112 return c1->cnt > c2->cnt ? 1 : -1; 1113 else 1114 /* If counts are identical, use unique IDs to stabilize qsort. */ 1115 return c1->id > c2->id ? 1 : -1; 1116 } 1117 1118 /* Return TRUE iff STMT represents a builtin call that raises OP 1119 to some exponent. */ 1120 1121 static bool 1122 stmt_is_power_of_op (gimple *stmt, tree op) 1123 { 1124 if (!is_gimple_call (stmt)) 1125 return false; 1126 1127 switch (gimple_call_combined_fn (stmt)) 1128 { 1129 CASE_CFN_POW: 1130 CASE_CFN_POWI: 1131 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1132 1133 default: 1134 return false; 1135 } 1136 } 1137 1138 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1139 in place and return the result. Assumes that stmt_is_power_of_op 1140 was previously called for STMT and returned TRUE. */ 1141 1142 static HOST_WIDE_INT 1143 decrement_power (gimple *stmt) 1144 { 1145 REAL_VALUE_TYPE c, cint; 1146 HOST_WIDE_INT power; 1147 tree arg1; 1148 1149 switch (gimple_call_combined_fn (stmt)) 1150 { 1151 CASE_CFN_POW: 1152 arg1 = gimple_call_arg (stmt, 1); 1153 c = TREE_REAL_CST (arg1); 1154 power = real_to_integer (&c) - 1; 1155 real_from_integer (&cint, VOIDmode, power, SIGNED); 1156 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1157 return power; 1158 1159 CASE_CFN_POWI: 1160 arg1 = gimple_call_arg (stmt, 1); 1161 power = TREE_INT_CST_LOW (arg1) - 1; 1162 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1163 return power; 1164 1165 default: 1166 gcc_unreachable (); 1167 } 1168 } 1169 1170 /* Replace SSA defined by STMT and replace all its uses with new 1171 SSA. Also return the new SSA. */ 1172 1173 static tree 1174 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1175 { 1176 gimple *use_stmt; 1177 use_operand_p use; 1178 imm_use_iterator iter; 1179 tree new_lhs, new_debug_lhs = NULL_TREE; 1180 tree lhs = gimple_get_lhs (stmt); 1181 1182 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1183 gimple_set_lhs (stmt, new_lhs); 1184 1185 /* Also need to update GIMPLE_DEBUGs. */ 1186 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1187 { 1188 tree repl = new_lhs; 1189 if (is_gimple_debug (use_stmt)) 1190 { 1191 if (new_debug_lhs == NULL_TREE) 1192 { 1193 new_debug_lhs = make_node (DEBUG_EXPR_DECL); 1194 gdebug *def_temp 1195 = gimple_build_debug_bind (new_debug_lhs, 1196 build2 (opcode, TREE_TYPE (lhs), 1197 new_lhs, op), 1198 stmt); 1199 DECL_ARTIFICIAL (new_debug_lhs) = 1; 1200 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); 1201 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); 1202 gimple_set_uid (def_temp, gimple_uid (stmt)); 1203 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1204 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1205 } 1206 repl = new_debug_lhs; 1207 } 1208 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1209 SET_USE (use, repl); 1210 update_stmt (use_stmt); 1211 } 1212 return new_lhs; 1213 } 1214 1215 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1216 uses with new SSAs. Also do this for the stmt that defines DEF 1217 if *DEF is not OP. */ 1218 1219 static void 1220 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1221 vec<gimple *> &stmts_to_fix) 1222 { 1223 unsigned i; 1224 gimple *stmt; 1225 1226 if (*def != op 1227 && TREE_CODE (*def) == SSA_NAME 1228 && (stmt = SSA_NAME_DEF_STMT (*def)) 1229 && gimple_code (stmt) != GIMPLE_NOP) 1230 *def = make_new_ssa_for_def (stmt, opcode, op); 1231 1232 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1233 make_new_ssa_for_def (stmt, opcode, op); 1234 } 1235 1236 /* Find the single immediate use of STMT's LHS, and replace it 1237 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1238 replace *DEF with OP as well. */ 1239 1240 static void 1241 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1242 { 1243 tree lhs; 1244 gimple *use_stmt; 1245 use_operand_p use; 1246 gimple_stmt_iterator gsi; 1247 1248 if (is_gimple_call (stmt)) 1249 lhs = gimple_call_lhs (stmt); 1250 else 1251 lhs = gimple_assign_lhs (stmt); 1252 1253 gcc_assert (has_single_use (lhs)); 1254 single_imm_use (lhs, &use, &use_stmt); 1255 if (lhs == *def) 1256 *def = op; 1257 SET_USE (use, op); 1258 if (TREE_CODE (op) != SSA_NAME) 1259 update_stmt (use_stmt); 1260 gsi = gsi_for_stmt (stmt); 1261 unlink_stmt_vdef (stmt); 1262 reassoc_remove_stmt (&gsi); 1263 release_defs (stmt); 1264 } 1265 1266 /* Walks the linear chain with result *DEF searching for an operation 1267 with operand OP and code OPCODE removing that from the chain. *DEF 1268 is updated if there is only one operand but no operation left. */ 1269 1270 static void 1271 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1272 { 1273 tree orig_def = *def; 1274 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1275 /* PR72835 - Record the stmt chain that has to be updated such that 1276 we dont use the same LHS when the values computed are different. */ 1277 auto_vec<gimple *, 64> stmts_to_fix; 1278 1279 do 1280 { 1281 tree name; 1282 1283 if (opcode == MULT_EXPR) 1284 { 1285 if (stmt_is_power_of_op (stmt, op)) 1286 { 1287 if (decrement_power (stmt) == 1) 1288 { 1289 if (stmts_to_fix.length () > 0) 1290 stmts_to_fix.pop (); 1291 propagate_op_to_single_use (op, stmt, def); 1292 } 1293 break; 1294 } 1295 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1296 { 1297 if (gimple_assign_rhs1 (stmt) == op) 1298 { 1299 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1300 if (stmts_to_fix.length () > 0) 1301 stmts_to_fix.pop (); 1302 propagate_op_to_single_use (cst, stmt, def); 1303 break; 1304 } 1305 else if (integer_minus_onep (op) 1306 || real_minus_onep (op)) 1307 { 1308 gimple_assign_set_rhs_code 1309 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1310 break; 1311 } 1312 } 1313 } 1314 1315 name = gimple_assign_rhs1 (stmt); 1316 1317 /* If this is the operation we look for and one of the operands 1318 is ours simply propagate the other operand into the stmts 1319 single use. */ 1320 if (gimple_assign_rhs_code (stmt) == opcode 1321 && (name == op 1322 || gimple_assign_rhs2 (stmt) == op)) 1323 { 1324 if (name == op) 1325 name = gimple_assign_rhs2 (stmt); 1326 if (stmts_to_fix.length () > 0) 1327 stmts_to_fix.pop (); 1328 propagate_op_to_single_use (name, stmt, def); 1329 break; 1330 } 1331 1332 /* We might have a multiply of two __builtin_pow* calls, and 1333 the operand might be hiding in the rightmost one. Likewise 1334 this can happen for a negate. */ 1335 if (opcode == MULT_EXPR 1336 && gimple_assign_rhs_code (stmt) == opcode 1337 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1338 && has_single_use (gimple_assign_rhs2 (stmt))) 1339 { 1340 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1341 if (stmt_is_power_of_op (stmt2, op)) 1342 { 1343 if (decrement_power (stmt2) == 1) 1344 propagate_op_to_single_use (op, stmt2, def); 1345 else 1346 stmts_to_fix.safe_push (stmt2); 1347 break; 1348 } 1349 else if (is_gimple_assign (stmt2) 1350 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1351 { 1352 if (gimple_assign_rhs1 (stmt2) == op) 1353 { 1354 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1355 propagate_op_to_single_use (cst, stmt2, def); 1356 break; 1357 } 1358 else if (integer_minus_onep (op) 1359 || real_minus_onep (op)) 1360 { 1361 stmts_to_fix.safe_push (stmt2); 1362 gimple_assign_set_rhs_code 1363 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1364 break; 1365 } 1366 } 1367 } 1368 1369 /* Continue walking the chain. */ 1370 gcc_assert (name != op 1371 && TREE_CODE (name) == SSA_NAME); 1372 stmt = SSA_NAME_DEF_STMT (name); 1373 stmts_to_fix.safe_push (stmt); 1374 } 1375 while (1); 1376 1377 if (stmts_to_fix.length () > 0 || *def == orig_def) 1378 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1379 } 1380 1381 /* Returns true if statement S1 dominates statement S2. Like 1382 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1383 1384 static bool 1385 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1386 { 1387 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1388 1389 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1390 SSA_NAME. Assume it lives at the beginning of function and 1391 thus dominates everything. */ 1392 if (!bb1 || s1 == s2) 1393 return true; 1394 1395 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1396 if (!bb2) 1397 return false; 1398 1399 if (bb1 == bb2) 1400 { 1401 /* PHIs in the same basic block are assumed to be 1402 executed all in parallel, if only one stmt is a PHI, 1403 it dominates the other stmt in the same basic block. */ 1404 if (gimple_code (s1) == GIMPLE_PHI) 1405 return true; 1406 1407 if (gimple_code (s2) == GIMPLE_PHI) 1408 return false; 1409 1410 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1411 1412 if (gimple_uid (s1) < gimple_uid (s2)) 1413 return true; 1414 1415 if (gimple_uid (s1) > gimple_uid (s2)) 1416 return false; 1417 1418 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1419 unsigned int uid = gimple_uid (s1); 1420 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1421 { 1422 gimple *s = gsi_stmt (gsi); 1423 if (gimple_uid (s) != uid) 1424 break; 1425 if (s == s2) 1426 return true; 1427 } 1428 1429 return false; 1430 } 1431 1432 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1433 } 1434 1435 /* Insert STMT after INSERT_POINT. */ 1436 1437 static void 1438 insert_stmt_after (gimple *stmt, gimple *insert_point) 1439 { 1440 gimple_stmt_iterator gsi; 1441 basic_block bb; 1442 1443 if (gimple_code (insert_point) == GIMPLE_PHI) 1444 bb = gimple_bb (insert_point); 1445 else if (!stmt_ends_bb_p (insert_point)) 1446 { 1447 gsi = gsi_for_stmt (insert_point); 1448 gimple_set_uid (stmt, gimple_uid (insert_point)); 1449 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1450 return; 1451 } 1452 else 1453 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1454 thus if it must end a basic block, it should be a call that can 1455 throw, or some assignment that can throw. If it throws, the LHS 1456 of it will not be initialized though, so only valid places using 1457 the SSA_NAME should be dominated by the fallthru edge. */ 1458 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1459 gsi = gsi_after_labels (bb); 1460 if (gsi_end_p (gsi)) 1461 { 1462 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1463 gimple_set_uid (stmt, 1464 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1465 } 1466 else 1467 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1468 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1469 } 1470 1471 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1472 the result. Places the statement after the definition of either 1473 OP1 or OP2. Returns the new statement. */ 1474 1475 static gimple * 1476 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1477 { 1478 gimple *op1def = NULL, *op2def = NULL; 1479 gimple_stmt_iterator gsi; 1480 tree op; 1481 gassign *sum; 1482 1483 /* Create the addition statement. */ 1484 op = make_ssa_name (type); 1485 sum = gimple_build_assign (op, opcode, op1, op2); 1486 1487 /* Find an insertion place and insert. */ 1488 if (TREE_CODE (op1) == SSA_NAME) 1489 op1def = SSA_NAME_DEF_STMT (op1); 1490 if (TREE_CODE (op2) == SSA_NAME) 1491 op2def = SSA_NAME_DEF_STMT (op2); 1492 if ((!op1def || gimple_nop_p (op1def)) 1493 && (!op2def || gimple_nop_p (op2def))) 1494 { 1495 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1496 if (gsi_end_p (gsi)) 1497 { 1498 gimple_stmt_iterator gsi2 1499 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1500 gimple_set_uid (sum, 1501 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1502 } 1503 else 1504 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1505 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1506 } 1507 else 1508 { 1509 gimple *insert_point; 1510 if ((!op1def || gimple_nop_p (op1def)) 1511 || (op2def && !gimple_nop_p (op2def) 1512 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1513 insert_point = op2def; 1514 else 1515 insert_point = op1def; 1516 insert_stmt_after (sum, insert_point); 1517 } 1518 update_stmt (sum); 1519 1520 return sum; 1521 } 1522 1523 /* Perform un-distribution of divisions and multiplications. 1524 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1525 to (A + B) / X for real X. 1526 1527 The algorithm is organized as follows. 1528 1529 - First we walk the addition chain *OPS looking for summands that 1530 are defined by a multiplication or a real division. This results 1531 in the candidates bitmap with relevant indices into *OPS. 1532 1533 - Second we build the chains of multiplications or divisions for 1534 these candidates, counting the number of occurrences of (operand, code) 1535 pairs in all of the candidates chains. 1536 1537 - Third we sort the (operand, code) pairs by number of occurrence and 1538 process them starting with the pair with the most uses. 1539 1540 * For each such pair we walk the candidates again to build a 1541 second candidate bitmap noting all multiplication/division chains 1542 that have at least one occurrence of (operand, code). 1543 1544 * We build an alternate addition chain only covering these 1545 candidates with one (operand, code) operation removed from their 1546 multiplication/division chain. 1547 1548 * The first candidate gets replaced by the alternate addition chain 1549 multiplied/divided by the operand. 1550 1551 * All candidate chains get disabled for further processing and 1552 processing of (operand, code) pairs continues. 1553 1554 The alternate addition chains built are re-processed by the main 1555 reassociation algorithm which allows optimizing a * x * y + b * y * x 1556 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1557 1558 static bool 1559 undistribute_ops_list (enum tree_code opcode, 1560 vec<operand_entry *> *ops, struct loop *loop) 1561 { 1562 unsigned int length = ops->length (); 1563 operand_entry *oe1; 1564 unsigned i, j; 1565 unsigned nr_candidates, nr_candidates2; 1566 sbitmap_iterator sbi0; 1567 vec<operand_entry *> *subops; 1568 bool changed = false; 1569 unsigned int next_oecount_id = 0; 1570 1571 if (length <= 1 1572 || opcode != PLUS_EXPR) 1573 return false; 1574 1575 /* Build a list of candidates to process. */ 1576 auto_sbitmap candidates (length); 1577 bitmap_clear (candidates); 1578 nr_candidates = 0; 1579 FOR_EACH_VEC_ELT (*ops, i, oe1) 1580 { 1581 enum tree_code dcode; 1582 gimple *oe1def; 1583 1584 if (TREE_CODE (oe1->op) != SSA_NAME) 1585 continue; 1586 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1587 if (!is_gimple_assign (oe1def)) 1588 continue; 1589 dcode = gimple_assign_rhs_code (oe1def); 1590 if ((dcode != MULT_EXPR 1591 && dcode != RDIV_EXPR) 1592 || !is_reassociable_op (oe1def, dcode, loop)) 1593 continue; 1594 1595 bitmap_set_bit (candidates, i); 1596 nr_candidates++; 1597 } 1598 1599 if (nr_candidates < 2) 1600 return false; 1601 1602 if (dump_file && (dump_flags & TDF_DETAILS)) 1603 { 1604 fprintf (dump_file, "searching for un-distribute opportunities "); 1605 print_generic_expr (dump_file, 1606 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1607 fprintf (dump_file, " %d\n", nr_candidates); 1608 } 1609 1610 /* Build linearized sub-operand lists and the counting table. */ 1611 cvec.create (0); 1612 1613 hash_table<oecount_hasher> ctable (15); 1614 1615 /* ??? Macro arguments cannot have multi-argument template types in 1616 them. This typedef is needed to workaround that limitation. */ 1617 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1618 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1619 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1620 { 1621 gimple *oedef; 1622 enum tree_code oecode; 1623 unsigned j; 1624 1625 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1626 oecode = gimple_assign_rhs_code (oedef); 1627 linearize_expr_tree (&subops[i], oedef, 1628 associative_tree_code (oecode), false); 1629 1630 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1631 { 1632 oecount c; 1633 int *slot; 1634 int idx; 1635 c.oecode = oecode; 1636 c.cnt = 1; 1637 c.id = next_oecount_id++; 1638 c.op = oe1->op; 1639 cvec.safe_push (c); 1640 idx = cvec.length () + 41; 1641 slot = ctable.find_slot (idx, INSERT); 1642 if (!*slot) 1643 { 1644 *slot = idx; 1645 } 1646 else 1647 { 1648 cvec.pop (); 1649 cvec[*slot - 42].cnt++; 1650 } 1651 } 1652 } 1653 1654 /* Sort the counting table. */ 1655 cvec.qsort (oecount_cmp); 1656 1657 if (dump_file && (dump_flags & TDF_DETAILS)) 1658 { 1659 oecount *c; 1660 fprintf (dump_file, "Candidates:\n"); 1661 FOR_EACH_VEC_ELT (cvec, j, c) 1662 { 1663 fprintf (dump_file, " %u %s: ", c->cnt, 1664 c->oecode == MULT_EXPR 1665 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1666 print_generic_expr (dump_file, c->op, 0); 1667 fprintf (dump_file, "\n"); 1668 } 1669 } 1670 1671 /* Process the (operand, code) pairs in order of most occurrence. */ 1672 auto_sbitmap candidates2 (length); 1673 while (!cvec.is_empty ()) 1674 { 1675 oecount *c = &cvec.last (); 1676 if (c->cnt < 2) 1677 break; 1678 1679 /* Now collect the operands in the outer chain that contain 1680 the common operand in their inner chain. */ 1681 bitmap_clear (candidates2); 1682 nr_candidates2 = 0; 1683 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1684 { 1685 gimple *oedef; 1686 enum tree_code oecode; 1687 unsigned j; 1688 tree op = (*ops)[i]->op; 1689 1690 /* If we undistributed in this chain already this may be 1691 a constant. */ 1692 if (TREE_CODE (op) != SSA_NAME) 1693 continue; 1694 1695 oedef = SSA_NAME_DEF_STMT (op); 1696 oecode = gimple_assign_rhs_code (oedef); 1697 if (oecode != c->oecode) 1698 continue; 1699 1700 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1701 { 1702 if (oe1->op == c->op) 1703 { 1704 bitmap_set_bit (candidates2, i); 1705 ++nr_candidates2; 1706 break; 1707 } 1708 } 1709 } 1710 1711 if (nr_candidates2 >= 2) 1712 { 1713 operand_entry *oe1, *oe2; 1714 gimple *prod; 1715 int first = bitmap_first_set_bit (candidates2); 1716 1717 /* Build the new addition chain. */ 1718 oe1 = (*ops)[first]; 1719 if (dump_file && (dump_flags & TDF_DETAILS)) 1720 { 1721 fprintf (dump_file, "Building ("); 1722 print_generic_expr (dump_file, oe1->op, 0); 1723 } 1724 zero_one_operation (&oe1->op, c->oecode, c->op); 1725 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1726 { 1727 gimple *sum; 1728 oe2 = (*ops)[i]; 1729 if (dump_file && (dump_flags & TDF_DETAILS)) 1730 { 1731 fprintf (dump_file, " + "); 1732 print_generic_expr (dump_file, oe2->op, 0); 1733 } 1734 zero_one_operation (&oe2->op, c->oecode, c->op); 1735 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1736 oe1->op, oe2->op, opcode); 1737 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1738 oe2->rank = 0; 1739 oe1->op = gimple_get_lhs (sum); 1740 } 1741 1742 /* Apply the multiplication/division. */ 1743 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1744 oe1->op, c->op, c->oecode); 1745 if (dump_file && (dump_flags & TDF_DETAILS)) 1746 { 1747 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1748 print_generic_expr (dump_file, c->op, 0); 1749 fprintf (dump_file, "\n"); 1750 } 1751 1752 /* Record it in the addition chain and disable further 1753 undistribution with this op. */ 1754 oe1->op = gimple_assign_lhs (prod); 1755 oe1->rank = get_rank (oe1->op); 1756 subops[first].release (); 1757 1758 changed = true; 1759 } 1760 1761 cvec.pop (); 1762 } 1763 1764 for (i = 0; i < ops->length (); ++i) 1765 subops[i].release (); 1766 free (subops); 1767 cvec.release (); 1768 1769 return changed; 1770 } 1771 1772 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1773 expression, examine the other OPS to see if any of them are comparisons 1774 of the same values, which we may be able to combine or eliminate. 1775 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1776 1777 static bool 1778 eliminate_redundant_comparison (enum tree_code opcode, 1779 vec<operand_entry *> *ops, 1780 unsigned int currindex, 1781 operand_entry *curr) 1782 { 1783 tree op1, op2; 1784 enum tree_code lcode, rcode; 1785 gimple *def1, *def2; 1786 int i; 1787 operand_entry *oe; 1788 1789 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1790 return false; 1791 1792 /* Check that CURR is a comparison. */ 1793 if (TREE_CODE (curr->op) != SSA_NAME) 1794 return false; 1795 def1 = SSA_NAME_DEF_STMT (curr->op); 1796 if (!is_gimple_assign (def1)) 1797 return false; 1798 lcode = gimple_assign_rhs_code (def1); 1799 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1800 return false; 1801 op1 = gimple_assign_rhs1 (def1); 1802 op2 = gimple_assign_rhs2 (def1); 1803 1804 /* Now look for a similar comparison in the remaining OPS. */ 1805 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1806 { 1807 tree t; 1808 1809 if (TREE_CODE (oe->op) != SSA_NAME) 1810 continue; 1811 def2 = SSA_NAME_DEF_STMT (oe->op); 1812 if (!is_gimple_assign (def2)) 1813 continue; 1814 rcode = gimple_assign_rhs_code (def2); 1815 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1816 continue; 1817 1818 /* If we got here, we have a match. See if we can combine the 1819 two comparisons. */ 1820 if (opcode == BIT_IOR_EXPR) 1821 t = maybe_fold_or_comparisons (lcode, op1, op2, 1822 rcode, gimple_assign_rhs1 (def2), 1823 gimple_assign_rhs2 (def2)); 1824 else 1825 t = maybe_fold_and_comparisons (lcode, op1, op2, 1826 rcode, gimple_assign_rhs1 (def2), 1827 gimple_assign_rhs2 (def2)); 1828 if (!t) 1829 continue; 1830 1831 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1832 always give us a boolean_type_node value back. If the original 1833 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1834 we need to convert. */ 1835 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1836 t = fold_convert (TREE_TYPE (curr->op), t); 1837 1838 if (TREE_CODE (t) != INTEGER_CST 1839 && !operand_equal_p (t, curr->op, 0)) 1840 { 1841 enum tree_code subcode; 1842 tree newop1, newop2; 1843 if (!COMPARISON_CLASS_P (t)) 1844 continue; 1845 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1846 STRIP_USELESS_TYPE_CONVERSION (newop1); 1847 STRIP_USELESS_TYPE_CONVERSION (newop2); 1848 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1849 continue; 1850 } 1851 1852 if (dump_file && (dump_flags & TDF_DETAILS)) 1853 { 1854 fprintf (dump_file, "Equivalence: "); 1855 print_generic_expr (dump_file, curr->op, 0); 1856 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1857 print_generic_expr (dump_file, oe->op, 0); 1858 fprintf (dump_file, " -> "); 1859 print_generic_expr (dump_file, t, 0); 1860 fprintf (dump_file, "\n"); 1861 } 1862 1863 /* Now we can delete oe, as it has been subsumed by the new combined 1864 expression t. */ 1865 ops->ordered_remove (i); 1866 reassociate_stats.ops_eliminated ++; 1867 1868 /* If t is the same as curr->op, we're done. Otherwise we must 1869 replace curr->op with t. Special case is if we got a constant 1870 back, in which case we add it to the end instead of in place of 1871 the current entry. */ 1872 if (TREE_CODE (t) == INTEGER_CST) 1873 { 1874 ops->ordered_remove (currindex); 1875 add_to_ops_vec (ops, t); 1876 } 1877 else if (!operand_equal_p (t, curr->op, 0)) 1878 { 1879 gimple *sum; 1880 enum tree_code subcode; 1881 tree newop1; 1882 tree newop2; 1883 gcc_assert (COMPARISON_CLASS_P (t)); 1884 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1885 STRIP_USELESS_TYPE_CONVERSION (newop1); 1886 STRIP_USELESS_TYPE_CONVERSION (newop2); 1887 gcc_checking_assert (is_gimple_val (newop1) 1888 && is_gimple_val (newop2)); 1889 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1890 curr->op = gimple_get_lhs (sum); 1891 } 1892 return true; 1893 } 1894 1895 return false; 1896 } 1897 1898 1899 /* Transform repeated addition of same values into multiply with 1900 constant. */ 1901 static bool 1902 transform_add_to_multiply (vec<operand_entry *> *ops) 1903 { 1904 operand_entry *oe; 1905 tree op = NULL_TREE; 1906 int j; 1907 int i, start = -1, end = 0, count = 0; 1908 auto_vec<std::pair <int, int> > indxs; 1909 bool changed = false; 1910 1911 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1912 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1913 || !flag_unsafe_math_optimizations)) 1914 return false; 1915 1916 /* Look for repeated operands. */ 1917 FOR_EACH_VEC_ELT (*ops, i, oe) 1918 { 1919 if (start == -1) 1920 { 1921 count = 1; 1922 op = oe->op; 1923 start = i; 1924 } 1925 else if (operand_equal_p (oe->op, op, 0)) 1926 { 1927 count++; 1928 end = i; 1929 } 1930 else 1931 { 1932 if (count > 1) 1933 indxs.safe_push (std::make_pair (start, end)); 1934 count = 1; 1935 op = oe->op; 1936 start = i; 1937 } 1938 } 1939 1940 if (count > 1) 1941 indxs.safe_push (std::make_pair (start, end)); 1942 1943 for (j = indxs.length () - 1; j >= 0; --j) 1944 { 1945 /* Convert repeated operand addition to multiplication. */ 1946 start = indxs[j].first; 1947 end = indxs[j].second; 1948 op = (*ops)[start]->op; 1949 count = end - start + 1; 1950 for (i = end; i >= start; --i) 1951 ops->unordered_remove (i); 1952 tree tmp = make_ssa_name (TREE_TYPE (op)); 1953 tree cst = build_int_cst (integer_type_node, count); 1954 gassign *mul_stmt 1955 = gimple_build_assign (tmp, MULT_EXPR, 1956 op, fold_convert (TREE_TYPE (op), cst)); 1957 gimple_set_visited (mul_stmt, true); 1958 add_to_ops_vec (ops, tmp, mul_stmt); 1959 changed = true; 1960 } 1961 1962 return changed; 1963 } 1964 1965 1966 /* Perform various identities and other optimizations on the list of 1967 operand entries, stored in OPS. The tree code for the binary 1968 operation between all the operands is OPCODE. */ 1969 1970 static void 1971 optimize_ops_list (enum tree_code opcode, 1972 vec<operand_entry *> *ops) 1973 { 1974 unsigned int length = ops->length (); 1975 unsigned int i; 1976 operand_entry *oe; 1977 operand_entry *oelast = NULL; 1978 bool iterate = false; 1979 1980 if (length == 1) 1981 return; 1982 1983 oelast = ops->last (); 1984 1985 /* If the last two are constants, pop the constants off, merge them 1986 and try the next two. */ 1987 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1988 { 1989 operand_entry *oelm1 = (*ops)[length - 2]; 1990 1991 if (oelm1->rank == 0 1992 && is_gimple_min_invariant (oelm1->op) 1993 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1994 TREE_TYPE (oelast->op))) 1995 { 1996 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1997 oelm1->op, oelast->op); 1998 1999 if (folded && is_gimple_min_invariant (folded)) 2000 { 2001 if (dump_file && (dump_flags & TDF_DETAILS)) 2002 fprintf (dump_file, "Merging constants\n"); 2003 2004 ops->pop (); 2005 ops->pop (); 2006 2007 add_to_ops_vec (ops, folded); 2008 reassociate_stats.constants_eliminated++; 2009 2010 optimize_ops_list (opcode, ops); 2011 return; 2012 } 2013 } 2014 } 2015 2016 eliminate_using_constants (opcode, ops); 2017 oelast = NULL; 2018 2019 for (i = 0; ops->iterate (i, &oe);) 2020 { 2021 bool done = false; 2022 2023 if (eliminate_not_pairs (opcode, ops, i, oe)) 2024 return; 2025 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2026 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2027 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2028 { 2029 if (done) 2030 return; 2031 iterate = true; 2032 oelast = NULL; 2033 continue; 2034 } 2035 oelast = oe; 2036 i++; 2037 } 2038 2039 length = ops->length (); 2040 oelast = ops->last (); 2041 2042 if (iterate) 2043 optimize_ops_list (opcode, ops); 2044 } 2045 2046 /* The following functions are subroutines to optimize_range_tests and allow 2047 it to try to change a logical combination of comparisons into a range 2048 test. 2049 2050 For example, both 2051 X == 2 || X == 5 || X == 3 || X == 4 2052 and 2053 X >= 2 && X <= 5 2054 are converted to 2055 (unsigned) (X - 2) <= 3 2056 2057 For more information see comments above fold_test_range in fold-const.c, 2058 this implementation is for GIMPLE. */ 2059 2060 struct range_entry 2061 { 2062 tree exp; 2063 tree low; 2064 tree high; 2065 bool in_p; 2066 bool strict_overflow_p; 2067 unsigned int idx, next; 2068 }; 2069 2070 /* This is similar to make_range in fold-const.c, but on top of 2071 GIMPLE instead of trees. If EXP is non-NULL, it should be 2072 an SSA_NAME and STMT argument is ignored, otherwise STMT 2073 argument should be a GIMPLE_COND. */ 2074 2075 static void 2076 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2077 { 2078 int in_p; 2079 tree low, high; 2080 bool is_bool, strict_overflow_p; 2081 2082 r->exp = NULL_TREE; 2083 r->in_p = false; 2084 r->strict_overflow_p = false; 2085 r->low = NULL_TREE; 2086 r->high = NULL_TREE; 2087 if (exp != NULL_TREE 2088 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2089 return; 2090 2091 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2092 and see if we can refine the range. Some of the cases below may not 2093 happen, but it doesn't seem worth worrying about this. We "continue" 2094 the outer loop when we've changed something; otherwise we "break" 2095 the switch, which will "break" the while. */ 2096 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2097 high = low; 2098 in_p = 0; 2099 strict_overflow_p = false; 2100 is_bool = false; 2101 if (exp == NULL_TREE) 2102 is_bool = true; 2103 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2104 { 2105 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2106 is_bool = true; 2107 else 2108 return; 2109 } 2110 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2111 is_bool = true; 2112 2113 while (1) 2114 { 2115 enum tree_code code; 2116 tree arg0, arg1, exp_type; 2117 tree nexp; 2118 location_t loc; 2119 2120 if (exp != NULL_TREE) 2121 { 2122 if (TREE_CODE (exp) != SSA_NAME 2123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2124 break; 2125 2126 stmt = SSA_NAME_DEF_STMT (exp); 2127 if (!is_gimple_assign (stmt)) 2128 break; 2129 2130 code = gimple_assign_rhs_code (stmt); 2131 arg0 = gimple_assign_rhs1 (stmt); 2132 arg1 = gimple_assign_rhs2 (stmt); 2133 exp_type = TREE_TYPE (exp); 2134 } 2135 else 2136 { 2137 code = gimple_cond_code (stmt); 2138 arg0 = gimple_cond_lhs (stmt); 2139 arg1 = gimple_cond_rhs (stmt); 2140 exp_type = boolean_type_node; 2141 } 2142 2143 if (TREE_CODE (arg0) != SSA_NAME 2144 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) 2145 break; 2146 loc = gimple_location (stmt); 2147 switch (code) 2148 { 2149 case BIT_NOT_EXPR: 2150 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2151 /* Ensure the range is either +[-,0], +[0,0], 2152 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2153 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2154 or similar expression of unconditional true or 2155 false, it should not be negated. */ 2156 && ((high && integer_zerop (high)) 2157 || (low && integer_onep (low)))) 2158 { 2159 in_p = !in_p; 2160 exp = arg0; 2161 continue; 2162 } 2163 break; 2164 case SSA_NAME: 2165 exp = arg0; 2166 continue; 2167 CASE_CONVERT: 2168 if (is_bool) 2169 goto do_default; 2170 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2171 { 2172 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2173 is_bool = true; 2174 else 2175 return; 2176 } 2177 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2178 is_bool = true; 2179 goto do_default; 2180 case EQ_EXPR: 2181 case NE_EXPR: 2182 case LT_EXPR: 2183 case LE_EXPR: 2184 case GE_EXPR: 2185 case GT_EXPR: 2186 is_bool = true; 2187 /* FALLTHRU */ 2188 default: 2189 if (!is_bool) 2190 return; 2191 do_default: 2192 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2193 &low, &high, &in_p, 2194 &strict_overflow_p); 2195 if (nexp != NULL_TREE) 2196 { 2197 exp = nexp; 2198 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2199 continue; 2200 } 2201 break; 2202 } 2203 break; 2204 } 2205 if (is_bool) 2206 { 2207 r->exp = exp; 2208 r->in_p = in_p; 2209 r->low = low; 2210 r->high = high; 2211 r->strict_overflow_p = strict_overflow_p; 2212 } 2213 } 2214 2215 /* Comparison function for qsort. Sort entries 2216 without SSA_NAME exp first, then with SSA_NAMEs sorted 2217 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2218 by increasing ->low and if ->low is the same, by increasing 2219 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2220 maximum. */ 2221 2222 static int 2223 range_entry_cmp (const void *a, const void *b) 2224 { 2225 const struct range_entry *p = (const struct range_entry *) a; 2226 const struct range_entry *q = (const struct range_entry *) b; 2227 2228 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2229 { 2230 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2231 { 2232 /* Group range_entries for the same SSA_NAME together. */ 2233 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2234 return -1; 2235 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2236 return 1; 2237 /* If ->low is different, NULL low goes first, then by 2238 ascending low. */ 2239 if (p->low != NULL_TREE) 2240 { 2241 if (q->low != NULL_TREE) 2242 { 2243 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2244 p->low, q->low); 2245 if (tem && integer_onep (tem)) 2246 return -1; 2247 tem = fold_binary (GT_EXPR, boolean_type_node, 2248 p->low, q->low); 2249 if (tem && integer_onep (tem)) 2250 return 1; 2251 } 2252 else 2253 return 1; 2254 } 2255 else if (q->low != NULL_TREE) 2256 return -1; 2257 /* If ->high is different, NULL high goes last, before that by 2258 ascending high. */ 2259 if (p->high != NULL_TREE) 2260 { 2261 if (q->high != NULL_TREE) 2262 { 2263 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2264 p->high, q->high); 2265 if (tem && integer_onep (tem)) 2266 return -1; 2267 tem = fold_binary (GT_EXPR, boolean_type_node, 2268 p->high, q->high); 2269 if (tem && integer_onep (tem)) 2270 return 1; 2271 } 2272 else 2273 return -1; 2274 } 2275 else if (q->high != NULL_TREE) 2276 return 1; 2277 /* If both ranges are the same, sort below by ascending idx. */ 2278 } 2279 else 2280 return 1; 2281 } 2282 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2283 return -1; 2284 2285 if (p->idx < q->idx) 2286 return -1; 2287 else 2288 { 2289 gcc_checking_assert (p->idx > q->idx); 2290 return 1; 2291 } 2292 } 2293 2294 /* Helper routine of optimize_range_test. 2295 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2296 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2297 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2298 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2299 an array of COUNT pointers to other ranges. Return 2300 true if the range merge has been successful. 2301 If OPCODE is ERROR_MARK, this is called from within 2302 maybe_optimize_range_tests and is performing inter-bb range optimization. 2303 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2304 oe->rank. */ 2305 2306 static bool 2307 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2308 struct range_entry **otherrangep, 2309 unsigned int count, enum tree_code opcode, 2310 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2311 bool in_p, tree low, tree high, bool strict_overflow_p) 2312 { 2313 operand_entry *oe = (*ops)[range->idx]; 2314 tree op = oe->op; 2315 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2316 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2317 location_t loc = gimple_location (stmt); 2318 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2319 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2320 in_p, low, high); 2321 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2322 gimple_stmt_iterator gsi; 2323 unsigned int i, uid; 2324 2325 if (tem == NULL_TREE) 2326 return false; 2327 2328 /* If op is default def SSA_NAME, there is no place to insert the 2329 new comparison. Give up, unless we can use OP itself as the 2330 range test. */ 2331 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2332 { 2333 if (op == range->exp 2334 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2335 || TREE_CODE (optype) == BOOLEAN_TYPE) 2336 && (op == tem 2337 || (TREE_CODE (tem) == EQ_EXPR 2338 && TREE_OPERAND (tem, 0) == op 2339 && integer_onep (TREE_OPERAND (tem, 1)))) 2340 && opcode != BIT_IOR_EXPR 2341 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2342 { 2343 stmt = NULL; 2344 tem = op; 2345 } 2346 else 2347 return false; 2348 } 2349 2350 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2351 warning_at (loc, OPT_Wstrict_overflow, 2352 "assuming signed overflow does not occur " 2353 "when simplifying range test"); 2354 2355 if (dump_file && (dump_flags & TDF_DETAILS)) 2356 { 2357 struct range_entry *r; 2358 fprintf (dump_file, "Optimizing range tests "); 2359 print_generic_expr (dump_file, range->exp, 0); 2360 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2361 print_generic_expr (dump_file, range->low, 0); 2362 fprintf (dump_file, ", "); 2363 print_generic_expr (dump_file, range->high, 0); 2364 fprintf (dump_file, "]"); 2365 for (i = 0; i < count; i++) 2366 { 2367 if (otherrange) 2368 r = otherrange + i; 2369 else 2370 r = otherrangep[i]; 2371 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 2372 print_generic_expr (dump_file, r->low, 0); 2373 fprintf (dump_file, ", "); 2374 print_generic_expr (dump_file, r->high, 0); 2375 fprintf (dump_file, "]"); 2376 } 2377 fprintf (dump_file, "\n into "); 2378 print_generic_expr (dump_file, tem, 0); 2379 fprintf (dump_file, "\n"); 2380 } 2381 2382 if (opcode == BIT_IOR_EXPR 2383 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2384 tem = invert_truthvalue_loc (loc, tem); 2385 2386 tem = fold_convert_loc (loc, optype, tem); 2387 if (stmt) 2388 { 2389 gsi = gsi_for_stmt (stmt); 2390 uid = gimple_uid (stmt); 2391 } 2392 else 2393 { 2394 gsi = gsi_none (); 2395 uid = 0; 2396 } 2397 if (stmt == NULL) 2398 gcc_checking_assert (tem == op); 2399 /* In rare cases range->exp can be equal to lhs of stmt. 2400 In that case we have to insert after the stmt rather then before 2401 it. If stmt is a PHI, insert it at the start of the basic block. */ 2402 else if (op != range->exp) 2403 { 2404 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2405 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2406 GSI_SAME_STMT); 2407 gsi_prev (&gsi); 2408 } 2409 else if (gimple_code (stmt) != GIMPLE_PHI) 2410 { 2411 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2412 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, 2413 GSI_CONTINUE_LINKING); 2414 } 2415 else 2416 { 2417 gsi = gsi_after_labels (gimple_bb (stmt)); 2418 if (!gsi_end_p (gsi)) 2419 uid = gimple_uid (gsi_stmt (gsi)); 2420 else 2421 { 2422 gsi = gsi_start_bb (gimple_bb (stmt)); 2423 uid = 1; 2424 while (!gsi_end_p (gsi)) 2425 { 2426 uid = gimple_uid (gsi_stmt (gsi)); 2427 gsi_next (&gsi); 2428 } 2429 } 2430 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2431 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2432 GSI_SAME_STMT); 2433 if (gsi_end_p (gsi)) 2434 gsi = gsi_last_bb (gimple_bb (stmt)); 2435 else 2436 gsi_prev (&gsi); 2437 } 2438 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2439 if (gimple_uid (gsi_stmt (gsi))) 2440 break; 2441 else 2442 gimple_set_uid (gsi_stmt (gsi), uid); 2443 2444 oe->op = tem; 2445 range->exp = exp; 2446 range->low = low; 2447 range->high = high; 2448 range->in_p = in_p; 2449 range->strict_overflow_p = false; 2450 2451 for (i = 0; i < count; i++) 2452 { 2453 if (otherrange) 2454 range = otherrange + i; 2455 else 2456 range = otherrangep[i]; 2457 oe = (*ops)[range->idx]; 2458 /* Now change all the other range test immediate uses, so that 2459 those tests will be optimized away. */ 2460 if (opcode == ERROR_MARK) 2461 { 2462 if (oe->op) 2463 oe->op = build_int_cst (TREE_TYPE (oe->op), 2464 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2465 else 2466 oe->op = (oe->rank == BIT_IOR_EXPR 2467 ? boolean_false_node : boolean_true_node); 2468 } 2469 else 2470 oe->op = error_mark_node; 2471 range->exp = NULL_TREE; 2472 range->low = NULL_TREE; 2473 range->high = NULL_TREE; 2474 } 2475 return true; 2476 } 2477 2478 /* Optimize X == CST1 || X == CST2 2479 if popcount (CST1 ^ CST2) == 1 into 2480 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2481 Similarly for ranges. E.g. 2482 X != 2 && X != 3 && X != 10 && X != 11 2483 will be transformed by the previous optimization into 2484 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2485 and this loop can transform that into 2486 !(((X & ~8) - 2U) <= 1U). */ 2487 2488 static bool 2489 optimize_range_tests_xor (enum tree_code opcode, tree type, 2490 tree lowi, tree lowj, tree highi, tree highj, 2491 vec<operand_entry *> *ops, 2492 struct range_entry *rangei, 2493 struct range_entry *rangej) 2494 { 2495 tree lowxor, highxor, tem, exp; 2496 /* Check lowi ^ lowj == highi ^ highj and 2497 popcount (lowi ^ lowj) == 1. */ 2498 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2499 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2500 return false; 2501 if (!integer_pow2p (lowxor)) 2502 return false; 2503 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2504 if (!tree_int_cst_equal (lowxor, highxor)) 2505 return false; 2506 2507 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2508 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2509 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2510 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2511 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2512 NULL, rangei->in_p, lowj, highj, 2513 rangei->strict_overflow_p 2514 || rangej->strict_overflow_p)) 2515 return true; 2516 return false; 2517 } 2518 2519 /* Optimize X == CST1 || X == CST2 2520 if popcount (CST2 - CST1) == 1 into 2521 ((X - CST1) & ~(CST2 - CST1)) == 0. 2522 Similarly for ranges. E.g. 2523 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2524 || X == 75 || X == 45 2525 will be transformed by the previous optimization into 2526 (X - 43U) <= 3U || (X - 75U) <= 3U 2527 and this loop can transform that into 2528 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2529 static bool 2530 optimize_range_tests_diff (enum tree_code opcode, tree type, 2531 tree lowi, tree lowj, tree highi, tree highj, 2532 vec<operand_entry *> *ops, 2533 struct range_entry *rangei, 2534 struct range_entry *rangej) 2535 { 2536 tree tem1, tem2, mask; 2537 /* Check highi - lowi == highj - lowj. */ 2538 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2539 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2540 return false; 2541 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2542 if (!tree_int_cst_equal (tem1, tem2)) 2543 return false; 2544 /* Check popcount (lowj - lowi) == 1. */ 2545 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2546 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2547 return false; 2548 if (!integer_pow2p (tem1)) 2549 return false; 2550 2551 type = unsigned_type_for (type); 2552 tem1 = fold_convert (type, tem1); 2553 tem2 = fold_convert (type, tem2); 2554 lowi = fold_convert (type, lowi); 2555 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2556 tem1 = fold_binary (MINUS_EXPR, type, 2557 fold_convert (type, rangei->exp), lowi); 2558 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2559 lowj = build_int_cst (type, 0); 2560 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2561 NULL, rangei->in_p, lowj, tem2, 2562 rangei->strict_overflow_p 2563 || rangej->strict_overflow_p)) 2564 return true; 2565 return false; 2566 } 2567 2568 /* It does some common checks for function optimize_range_tests_xor and 2569 optimize_range_tests_diff. 2570 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2571 Else it calls optimize_range_tests_diff. */ 2572 2573 static bool 2574 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2575 bool optimize_xor, vec<operand_entry *> *ops, 2576 struct range_entry *ranges) 2577 { 2578 int i, j; 2579 bool any_changes = false; 2580 for (i = first; i < length; i++) 2581 { 2582 tree lowi, highi, lowj, highj, type, tem; 2583 2584 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2585 continue; 2586 type = TREE_TYPE (ranges[i].exp); 2587 if (!INTEGRAL_TYPE_P (type)) 2588 continue; 2589 lowi = ranges[i].low; 2590 if (lowi == NULL_TREE) 2591 lowi = TYPE_MIN_VALUE (type); 2592 highi = ranges[i].high; 2593 if (highi == NULL_TREE) 2594 continue; 2595 for (j = i + 1; j < length && j < i + 64; j++) 2596 { 2597 bool changes; 2598 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2599 continue; 2600 lowj = ranges[j].low; 2601 if (lowj == NULL_TREE) 2602 continue; 2603 highj = ranges[j].high; 2604 if (highj == NULL_TREE) 2605 highj = TYPE_MAX_VALUE (type); 2606 /* Check lowj > highi. */ 2607 tem = fold_binary (GT_EXPR, boolean_type_node, 2608 lowj, highi); 2609 if (tem == NULL_TREE || !integer_onep (tem)) 2610 continue; 2611 if (optimize_xor) 2612 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2613 highi, highj, ops, 2614 ranges + i, ranges + j); 2615 else 2616 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2617 highi, highj, ops, 2618 ranges + i, ranges + j); 2619 if (changes) 2620 { 2621 any_changes = true; 2622 break; 2623 } 2624 } 2625 } 2626 return any_changes; 2627 } 2628 2629 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 2630 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2631 EXP on success, NULL otherwise. */ 2632 2633 static tree 2634 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2635 wide_int *mask, tree *totallowp) 2636 { 2637 tree tem = int_const_binop (MINUS_EXPR, high, low); 2638 if (tem == NULL_TREE 2639 || TREE_CODE (tem) != INTEGER_CST 2640 || TREE_OVERFLOW (tem) 2641 || tree_int_cst_sgn (tem) == -1 2642 || compare_tree_int (tem, prec) != -1) 2643 return NULL_TREE; 2644 2645 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2646 *mask = wi::shifted_mask (0, max, false, prec); 2647 if (TREE_CODE (exp) == BIT_AND_EXPR 2648 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2649 { 2650 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2651 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2652 if (wi::popcount (msk) == 1 2653 && wi::ltu_p (msk, prec - max)) 2654 { 2655 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2656 max += msk.to_uhwi (); 2657 exp = TREE_OPERAND (exp, 0); 2658 if (integer_zerop (low) 2659 && TREE_CODE (exp) == PLUS_EXPR 2660 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2661 { 2662 tree ret = TREE_OPERAND (exp, 0); 2663 STRIP_NOPS (ret); 2664 widest_int bias 2665 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2666 TYPE_PRECISION (TREE_TYPE (low)))); 2667 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2668 if (totallowp) 2669 { 2670 *totallowp = tbias; 2671 return ret; 2672 } 2673 else if (!tree_int_cst_lt (totallow, tbias)) 2674 return NULL_TREE; 2675 bias = wi::to_widest (tbias); 2676 bias -= wi::to_widest (totallow); 2677 if (bias >= 0 && bias < prec - max) 2678 { 2679 *mask = wi::lshift (*mask, bias); 2680 return ret; 2681 } 2682 } 2683 } 2684 } 2685 if (totallowp) 2686 return exp; 2687 if (!tree_int_cst_lt (totallow, low)) 2688 return exp; 2689 tem = int_const_binop (MINUS_EXPR, low, totallow); 2690 if (tem == NULL_TREE 2691 || TREE_CODE (tem) != INTEGER_CST 2692 || TREE_OVERFLOW (tem) 2693 || compare_tree_int (tem, prec - max) == 1) 2694 return NULL_TREE; 2695 2696 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2697 return exp; 2698 } 2699 2700 /* Attempt to optimize small range tests using bit test. 2701 E.g. 2702 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2703 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2704 has been by earlier optimizations optimized into: 2705 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2706 As all the 43 through 82 range is less than 64 numbers, 2707 for 64-bit word targets optimize that into: 2708 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2709 2710 static bool 2711 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2712 vec<operand_entry *> *ops, 2713 struct range_entry *ranges) 2714 { 2715 int i, j; 2716 bool any_changes = false; 2717 int prec = GET_MODE_BITSIZE (word_mode); 2718 auto_vec<struct range_entry *, 64> candidates; 2719 2720 for (i = first; i < length - 2; i++) 2721 { 2722 tree lowi, highi, lowj, highj, type; 2723 2724 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2725 continue; 2726 type = TREE_TYPE (ranges[i].exp); 2727 if (!INTEGRAL_TYPE_P (type)) 2728 continue; 2729 lowi = ranges[i].low; 2730 if (lowi == NULL_TREE) 2731 lowi = TYPE_MIN_VALUE (type); 2732 highi = ranges[i].high; 2733 if (highi == NULL_TREE) 2734 continue; 2735 wide_int mask; 2736 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2737 highi, &mask, &lowi); 2738 if (exp == NULL_TREE) 2739 continue; 2740 bool strict_overflow_p = ranges[i].strict_overflow_p; 2741 candidates.truncate (0); 2742 int end = MIN (i + 64, length); 2743 for (j = i + 1; j < end; j++) 2744 { 2745 tree exp2; 2746 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2747 continue; 2748 if (ranges[j].exp == exp) 2749 ; 2750 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2751 { 2752 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2753 if (exp2 == exp) 2754 ; 2755 else if (TREE_CODE (exp2) == PLUS_EXPR) 2756 { 2757 exp2 = TREE_OPERAND (exp2, 0); 2758 STRIP_NOPS (exp2); 2759 if (exp2 != exp) 2760 continue; 2761 } 2762 else 2763 continue; 2764 } 2765 else 2766 continue; 2767 lowj = ranges[j].low; 2768 if (lowj == NULL_TREE) 2769 continue; 2770 highj = ranges[j].high; 2771 if (highj == NULL_TREE) 2772 highj = TYPE_MAX_VALUE (type); 2773 wide_int mask2; 2774 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2775 highj, &mask2, NULL); 2776 if (exp2 != exp) 2777 continue; 2778 mask |= mask2; 2779 strict_overflow_p |= ranges[j].strict_overflow_p; 2780 candidates.safe_push (&ranges[j]); 2781 } 2782 2783 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2784 if (candidates.length () >= 2) 2785 { 2786 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2787 wi::to_widest (lowi) 2788 + prec - 1 - wi::clz (mask)); 2789 operand_entry *oe = (*ops)[ranges[i].idx]; 2790 tree op = oe->op; 2791 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2792 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2793 location_t loc = gimple_location (stmt); 2794 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2795 2796 /* See if it isn't cheaper to pretend the minimum value of the 2797 range is 0, if maximum value is small enough. 2798 We can avoid then subtraction of the minimum value, but the 2799 mask constant could be perhaps more expensive. */ 2800 if (compare_tree_int (lowi, 0) > 0 2801 && compare_tree_int (high, prec) < 0) 2802 { 2803 int cost_diff; 2804 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2805 rtx reg = gen_raw_REG (word_mode, 10000); 2806 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2807 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2808 GEN_INT (-m)), speed_p); 2809 rtx r = immed_wide_int_const (mask, word_mode); 2810 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2811 word_mode, speed_p); 2812 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2813 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2814 word_mode, speed_p); 2815 if (cost_diff > 0) 2816 { 2817 mask = wi::lshift (mask, m); 2818 lowi = build_zero_cst (TREE_TYPE (lowi)); 2819 } 2820 } 2821 2822 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2823 false, lowi, high); 2824 if (tem == NULL_TREE || is_gimple_val (tem)) 2825 continue; 2826 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2827 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2828 fold_convert_loc (loc, etype, exp), 2829 fold_convert_loc (loc, etype, lowi)); 2830 exp = fold_convert_loc (loc, integer_type_node, exp); 2831 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2832 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2833 build_int_cst (word_type, 1), exp); 2834 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2835 wide_int_to_tree (word_type, mask)); 2836 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2837 build_zero_cst (word_type)); 2838 if (is_gimple_val (exp)) 2839 continue; 2840 2841 /* The shift might have undefined behavior if TEM is true, 2842 but reassociate_bb isn't prepared to have basic blocks 2843 split when it is running. So, temporarily emit a code 2844 with BIT_IOR_EXPR instead of &&, and fix it up in 2845 branch_fixup. */ 2846 gimple_seq seq; 2847 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2848 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2849 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2850 gimple_seq seq2; 2851 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2852 gimple_seq_add_seq_without_update (&seq, seq2); 2853 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2854 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2855 gimple *g = gimple_build_assign (make_ssa_name (optype), 2856 BIT_IOR_EXPR, tem, exp); 2857 gimple_set_location (g, loc); 2858 gimple_seq_add_stmt_without_update (&seq, g); 2859 exp = gimple_assign_lhs (g); 2860 tree val = build_zero_cst (optype); 2861 if (update_range_test (&ranges[i], NULL, candidates.address (), 2862 candidates.length (), opcode, ops, exp, 2863 seq, false, val, val, strict_overflow_p)) 2864 { 2865 any_changes = true; 2866 reassoc_branch_fixups.safe_push (tem); 2867 } 2868 else 2869 gimple_seq_discard (seq); 2870 } 2871 } 2872 return any_changes; 2873 } 2874 2875 /* Attempt to optimize for signed a and b where b is known to be >= 0: 2876 a >= 0 && a < b into (unsigned) a < (unsigned) b 2877 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 2878 2879 static bool 2880 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 2881 vec<operand_entry *> *ops, 2882 struct range_entry *ranges, 2883 basic_block first_bb) 2884 { 2885 int i; 2886 bool any_changes = false; 2887 hash_map<tree, int> *map = NULL; 2888 2889 for (i = first; i < length; i++) 2890 { 2891 if (ranges[i].exp == NULL_TREE 2892 || TREE_CODE (ranges[i].exp) != SSA_NAME 2893 || !ranges[i].in_p) 2894 continue; 2895 2896 tree type = TREE_TYPE (ranges[i].exp); 2897 if (!INTEGRAL_TYPE_P (type) 2898 || TYPE_UNSIGNED (type) 2899 || ranges[i].low == NULL_TREE 2900 || !integer_zerop (ranges[i].low) 2901 || ranges[i].high != NULL_TREE) 2902 continue; 2903 /* EXP >= 0 here. */ 2904 if (map == NULL) 2905 map = new hash_map <tree, int>; 2906 map->put (ranges[i].exp, i); 2907 } 2908 2909 if (map == NULL) 2910 return false; 2911 2912 for (i = 0; i < length; i++) 2913 { 2914 if (ranges[i].low == NULL_TREE 2915 || ranges[i].high == NULL_TREE 2916 || !integer_zerop (ranges[i].low) 2917 || !integer_zerop (ranges[i].high)) 2918 continue; 2919 2920 gimple *stmt; 2921 tree_code ccode; 2922 tree rhs1, rhs2; 2923 if (ranges[i].exp) 2924 { 2925 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 2926 continue; 2927 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 2928 if (!is_gimple_assign (stmt)) 2929 continue; 2930 ccode = gimple_assign_rhs_code (stmt); 2931 rhs1 = gimple_assign_rhs1 (stmt); 2932 rhs2 = gimple_assign_rhs2 (stmt); 2933 } 2934 else 2935 { 2936 operand_entry *oe = (*ops)[ranges[i].idx]; 2937 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2938 if (gimple_code (stmt) != GIMPLE_COND) 2939 continue; 2940 ccode = gimple_cond_code (stmt); 2941 rhs1 = gimple_cond_lhs (stmt); 2942 rhs2 = gimple_cond_rhs (stmt); 2943 } 2944 2945 if (TREE_CODE (rhs1) != SSA_NAME 2946 || rhs2 == NULL_TREE 2947 || TREE_CODE (rhs2) != SSA_NAME) 2948 continue; 2949 2950 switch (ccode) 2951 { 2952 case GT_EXPR: 2953 case GE_EXPR: 2954 case LT_EXPR: 2955 case LE_EXPR: 2956 break; 2957 default: 2958 continue; 2959 } 2960 if (ranges[i].in_p) 2961 ccode = invert_tree_comparison (ccode, false); 2962 switch (ccode) 2963 { 2964 case GT_EXPR: 2965 case GE_EXPR: 2966 std::swap (rhs1, rhs2); 2967 ccode = swap_tree_comparison (ccode); 2968 break; 2969 case LT_EXPR: 2970 case LE_EXPR: 2971 break; 2972 default: 2973 gcc_unreachable (); 2974 } 2975 2976 int *idx = map->get (rhs1); 2977 if (idx == NULL) 2978 continue; 2979 2980 /* maybe_optimize_range_tests allows statements without side-effects 2981 in the basic blocks as long as they are consumed in the same bb. 2982 Make sure rhs2's def stmt is not among them, otherwise we can't 2983 use safely get_nonzero_bits on it. E.g. in: 2984 # RANGE [-83, 1] NONZERO 173 2985 # k_32 = PHI <k_47(13), k_12(9)> 2986 ... 2987 if (k_32 >= 0) 2988 goto <bb 5>; [26.46%] 2989 else 2990 goto <bb 9>; [73.54%] 2991 2992 <bb 5> [local count: 140323371]: 2993 # RANGE [0, 1] NONZERO 1 2994 _5 = (int) k_32; 2995 # RANGE [0, 4] NONZERO 4 2996 _21 = _5 << 2; 2997 # RANGE [0, 4] NONZERO 4 2998 iftmp.0_44 = (char) _21; 2999 if (k_32 < iftmp.0_44) 3000 goto <bb 6>; [84.48%] 3001 else 3002 goto <bb 9>; [15.52%] 3003 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3004 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3005 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3006 those stmts even for negative k_32 and the value ranges would be no 3007 longer guaranteed and so the optimization would be invalid. */ 3008 if (opcode == ERROR_MARK) 3009 { 3010 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3011 basic_block bb2 = gimple_bb (g); 3012 if (bb2 3013 && bb2 != first_bb 3014 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3015 { 3016 /* As an exception, handle a few common cases. */ 3017 if (gimple_assign_cast_p (g) 3018 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))) 3019 && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))) 3020 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3021 > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g))))) 3022 /* Zero-extension is always ok. */ ; 3023 else if (is_gimple_assign (g) 3024 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3025 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3026 && !wi::neg_p (gimple_assign_rhs2 (g))) 3027 /* Masking with INTEGER_CST with MSB clear is always ok 3028 too. */ ; 3029 else 3030 continue; 3031 } 3032 } 3033 3034 wide_int nz = get_nonzero_bits (rhs2); 3035 if (wi::neg_p (nz)) 3036 continue; 3037 3038 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3039 and RHS2 is known to be RHS2 >= 0. */ 3040 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3041 3042 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3043 if ((ranges[*idx].strict_overflow_p 3044 || ranges[i].strict_overflow_p) 3045 && issue_strict_overflow_warning (wc)) 3046 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3047 "assuming signed overflow does not occur " 3048 "when simplifying range test"); 3049 3050 if (dump_file && (dump_flags & TDF_DETAILS)) 3051 { 3052 struct range_entry *r = &ranges[*idx]; 3053 fprintf (dump_file, "Optimizing range test "); 3054 print_generic_expr (dump_file, r->exp, 0); 3055 fprintf (dump_file, " +["); 3056 print_generic_expr (dump_file, r->low, 0); 3057 fprintf (dump_file, ", "); 3058 print_generic_expr (dump_file, r->high, 0); 3059 fprintf (dump_file, "] and comparison "); 3060 print_generic_expr (dump_file, rhs1, 0); 3061 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3062 print_generic_expr (dump_file, rhs2, 0); 3063 fprintf (dump_file, "\n into ("); 3064 print_generic_expr (dump_file, utype, 0); 3065 fprintf (dump_file, ") "); 3066 print_generic_expr (dump_file, rhs1, 0); 3067 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3068 print_generic_expr (dump_file, utype, 0); 3069 fprintf (dump_file, ") "); 3070 print_generic_expr (dump_file, rhs2, 0); 3071 fprintf (dump_file, "\n"); 3072 } 3073 3074 operand_entry *oe = (*ops)[ranges[i].idx]; 3075 ranges[i].in_p = 0; 3076 if (opcode == BIT_IOR_EXPR 3077 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3078 { 3079 ranges[i].in_p = 1; 3080 ccode = invert_tree_comparison (ccode, false); 3081 } 3082 3083 unsigned int uid = gimple_uid (stmt); 3084 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3085 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3086 gimple_set_uid (g, uid); 3087 rhs1 = gimple_assign_lhs (g); 3088 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3089 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3090 gimple_set_uid (g, uid); 3091 rhs2 = gimple_assign_lhs (g); 3092 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3093 if (tree_swap_operands_p (rhs1, rhs2)) 3094 { 3095 std::swap (rhs1, rhs2); 3096 ccode = swap_tree_comparison (ccode); 3097 } 3098 if (gimple_code (stmt) == GIMPLE_COND) 3099 { 3100 gcond *c = as_a <gcond *> (stmt); 3101 gimple_cond_set_code (c, ccode); 3102 gimple_cond_set_lhs (c, rhs1); 3103 gimple_cond_set_rhs (c, rhs2); 3104 update_stmt (stmt); 3105 } 3106 else 3107 { 3108 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3109 if (!INTEGRAL_TYPE_P (ctype) 3110 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3111 && TYPE_PRECISION (ctype) != 1)) 3112 ctype = boolean_type_node; 3113 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3114 gimple_set_uid (g, uid); 3115 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3116 if (oe->op && ctype != TREE_TYPE (oe->op)) 3117 { 3118 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3119 NOP_EXPR, gimple_assign_lhs (g)); 3120 gimple_set_uid (g, uid); 3121 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3122 } 3123 ranges[i].exp = gimple_assign_lhs (g); 3124 oe->op = ranges[i].exp; 3125 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 3126 ranges[i].high = ranges[i].low; 3127 } 3128 ranges[i].strict_overflow_p = false; 3129 oe = (*ops)[ranges[*idx].idx]; 3130 /* Now change all the other range test immediate uses, so that 3131 those tests will be optimized away. */ 3132 if (opcode == ERROR_MARK) 3133 { 3134 if (oe->op) 3135 oe->op = build_int_cst (TREE_TYPE (oe->op), 3136 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3137 else 3138 oe->op = (oe->rank == BIT_IOR_EXPR 3139 ? boolean_false_node : boolean_true_node); 3140 } 3141 else 3142 oe->op = error_mark_node; 3143 ranges[*idx].exp = NULL_TREE; 3144 ranges[*idx].low = NULL_TREE; 3145 ranges[*idx].high = NULL_TREE; 3146 any_changes = true; 3147 } 3148 3149 delete map; 3150 return any_changes; 3151 } 3152 3153 /* Optimize range tests, similarly how fold_range_test optimizes 3154 it on trees. The tree code for the binary 3155 operation between all the operands is OPCODE. 3156 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 3157 maybe_optimize_range_tests for inter-bb range optimization. 3158 In that case if oe->op is NULL, oe->id is bb->index whose 3159 GIMPLE_COND is && or ||ed into the test, and oe->rank says 3160 the actual opcode. 3161 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 3162 3163 static bool 3164 optimize_range_tests (enum tree_code opcode, 3165 vec<operand_entry *> *ops, basic_block first_bb) 3166 { 3167 unsigned int length = ops->length (), i, j, first; 3168 operand_entry *oe; 3169 struct range_entry *ranges; 3170 bool any_changes = false; 3171 3172 if (length == 1) 3173 return false; 3174 3175 ranges = XNEWVEC (struct range_entry, length); 3176 for (i = 0; i < length; i++) 3177 { 3178 oe = (*ops)[i]; 3179 ranges[i].idx = i; 3180 init_range_entry (ranges + i, oe->op, 3181 oe->op 3182 ? NULL 3183 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 3184 /* For | invert it now, we will invert it again before emitting 3185 the optimized expression. */ 3186 if (opcode == BIT_IOR_EXPR 3187 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3188 ranges[i].in_p = !ranges[i].in_p; 3189 } 3190 3191 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 3192 for (i = 0; i < length; i++) 3193 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 3194 break; 3195 3196 /* Try to merge ranges. */ 3197 for (first = i; i < length; i++) 3198 { 3199 tree low = ranges[i].low; 3200 tree high = ranges[i].high; 3201 int in_p = ranges[i].in_p; 3202 bool strict_overflow_p = ranges[i].strict_overflow_p; 3203 int update_fail_count = 0; 3204 3205 for (j = i + 1; j < length; j++) 3206 { 3207 if (ranges[i].exp != ranges[j].exp) 3208 break; 3209 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 3210 ranges[j].in_p, ranges[j].low, ranges[j].high)) 3211 break; 3212 strict_overflow_p |= ranges[j].strict_overflow_p; 3213 } 3214 3215 if (j == i + 1) 3216 continue; 3217 3218 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 3219 opcode, ops, ranges[i].exp, NULL, in_p, 3220 low, high, strict_overflow_p)) 3221 { 3222 i = j - 1; 3223 any_changes = true; 3224 } 3225 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 3226 while update_range_test would fail. */ 3227 else if (update_fail_count == 64) 3228 i = j - 1; 3229 else 3230 ++update_fail_count; 3231 } 3232 3233 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 3234 ops, ranges); 3235 3236 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 3237 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 3238 ops, ranges); 3239 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 3240 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 3241 ops, ranges); 3242 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 3243 ranges, first_bb); 3244 3245 if (any_changes && opcode != ERROR_MARK) 3246 { 3247 j = 0; 3248 FOR_EACH_VEC_ELT (*ops, i, oe) 3249 { 3250 if (oe->op == error_mark_node) 3251 continue; 3252 else if (i != j) 3253 (*ops)[j] = oe; 3254 j++; 3255 } 3256 ops->truncate (j); 3257 } 3258 3259 XDELETEVEC (ranges); 3260 return any_changes; 3261 } 3262 3263 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3264 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3265 otherwise the comparison code. */ 3266 3267 static tree_code 3268 ovce_extract_ops (tree var, gassign **rets, bool *reti) 3269 { 3270 if (TREE_CODE (var) != SSA_NAME) 3271 return ERROR_MARK; 3272 3273 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3274 if (stmt == NULL) 3275 return ERROR_MARK; 3276 3277 /* ??? If we start creating more COND_EXPR, we could perform 3278 this same optimization with them. For now, simplify. */ 3279 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 3280 return ERROR_MARK; 3281 3282 tree cond = gimple_assign_rhs1 (stmt); 3283 tree_code cmp = TREE_CODE (cond); 3284 if (TREE_CODE_CLASS (cmp) != tcc_comparison) 3285 return ERROR_MARK; 3286 3287 /* ??? For now, allow only canonical true and false result vectors. 3288 We could expand this to other constants should the need arise, 3289 but at the moment we don't create them. */ 3290 tree t = gimple_assign_rhs2 (stmt); 3291 tree f = gimple_assign_rhs3 (stmt); 3292 bool inv; 3293 if (integer_all_onesp (t)) 3294 inv = false; 3295 else if (integer_all_onesp (f)) 3296 { 3297 cmp = invert_tree_comparison (cmp, false); 3298 inv = true; 3299 } 3300 else 3301 return ERROR_MARK; 3302 if (!integer_zerop (f)) 3303 return ERROR_MARK; 3304 3305 /* Success! */ 3306 if (rets) 3307 *rets = stmt; 3308 if (reti) 3309 *reti = inv; 3310 return cmp; 3311 } 3312 3313 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3314 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3315 3316 static bool 3317 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 3318 { 3319 unsigned int length = ops->length (), i, j; 3320 bool any_changes = false; 3321 3322 if (length == 1) 3323 return false; 3324 3325 for (i = 0; i < length; ++i) 3326 { 3327 tree elt0 = (*ops)[i]->op; 3328 3329 gassign *stmt0; 3330 bool invert; 3331 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); 3332 if (cmp0 == ERROR_MARK) 3333 continue; 3334 3335 for (j = i + 1; j < length; ++j) 3336 { 3337 tree &elt1 = (*ops)[j]->op; 3338 3339 gassign *stmt1; 3340 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); 3341 if (cmp1 == ERROR_MARK) 3342 continue; 3343 3344 tree cond0 = gimple_assign_rhs1 (stmt0); 3345 tree x0 = TREE_OPERAND (cond0, 0); 3346 tree y0 = TREE_OPERAND (cond0, 1); 3347 3348 tree cond1 = gimple_assign_rhs1 (stmt1); 3349 tree x1 = TREE_OPERAND (cond1, 0); 3350 tree y1 = TREE_OPERAND (cond1, 1); 3351 3352 tree comb; 3353 if (opcode == BIT_AND_EXPR) 3354 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3355 else if (opcode == BIT_IOR_EXPR) 3356 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3357 else 3358 gcc_unreachable (); 3359 if (comb == NULL) 3360 continue; 3361 3362 /* Success! */ 3363 if (dump_file && (dump_flags & TDF_DETAILS)) 3364 { 3365 fprintf (dump_file, "Transforming "); 3366 print_generic_expr (dump_file, cond0, 0); 3367 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 3368 print_generic_expr (dump_file, cond1, 0); 3369 fprintf (dump_file, " into "); 3370 print_generic_expr (dump_file, comb, 0); 3371 fputc ('\n', dump_file); 3372 } 3373 3374 gimple_assign_set_rhs1 (stmt0, comb); 3375 if (invert) 3376 std::swap (*gimple_assign_rhs2_ptr (stmt0), 3377 *gimple_assign_rhs3_ptr (stmt0)); 3378 update_stmt (stmt0); 3379 3380 elt1 = error_mark_node; 3381 any_changes = true; 3382 } 3383 } 3384 3385 if (any_changes) 3386 { 3387 operand_entry *oe; 3388 j = 0; 3389 FOR_EACH_VEC_ELT (*ops, i, oe) 3390 { 3391 if (oe->op == error_mark_node) 3392 continue; 3393 else if (i != j) 3394 (*ops)[j] = oe; 3395 j++; 3396 } 3397 ops->truncate (j); 3398 } 3399 3400 return any_changes; 3401 } 3402 3403 /* Return true if STMT is a cast like: 3404 <bb N>: 3405 ... 3406 _123 = (int) _234; 3407 3408 <bb M>: 3409 # _345 = PHI <_123(N), 1(...), 1(...)> 3410 where _234 has bool type, _123 has single use and 3411 bb N has a single successor M. This is commonly used in 3412 the last block of a range test. 3413 3414 Also Return true if STMT is tcc_compare like: 3415 <bb N>: 3416 ... 3417 _234 = a_2(D) == 2; 3418 3419 <bb M>: 3420 # _345 = PHI <_234(N), 1(...), 1(...)> 3421 _346 = (int) _345; 3422 where _234 has booltype, single use and 3423 bb N has a single successor M. This is commonly used in 3424 the last block of a range test. */ 3425 3426 static bool 3427 final_range_test_p (gimple *stmt) 3428 { 3429 basic_block bb, rhs_bb, lhs_bb; 3430 edge e; 3431 tree lhs, rhs; 3432 use_operand_p use_p; 3433 gimple *use_stmt; 3434 3435 if (!gimple_assign_cast_p (stmt) 3436 && (!is_gimple_assign (stmt) 3437 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3438 != tcc_comparison))) 3439 return false; 3440 bb = gimple_bb (stmt); 3441 if (!single_succ_p (bb)) 3442 return false; 3443 e = single_succ_edge (bb); 3444 if (e->flags & EDGE_COMPLEX) 3445 return false; 3446 3447 lhs = gimple_assign_lhs (stmt); 3448 rhs = gimple_assign_rhs1 (stmt); 3449 if (gimple_assign_cast_p (stmt) 3450 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3451 || TREE_CODE (rhs) != SSA_NAME 3452 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 3453 return false; 3454 3455 if (!gimple_assign_cast_p (stmt) 3456 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 3457 return false; 3458 3459 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 3460 if (!single_imm_use (lhs, &use_p, &use_stmt)) 3461 return false; 3462 3463 if (gimple_code (use_stmt) != GIMPLE_PHI 3464 || gimple_bb (use_stmt) != e->dest) 3465 return false; 3466 3467 /* And that the rhs is defined in the same loop. */ 3468 if (gimple_assign_cast_p (stmt)) 3469 { 3470 if (TREE_CODE (rhs) != SSA_NAME 3471 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 3472 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 3473 return false; 3474 } 3475 else 3476 { 3477 if (TREE_CODE (lhs) != SSA_NAME 3478 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 3479 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 3480 return false; 3481 } 3482 3483 return true; 3484 } 3485 3486 /* Return true if BB is suitable basic block for inter-bb range test 3487 optimization. If BACKWARD is true, BB should be the only predecessor 3488 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 3489 or compared with to find a common basic block to which all conditions 3490 branch to if true resp. false. If BACKWARD is false, TEST_BB should 3491 be the only predecessor of BB. */ 3492 3493 static bool 3494 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 3495 bool backward) 3496 { 3497 edge_iterator ei, ei2; 3498 edge e, e2; 3499 gimple *stmt; 3500 gphi_iterator gsi; 3501 bool other_edge_seen = false; 3502 bool is_cond; 3503 3504 if (test_bb == bb) 3505 return false; 3506 /* Check last stmt first. */ 3507 stmt = last_stmt (bb); 3508 if (stmt == NULL 3509 || (gimple_code (stmt) != GIMPLE_COND 3510 && (backward || !final_range_test_p (stmt))) 3511 || gimple_visited_p (stmt) 3512 || stmt_could_throw_p (stmt) 3513 || *other_bb == bb) 3514 return false; 3515 is_cond = gimple_code (stmt) == GIMPLE_COND; 3516 if (is_cond) 3517 { 3518 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 3519 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 3520 to *OTHER_BB (if not set yet, try to find it out). */ 3521 if (EDGE_COUNT (bb->succs) != 2) 3522 return false; 3523 FOR_EACH_EDGE (e, ei, bb->succs) 3524 { 3525 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3526 return false; 3527 if (e->dest == test_bb) 3528 { 3529 if (backward) 3530 continue; 3531 else 3532 return false; 3533 } 3534 if (e->dest == bb) 3535 return false; 3536 if (*other_bb == NULL) 3537 { 3538 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 3539 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3540 return false; 3541 else if (e->dest == e2->dest) 3542 *other_bb = e->dest; 3543 if (*other_bb == NULL) 3544 return false; 3545 } 3546 if (e->dest == *other_bb) 3547 other_edge_seen = true; 3548 else if (backward) 3549 return false; 3550 } 3551 if (*other_bb == NULL || !other_edge_seen) 3552 return false; 3553 } 3554 else if (single_succ (bb) != *other_bb) 3555 return false; 3556 3557 /* Now check all PHIs of *OTHER_BB. */ 3558 e = find_edge (bb, *other_bb); 3559 e2 = find_edge (test_bb, *other_bb); 3560 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 3561 { 3562 gphi *phi = gsi.phi (); 3563 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 3564 corresponding to BB and TEST_BB predecessor must be the same. */ 3565 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 3566 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 3567 { 3568 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 3569 one of the PHIs should have the lhs of the last stmt in 3570 that block as PHI arg and that PHI should have 0 or 1 3571 corresponding to it in all other range test basic blocks 3572 considered. */ 3573 if (!is_cond) 3574 { 3575 if (gimple_phi_arg_def (phi, e->dest_idx) 3576 == gimple_assign_lhs (stmt) 3577 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 3578 || integer_onep (gimple_phi_arg_def (phi, 3579 e2->dest_idx)))) 3580 continue; 3581 } 3582 else 3583 { 3584 gimple *test_last = last_stmt (test_bb); 3585 if (gimple_code (test_last) != GIMPLE_COND 3586 && gimple_phi_arg_def (phi, e2->dest_idx) 3587 == gimple_assign_lhs (test_last) 3588 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 3589 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 3590 continue; 3591 } 3592 3593 return false; 3594 } 3595 } 3596 return true; 3597 } 3598 3599 /* Return true if BB doesn't have side-effects that would disallow 3600 range test optimization, all SSA_NAMEs set in the bb are consumed 3601 in the bb and there are no PHIs. */ 3602 3603 static bool 3604 no_side_effect_bb (basic_block bb) 3605 { 3606 gimple_stmt_iterator gsi; 3607 gimple *last; 3608 3609 if (!gimple_seq_empty_p (phi_nodes (bb))) 3610 return false; 3611 last = last_stmt (bb); 3612 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3613 { 3614 gimple *stmt = gsi_stmt (gsi); 3615 tree lhs; 3616 imm_use_iterator imm_iter; 3617 use_operand_p use_p; 3618 3619 if (is_gimple_debug (stmt)) 3620 continue; 3621 if (gimple_has_side_effects (stmt)) 3622 return false; 3623 if (stmt == last) 3624 return true; 3625 if (!is_gimple_assign (stmt)) 3626 return false; 3627 lhs = gimple_assign_lhs (stmt); 3628 if (TREE_CODE (lhs) != SSA_NAME) 3629 return false; 3630 if (gimple_assign_rhs_could_trap_p (stmt)) 3631 return false; 3632 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 3633 { 3634 gimple *use_stmt = USE_STMT (use_p); 3635 if (is_gimple_debug (use_stmt)) 3636 continue; 3637 if (gimple_bb (use_stmt) != bb) 3638 return false; 3639 } 3640 } 3641 return false; 3642 } 3643 3644 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3645 return true and fill in *OPS recursively. */ 3646 3647 static bool 3648 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 3649 struct loop *loop) 3650 { 3651 gimple *stmt = SSA_NAME_DEF_STMT (var); 3652 tree rhs[2]; 3653 int i; 3654 3655 if (!is_reassociable_op (stmt, code, loop)) 3656 return false; 3657 3658 rhs[0] = gimple_assign_rhs1 (stmt); 3659 rhs[1] = gimple_assign_rhs2 (stmt); 3660 gimple_set_visited (stmt, true); 3661 for (i = 0; i < 2; i++) 3662 if (TREE_CODE (rhs[i]) == SSA_NAME 3663 && !get_ops (rhs[i], code, ops, loop) 3664 && has_single_use (rhs[i])) 3665 { 3666 operand_entry *oe = operand_entry_pool.allocate (); 3667 3668 oe->op = rhs[i]; 3669 oe->rank = code; 3670 oe->id = 0; 3671 oe->count = 1; 3672 oe->stmt_to_insert = NULL; 3673 ops->safe_push (oe); 3674 } 3675 return true; 3676 } 3677 3678 /* Find the ops that were added by get_ops starting from VAR, see if 3679 they were changed during update_range_test and if yes, create new 3680 stmts. */ 3681 3682 static tree 3683 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 3684 unsigned int *pidx, struct loop *loop) 3685 { 3686 gimple *stmt = SSA_NAME_DEF_STMT (var); 3687 tree rhs[4]; 3688 int i; 3689 3690 if (!is_reassociable_op (stmt, code, loop)) 3691 return NULL; 3692 3693 rhs[0] = gimple_assign_rhs1 (stmt); 3694 rhs[1] = gimple_assign_rhs2 (stmt); 3695 rhs[2] = rhs[0]; 3696 rhs[3] = rhs[1]; 3697 for (i = 0; i < 2; i++) 3698 if (TREE_CODE (rhs[i]) == SSA_NAME) 3699 { 3700 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3701 if (rhs[2 + i] == NULL_TREE) 3702 { 3703 if (has_single_use (rhs[i])) 3704 rhs[2 + i] = ops[(*pidx)++]->op; 3705 else 3706 rhs[2 + i] = rhs[i]; 3707 } 3708 } 3709 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3710 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3711 { 3712 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3713 var = make_ssa_name (TREE_TYPE (var)); 3714 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3715 rhs[2], rhs[3]); 3716 gimple_set_uid (g, gimple_uid (stmt)); 3717 gimple_set_visited (g, true); 3718 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3719 } 3720 return var; 3721 } 3722 3723 /* Structure to track the initial value passed to get_ops and 3724 the range in the ops vector for each basic block. */ 3725 3726 struct inter_bb_range_test_entry 3727 { 3728 tree op; 3729 unsigned int first_idx, last_idx; 3730 }; 3731 3732 /* Inter-bb range test optimization. 3733 3734 Returns TRUE if a gimple conditional is optimized to a true/false, 3735 otherwise return FALSE. 3736 3737 This indicates to the caller that it should run a CFG cleanup pass 3738 once reassociation is completed. */ 3739 3740 static bool 3741 maybe_optimize_range_tests (gimple *stmt) 3742 { 3743 basic_block first_bb = gimple_bb (stmt); 3744 basic_block last_bb = first_bb; 3745 basic_block other_bb = NULL; 3746 basic_block bb; 3747 edge_iterator ei; 3748 edge e; 3749 auto_vec<operand_entry *> ops; 3750 auto_vec<inter_bb_range_test_entry> bbinfo; 3751 bool any_changes = false; 3752 bool cfg_cleanup_needed = false; 3753 3754 /* Consider only basic blocks that end with GIMPLE_COND or 3755 a cast statement satisfying final_range_test_p. All 3756 but the last bb in the first_bb .. last_bb range 3757 should end with GIMPLE_COND. */ 3758 if (gimple_code (stmt) == GIMPLE_COND) 3759 { 3760 if (EDGE_COUNT (first_bb->succs) != 2) 3761 return cfg_cleanup_needed; 3762 } 3763 else if (final_range_test_p (stmt)) 3764 other_bb = single_succ (first_bb); 3765 else 3766 return cfg_cleanup_needed; 3767 3768 if (stmt_could_throw_p (stmt)) 3769 return cfg_cleanup_needed; 3770 3771 /* As relative ordering of post-dominator sons isn't fixed, 3772 maybe_optimize_range_tests can be called first on any 3773 bb in the range we want to optimize. So, start searching 3774 backwards, if first_bb can be set to a predecessor. */ 3775 while (single_pred_p (first_bb)) 3776 { 3777 basic_block pred_bb = single_pred (first_bb); 3778 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3779 break; 3780 if (!no_side_effect_bb (first_bb)) 3781 break; 3782 first_bb = pred_bb; 3783 } 3784 /* If first_bb is last_bb, other_bb hasn't been computed yet. 3785 Before starting forward search in last_bb successors, find 3786 out the other_bb. */ 3787 if (first_bb == last_bb) 3788 { 3789 other_bb = NULL; 3790 /* As non-GIMPLE_COND last stmt always terminates the range, 3791 if forward search didn't discover anything, just give up. */ 3792 if (gimple_code (stmt) != GIMPLE_COND) 3793 return cfg_cleanup_needed; 3794 /* Look at both successors. Either it ends with a GIMPLE_COND 3795 and satisfies suitable_cond_bb, or ends with a cast and 3796 other_bb is that cast's successor. */ 3797 FOR_EACH_EDGE (e, ei, first_bb->succs) 3798 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 3799 || e->dest == first_bb) 3800 return cfg_cleanup_needed; 3801 else if (single_pred_p (e->dest)) 3802 { 3803 stmt = last_stmt (e->dest); 3804 if (stmt 3805 && gimple_code (stmt) == GIMPLE_COND 3806 && EDGE_COUNT (e->dest->succs) == 2) 3807 { 3808 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 3809 break; 3810 else 3811 other_bb = NULL; 3812 } 3813 else if (stmt 3814 && final_range_test_p (stmt) 3815 && find_edge (first_bb, single_succ (e->dest))) 3816 { 3817 other_bb = single_succ (e->dest); 3818 if (other_bb == first_bb) 3819 other_bb = NULL; 3820 } 3821 } 3822 if (other_bb == NULL) 3823 return cfg_cleanup_needed; 3824 } 3825 /* Now do the forward search, moving last_bb to successor bbs 3826 that aren't other_bb. */ 3827 while (EDGE_COUNT (last_bb->succs) == 2) 3828 { 3829 FOR_EACH_EDGE (e, ei, last_bb->succs) 3830 if (e->dest != other_bb) 3831 break; 3832 if (e == NULL) 3833 break; 3834 if (!single_pred_p (e->dest)) 3835 break; 3836 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 3837 break; 3838 if (!no_side_effect_bb (e->dest)) 3839 break; 3840 last_bb = e->dest; 3841 } 3842 if (first_bb == last_bb) 3843 return cfg_cleanup_needed; 3844 /* Here basic blocks first_bb through last_bb's predecessor 3845 end with GIMPLE_COND, all of them have one of the edges to 3846 other_bb and another to another block in the range, 3847 all blocks except first_bb don't have side-effects and 3848 last_bb ends with either GIMPLE_COND, or cast satisfying 3849 final_range_test_p. */ 3850 for (bb = last_bb; ; bb = single_pred (bb)) 3851 { 3852 enum tree_code code; 3853 tree lhs, rhs; 3854 inter_bb_range_test_entry bb_ent; 3855 3856 bb_ent.op = NULL_TREE; 3857 bb_ent.first_idx = ops.length (); 3858 bb_ent.last_idx = bb_ent.first_idx; 3859 e = find_edge (bb, other_bb); 3860 stmt = last_stmt (bb); 3861 gimple_set_visited (stmt, true); 3862 if (gimple_code (stmt) != GIMPLE_COND) 3863 { 3864 use_operand_p use_p; 3865 gimple *phi; 3866 edge e2; 3867 unsigned int d; 3868 3869 lhs = gimple_assign_lhs (stmt); 3870 rhs = gimple_assign_rhs1 (stmt); 3871 gcc_assert (bb == last_bb); 3872 3873 /* stmt is 3874 _123 = (int) _234; 3875 OR 3876 _234 = a_2(D) == 2; 3877 3878 followed by: 3879 <bb M>: 3880 # _345 = PHI <_123(N), 1(...), 1(...)> 3881 3882 or 0 instead of 1. If it is 0, the _234 3883 range test is anded together with all the 3884 other range tests, if it is 1, it is ored with 3885 them. */ 3886 single_imm_use (lhs, &use_p, &phi); 3887 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 3888 e2 = find_edge (first_bb, other_bb); 3889 d = e2->dest_idx; 3890 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 3891 if (integer_zerop (gimple_phi_arg_def (phi, d))) 3892 code = BIT_AND_EXPR; 3893 else 3894 { 3895 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 3896 code = BIT_IOR_EXPR; 3897 } 3898 3899 /* If _234 SSA_NAME_DEF_STMT is 3900 _234 = _567 | _789; 3901 (or &, corresponding to 1/0 in the phi arguments, 3902 push into ops the individual range test arguments 3903 of the bitwise or resp. and, recursively. */ 3904 if (TREE_CODE (rhs) == SSA_NAME 3905 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3906 != tcc_comparison) 3907 && !get_ops (rhs, code, &ops, 3908 loop_containing_stmt (stmt)) 3909 && has_single_use (rhs)) 3910 { 3911 /* Otherwise, push the _234 range test itself. */ 3912 operand_entry *oe = operand_entry_pool.allocate (); 3913 3914 oe->op = rhs; 3915 oe->rank = code; 3916 oe->id = 0; 3917 oe->count = 1; 3918 oe->stmt_to_insert = NULL; 3919 ops.safe_push (oe); 3920 bb_ent.last_idx++; 3921 bb_ent.op = rhs; 3922 } 3923 else if (is_gimple_assign (stmt) 3924 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3925 == tcc_comparison) 3926 && !get_ops (lhs, code, &ops, 3927 loop_containing_stmt (stmt)) 3928 && has_single_use (lhs)) 3929 { 3930 operand_entry *oe = operand_entry_pool.allocate (); 3931 oe->op = lhs; 3932 oe->rank = code; 3933 oe->id = 0; 3934 oe->count = 1; 3935 ops.safe_push (oe); 3936 bb_ent.last_idx++; 3937 bb_ent.op = lhs; 3938 } 3939 else 3940 { 3941 bb_ent.last_idx = ops.length (); 3942 bb_ent.op = rhs; 3943 } 3944 bbinfo.safe_push (bb_ent); 3945 continue; 3946 } 3947 /* Otherwise stmt is GIMPLE_COND. */ 3948 code = gimple_cond_code (stmt); 3949 lhs = gimple_cond_lhs (stmt); 3950 rhs = gimple_cond_rhs (stmt); 3951 if (TREE_CODE (lhs) == SSA_NAME 3952 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3953 && ((code != EQ_EXPR && code != NE_EXPR) 3954 || rhs != boolean_false_node 3955 /* Either push into ops the individual bitwise 3956 or resp. and operands, depending on which 3957 edge is other_bb. */ 3958 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 3959 ^ (code == EQ_EXPR)) 3960 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 3961 loop_containing_stmt (stmt)))) 3962 { 3963 /* Or push the GIMPLE_COND stmt itself. */ 3964 operand_entry *oe = operand_entry_pool.allocate (); 3965 3966 oe->op = NULL; 3967 oe->rank = (e->flags & EDGE_TRUE_VALUE) 3968 ? BIT_IOR_EXPR : BIT_AND_EXPR; 3969 /* oe->op = NULL signs that there is no SSA_NAME 3970 for the range test, and oe->id instead is the 3971 basic block number, at which's end the GIMPLE_COND 3972 is. */ 3973 oe->id = bb->index; 3974 oe->count = 1; 3975 oe->stmt_to_insert = NULL; 3976 ops.safe_push (oe); 3977 bb_ent.op = NULL; 3978 bb_ent.last_idx++; 3979 } 3980 else if (ops.length () > bb_ent.first_idx) 3981 { 3982 bb_ent.op = lhs; 3983 bb_ent.last_idx = ops.length (); 3984 } 3985 bbinfo.safe_push (bb_ent); 3986 if (bb == first_bb) 3987 break; 3988 } 3989 if (ops.length () > 1) 3990 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 3991 if (any_changes) 3992 { 3993 unsigned int idx, max_idx = 0; 3994 /* update_ops relies on has_single_use predicates returning the 3995 same values as it did during get_ops earlier. Additionally it 3996 never removes statements, only adds new ones and it should walk 3997 from the single imm use and check the predicate already before 3998 making those changes. 3999 On the other side, the handling of GIMPLE_COND directly can turn 4000 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4001 it needs to be done in a separate loop afterwards. */ 4002 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4003 { 4004 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4005 && bbinfo[idx].op != NULL_TREE) 4006 { 4007 tree new_op; 4008 4009 max_idx = idx; 4010 stmt = last_stmt (bb); 4011 new_op = update_ops (bbinfo[idx].op, 4012 (enum tree_code) 4013 ops[bbinfo[idx].first_idx]->rank, 4014 ops, &bbinfo[idx].first_idx, 4015 loop_containing_stmt (stmt)); 4016 if (new_op == NULL_TREE) 4017 { 4018 gcc_assert (bb == last_bb); 4019 new_op = ops[bbinfo[idx].first_idx++]->op; 4020 } 4021 if (bbinfo[idx].op != new_op) 4022 { 4023 imm_use_iterator iter; 4024 use_operand_p use_p; 4025 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4026 4027 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 4028 if (is_gimple_debug (use_stmt)) 4029 continue; 4030 else if (gimple_code (use_stmt) == GIMPLE_COND 4031 || gimple_code (use_stmt) == GIMPLE_PHI) 4032 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4033 SET_USE (use_p, new_op); 4034 else if ((is_gimple_assign (use_stmt) 4035 && (TREE_CODE_CLASS 4036 (gimple_assign_rhs_code (use_stmt)) 4037 == tcc_comparison))) 4038 cast_or_tcc_cmp_stmt = use_stmt; 4039 else if (gimple_assign_cast_p (use_stmt)) 4040 cast_or_tcc_cmp_stmt = use_stmt; 4041 else 4042 gcc_unreachable (); 4043 4044 if (cast_or_tcc_cmp_stmt) 4045 { 4046 gcc_assert (bb == last_bb); 4047 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 4048 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 4049 enum tree_code rhs_code 4050 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 4051 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 4052 : CONVERT_EXPR; 4053 gassign *g; 4054 if (is_gimple_min_invariant (new_op)) 4055 { 4056 new_op = fold_convert (TREE_TYPE (lhs), new_op); 4057 g = gimple_build_assign (new_lhs, new_op); 4058 } 4059 else 4060 g = gimple_build_assign (new_lhs, rhs_code, new_op); 4061 gimple_stmt_iterator gsi 4062 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 4063 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 4064 gimple_set_visited (g, true); 4065 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4066 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 4067 if (is_gimple_debug (use_stmt)) 4068 continue; 4069 else if (gimple_code (use_stmt) == GIMPLE_COND 4070 || gimple_code (use_stmt) == GIMPLE_PHI) 4071 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4072 SET_USE (use_p, new_lhs); 4073 else 4074 gcc_unreachable (); 4075 } 4076 } 4077 } 4078 if (bb == first_bb) 4079 break; 4080 } 4081 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4082 { 4083 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4084 && bbinfo[idx].op == NULL_TREE 4085 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 4086 { 4087 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 4088 4089 if (idx > max_idx) 4090 max_idx = idx; 4091 4092 /* If we collapse the conditional to a true/false 4093 condition, then bubble that knowledge up to our caller. */ 4094 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 4095 { 4096 gimple_cond_make_false (cond_stmt); 4097 cfg_cleanup_needed = true; 4098 } 4099 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 4100 { 4101 gimple_cond_make_true (cond_stmt); 4102 cfg_cleanup_needed = true; 4103 } 4104 else 4105 { 4106 gimple_cond_set_code (cond_stmt, NE_EXPR); 4107 gimple_cond_set_lhs (cond_stmt, 4108 ops[bbinfo[idx].first_idx]->op); 4109 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 4110 } 4111 update_stmt (cond_stmt); 4112 } 4113 if (bb == first_bb) 4114 break; 4115 } 4116 4117 /* The above changes could result in basic blocks after the first 4118 modified one, up to and including last_bb, to be executed even if 4119 they would not be in the original program. If the value ranges of 4120 assignment lhs' in those bbs were dependent on the conditions 4121 guarding those basic blocks which now can change, the VRs might 4122 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 4123 are only used within the same bb, it should be not a big deal if 4124 we just reset all the VRs in those bbs. See PR68671. */ 4125 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 4126 reset_flow_sensitive_info_in_bb (bb); 4127 } 4128 return cfg_cleanup_needed; 4129 } 4130 4131 /* Return true if OPERAND is defined by a PHI node which uses the LHS 4132 of STMT in it's operands. This is also known as a "destructive 4133 update" operation. */ 4134 4135 static bool 4136 is_phi_for_stmt (gimple *stmt, tree operand) 4137 { 4138 gimple *def_stmt; 4139 gphi *def_phi; 4140 tree lhs; 4141 use_operand_p arg_p; 4142 ssa_op_iter i; 4143 4144 if (TREE_CODE (operand) != SSA_NAME) 4145 return false; 4146 4147 lhs = gimple_assign_lhs (stmt); 4148 4149 def_stmt = SSA_NAME_DEF_STMT (operand); 4150 def_phi = dyn_cast <gphi *> (def_stmt); 4151 if (!def_phi) 4152 return false; 4153 4154 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 4155 if (lhs == USE_FROM_PTR (arg_p)) 4156 return true; 4157 return false; 4158 } 4159 4160 /* Remove def stmt of VAR if VAR has zero uses and recurse 4161 on rhs1 operand if so. */ 4162 4163 static void 4164 remove_visited_stmt_chain (tree var) 4165 { 4166 gimple *stmt; 4167 gimple_stmt_iterator gsi; 4168 4169 while (1) 4170 { 4171 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 4172 return; 4173 stmt = SSA_NAME_DEF_STMT (var); 4174 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 4175 { 4176 var = gimple_assign_rhs1 (stmt); 4177 gsi = gsi_for_stmt (stmt); 4178 reassoc_remove_stmt (&gsi); 4179 release_defs (stmt); 4180 } 4181 else 4182 return; 4183 } 4184 } 4185 4186 /* This function checks three consequtive operands in 4187 passed operands vector OPS starting from OPINDEX and 4188 swaps two operands if it is profitable for binary operation 4189 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 4190 4191 We pair ops with the same rank if possible. 4192 4193 The alternative we try is to see if STMT is a destructive 4194 update style statement, which is like: 4195 b = phi (a, ...) 4196 a = c + b; 4197 In that case, we want to use the destructive update form to 4198 expose the possible vectorizer sum reduction opportunity. 4199 In that case, the third operand will be the phi node. This 4200 check is not performed if STMT is null. 4201 4202 We could, of course, try to be better as noted above, and do a 4203 lot of work to try to find these opportunities in >3 operand 4204 cases, but it is unlikely to be worth it. */ 4205 4206 static void 4207 swap_ops_for_binary_stmt (vec<operand_entry *> ops, 4208 unsigned int opindex, gimple *stmt) 4209 { 4210 operand_entry *oe1, *oe2, *oe3; 4211 4212 oe1 = ops[opindex]; 4213 oe2 = ops[opindex + 1]; 4214 oe3 = ops[opindex + 2]; 4215 4216 if ((oe1->rank == oe2->rank 4217 && oe2->rank != oe3->rank) 4218 || (stmt && is_phi_for_stmt (stmt, oe3->op) 4219 && !is_phi_for_stmt (stmt, oe1->op) 4220 && !is_phi_for_stmt (stmt, oe2->op))) 4221 std::swap (*oe1, *oe3); 4222 else if ((oe1->rank == oe3->rank 4223 && oe2->rank != oe3->rank) 4224 || (stmt && is_phi_for_stmt (stmt, oe2->op) 4225 && !is_phi_for_stmt (stmt, oe1->op) 4226 && !is_phi_for_stmt (stmt, oe3->op))) 4227 std::swap (*oe1, *oe2); 4228 } 4229 4230 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 4231 two definitions, otherwise return STMT. */ 4232 4233 static inline gimple * 4234 find_insert_point (gimple *stmt, tree rhs1, tree rhs2) 4235 { 4236 if (TREE_CODE (rhs1) == SSA_NAME 4237 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 4238 stmt = SSA_NAME_DEF_STMT (rhs1); 4239 if (TREE_CODE (rhs2) == SSA_NAME 4240 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 4241 stmt = SSA_NAME_DEF_STMT (rhs2); 4242 return stmt; 4243 } 4244 4245 /* If the stmt that defines operand has to be inserted, insert it 4246 before the use. */ 4247 static void 4248 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 4249 { 4250 gcc_assert (is_gimple_assign (stmt_to_insert)); 4251 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 4252 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 4253 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2); 4254 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 4255 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 4256 4257 /* If the insert point is not stmt, then insert_point would be 4258 the point where operand rhs1 or rhs2 is defined. In this case, 4259 stmt_to_insert has to be inserted afterwards. This would 4260 only happen when the stmt insertion point is flexible. */ 4261 if (stmt == insert_point) 4262 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 4263 else 4264 insert_stmt_after (stmt_to_insert, insert_point); 4265 } 4266 4267 4268 /* Recursively rewrite our linearized statements so that the operators 4269 match those in OPS[OPINDEX], putting the computation in rank 4270 order. Return new lhs. 4271 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 4272 the current stmt and during recursive invocations. 4273 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 4274 recursive invocations. */ 4275 4276 static tree 4277 rewrite_expr_tree (gimple *stmt, unsigned int opindex, 4278 vec<operand_entry *> ops, bool changed, bool next_changed) 4279 { 4280 tree rhs1 = gimple_assign_rhs1 (stmt); 4281 tree rhs2 = gimple_assign_rhs2 (stmt); 4282 tree lhs = gimple_assign_lhs (stmt); 4283 operand_entry *oe; 4284 4285 /* The final recursion case for this function is that you have 4286 exactly two operations left. 4287 If we had exactly one op in the entire list to start with, we 4288 would have never called this function, and the tail recursion 4289 rewrites them one at a time. */ 4290 if (opindex + 2 == ops.length ()) 4291 { 4292 operand_entry *oe1, *oe2; 4293 4294 oe1 = ops[opindex]; 4295 oe2 = ops[opindex + 1]; 4296 4297 if (rhs1 != oe1->op || rhs2 != oe2->op) 4298 { 4299 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4300 unsigned int uid = gimple_uid (stmt); 4301 4302 if (dump_file && (dump_flags & TDF_DETAILS)) 4303 { 4304 fprintf (dump_file, "Transforming "); 4305 print_gimple_stmt (dump_file, stmt, 0, 0); 4306 } 4307 4308 /* If the stmt that defines operand has to be inserted, insert it 4309 before the use. */ 4310 if (oe1->stmt_to_insert) 4311 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 4312 if (oe2->stmt_to_insert) 4313 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 4314 /* Even when changed is false, reassociation could have e.g. removed 4315 some redundant operations, so unless we are just swapping the 4316 arguments or unless there is no change at all (then we just 4317 return lhs), force creation of a new SSA_NAME. */ 4318 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 4319 { 4320 gimple *insert_point 4321 = find_insert_point (stmt, oe1->op, oe2->op); 4322 lhs = make_ssa_name (TREE_TYPE (lhs)); 4323 stmt 4324 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4325 oe1->op, oe2->op); 4326 gimple_set_uid (stmt, uid); 4327 gimple_set_visited (stmt, true); 4328 if (insert_point == gsi_stmt (gsi)) 4329 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4330 else 4331 insert_stmt_after (stmt, insert_point); 4332 } 4333 else 4334 { 4335 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 4336 == stmt); 4337 gimple_assign_set_rhs1 (stmt, oe1->op); 4338 gimple_assign_set_rhs2 (stmt, oe2->op); 4339 update_stmt (stmt); 4340 } 4341 4342 if (rhs1 != oe1->op && rhs1 != oe2->op) 4343 remove_visited_stmt_chain (rhs1); 4344 4345 if (dump_file && (dump_flags & TDF_DETAILS)) 4346 { 4347 fprintf (dump_file, " into "); 4348 print_gimple_stmt (dump_file, stmt, 0, 0); 4349 } 4350 } 4351 return lhs; 4352 } 4353 4354 /* If we hit here, we should have 3 or more ops left. */ 4355 gcc_assert (opindex + 2 < ops.length ()); 4356 4357 /* Rewrite the next operator. */ 4358 oe = ops[opindex]; 4359 4360 /* If the stmt that defines operand has to be inserted, insert it 4361 before the use. */ 4362 if (oe->stmt_to_insert) 4363 insert_stmt_before_use (stmt, oe->stmt_to_insert); 4364 4365 /* Recurse on the LHS of the binary operator, which is guaranteed to 4366 be the non-leaf side. */ 4367 tree new_rhs1 4368 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 4369 changed || oe->op != rhs2 || next_changed, 4370 false); 4371 4372 if (oe->op != rhs2 || new_rhs1 != rhs1) 4373 { 4374 if (dump_file && (dump_flags & TDF_DETAILS)) 4375 { 4376 fprintf (dump_file, "Transforming "); 4377 print_gimple_stmt (dump_file, stmt, 0, 0); 4378 } 4379 4380 /* If changed is false, this is either opindex == 0 4381 or all outer rhs2's were equal to corresponding oe->op, 4382 and powi_result is NULL. 4383 That means lhs is equivalent before and after reassociation. 4384 Otherwise ensure the old lhs SSA_NAME is not reused and 4385 create a new stmt as well, so that any debug stmts will be 4386 properly adjusted. */ 4387 if (changed) 4388 { 4389 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4390 unsigned int uid = gimple_uid (stmt); 4391 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); 4392 4393 lhs = make_ssa_name (TREE_TYPE (lhs)); 4394 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4395 new_rhs1, oe->op); 4396 gimple_set_uid (stmt, uid); 4397 gimple_set_visited (stmt, true); 4398 if (insert_point == gsi_stmt (gsi)) 4399 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4400 else 4401 insert_stmt_after (stmt, insert_point); 4402 } 4403 else 4404 { 4405 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 4406 == stmt); 4407 gimple_assign_set_rhs1 (stmt, new_rhs1); 4408 gimple_assign_set_rhs2 (stmt, oe->op); 4409 update_stmt (stmt); 4410 } 4411 4412 if (dump_file && (dump_flags & TDF_DETAILS)) 4413 { 4414 fprintf (dump_file, " into "); 4415 print_gimple_stmt (dump_file, stmt, 0, 0); 4416 } 4417 } 4418 return lhs; 4419 } 4420 4421 /* Find out how many cycles we need to compute statements chain. 4422 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 4423 maximum number of independent statements we may execute per cycle. */ 4424 4425 static int 4426 get_required_cycles (int ops_num, int cpu_width) 4427 { 4428 int res; 4429 int elog; 4430 unsigned int rest; 4431 4432 /* While we have more than 2 * cpu_width operands 4433 we may reduce number of operands by cpu_width 4434 per cycle. */ 4435 res = ops_num / (2 * cpu_width); 4436 4437 /* Remained operands count may be reduced twice per cycle 4438 until we have only one operand. */ 4439 rest = (unsigned)(ops_num - res * cpu_width); 4440 elog = exact_log2 (rest); 4441 if (elog >= 0) 4442 res += elog; 4443 else 4444 res += floor_log2 (rest) + 1; 4445 4446 return res; 4447 } 4448 4449 /* Returns an optimal number of registers to use for computation of 4450 given statements. */ 4451 4452 static int 4453 get_reassociation_width (int ops_num, enum tree_code opc, 4454 machine_mode mode) 4455 { 4456 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 4457 int width; 4458 int width_min; 4459 int cycles_best; 4460 4461 if (param_width > 0) 4462 width = param_width; 4463 else 4464 width = targetm.sched.reassociation_width (opc, mode); 4465 4466 if (width == 1) 4467 return width; 4468 4469 /* Get the minimal time required for sequence computation. */ 4470 cycles_best = get_required_cycles (ops_num, width); 4471 4472 /* Check if we may use less width and still compute sequence for 4473 the same time. It will allow us to reduce registers usage. 4474 get_required_cycles is monotonically increasing with lower width 4475 so we can perform a binary search for the minimal width that still 4476 results in the optimal cycle count. */ 4477 width_min = 1; 4478 while (width > width_min) 4479 { 4480 int width_mid = (width + width_min) / 2; 4481 4482 if (get_required_cycles (ops_num, width_mid) == cycles_best) 4483 width = width_mid; 4484 else if (width_min < width_mid) 4485 width_min = width_mid; 4486 else 4487 break; 4488 } 4489 4490 return width; 4491 } 4492 4493 /* Recursively rewrite our linearized statements so that the operators 4494 match those in OPS[OPINDEX], putting the computation in rank 4495 order and trying to allow operations to be executed in 4496 parallel. */ 4497 4498 static void 4499 rewrite_expr_tree_parallel (gassign *stmt, int width, 4500 vec<operand_entry *> ops) 4501 { 4502 enum tree_code opcode = gimple_assign_rhs_code (stmt); 4503 int op_num = ops.length (); 4504 gcc_assert (op_num > 0); 4505 int stmt_num = op_num - 1; 4506 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 4507 int op_index = op_num - 1; 4508 int stmt_index = 0; 4509 int ready_stmts_end = 0; 4510 int i = 0; 4511 gimple *stmt1 = NULL, *stmt2 = NULL; 4512 tree last_rhs1 = gimple_assign_rhs1 (stmt); 4513 4514 /* We start expression rewriting from the top statements. 4515 So, in this loop we create a full list of statements 4516 we will work with. */ 4517 stmts[stmt_num - 1] = stmt; 4518 for (i = stmt_num - 2; i >= 0; i--) 4519 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 4520 4521 for (i = 0; i < stmt_num; i++) 4522 { 4523 tree op1, op2; 4524 4525 /* Determine whether we should use results of 4526 already handled statements or not. */ 4527 if (ready_stmts_end == 0 4528 && (i - stmt_index >= width || op_index < 1)) 4529 ready_stmts_end = i; 4530 4531 /* Now we choose operands for the next statement. Non zero 4532 value in ready_stmts_end means here that we should use 4533 the result of already generated statements as new operand. */ 4534 if (ready_stmts_end > 0) 4535 { 4536 op1 = gimple_assign_lhs (stmts[stmt_index++]); 4537 if (ready_stmts_end > stmt_index) 4538 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4539 else if (op_index >= 0) 4540 { 4541 operand_entry *oe = ops[op_index--]; 4542 stmt2 = oe->stmt_to_insert; 4543 op2 = oe->op; 4544 } 4545 else 4546 { 4547 gcc_assert (stmt_index < i); 4548 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4549 } 4550 4551 if (stmt_index >= ready_stmts_end) 4552 ready_stmts_end = 0; 4553 } 4554 else 4555 { 4556 if (op_index > 1) 4557 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 4558 operand_entry *oe2 = ops[op_index--]; 4559 operand_entry *oe1 = ops[op_index--]; 4560 op2 = oe2->op; 4561 stmt2 = oe2->stmt_to_insert; 4562 op1 = oe1->op; 4563 stmt1 = oe1->stmt_to_insert; 4564 } 4565 4566 /* If we emit the last statement then we should put 4567 operands into the last statement. It will also 4568 break the loop. */ 4569 if (op_index < 0 && stmt_index == i) 4570 i = stmt_num - 1; 4571 4572 if (dump_file && (dump_flags & TDF_DETAILS)) 4573 { 4574 fprintf (dump_file, "Transforming "); 4575 print_gimple_stmt (dump_file, stmts[i], 0, 0); 4576 } 4577 4578 /* If the stmt that defines operand has to be inserted, insert it 4579 before the use. */ 4580 if (stmt1) 4581 insert_stmt_before_use (stmts[i], stmt1); 4582 if (stmt2) 4583 insert_stmt_before_use (stmts[i], stmt2); 4584 stmt1 = stmt2 = NULL; 4585 4586 /* We keep original statement only for the last one. All 4587 others are recreated. */ 4588 if (i == stmt_num - 1) 4589 { 4590 gimple_assign_set_rhs1 (stmts[i], op1); 4591 gimple_assign_set_rhs2 (stmts[i], op2); 4592 update_stmt (stmts[i]); 4593 } 4594 else 4595 { 4596 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 4597 gimple_set_visited (stmts[i], true); 4598 } 4599 if (dump_file && (dump_flags & TDF_DETAILS)) 4600 { 4601 fprintf (dump_file, " into "); 4602 print_gimple_stmt (dump_file, stmts[i], 0, 0); 4603 } 4604 } 4605 4606 remove_visited_stmt_chain (last_rhs1); 4607 } 4608 4609 /* Transform STMT, which is really (A +B) + (C + D) into the left 4610 linear form, ((A+B)+C)+D. 4611 Recurse on D if necessary. */ 4612 4613 static void 4614 linearize_expr (gimple *stmt) 4615 { 4616 gimple_stmt_iterator gsi; 4617 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 4618 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4619 gimple *oldbinrhs = binrhs; 4620 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4621 gimple *newbinrhs = NULL; 4622 struct loop *loop = loop_containing_stmt (stmt); 4623 tree lhs = gimple_assign_lhs (stmt); 4624 4625 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 4626 && is_reassociable_op (binrhs, rhscode, loop)); 4627 4628 gsi = gsi_for_stmt (stmt); 4629 4630 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 4631 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 4632 gimple_assign_rhs_code (binrhs), 4633 gimple_assign_lhs (binlhs), 4634 gimple_assign_rhs2 (binrhs)); 4635 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 4636 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 4637 gimple_set_uid (binrhs, gimple_uid (stmt)); 4638 4639 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 4640 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4641 4642 if (dump_file && (dump_flags & TDF_DETAILS)) 4643 { 4644 fprintf (dump_file, "Linearized: "); 4645 print_gimple_stmt (dump_file, stmt, 0, 0); 4646 } 4647 4648 reassociate_stats.linearized++; 4649 update_stmt (stmt); 4650 4651 gsi = gsi_for_stmt (oldbinrhs); 4652 reassoc_remove_stmt (&gsi); 4653 release_defs (oldbinrhs); 4654 4655 gimple_set_visited (stmt, true); 4656 gimple_set_visited (binlhs, true); 4657 gimple_set_visited (binrhs, true); 4658 4659 /* Tail recurse on the new rhs if it still needs reassociation. */ 4660 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 4661 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 4662 want to change the algorithm while converting to tuples. */ 4663 linearize_expr (stmt); 4664 } 4665 4666 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 4667 it. Otherwise, return NULL. */ 4668 4669 static gimple * 4670 get_single_immediate_use (tree lhs) 4671 { 4672 use_operand_p immuse; 4673 gimple *immusestmt; 4674 4675 if (TREE_CODE (lhs) == SSA_NAME 4676 && single_imm_use (lhs, &immuse, &immusestmt) 4677 && is_gimple_assign (immusestmt)) 4678 return immusestmt; 4679 4680 return NULL; 4681 } 4682 4683 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 4684 representing the negated value. Insertions of any necessary 4685 instructions go before GSI. 4686 This function is recursive in that, if you hand it "a_5" as the 4687 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 4688 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 4689 4690 static tree 4691 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 4692 { 4693 gimple *negatedefstmt = NULL; 4694 tree resultofnegate; 4695 gimple_stmt_iterator gsi; 4696 unsigned int uid; 4697 4698 /* If we are trying to negate a name, defined by an add, negate the 4699 add operands instead. */ 4700 if (TREE_CODE (tonegate) == SSA_NAME) 4701 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 4702 if (TREE_CODE (tonegate) == SSA_NAME 4703 && is_gimple_assign (negatedefstmt) 4704 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 4705 && has_single_use (gimple_assign_lhs (negatedefstmt)) 4706 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 4707 { 4708 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 4709 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 4710 tree lhs = gimple_assign_lhs (negatedefstmt); 4711 gimple *g; 4712 4713 gsi = gsi_for_stmt (negatedefstmt); 4714 rhs1 = negate_value (rhs1, &gsi); 4715 4716 gsi = gsi_for_stmt (negatedefstmt); 4717 rhs2 = negate_value (rhs2, &gsi); 4718 4719 gsi = gsi_for_stmt (negatedefstmt); 4720 lhs = make_ssa_name (TREE_TYPE (lhs)); 4721 gimple_set_visited (negatedefstmt, true); 4722 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 4723 gimple_set_uid (g, gimple_uid (negatedefstmt)); 4724 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4725 return lhs; 4726 } 4727 4728 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 4729 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 4730 NULL_TREE, true, GSI_SAME_STMT); 4731 gsi = *gsip; 4732 uid = gimple_uid (gsi_stmt (gsi)); 4733 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 4734 { 4735 gimple *stmt = gsi_stmt (gsi); 4736 if (gimple_uid (stmt) != 0) 4737 break; 4738 gimple_set_uid (stmt, uid); 4739 } 4740 return resultofnegate; 4741 } 4742 4743 /* Return true if we should break up the subtract in STMT into an add 4744 with negate. This is true when we the subtract operands are really 4745 adds, or the subtract itself is used in an add expression. In 4746 either case, breaking up the subtract into an add with negate 4747 exposes the adds to reassociation. */ 4748 4749 static bool 4750 should_break_up_subtract (gimple *stmt) 4751 { 4752 tree lhs = gimple_assign_lhs (stmt); 4753 tree binlhs = gimple_assign_rhs1 (stmt); 4754 tree binrhs = gimple_assign_rhs2 (stmt); 4755 gimple *immusestmt; 4756 struct loop *loop = loop_containing_stmt (stmt); 4757 4758 if (TREE_CODE (binlhs) == SSA_NAME 4759 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4760 return true; 4761 4762 if (TREE_CODE (binrhs) == SSA_NAME 4763 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4764 return true; 4765 4766 if (TREE_CODE (lhs) == SSA_NAME 4767 && (immusestmt = get_single_immediate_use (lhs)) 4768 && is_gimple_assign (immusestmt) 4769 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4770 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4771 return true; 4772 return false; 4773 } 4774 4775 /* Transform STMT from A - B into A + -B. */ 4776 4777 static void 4778 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 4779 { 4780 tree rhs1 = gimple_assign_rhs1 (stmt); 4781 tree rhs2 = gimple_assign_rhs2 (stmt); 4782 4783 if (dump_file && (dump_flags & TDF_DETAILS)) 4784 { 4785 fprintf (dump_file, "Breaking up subtract "); 4786 print_gimple_stmt (dump_file, stmt, 0, 0); 4787 } 4788 4789 rhs2 = negate_value (rhs2, gsip); 4790 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 4791 update_stmt (stmt); 4792 } 4793 4794 /* Determine whether STMT is a builtin call that raises an SSA name 4795 to an integer power and has only one use. If so, and this is early 4796 reassociation and unsafe math optimizations are permitted, place 4797 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 4798 If any of these conditions does not hold, return FALSE. */ 4799 4800 static bool 4801 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 4802 { 4803 tree arg1; 4804 REAL_VALUE_TYPE c, cint; 4805 4806 switch (gimple_call_combined_fn (stmt)) 4807 { 4808 CASE_CFN_POW: 4809 if (flag_errno_math) 4810 return false; 4811 4812 *base = gimple_call_arg (stmt, 0); 4813 arg1 = gimple_call_arg (stmt, 1); 4814 4815 if (TREE_CODE (arg1) != REAL_CST) 4816 return false; 4817 4818 c = TREE_REAL_CST (arg1); 4819 4820 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 4821 return false; 4822 4823 *exponent = real_to_integer (&c); 4824 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 4825 if (!real_identical (&c, &cint)) 4826 return false; 4827 4828 break; 4829 4830 CASE_CFN_POWI: 4831 *base = gimple_call_arg (stmt, 0); 4832 arg1 = gimple_call_arg (stmt, 1); 4833 4834 if (!tree_fits_shwi_p (arg1)) 4835 return false; 4836 4837 *exponent = tree_to_shwi (arg1); 4838 break; 4839 4840 default: 4841 return false; 4842 } 4843 4844 /* Expanding negative exponents is generally unproductive, so we don't 4845 complicate matters with those. Exponents of zero and one should 4846 have been handled by expression folding. */ 4847 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 4848 return false; 4849 4850 return true; 4851 } 4852 4853 /* Try to derive and add operand entry for OP to *OPS. Return false if 4854 unsuccessful. */ 4855 4856 static bool 4857 try_special_add_to_ops (vec<operand_entry *> *ops, 4858 enum tree_code code, 4859 tree op, gimple* def_stmt) 4860 { 4861 tree base = NULL_TREE; 4862 HOST_WIDE_INT exponent = 0; 4863 4864 if (TREE_CODE (op) != SSA_NAME 4865 || ! has_single_use (op)) 4866 return false; 4867 4868 if (code == MULT_EXPR 4869 && reassoc_insert_powi_p 4870 && flag_unsafe_math_optimizations 4871 && is_gimple_call (def_stmt) 4872 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 4873 { 4874 add_repeat_to_ops_vec (ops, base, exponent); 4875 gimple_set_visited (def_stmt, true); 4876 return true; 4877 } 4878 else if (code == MULT_EXPR 4879 && is_gimple_assign (def_stmt) 4880 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 4881 && !HONOR_SNANS (TREE_TYPE (op)) 4882 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 4883 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) 4884 { 4885 tree rhs1 = gimple_assign_rhs1 (def_stmt); 4886 tree cst = build_minus_one_cst (TREE_TYPE (op)); 4887 add_to_ops_vec (ops, rhs1); 4888 add_to_ops_vec (ops, cst); 4889 gimple_set_visited (def_stmt, true); 4890 return true; 4891 } 4892 4893 return false; 4894 } 4895 4896 /* Recursively linearize a binary expression that is the RHS of STMT. 4897 Place the operands of the expression tree in the vector named OPS. */ 4898 4899 static void 4900 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 4901 bool is_associative, bool set_visited) 4902 { 4903 tree binlhs = gimple_assign_rhs1 (stmt); 4904 tree binrhs = gimple_assign_rhs2 (stmt); 4905 gimple *binlhsdef = NULL, *binrhsdef = NULL; 4906 bool binlhsisreassoc = false; 4907 bool binrhsisreassoc = false; 4908 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4909 struct loop *loop = loop_containing_stmt (stmt); 4910 4911 if (set_visited) 4912 gimple_set_visited (stmt, true); 4913 4914 if (TREE_CODE (binlhs) == SSA_NAME) 4915 { 4916 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 4917 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 4918 && !stmt_could_throw_p (binlhsdef)); 4919 } 4920 4921 if (TREE_CODE (binrhs) == SSA_NAME) 4922 { 4923 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 4924 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 4925 && !stmt_could_throw_p (binrhsdef)); 4926 } 4927 4928 /* If the LHS is not reassociable, but the RHS is, we need to swap 4929 them. If neither is reassociable, there is nothing we can do, so 4930 just put them in the ops vector. If the LHS is reassociable, 4931 linearize it. If both are reassociable, then linearize the RHS 4932 and the LHS. */ 4933 4934 if (!binlhsisreassoc) 4935 { 4936 /* If this is not a associative operation like division, give up. */ 4937 if (!is_associative) 4938 { 4939 add_to_ops_vec (ops, binrhs); 4940 return; 4941 } 4942 4943 if (!binrhsisreassoc) 4944 { 4945 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 4946 add_to_ops_vec (ops, binrhs); 4947 4948 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 4949 add_to_ops_vec (ops, binlhs); 4950 4951 return; 4952 } 4953 4954 if (dump_file && (dump_flags & TDF_DETAILS)) 4955 { 4956 fprintf (dump_file, "swapping operands of "); 4957 print_gimple_stmt (dump_file, stmt, 0, 0); 4958 } 4959 4960 swap_ssa_operands (stmt, 4961 gimple_assign_rhs1_ptr (stmt), 4962 gimple_assign_rhs2_ptr (stmt)); 4963 update_stmt (stmt); 4964 4965 if (dump_file && (dump_flags & TDF_DETAILS)) 4966 { 4967 fprintf (dump_file, " is now "); 4968 print_gimple_stmt (dump_file, stmt, 0, 0); 4969 } 4970 4971 /* We want to make it so the lhs is always the reassociative op, 4972 so swap. */ 4973 std::swap (binlhs, binrhs); 4974 } 4975 else if (binrhsisreassoc) 4976 { 4977 linearize_expr (stmt); 4978 binlhs = gimple_assign_rhs1 (stmt); 4979 binrhs = gimple_assign_rhs2 (stmt); 4980 } 4981 4982 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 4983 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 4984 rhscode, loop)); 4985 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 4986 is_associative, set_visited); 4987 4988 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 4989 add_to_ops_vec (ops, binrhs); 4990 } 4991 4992 /* Repropagate the negates back into subtracts, since no other pass 4993 currently does it. */ 4994 4995 static void 4996 repropagate_negates (void) 4997 { 4998 unsigned int i = 0; 4999 tree negate; 5000 5001 FOR_EACH_VEC_ELT (plus_negates, i, negate) 5002 { 5003 gimple *user = get_single_immediate_use (negate); 5004 5005 if (!user || !is_gimple_assign (user)) 5006 continue; 5007 5008 /* The negate operand can be either operand of a PLUS_EXPR 5009 (it can be the LHS if the RHS is a constant for example). 5010 5011 Force the negate operand to the RHS of the PLUS_EXPR, then 5012 transform the PLUS_EXPR into a MINUS_EXPR. */ 5013 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 5014 { 5015 /* If the negated operand appears on the LHS of the 5016 PLUS_EXPR, exchange the operands of the PLUS_EXPR 5017 to force the negated operand to the RHS of the PLUS_EXPR. */ 5018 if (gimple_assign_rhs1 (user) == negate) 5019 { 5020 swap_ssa_operands (user, 5021 gimple_assign_rhs1_ptr (user), 5022 gimple_assign_rhs2_ptr (user)); 5023 } 5024 5025 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 5026 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 5027 if (gimple_assign_rhs2 (user) == negate) 5028 { 5029 tree rhs1 = gimple_assign_rhs1 (user); 5030 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 5031 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5032 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 5033 update_stmt (user); 5034 } 5035 } 5036 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 5037 { 5038 if (gimple_assign_rhs1 (user) == negate) 5039 { 5040 /* We have 5041 x = -a 5042 y = x - b 5043 which we transform into 5044 x = a + b 5045 y = -x . 5046 This pushes down the negate which we possibly can merge 5047 into some other operation, hence insert it into the 5048 plus_negates vector. */ 5049 gimple *feed = SSA_NAME_DEF_STMT (negate); 5050 tree a = gimple_assign_rhs1 (feed); 5051 tree b = gimple_assign_rhs2 (user); 5052 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 5053 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 5054 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 5055 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); 5056 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5057 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 5058 user = gsi_stmt (gsi2); 5059 update_stmt (user); 5060 reassoc_remove_stmt (&gsi); 5061 release_defs (feed); 5062 plus_negates.safe_push (gimple_assign_lhs (user)); 5063 } 5064 else 5065 { 5066 /* Transform "x = -a; y = b - x" into "y = b + a", getting 5067 rid of one operation. */ 5068 gimple *feed = SSA_NAME_DEF_STMT (negate); 5069 tree a = gimple_assign_rhs1 (feed); 5070 tree rhs1 = gimple_assign_rhs1 (user); 5071 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5072 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 5073 update_stmt (gsi_stmt (gsi)); 5074 } 5075 } 5076 } 5077 } 5078 5079 /* Returns true if OP is of a type for which we can do reassociation. 5080 That is for integral or non-saturating fixed-point types, and for 5081 floating point type when associative-math is enabled. */ 5082 5083 static bool 5084 can_reassociate_p (tree op) 5085 { 5086 tree type = TREE_TYPE (op); 5087 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 5088 return false; 5089 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 5090 || NON_SAT_FIXED_POINT_TYPE_P (type) 5091 || (flag_associative_math && FLOAT_TYPE_P (type))) 5092 return true; 5093 return false; 5094 } 5095 5096 /* Break up subtract operations in block BB. 5097 5098 We do this top down because we don't know whether the subtract is 5099 part of a possible chain of reassociation except at the top. 5100 5101 IE given 5102 d = f + g 5103 c = a + e 5104 b = c - d 5105 q = b - r 5106 k = t - q 5107 5108 we want to break up k = t - q, but we won't until we've transformed q 5109 = b - r, which won't be broken up until we transform b = c - d. 5110 5111 En passant, clear the GIMPLE visited flag on every statement 5112 and set UIDs within each basic block. */ 5113 5114 static void 5115 break_up_subtract_bb (basic_block bb) 5116 { 5117 gimple_stmt_iterator gsi; 5118 basic_block son; 5119 unsigned int uid = 1; 5120 5121 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5122 { 5123 gimple *stmt = gsi_stmt (gsi); 5124 gimple_set_visited (stmt, false); 5125 gimple_set_uid (stmt, uid++); 5126 5127 if (!is_gimple_assign (stmt) 5128 || !can_reassociate_p (gimple_assign_lhs (stmt))) 5129 continue; 5130 5131 /* Look for simple gimple subtract operations. */ 5132 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 5133 { 5134 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 5135 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 5136 continue; 5137 5138 /* Check for a subtract used only in an addition. If this 5139 is the case, transform it into add of a negate for better 5140 reassociation. IE transform C = A-B into C = A + -B if C 5141 is only used in an addition. */ 5142 if (should_break_up_subtract (stmt)) 5143 break_up_subtract (stmt, &gsi); 5144 } 5145 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 5146 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 5147 plus_negates.safe_push (gimple_assign_lhs (stmt)); 5148 } 5149 for (son = first_dom_son (CDI_DOMINATORS, bb); 5150 son; 5151 son = next_dom_son (CDI_DOMINATORS, son)) 5152 break_up_subtract_bb (son); 5153 } 5154 5155 /* Used for repeated factor analysis. */ 5156 struct repeat_factor 5157 { 5158 /* An SSA name that occurs in a multiply chain. */ 5159 tree factor; 5160 5161 /* Cached rank of the factor. */ 5162 unsigned rank; 5163 5164 /* Number of occurrences of the factor in the chain. */ 5165 HOST_WIDE_INT count; 5166 5167 /* An SSA name representing the product of this factor and 5168 all factors appearing later in the repeated factor vector. */ 5169 tree repr; 5170 }; 5171 5172 5173 static vec<repeat_factor> repeat_factor_vec; 5174 5175 /* Used for sorting the repeat factor vector. Sort primarily by 5176 ascending occurrence count, secondarily by descending rank. */ 5177 5178 static int 5179 compare_repeat_factors (const void *x1, const void *x2) 5180 { 5181 const repeat_factor *rf1 = (const repeat_factor *) x1; 5182 const repeat_factor *rf2 = (const repeat_factor *) x2; 5183 5184 if (rf1->count != rf2->count) 5185 return rf1->count - rf2->count; 5186 5187 return rf2->rank - rf1->rank; 5188 } 5189 5190 /* Look for repeated operands in OPS in the multiply tree rooted at 5191 STMT. Replace them with an optimal sequence of multiplies and powi 5192 builtin calls, and remove the used operands from OPS. Return an 5193 SSA name representing the value of the replacement sequence. */ 5194 5195 static tree 5196 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 5197 { 5198 unsigned i, j, vec_len; 5199 int ii; 5200 operand_entry *oe; 5201 repeat_factor *rf1, *rf2; 5202 repeat_factor rfnew; 5203 tree result = NULL_TREE; 5204 tree target_ssa, iter_result; 5205 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 5206 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 5207 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5208 gimple *mul_stmt, *pow_stmt; 5209 5210 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 5211 target. */ 5212 if (!powi_fndecl) 5213 return NULL_TREE; 5214 5215 /* Allocate the repeated factor vector. */ 5216 repeat_factor_vec.create (10); 5217 5218 /* Scan the OPS vector for all SSA names in the product and build 5219 up a vector of occurrence counts for each factor. */ 5220 FOR_EACH_VEC_ELT (*ops, i, oe) 5221 { 5222 if (TREE_CODE (oe->op) == SSA_NAME) 5223 { 5224 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5225 { 5226 if (rf1->factor == oe->op) 5227 { 5228 rf1->count += oe->count; 5229 break; 5230 } 5231 } 5232 5233 if (j >= repeat_factor_vec.length ()) 5234 { 5235 rfnew.factor = oe->op; 5236 rfnew.rank = oe->rank; 5237 rfnew.count = oe->count; 5238 rfnew.repr = NULL_TREE; 5239 repeat_factor_vec.safe_push (rfnew); 5240 } 5241 } 5242 } 5243 5244 /* Sort the repeated factor vector by (a) increasing occurrence count, 5245 and (b) decreasing rank. */ 5246 repeat_factor_vec.qsort (compare_repeat_factors); 5247 5248 /* It is generally best to combine as many base factors as possible 5249 into a product before applying __builtin_powi to the result. 5250 However, the sort order chosen for the repeated factor vector 5251 allows us to cache partial results for the product of the base 5252 factors for subsequent use. When we already have a cached partial 5253 result from a previous iteration, it is best to make use of it 5254 before looking for another __builtin_pow opportunity. 5255 5256 As an example, consider x * x * y * y * y * z * z * z * z. 5257 We want to first compose the product x * y * z, raise it to the 5258 second power, then multiply this by y * z, and finally multiply 5259 by z. This can be done in 5 multiplies provided we cache y * z 5260 for use in both expressions: 5261 5262 t1 = y * z 5263 t2 = t1 * x 5264 t3 = t2 * t2 5265 t4 = t1 * t3 5266 result = t4 * z 5267 5268 If we instead ignored the cached y * z and first multiplied by 5269 the __builtin_pow opportunity z * z, we would get the inferior: 5270 5271 t1 = y * z 5272 t2 = t1 * x 5273 t3 = t2 * t2 5274 t4 = z * z 5275 t5 = t3 * t4 5276 result = t5 * y */ 5277 5278 vec_len = repeat_factor_vec.length (); 5279 5280 /* Repeatedly look for opportunities to create a builtin_powi call. */ 5281 while (true) 5282 { 5283 HOST_WIDE_INT power; 5284 5285 /* First look for the largest cached product of factors from 5286 preceding iterations. If found, create a builtin_powi for 5287 it if the minimum occurrence count for its factors is at 5288 least 2, or just use this cached product as our next 5289 multiplicand if the minimum occurrence count is 1. */ 5290 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5291 { 5292 if (rf1->repr && rf1->count > 0) 5293 break; 5294 } 5295 5296 if (j < vec_len) 5297 { 5298 power = rf1->count; 5299 5300 if (power == 1) 5301 { 5302 iter_result = rf1->repr; 5303 5304 if (dump_file && (dump_flags & TDF_DETAILS)) 5305 { 5306 unsigned elt; 5307 repeat_factor *rf; 5308 fputs ("Multiplying by cached product ", dump_file); 5309 for (elt = j; elt < vec_len; elt++) 5310 { 5311 rf = &repeat_factor_vec[elt]; 5312 print_generic_expr (dump_file, rf->factor, 0); 5313 if (elt < vec_len - 1) 5314 fputs (" * ", dump_file); 5315 } 5316 fputs ("\n", dump_file); 5317 } 5318 } 5319 else 5320 { 5321 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5322 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5323 build_int_cst (integer_type_node, 5324 power)); 5325 gimple_call_set_lhs (pow_stmt, iter_result); 5326 gimple_set_location (pow_stmt, gimple_location (stmt)); 5327 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5328 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5329 5330 if (dump_file && (dump_flags & TDF_DETAILS)) 5331 { 5332 unsigned elt; 5333 repeat_factor *rf; 5334 fputs ("Building __builtin_pow call for cached product (", 5335 dump_file); 5336 for (elt = j; elt < vec_len; elt++) 5337 { 5338 rf = &repeat_factor_vec[elt]; 5339 print_generic_expr (dump_file, rf->factor, 0); 5340 if (elt < vec_len - 1) 5341 fputs (" * ", dump_file); 5342 } 5343 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 5344 power); 5345 } 5346 } 5347 } 5348 else 5349 { 5350 /* Otherwise, find the first factor in the repeated factor 5351 vector whose occurrence count is at least 2. If no such 5352 factor exists, there are no builtin_powi opportunities 5353 remaining. */ 5354 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5355 { 5356 if (rf1->count >= 2) 5357 break; 5358 } 5359 5360 if (j >= vec_len) 5361 break; 5362 5363 power = rf1->count; 5364 5365 if (dump_file && (dump_flags & TDF_DETAILS)) 5366 { 5367 unsigned elt; 5368 repeat_factor *rf; 5369 fputs ("Building __builtin_pow call for (", dump_file); 5370 for (elt = j; elt < vec_len; elt++) 5371 { 5372 rf = &repeat_factor_vec[elt]; 5373 print_generic_expr (dump_file, rf->factor, 0); 5374 if (elt < vec_len - 1) 5375 fputs (" * ", dump_file); 5376 } 5377 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 5378 } 5379 5380 reassociate_stats.pows_created++; 5381 5382 /* Visit each element of the vector in reverse order (so that 5383 high-occurrence elements are visited first, and within the 5384 same occurrence count, lower-ranked elements are visited 5385 first). Form a linear product of all elements in this order 5386 whose occurrencce count is at least that of element J. 5387 Record the SSA name representing the product of each element 5388 with all subsequent elements in the vector. */ 5389 if (j == vec_len - 1) 5390 rf1->repr = rf1->factor; 5391 else 5392 { 5393 for (ii = vec_len - 2; ii >= (int)j; ii--) 5394 { 5395 tree op1, op2; 5396 5397 rf1 = &repeat_factor_vec[ii]; 5398 rf2 = &repeat_factor_vec[ii + 1]; 5399 5400 /* Init the last factor's representative to be itself. */ 5401 if (!rf2->repr) 5402 rf2->repr = rf2->factor; 5403 5404 op1 = rf1->factor; 5405 op2 = rf2->repr; 5406 5407 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 5408 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 5409 op1, op2); 5410 gimple_set_location (mul_stmt, gimple_location (stmt)); 5411 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5412 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5413 rf1->repr = target_ssa; 5414 5415 /* Don't reprocess the multiply we just introduced. */ 5416 gimple_set_visited (mul_stmt, true); 5417 } 5418 } 5419 5420 /* Form a call to __builtin_powi for the maximum product 5421 just formed, raised to the power obtained earlier. */ 5422 rf1 = &repeat_factor_vec[j]; 5423 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5424 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5425 build_int_cst (integer_type_node, 5426 power)); 5427 gimple_call_set_lhs (pow_stmt, iter_result); 5428 gimple_set_location (pow_stmt, gimple_location (stmt)); 5429 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5430 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5431 } 5432 5433 /* If we previously formed at least one other builtin_powi call, 5434 form the product of this one and those others. */ 5435 if (result) 5436 { 5437 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5438 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 5439 result, iter_result); 5440 gimple_set_location (mul_stmt, gimple_location (stmt)); 5441 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5442 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5443 gimple_set_visited (mul_stmt, true); 5444 result = new_result; 5445 } 5446 else 5447 result = iter_result; 5448 5449 /* Decrement the occurrence count of each element in the product 5450 by the count found above, and remove this many copies of each 5451 factor from OPS. */ 5452 for (i = j; i < vec_len; i++) 5453 { 5454 unsigned k = power; 5455 unsigned n; 5456 5457 rf1 = &repeat_factor_vec[i]; 5458 rf1->count -= power; 5459 5460 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 5461 { 5462 if (oe->op == rf1->factor) 5463 { 5464 if (oe->count <= k) 5465 { 5466 ops->ordered_remove (n); 5467 k -= oe->count; 5468 5469 if (k == 0) 5470 break; 5471 } 5472 else 5473 { 5474 oe->count -= k; 5475 break; 5476 } 5477 } 5478 } 5479 } 5480 } 5481 5482 /* At this point all elements in the repeated factor vector have a 5483 remaining occurrence count of 0 or 1, and those with a count of 1 5484 don't have cached representatives. Re-sort the ops vector and 5485 clean up. */ 5486 ops->qsort (sort_by_operand_rank); 5487 repeat_factor_vec.release (); 5488 5489 /* Return the final product computed herein. Note that there may 5490 still be some elements with single occurrence count left in OPS; 5491 those will be handled by the normal reassociation logic. */ 5492 return result; 5493 } 5494 5495 /* Attempt to optimize 5496 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 5497 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 5498 5499 static void 5500 attempt_builtin_copysign (vec<operand_entry *> *ops) 5501 { 5502 operand_entry *oe; 5503 unsigned int i; 5504 unsigned int length = ops->length (); 5505 tree cst = ops->last ()->op; 5506 5507 if (length == 1 || TREE_CODE (cst) != REAL_CST) 5508 return; 5509 5510 FOR_EACH_VEC_ELT (*ops, i, oe) 5511 { 5512 if (TREE_CODE (oe->op) == SSA_NAME 5513 && has_single_use (oe->op)) 5514 { 5515 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 5516 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 5517 { 5518 tree arg0, arg1; 5519 switch (gimple_call_combined_fn (old_call)) 5520 { 5521 CASE_CFN_COPYSIGN: 5522 arg0 = gimple_call_arg (old_call, 0); 5523 arg1 = gimple_call_arg (old_call, 1); 5524 /* The first argument of copysign must be a constant, 5525 otherwise there's nothing to do. */ 5526 if (TREE_CODE (arg0) == REAL_CST) 5527 { 5528 tree type = TREE_TYPE (arg0); 5529 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 5530 /* If we couldn't fold to a single constant, skip it. 5531 That happens e.g. for inexact multiplication when 5532 -frounding-math. */ 5533 if (mul == NULL_TREE) 5534 break; 5535 /* Instead of adjusting OLD_CALL, let's build a new 5536 call to not leak the LHS and prevent keeping bogus 5537 debug statements. DCE will clean up the old call. */ 5538 gcall *new_call; 5539 if (gimple_call_internal_p (old_call)) 5540 new_call = gimple_build_call_internal 5541 (IFN_COPYSIGN, 2, mul, arg1); 5542 else 5543 new_call = gimple_build_call 5544 (gimple_call_fndecl (old_call), 2, mul, arg1); 5545 tree lhs = make_ssa_name (type); 5546 gimple_call_set_lhs (new_call, lhs); 5547 gimple_set_location (new_call, 5548 gimple_location (old_call)); 5549 insert_stmt_after (new_call, old_call); 5550 /* We've used the constant, get rid of it. */ 5551 ops->pop (); 5552 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 5553 /* Handle the CST1 < 0 case by negating the result. */ 5554 if (cst1_neg) 5555 { 5556 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 5557 gimple *negate_stmt 5558 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 5559 insert_stmt_after (negate_stmt, new_call); 5560 oe->op = negrhs; 5561 } 5562 else 5563 oe->op = lhs; 5564 if (dump_file && (dump_flags & TDF_DETAILS)) 5565 { 5566 fprintf (dump_file, "Optimizing copysign: "); 5567 print_generic_expr (dump_file, cst, 0); 5568 fprintf (dump_file, " * COPYSIGN ("); 5569 print_generic_expr (dump_file, arg0, 0); 5570 fprintf (dump_file, ", "); 5571 print_generic_expr (dump_file, arg1, 0); 5572 fprintf (dump_file, ") into %sCOPYSIGN (", 5573 cst1_neg ? "-" : ""); 5574 print_generic_expr (dump_file, mul, 0); 5575 fprintf (dump_file, ", "); 5576 print_generic_expr (dump_file, arg1, 0); 5577 fprintf (dump_file, "\n"); 5578 } 5579 return; 5580 } 5581 break; 5582 default: 5583 break; 5584 } 5585 } 5586 } 5587 } 5588 } 5589 5590 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 5591 5592 static void 5593 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 5594 { 5595 tree rhs1; 5596 5597 if (dump_file && (dump_flags & TDF_DETAILS)) 5598 { 5599 fprintf (dump_file, "Transforming "); 5600 print_gimple_stmt (dump_file, stmt, 0, 0); 5601 } 5602 5603 rhs1 = gimple_assign_rhs1 (stmt); 5604 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 5605 update_stmt (stmt); 5606 remove_visited_stmt_chain (rhs1); 5607 5608 if (dump_file && (dump_flags & TDF_DETAILS)) 5609 { 5610 fprintf (dump_file, " into "); 5611 print_gimple_stmt (dump_file, stmt, 0, 0); 5612 } 5613 } 5614 5615 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 5616 5617 static void 5618 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 5619 tree rhs1, tree rhs2) 5620 { 5621 if (dump_file && (dump_flags & TDF_DETAILS)) 5622 { 5623 fprintf (dump_file, "Transforming "); 5624 print_gimple_stmt (dump_file, stmt, 0, 0); 5625 } 5626 5627 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 5628 update_stmt (gsi_stmt (*gsi)); 5629 remove_visited_stmt_chain (rhs1); 5630 5631 if (dump_file && (dump_flags & TDF_DETAILS)) 5632 { 5633 fprintf (dump_file, " into "); 5634 print_gimple_stmt (dump_file, stmt, 0, 0); 5635 } 5636 } 5637 5638 /* Reassociate expressions in basic block BB and its post-dominator as 5639 children. 5640 5641 Bubble up return status from maybe_optimize_range_tests. */ 5642 5643 static bool 5644 reassociate_bb (basic_block bb) 5645 { 5646 gimple_stmt_iterator gsi; 5647 basic_block son; 5648 gimple *stmt = last_stmt (bb); 5649 bool cfg_cleanup_needed = false; 5650 5651 if (stmt && !gimple_visited_p (stmt)) 5652 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 5653 5654 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 5655 { 5656 stmt = gsi_stmt (gsi); 5657 5658 if (is_gimple_assign (stmt) 5659 && !stmt_could_throw_p (stmt)) 5660 { 5661 tree lhs, rhs1, rhs2; 5662 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 5663 5664 /* If this is not a gimple binary expression, there is 5665 nothing for us to do with it. */ 5666 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 5667 continue; 5668 5669 /* If this was part of an already processed statement, 5670 we don't need to touch it again. */ 5671 if (gimple_visited_p (stmt)) 5672 { 5673 /* This statement might have become dead because of previous 5674 reassociations. */ 5675 if (has_zero_uses (gimple_get_lhs (stmt))) 5676 { 5677 reassoc_remove_stmt (&gsi); 5678 release_defs (stmt); 5679 /* We might end up removing the last stmt above which 5680 places the iterator to the end of the sequence. 5681 Reset it to the last stmt in this case which might 5682 be the end of the sequence as well if we removed 5683 the last statement of the sequence. In which case 5684 we need to bail out. */ 5685 if (gsi_end_p (gsi)) 5686 { 5687 gsi = gsi_last_bb (bb); 5688 if (gsi_end_p (gsi)) 5689 break; 5690 } 5691 } 5692 continue; 5693 } 5694 5695 lhs = gimple_assign_lhs (stmt); 5696 rhs1 = gimple_assign_rhs1 (stmt); 5697 rhs2 = gimple_assign_rhs2 (stmt); 5698 5699 /* For non-bit or min/max operations we can't associate 5700 all types. Verify that here. */ 5701 if (rhs_code != BIT_IOR_EXPR 5702 && rhs_code != BIT_AND_EXPR 5703 && rhs_code != BIT_XOR_EXPR 5704 && rhs_code != MIN_EXPR 5705 && rhs_code != MAX_EXPR 5706 && (!can_reassociate_p (lhs) 5707 || !can_reassociate_p (rhs1) 5708 || !can_reassociate_p (rhs2))) 5709 continue; 5710 5711 if (associative_tree_code (rhs_code)) 5712 { 5713 auto_vec<operand_entry *> ops; 5714 tree powi_result = NULL_TREE; 5715 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 5716 5717 /* There may be no immediate uses left by the time we 5718 get here because we may have eliminated them all. */ 5719 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 5720 continue; 5721 5722 gimple_set_visited (stmt, true); 5723 linearize_expr_tree (&ops, stmt, true, true); 5724 ops.qsort (sort_by_operand_rank); 5725 int orig_len = ops.length (); 5726 optimize_ops_list (rhs_code, &ops); 5727 if (undistribute_ops_list (rhs_code, &ops, 5728 loop_containing_stmt (stmt))) 5729 { 5730 ops.qsort (sort_by_operand_rank); 5731 optimize_ops_list (rhs_code, &ops); 5732 } 5733 5734 if (rhs_code == PLUS_EXPR 5735 && transform_add_to_multiply (&ops)) 5736 ops.qsort (sort_by_operand_rank); 5737 5738 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 5739 { 5740 if (is_vector) 5741 optimize_vec_cond_expr (rhs_code, &ops); 5742 else 5743 optimize_range_tests (rhs_code, &ops, NULL); 5744 } 5745 5746 if (rhs_code == MULT_EXPR && !is_vector) 5747 { 5748 attempt_builtin_copysign (&ops); 5749 5750 if (reassoc_insert_powi_p 5751 && flag_unsafe_math_optimizations) 5752 powi_result = attempt_builtin_powi (stmt, &ops); 5753 } 5754 5755 operand_entry *last; 5756 bool negate_result = false; 5757 if (ops.length () > 1 5758 && rhs_code == MULT_EXPR) 5759 { 5760 last = ops.last (); 5761 if ((integer_minus_onep (last->op) 5762 || real_minus_onep (last->op)) 5763 && !HONOR_SNANS (TREE_TYPE (lhs)) 5764 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 5765 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 5766 { 5767 ops.pop (); 5768 negate_result = true; 5769 } 5770 } 5771 5772 tree new_lhs = lhs; 5773 /* If the operand vector is now empty, all operands were 5774 consumed by the __builtin_powi optimization. */ 5775 if (ops.length () == 0) 5776 transform_stmt_to_copy (&gsi, stmt, powi_result); 5777 else if (ops.length () == 1) 5778 { 5779 tree last_op = ops.last ()->op; 5780 5781 /* If the stmt that defines operand has to be inserted, insert it 5782 before the use. */ 5783 if (ops.last ()->stmt_to_insert) 5784 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 5785 if (powi_result) 5786 transform_stmt_to_multiply (&gsi, stmt, last_op, 5787 powi_result); 5788 else 5789 transform_stmt_to_copy (&gsi, stmt, last_op); 5790 } 5791 else 5792 { 5793 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 5794 int ops_num = ops.length (); 5795 int width = get_reassociation_width (ops_num, rhs_code, mode); 5796 5797 if (dump_file && (dump_flags & TDF_DETAILS)) 5798 fprintf (dump_file, 5799 "Width = %d was chosen for reassociation\n", width); 5800 5801 if (width > 1 5802 && ops.length () > 3) 5803 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 5804 width, ops); 5805 else 5806 { 5807 /* When there are three operands left, we want 5808 to make sure the ones that get the double 5809 binary op are chosen wisely. */ 5810 int len = ops.length (); 5811 if (len >= 3) 5812 swap_ops_for_binary_stmt (ops, len - 3, stmt); 5813 5814 new_lhs = rewrite_expr_tree (stmt, 0, ops, 5815 powi_result != NULL 5816 || negate_result, 5817 len != orig_len); 5818 } 5819 5820 /* If we combined some repeated factors into a 5821 __builtin_powi call, multiply that result by the 5822 reassociated operands. */ 5823 if (powi_result) 5824 { 5825 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 5826 tree type = TREE_TYPE (lhs); 5827 tree target_ssa = make_temp_ssa_name (type, NULL, 5828 "reassocpow"); 5829 gimple_set_lhs (lhs_stmt, target_ssa); 5830 update_stmt (lhs_stmt); 5831 if (lhs != new_lhs) 5832 { 5833 target_ssa = new_lhs; 5834 new_lhs = lhs; 5835 } 5836 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 5837 powi_result, target_ssa); 5838 gimple_set_location (mul_stmt, gimple_location (stmt)); 5839 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5840 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 5841 } 5842 } 5843 5844 if (negate_result) 5845 { 5846 stmt = SSA_NAME_DEF_STMT (lhs); 5847 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 5848 gimple_set_lhs (stmt, tmp); 5849 if (lhs != new_lhs) 5850 tmp = new_lhs; 5851 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 5852 tmp); 5853 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 5854 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 5855 update_stmt (stmt); 5856 } 5857 } 5858 } 5859 } 5860 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 5861 son; 5862 son = next_dom_son (CDI_POST_DOMINATORS, son)) 5863 cfg_cleanup_needed |= reassociate_bb (son); 5864 5865 return cfg_cleanup_needed; 5866 } 5867 5868 /* Add jumps around shifts for range tests turned into bit tests. 5869 For each SSA_NAME VAR we have code like: 5870 VAR = ...; // final stmt of range comparison 5871 // bit test here...; 5872 OTHERVAR = ...; // final stmt of the bit test sequence 5873 RES = VAR | OTHERVAR; 5874 Turn the above into: 5875 VAR = ...; 5876 if (VAR != 0) 5877 goto <l3>; 5878 else 5879 goto <l2>; 5880 <l2>: 5881 // bit test here...; 5882 OTHERVAR = ...; 5883 <l3>: 5884 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 5885 5886 static void 5887 branch_fixup (void) 5888 { 5889 tree var; 5890 unsigned int i; 5891 5892 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 5893 { 5894 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 5895 gimple *use_stmt; 5896 use_operand_p use; 5897 bool ok = single_imm_use (var, &use, &use_stmt); 5898 gcc_assert (ok 5899 && is_gimple_assign (use_stmt) 5900 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 5901 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 5902 5903 basic_block cond_bb = gimple_bb (def_stmt); 5904 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 5905 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 5906 5907 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 5908 gimple *g = gimple_build_cond (NE_EXPR, var, 5909 build_zero_cst (TREE_TYPE (var)), 5910 NULL_TREE, NULL_TREE); 5911 location_t loc = gimple_location (use_stmt); 5912 gimple_set_location (g, loc); 5913 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 5914 5915 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 5916 etrue->probability = REG_BR_PROB_BASE / 2; 5917 etrue->count = cond_bb->count / 2; 5918 edge efalse = find_edge (cond_bb, then_bb); 5919 efalse->flags = EDGE_FALSE_VALUE; 5920 efalse->probability -= etrue->probability; 5921 efalse->count -= etrue->count; 5922 then_bb->count -= etrue->count; 5923 5924 tree othervar = NULL_TREE; 5925 if (gimple_assign_rhs1 (use_stmt) == var) 5926 othervar = gimple_assign_rhs2 (use_stmt); 5927 else if (gimple_assign_rhs2 (use_stmt) == var) 5928 othervar = gimple_assign_rhs1 (use_stmt); 5929 else 5930 gcc_unreachable (); 5931 tree lhs = gimple_assign_lhs (use_stmt); 5932 gphi *phi = create_phi_node (lhs, merge_bb); 5933 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 5934 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 5935 gsi = gsi_for_stmt (use_stmt); 5936 gsi_remove (&gsi, true); 5937 5938 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 5939 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 5940 } 5941 reassoc_branch_fixups.release (); 5942 } 5943 5944 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 5945 void debug_ops_vector (vec<operand_entry *> ops); 5946 5947 /* Dump the operand entry vector OPS to FILE. */ 5948 5949 void 5950 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 5951 { 5952 operand_entry *oe; 5953 unsigned int i; 5954 5955 FOR_EACH_VEC_ELT (ops, i, oe) 5956 { 5957 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 5958 print_generic_expr (file, oe->op, 0); 5959 fprintf (file, "\n"); 5960 } 5961 } 5962 5963 /* Dump the operand entry vector OPS to STDERR. */ 5964 5965 DEBUG_FUNCTION void 5966 debug_ops_vector (vec<operand_entry *> ops) 5967 { 5968 dump_ops_vector (stderr, ops); 5969 } 5970 5971 /* Bubble up return status from reassociate_bb. */ 5972 5973 static bool 5974 do_reassoc (void) 5975 { 5976 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 5977 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 5978 } 5979 5980 /* Initialize the reassociation pass. */ 5981 5982 static void 5983 init_reassoc (void) 5984 { 5985 int i; 5986 long rank = 2; 5987 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 5988 5989 /* Find the loops, so that we can prevent moving calculations in 5990 them. */ 5991 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 5992 5993 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 5994 5995 next_operand_entry_id = 0; 5996 5997 /* Reverse RPO (Reverse Post Order) will give us something where 5998 deeper loops come later. */ 5999 pre_and_rev_post_order_compute (NULL, bbs, false); 6000 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 6001 operand_rank = new hash_map<tree, long>; 6002 6003 /* Give each default definition a distinct rank. This includes 6004 parameters and the static chain. Walk backwards over all 6005 SSA names so that we get proper rank ordering according 6006 to tree_swap_operands_p. */ 6007 for (i = num_ssa_names - 1; i > 0; --i) 6008 { 6009 tree name = ssa_name (i); 6010 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 6011 insert_operand_rank (name, ++rank); 6012 } 6013 6014 /* Set up rank for each BB */ 6015 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 6016 bb_rank[bbs[i]] = ++rank << 16; 6017 6018 free (bbs); 6019 calculate_dominance_info (CDI_POST_DOMINATORS); 6020 plus_negates = vNULL; 6021 } 6022 6023 /* Cleanup after the reassociation pass, and print stats if 6024 requested. */ 6025 6026 static void 6027 fini_reassoc (void) 6028 { 6029 statistics_counter_event (cfun, "Linearized", 6030 reassociate_stats.linearized); 6031 statistics_counter_event (cfun, "Constants eliminated", 6032 reassociate_stats.constants_eliminated); 6033 statistics_counter_event (cfun, "Ops eliminated", 6034 reassociate_stats.ops_eliminated); 6035 statistics_counter_event (cfun, "Statements rewritten", 6036 reassociate_stats.rewritten); 6037 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 6038 reassociate_stats.pows_encountered); 6039 statistics_counter_event (cfun, "Built-in powi calls created", 6040 reassociate_stats.pows_created); 6041 6042 delete operand_rank; 6043 operand_entry_pool.release (); 6044 free (bb_rank); 6045 plus_negates.release (); 6046 free_dominance_info (CDI_POST_DOMINATORS); 6047 loop_optimizer_finalize (); 6048 } 6049 6050 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 6051 insertion of __builtin_powi calls. 6052 6053 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 6054 optimization of a gimple conditional. Otherwise returns zero. */ 6055 6056 static unsigned int 6057 execute_reassoc (bool insert_powi_p) 6058 { 6059 reassoc_insert_powi_p = insert_powi_p; 6060 6061 init_reassoc (); 6062 6063 bool cfg_cleanup_needed; 6064 cfg_cleanup_needed = do_reassoc (); 6065 repropagate_negates (); 6066 branch_fixup (); 6067 6068 fini_reassoc (); 6069 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 6070 } 6071 6072 namespace { 6073 6074 const pass_data pass_data_reassoc = 6075 { 6076 GIMPLE_PASS, /* type */ 6077 "reassoc", /* name */ 6078 OPTGROUP_NONE, /* optinfo_flags */ 6079 TV_TREE_REASSOC, /* tv_id */ 6080 ( PROP_cfg | PROP_ssa ), /* properties_required */ 6081 0, /* properties_provided */ 6082 0, /* properties_destroyed */ 6083 0, /* todo_flags_start */ 6084 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 6085 }; 6086 6087 class pass_reassoc : public gimple_opt_pass 6088 { 6089 public: 6090 pass_reassoc (gcc::context *ctxt) 6091 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 6092 {} 6093 6094 /* opt_pass methods: */ 6095 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 6096 void set_pass_param (unsigned int n, bool param) 6097 { 6098 gcc_assert (n == 0); 6099 insert_powi_p = param; 6100 } 6101 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 6102 virtual unsigned int execute (function *) 6103 { return execute_reassoc (insert_powi_p); } 6104 6105 private: 6106 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 6107 point 3a in the pass header comment. */ 6108 bool insert_powi_p; 6109 }; // class pass_reassoc 6110 6111 } // anon namespace 6112 6113 gimple_opt_pass * 6114 make_pass_reassoc (gcc::context *ctxt) 6115 { 6116 return new pass_reassoc (ctxt); 6117 } 6118