1 /* Reassociation for trees. 2 Copyright (C) 2005-2019 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "params.h" 52 #include "builtins.h" 53 #include "gimplify.h" 54 #include "case-cfn-macros.h" 55 56 /* This is a simple global reassociation pass. It is, in part, based 57 on the LLVM pass of the same name (They do some things more/less 58 than we do, in different orders, etc). 59 60 It consists of five steps: 61 62 1. Breaking up subtract operations into addition + negate, where 63 it would promote the reassociation of adds. 64 65 2. Left linearization of the expression trees, so that (A+B)+(C+D) 66 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 67 During linearization, we place the operands of the binary 68 expressions into a vector of operand_entry_* 69 70 3. Optimization of the operand lists, eliminating things like a + 71 -a, a & a, etc. 72 73 3a. Combine repeated factors with the same occurrence counts 74 into a __builtin_powi call that will later be optimized into 75 an optimal number of multiplies. 76 77 4. Rewrite the expression trees we linearized and optimized so 78 they are in proper rank order. 79 80 5. Repropagate negates, as nothing else will clean it up ATM. 81 82 A bit of theory on #4, since nobody seems to write anything down 83 about why it makes sense to do it the way they do it: 84 85 We could do this much nicer theoretically, but don't (for reasons 86 explained after how to do it theoretically nice :P). 87 88 In order to promote the most redundancy elimination, you want 89 binary expressions whose operands are the same rank (or 90 preferably, the same value) exposed to the redundancy eliminator, 91 for possible elimination. 92 93 So the way to do this if we really cared, is to build the new op 94 tree from the leaves to the roots, merging as you go, and putting the 95 new op on the end of the worklist, until you are left with one 96 thing on the worklist. 97 98 IE if you have to rewrite the following set of operands (listed with 99 rank in parentheses), with opcode PLUS_EXPR: 100 101 a (1), b (1), c (1), d (2), e (2) 102 103 104 We start with our merge worklist empty, and the ops list with all of 105 those on it. 106 107 You want to first merge all leaves of the same rank, as much as 108 possible. 109 110 So first build a binary op of 111 112 mergetmp = a + b, and put "mergetmp" on the merge worklist. 113 114 Because there is no three operand form of PLUS_EXPR, c is not going to 115 be exposed to redundancy elimination as a rank 1 operand. 116 117 So you might as well throw it on the merge worklist (you could also 118 consider it to now be a rank two operand, and merge it with d and e, 119 but in this case, you then have evicted e from a binary op. So at 120 least in this situation, you can't win.) 121 122 Then build a binary op of d + e 123 mergetmp2 = d + e 124 125 and put mergetmp2 on the merge worklist. 126 127 so merge worklist = {mergetmp, c, mergetmp2} 128 129 Continue building binary ops of these operations until you have only 130 one operation left on the worklist. 131 132 So we have 133 134 build binary op 135 mergetmp3 = mergetmp + c 136 137 worklist = {mergetmp2, mergetmp3} 138 139 mergetmp4 = mergetmp2 + mergetmp3 140 141 worklist = {mergetmp4} 142 143 because we have one operation left, we can now just set the original 144 statement equal to the result of that operation. 145 146 This will at least expose a + b and d + e to redundancy elimination 147 as binary operations. 148 149 For extra points, you can reuse the old statements to build the 150 mergetmps, since you shouldn't run out. 151 152 So why don't we do this? 153 154 Because it's expensive, and rarely will help. Most trees we are 155 reassociating have 3 or less ops. If they have 2 ops, they already 156 will be written into a nice single binary op. If you have 3 ops, a 157 single simple check suffices to tell you whether the first two are of the 158 same rank. If so, you know to order it 159 160 mergetmp = op1 + op2 161 newstmt = mergetmp + op3 162 163 instead of 164 mergetmp = op2 + op3 165 newstmt = mergetmp + op1 166 167 If all three are of the same rank, you can't expose them all in a 168 single binary operator anyway, so the above is *still* the best you 169 can do. 170 171 Thus, this is what we do. When we have three ops left, we check to see 172 what order to put them in, and call it a day. As a nod to vector sum 173 reduction, we check if any of the ops are really a phi node that is a 174 destructive update for the associating op, and keep the destructive 175 update together for vector sum reduction recognition. */ 176 177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 178 point 3a in the pass header comment. */ 179 static bool reassoc_insert_powi_p; 180 181 /* Statistics */ 182 static struct 183 { 184 int linearized; 185 int constants_eliminated; 186 int ops_eliminated; 187 int rewritten; 188 int pows_encountered; 189 int pows_created; 190 } reassociate_stats; 191 192 /* Operator, rank pair. */ 193 struct operand_entry 194 { 195 unsigned int rank; 196 unsigned int id; 197 tree op; 198 unsigned int count; 199 gimple *stmt_to_insert; 200 }; 201 202 static object_allocator<operand_entry> operand_entry_pool 203 ("operand entry pool"); 204 205 /* This is used to assign a unique ID to each struct operand_entry 206 so that qsort results are identical on different hosts. */ 207 static unsigned int next_operand_entry_id; 208 209 /* Starting rank number for a given basic block, so that we can rank 210 operations using unmovable instructions in that BB based on the bb 211 depth. */ 212 static long *bb_rank; 213 214 /* Operand->rank hashtable. */ 215 static hash_map<tree, long> *operand_rank; 216 217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 218 all basic blocks the CFG should be adjusted - basic blocks 219 split right after that SSA_NAME's definition statement and before 220 the only use, which must be a bit ior. */ 221 static vec<tree> reassoc_branch_fixups; 222 223 /* Forward decls. */ 224 static long get_rank (tree); 225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 226 227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 228 possibly added by gsi_remove. */ 229 230 bool 231 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 232 { 233 gimple *stmt = gsi_stmt (*gsi); 234 235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) 236 return gsi_remove (gsi, true); 237 238 gimple_stmt_iterator prev = *gsi; 239 gsi_prev (&prev); 240 unsigned uid = gimple_uid (stmt); 241 basic_block bb = gimple_bb (stmt); 242 bool ret = gsi_remove (gsi, true); 243 if (!gsi_end_p (prev)) 244 gsi_next (&prev); 245 else 246 prev = gsi_start_bb (bb); 247 gimple *end_stmt = gsi_stmt (*gsi); 248 while ((stmt = gsi_stmt (prev)) != end_stmt) 249 { 250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 251 gimple_set_uid (stmt, uid); 252 gsi_next (&prev); 253 } 254 return ret; 255 } 256 257 /* Bias amount for loop-carried phis. We want this to be larger than 258 the depth of any reassociation tree we can see, but not larger than 259 the rank difference between two blocks. */ 260 #define PHI_LOOP_BIAS (1 << 15) 261 262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 263 an innermost loop, and the phi has only a single use which is inside 264 the loop, then the rank is the block rank of the loop latch plus an 265 extra bias for the loop-carried dependence. This causes expressions 266 calculated into an accumulator variable to be independent for each 267 iteration of the loop. If STMT is some other phi, the rank is the 268 block rank of its containing block. */ 269 static long 270 phi_rank (gimple *stmt) 271 { 272 basic_block bb = gimple_bb (stmt); 273 struct loop *father = bb->loop_father; 274 tree res; 275 unsigned i; 276 use_operand_p use; 277 gimple *use_stmt; 278 279 /* We only care about real loops (those with a latch). */ 280 if (!father->latch) 281 return bb_rank[bb->index]; 282 283 /* Interesting phis must be in headers of innermost loops. */ 284 if (bb != father->header 285 || father->inner) 286 return bb_rank[bb->index]; 287 288 /* Ignore virtual SSA_NAMEs. */ 289 res = gimple_phi_result (stmt); 290 if (virtual_operand_p (res)) 291 return bb_rank[bb->index]; 292 293 /* The phi definition must have a single use, and that use must be 294 within the loop. Otherwise this isn't an accumulator pattern. */ 295 if (!single_imm_use (res, &use, &use_stmt) 296 || gimple_bb (use_stmt)->loop_father != father) 297 return bb_rank[bb->index]; 298 299 /* Look for phi arguments from within the loop. If found, bias this phi. */ 300 for (i = 0; i < gimple_phi_num_args (stmt); i++) 301 { 302 tree arg = gimple_phi_arg_def (stmt, i); 303 if (TREE_CODE (arg) == SSA_NAME 304 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 305 { 306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 307 if (gimple_bb (def_stmt)->loop_father == father) 308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 309 } 310 } 311 312 /* Must be an uninteresting phi. */ 313 return bb_rank[bb->index]; 314 } 315 316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 317 loop-carried dependence of an innermost loop, return TRUE; else 318 return FALSE. */ 319 static bool 320 loop_carried_phi (tree exp) 321 { 322 gimple *phi_stmt; 323 long block_rank; 324 325 if (TREE_CODE (exp) != SSA_NAME 326 || SSA_NAME_IS_DEFAULT_DEF (exp)) 327 return false; 328 329 phi_stmt = SSA_NAME_DEF_STMT (exp); 330 331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 332 return false; 333 334 /* Non-loop-carried phis have block rank. Loop-carried phis have 335 an additional bias added in. If this phi doesn't have block rank, 336 it's biased and should not be propagated. */ 337 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 338 339 if (phi_rank (phi_stmt) != block_rank) 340 return true; 341 342 return false; 343 } 344 345 /* Return the maximum of RANK and the rank that should be propagated 346 from expression OP. For most operands, this is just the rank of OP. 347 For loop-carried phis, the value is zero to avoid undoing the bias 348 in favor of the phi. */ 349 static long 350 propagate_rank (long rank, tree op) 351 { 352 long op_rank; 353 354 if (loop_carried_phi (op)) 355 return rank; 356 357 op_rank = get_rank (op); 358 359 return MAX (rank, op_rank); 360 } 361 362 /* Look up the operand rank structure for expression E. */ 363 364 static inline long 365 find_operand_rank (tree e) 366 { 367 long *slot = operand_rank->get (e); 368 return slot ? *slot : -1; 369 } 370 371 /* Insert {E,RANK} into the operand rank hashtable. */ 372 373 static inline void 374 insert_operand_rank (tree e, long rank) 375 { 376 gcc_assert (rank > 0); 377 gcc_assert (!operand_rank->put (e, rank)); 378 } 379 380 /* Given an expression E, return the rank of the expression. */ 381 382 static long 383 get_rank (tree e) 384 { 385 /* SSA_NAME's have the rank of the expression they are the result 386 of. 387 For globals and uninitialized values, the rank is 0. 388 For function arguments, use the pre-setup rank. 389 For PHI nodes, stores, asm statements, etc, we use the rank of 390 the BB. 391 For simple operations, the rank is the maximum rank of any of 392 its operands, or the bb_rank, whichever is less. 393 I make no claims that this is optimal, however, it gives good 394 results. */ 395 396 /* We make an exception to the normal ranking system to break 397 dependences of accumulator variables in loops. Suppose we 398 have a simple one-block loop containing: 399 400 x_1 = phi(x_0, x_2) 401 b = a + x_1 402 c = b + d 403 x_2 = c + e 404 405 As shown, each iteration of the calculation into x is fully 406 dependent upon the iteration before it. We would prefer to 407 see this in the form: 408 409 x_1 = phi(x_0, x_2) 410 b = a + d 411 c = b + e 412 x_2 = c + x_1 413 414 If the loop is unrolled, the calculations of b and c from 415 different iterations can be interleaved. 416 417 To obtain this result during reassociation, we bias the rank 418 of the phi definition x_1 upward, when it is recognized as an 419 accumulator pattern. The artificial rank causes it to be 420 added last, providing the desired independence. */ 421 422 if (TREE_CODE (e) == SSA_NAME) 423 { 424 ssa_op_iter iter; 425 gimple *stmt; 426 long rank; 427 tree op; 428 429 if (SSA_NAME_IS_DEFAULT_DEF (e)) 430 return find_operand_rank (e); 431 432 stmt = SSA_NAME_DEF_STMT (e); 433 if (gimple_code (stmt) == GIMPLE_PHI) 434 return phi_rank (stmt); 435 436 if (!is_gimple_assign (stmt)) 437 return bb_rank[gimple_bb (stmt)->index]; 438 439 /* If we already have a rank for this expression, use that. */ 440 rank = find_operand_rank (e); 441 if (rank != -1) 442 return rank; 443 444 /* Otherwise, find the maximum rank for the operands. As an 445 exception, remove the bias from loop-carried phis when propagating 446 the rank so that dependent operations are not also biased. */ 447 /* Simply walk over all SSA uses - this takes advatage of the 448 fact that non-SSA operands are is_gimple_min_invariant and 449 thus have rank 0. */ 450 rank = 0; 451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 452 rank = propagate_rank (rank, op); 453 454 if (dump_file && (dump_flags & TDF_DETAILS)) 455 { 456 fprintf (dump_file, "Rank for "); 457 print_generic_expr (dump_file, e); 458 fprintf (dump_file, " is %ld\n", (rank + 1)); 459 } 460 461 /* Note the rank in the hashtable so we don't recompute it. */ 462 insert_operand_rank (e, (rank + 1)); 463 return (rank + 1); 464 } 465 466 /* Constants, globals, etc., are rank 0 */ 467 return 0; 468 } 469 470 471 /* We want integer ones to end up last no matter what, since they are 472 the ones we can do the most with. */ 473 #define INTEGER_CONST_TYPE 1 << 4 474 #define FLOAT_ONE_CONST_TYPE 1 << 3 475 #define FLOAT_CONST_TYPE 1 << 2 476 #define OTHER_CONST_TYPE 1 << 1 477 478 /* Classify an invariant tree into integer, float, or other, so that 479 we can sort them to be near other constants of the same type. */ 480 static inline int 481 constant_type (tree t) 482 { 483 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 484 return INTEGER_CONST_TYPE; 485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 486 { 487 /* Sort -1.0 and 1.0 constants last, while in some cases 488 const_binop can't optimize some inexact operations, multiplication 489 by -1.0 or 1.0 can be always merged with others. */ 490 if (real_onep (t) || real_minus_onep (t)) 491 return FLOAT_ONE_CONST_TYPE; 492 return FLOAT_CONST_TYPE; 493 } 494 else 495 return OTHER_CONST_TYPE; 496 } 497 498 /* qsort comparison function to sort operand entries PA and PB by rank 499 so that the sorted array is ordered by rank in decreasing order. */ 500 static int 501 sort_by_operand_rank (const void *pa, const void *pb) 502 { 503 const operand_entry *oea = *(const operand_entry *const *)pa; 504 const operand_entry *oeb = *(const operand_entry *const *)pb; 505 506 if (oeb->rank != oea->rank) 507 return oeb->rank > oea->rank ? 1 : -1; 508 509 /* It's nicer for optimize_expression if constants that are likely 510 to fold when added/multiplied/whatever are put next to each 511 other. Since all constants have rank 0, order them by type. */ 512 if (oea->rank == 0) 513 { 514 if (constant_type (oeb->op) != constant_type (oea->op)) 515 return constant_type (oea->op) - constant_type (oeb->op); 516 else 517 /* To make sorting result stable, we use unique IDs to determine 518 order. */ 519 return oeb->id > oea->id ? 1 : -1; 520 } 521 522 if (TREE_CODE (oea->op) != SSA_NAME) 523 { 524 if (TREE_CODE (oeb->op) != SSA_NAME) 525 return oeb->id > oea->id ? 1 : -1; 526 else 527 return 1; 528 } 529 else if (TREE_CODE (oeb->op) != SSA_NAME) 530 return -1; 531 532 /* Lastly, make sure the versions that are the same go next to each 533 other. */ 534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 535 { 536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 537 versions of removed SSA_NAMEs, so if possible, prefer to sort 538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 539 See PR60418. */ 540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 542 basic_block bba = gimple_bb (stmta); 543 basic_block bbb = gimple_bb (stmtb); 544 if (bbb != bba) 545 { 546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert 547 but the other might not. */ 548 if (!bba) 549 return 1; 550 if (!bbb) 551 return -1; 552 /* If neither is, compare bb_rank. */ 553 if (bb_rank[bbb->index] != bb_rank[bba->index]) 554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); 555 } 556 557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 559 if (da != db) 560 return da ? 1 : -1; 561 562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 563 } 564 565 return oeb->id > oea->id ? 1 : -1; 566 } 567 568 /* Add an operand entry to *OPS for the tree operand OP. */ 569 570 static void 571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 572 { 573 operand_entry *oe = operand_entry_pool.allocate (); 574 575 oe->op = op; 576 oe->rank = get_rank (op); 577 oe->id = next_operand_entry_id++; 578 oe->count = 1; 579 oe->stmt_to_insert = stmt_to_insert; 580 ops->safe_push (oe); 581 } 582 583 /* Add an operand entry to *OPS for the tree operand OP with repeat 584 count REPEAT. */ 585 586 static void 587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 588 HOST_WIDE_INT repeat) 589 { 590 operand_entry *oe = operand_entry_pool.allocate (); 591 592 oe->op = op; 593 oe->rank = get_rank (op); 594 oe->id = next_operand_entry_id++; 595 oe->count = repeat; 596 oe->stmt_to_insert = NULL; 597 ops->safe_push (oe); 598 599 reassociate_stats.pows_encountered++; 600 } 601 602 /* Return true if STMT is reassociable operation containing a binary 603 operation with tree code CODE, and is inside LOOP. */ 604 605 static bool 606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) 607 { 608 basic_block bb = gimple_bb (stmt); 609 610 if (gimple_bb (stmt) == NULL) 611 return false; 612 613 if (!flow_bb_inside_loop_p (loop, bb)) 614 return false; 615 616 if (is_gimple_assign (stmt) 617 && gimple_assign_rhs_code (stmt) == code 618 && has_single_use (gimple_assign_lhs (stmt))) 619 { 620 tree rhs1 = gimple_assign_rhs1 (stmt); 621 tree rhs2 = gimple_assign_rhs2 (stmt); 622 if (TREE_CODE (rhs1) == SSA_NAME 623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 624 return false; 625 if (rhs2 626 && TREE_CODE (rhs2) == SSA_NAME 627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 628 return false; 629 return true; 630 } 631 632 return false; 633 } 634 635 636 /* Return true if STMT is a nop-conversion. */ 637 638 static bool 639 gimple_nop_conversion_p (gimple *stmt) 640 { 641 if (gassign *ass = dyn_cast <gassign *> (stmt)) 642 { 643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 645 TREE_TYPE (gimple_assign_rhs1 (ass)))) 646 return true; 647 } 648 return false; 649 } 650 651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 652 operand of the negate operation. Otherwise, return NULL. */ 653 654 static tree 655 get_unary_op (tree name, enum tree_code opcode) 656 { 657 gimple *stmt = SSA_NAME_DEF_STMT (name); 658 659 /* Look through nop conversions (sign changes). */ 660 if (gimple_nop_conversion_p (stmt) 661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 663 664 if (!is_gimple_assign (stmt)) 665 return NULL_TREE; 666 667 if (gimple_assign_rhs_code (stmt) == opcode) 668 return gimple_assign_rhs1 (stmt); 669 return NULL_TREE; 670 } 671 672 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 673 674 static bool 675 ops_equal_values_p (tree op1, tree op2) 676 { 677 if (op1 == op2) 678 return true; 679 680 tree orig_op1 = op1; 681 if (TREE_CODE (op1) == SSA_NAME) 682 { 683 gimple *stmt = SSA_NAME_DEF_STMT (op1); 684 if (gimple_nop_conversion_p (stmt)) 685 { 686 op1 = gimple_assign_rhs1 (stmt); 687 if (op1 == op2) 688 return true; 689 } 690 } 691 692 if (TREE_CODE (op2) == SSA_NAME) 693 { 694 gimple *stmt = SSA_NAME_DEF_STMT (op2); 695 if (gimple_nop_conversion_p (stmt)) 696 { 697 op2 = gimple_assign_rhs1 (stmt); 698 if (op1 == op2 699 || orig_op1 == op2) 700 return true; 701 } 702 } 703 704 return false; 705 } 706 707 708 /* If CURR and LAST are a pair of ops that OPCODE allows us to 709 eliminate through equivalences, do so, remove them from OPS, and 710 return true. Otherwise, return false. */ 711 712 static bool 713 eliminate_duplicate_pair (enum tree_code opcode, 714 vec<operand_entry *> *ops, 715 bool *all_done, 716 unsigned int i, 717 operand_entry *curr, 718 operand_entry *last) 719 { 720 721 /* If we have two of the same op, and the opcode is & |, min, or max, 722 we can eliminate one of them. 723 If we have two of the same op, and the opcode is ^, we can 724 eliminate both of them. */ 725 726 if (last && last->op == curr->op) 727 { 728 switch (opcode) 729 { 730 case MAX_EXPR: 731 case MIN_EXPR: 732 case BIT_IOR_EXPR: 733 case BIT_AND_EXPR: 734 if (dump_file && (dump_flags & TDF_DETAILS)) 735 { 736 fprintf (dump_file, "Equivalence: "); 737 print_generic_expr (dump_file, curr->op); 738 fprintf (dump_file, " [&|minmax] "); 739 print_generic_expr (dump_file, last->op); 740 fprintf (dump_file, " -> "); 741 print_generic_stmt (dump_file, last->op); 742 } 743 744 ops->ordered_remove (i); 745 reassociate_stats.ops_eliminated ++; 746 747 return true; 748 749 case BIT_XOR_EXPR: 750 if (dump_file && (dump_flags & TDF_DETAILS)) 751 { 752 fprintf (dump_file, "Equivalence: "); 753 print_generic_expr (dump_file, curr->op); 754 fprintf (dump_file, " ^ "); 755 print_generic_expr (dump_file, last->op); 756 fprintf (dump_file, " -> nothing\n"); 757 } 758 759 reassociate_stats.ops_eliminated += 2; 760 761 if (ops->length () == 2) 762 { 763 ops->truncate (0); 764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 765 *all_done = true; 766 } 767 else 768 { 769 ops->ordered_remove (i-1); 770 ops->ordered_remove (i-1); 771 } 772 773 return true; 774 775 default: 776 break; 777 } 778 } 779 return false; 780 } 781 782 static vec<tree> plus_negates; 783 784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 785 expression, look in OPS for a corresponding positive operation to cancel 786 it out. If we find one, remove the other from OPS, replace 787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 788 return false. */ 789 790 static bool 791 eliminate_plus_minus_pair (enum tree_code opcode, 792 vec<operand_entry *> *ops, 793 unsigned int currindex, 794 operand_entry *curr) 795 { 796 tree negateop; 797 tree notop; 798 unsigned int i; 799 operand_entry *oe; 800 801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 802 return false; 803 804 negateop = get_unary_op (curr->op, NEGATE_EXPR); 805 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 806 if (negateop == NULL_TREE && notop == NULL_TREE) 807 return false; 808 809 /* Any non-negated version will have a rank that is one less than 810 the current rank. So once we hit those ranks, if we don't find 811 one, we can stop. */ 812 813 for (i = currindex + 1; 814 ops->iterate (i, &oe) 815 && oe->rank >= curr->rank - 1 ; 816 i++) 817 { 818 if (negateop 819 && ops_equal_values_p (oe->op, negateop)) 820 { 821 if (dump_file && (dump_flags & TDF_DETAILS)) 822 { 823 fprintf (dump_file, "Equivalence: "); 824 print_generic_expr (dump_file, negateop); 825 fprintf (dump_file, " + -"); 826 print_generic_expr (dump_file, oe->op); 827 fprintf (dump_file, " -> 0\n"); 828 } 829 830 ops->ordered_remove (i); 831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 832 ops->ordered_remove (currindex); 833 reassociate_stats.ops_eliminated ++; 834 835 return true; 836 } 837 else if (notop 838 && ops_equal_values_p (oe->op, notop)) 839 { 840 tree op_type = TREE_TYPE (oe->op); 841 842 if (dump_file && (dump_flags & TDF_DETAILS)) 843 { 844 fprintf (dump_file, "Equivalence: "); 845 print_generic_expr (dump_file, notop); 846 fprintf (dump_file, " + ~"); 847 print_generic_expr (dump_file, oe->op); 848 fprintf (dump_file, " -> -1\n"); 849 } 850 851 ops->ordered_remove (i); 852 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 853 ops->ordered_remove (currindex); 854 reassociate_stats.ops_eliminated ++; 855 856 return true; 857 } 858 } 859 860 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 861 save it for later inspection in repropagate_negates(). */ 862 if (negateop != NULL_TREE 863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 864 plus_negates.safe_push (curr->op); 865 866 return false; 867 } 868 869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 870 bitwise not expression, look in OPS for a corresponding operand to 871 cancel it out. If we find one, remove the other from OPS, replace 872 OPS[CURRINDEX] with 0, and return true. Otherwise, return 873 false. */ 874 875 static bool 876 eliminate_not_pairs (enum tree_code opcode, 877 vec<operand_entry *> *ops, 878 unsigned int currindex, 879 operand_entry *curr) 880 { 881 tree notop; 882 unsigned int i; 883 operand_entry *oe; 884 885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 886 || TREE_CODE (curr->op) != SSA_NAME) 887 return false; 888 889 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 890 if (notop == NULL_TREE) 891 return false; 892 893 /* Any non-not version will have a rank that is one less than 894 the current rank. So once we hit those ranks, if we don't find 895 one, we can stop. */ 896 897 for (i = currindex + 1; 898 ops->iterate (i, &oe) 899 && oe->rank >= curr->rank - 1; 900 i++) 901 { 902 if (oe->op == notop) 903 { 904 if (dump_file && (dump_flags & TDF_DETAILS)) 905 { 906 fprintf (dump_file, "Equivalence: "); 907 print_generic_expr (dump_file, notop); 908 if (opcode == BIT_AND_EXPR) 909 fprintf (dump_file, " & ~"); 910 else if (opcode == BIT_IOR_EXPR) 911 fprintf (dump_file, " | ~"); 912 print_generic_expr (dump_file, oe->op); 913 if (opcode == BIT_AND_EXPR) 914 fprintf (dump_file, " -> 0\n"); 915 else if (opcode == BIT_IOR_EXPR) 916 fprintf (dump_file, " -> -1\n"); 917 } 918 919 if (opcode == BIT_AND_EXPR) 920 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 921 else if (opcode == BIT_IOR_EXPR) 922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 923 924 reassociate_stats.ops_eliminated += ops->length () - 1; 925 ops->truncate (0); 926 ops->quick_push (oe); 927 return true; 928 } 929 } 930 931 return false; 932 } 933 934 /* Use constant value that may be present in OPS to try to eliminate 935 operands. Note that this function is only really used when we've 936 eliminated ops for other reasons, or merged constants. Across 937 single statements, fold already does all of this, plus more. There 938 is little point in duplicating logic, so I've only included the 939 identities that I could ever construct testcases to trigger. */ 940 941 static void 942 eliminate_using_constants (enum tree_code opcode, 943 vec<operand_entry *> *ops) 944 { 945 operand_entry *oelast = ops->last (); 946 tree type = TREE_TYPE (oelast->op); 947 948 if (oelast->rank == 0 949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 950 { 951 switch (opcode) 952 { 953 case BIT_AND_EXPR: 954 if (integer_zerop (oelast->op)) 955 { 956 if (ops->length () != 1) 957 { 958 if (dump_file && (dump_flags & TDF_DETAILS)) 959 fprintf (dump_file, "Found & 0, removing all other ops\n"); 960 961 reassociate_stats.ops_eliminated += ops->length () - 1; 962 963 ops->truncate (0); 964 ops->quick_push (oelast); 965 return; 966 } 967 } 968 else if (integer_all_onesp (oelast->op)) 969 { 970 if (ops->length () != 1) 971 { 972 if (dump_file && (dump_flags & TDF_DETAILS)) 973 fprintf (dump_file, "Found & -1, removing\n"); 974 ops->pop (); 975 reassociate_stats.ops_eliminated++; 976 } 977 } 978 break; 979 case BIT_IOR_EXPR: 980 if (integer_all_onesp (oelast->op)) 981 { 982 if (ops->length () != 1) 983 { 984 if (dump_file && (dump_flags & TDF_DETAILS)) 985 fprintf (dump_file, "Found | -1, removing all other ops\n"); 986 987 reassociate_stats.ops_eliminated += ops->length () - 1; 988 989 ops->truncate (0); 990 ops->quick_push (oelast); 991 return; 992 } 993 } 994 else if (integer_zerop (oelast->op)) 995 { 996 if (ops->length () != 1) 997 { 998 if (dump_file && (dump_flags & TDF_DETAILS)) 999 fprintf (dump_file, "Found | 0, removing\n"); 1000 ops->pop (); 1001 reassociate_stats.ops_eliminated++; 1002 } 1003 } 1004 break; 1005 case MULT_EXPR: 1006 if (integer_zerop (oelast->op) 1007 || (FLOAT_TYPE_P (type) 1008 && !HONOR_NANS (type) 1009 && !HONOR_SIGNED_ZEROS (type) 1010 && real_zerop (oelast->op))) 1011 { 1012 if (ops->length () != 1) 1013 { 1014 if (dump_file && (dump_flags & TDF_DETAILS)) 1015 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1016 1017 reassociate_stats.ops_eliminated += ops->length () - 1; 1018 ops->truncate (0); 1019 ops->quick_push (oelast); 1020 return; 1021 } 1022 } 1023 else if (integer_onep (oelast->op) 1024 || (FLOAT_TYPE_P (type) 1025 && !HONOR_SNANS (type) 1026 && real_onep (oelast->op))) 1027 { 1028 if (ops->length () != 1) 1029 { 1030 if (dump_file && (dump_flags & TDF_DETAILS)) 1031 fprintf (dump_file, "Found * 1, removing\n"); 1032 ops->pop (); 1033 reassociate_stats.ops_eliminated++; 1034 return; 1035 } 1036 } 1037 break; 1038 case BIT_XOR_EXPR: 1039 case PLUS_EXPR: 1040 case MINUS_EXPR: 1041 if (integer_zerop (oelast->op) 1042 || (FLOAT_TYPE_P (type) 1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1044 && fold_real_zero_addition_p (type, oelast->op, 1045 opcode == MINUS_EXPR))) 1046 { 1047 if (ops->length () != 1) 1048 { 1049 if (dump_file && (dump_flags & TDF_DETAILS)) 1050 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1051 ops->pop (); 1052 reassociate_stats.ops_eliminated++; 1053 return; 1054 } 1055 } 1056 break; 1057 default: 1058 break; 1059 } 1060 } 1061 } 1062 1063 1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1065 bool, bool); 1066 1067 /* Structure for tracking and counting operands. */ 1068 struct oecount { 1069 unsigned int cnt; 1070 unsigned int id; 1071 enum tree_code oecode; 1072 tree op; 1073 }; 1074 1075 1076 /* The heap for the oecount hashtable and the sorted list of operands. */ 1077 static vec<oecount> cvec; 1078 1079 1080 /* Oecount hashtable helpers. */ 1081 1082 struct oecount_hasher : int_hash <int, 0, 1> 1083 { 1084 static inline hashval_t hash (int); 1085 static inline bool equal (int, int); 1086 }; 1087 1088 /* Hash function for oecount. */ 1089 1090 inline hashval_t 1091 oecount_hasher::hash (int p) 1092 { 1093 const oecount *c = &cvec[p - 42]; 1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1095 } 1096 1097 /* Comparison function for oecount. */ 1098 1099 inline bool 1100 oecount_hasher::equal (int p1, int p2) 1101 { 1102 const oecount *c1 = &cvec[p1 - 42]; 1103 const oecount *c2 = &cvec[p2 - 42]; 1104 return c1->oecode == c2->oecode && c1->op == c2->op; 1105 } 1106 1107 /* Comparison function for qsort sorting oecount elements by count. */ 1108 1109 static int 1110 oecount_cmp (const void *p1, const void *p2) 1111 { 1112 const oecount *c1 = (const oecount *)p1; 1113 const oecount *c2 = (const oecount *)p2; 1114 if (c1->cnt != c2->cnt) 1115 return c1->cnt > c2->cnt ? 1 : -1; 1116 else 1117 /* If counts are identical, use unique IDs to stabilize qsort. */ 1118 return c1->id > c2->id ? 1 : -1; 1119 } 1120 1121 /* Return TRUE iff STMT represents a builtin call that raises OP 1122 to some exponent. */ 1123 1124 static bool 1125 stmt_is_power_of_op (gimple *stmt, tree op) 1126 { 1127 if (!is_gimple_call (stmt)) 1128 return false; 1129 1130 switch (gimple_call_combined_fn (stmt)) 1131 { 1132 CASE_CFN_POW: 1133 CASE_CFN_POWI: 1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1135 1136 default: 1137 return false; 1138 } 1139 } 1140 1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1142 in place and return the result. Assumes that stmt_is_power_of_op 1143 was previously called for STMT and returned TRUE. */ 1144 1145 static HOST_WIDE_INT 1146 decrement_power (gimple *stmt) 1147 { 1148 REAL_VALUE_TYPE c, cint; 1149 HOST_WIDE_INT power; 1150 tree arg1; 1151 1152 switch (gimple_call_combined_fn (stmt)) 1153 { 1154 CASE_CFN_POW: 1155 arg1 = gimple_call_arg (stmt, 1); 1156 c = TREE_REAL_CST (arg1); 1157 power = real_to_integer (&c) - 1; 1158 real_from_integer (&cint, VOIDmode, power, SIGNED); 1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1160 return power; 1161 1162 CASE_CFN_POWI: 1163 arg1 = gimple_call_arg (stmt, 1); 1164 power = TREE_INT_CST_LOW (arg1) - 1; 1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1166 return power; 1167 1168 default: 1169 gcc_unreachable (); 1170 } 1171 } 1172 1173 /* Replace SSA defined by STMT and replace all its uses with new 1174 SSA. Also return the new SSA. */ 1175 1176 static tree 1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1178 { 1179 gimple *use_stmt; 1180 use_operand_p use; 1181 imm_use_iterator iter; 1182 tree new_lhs, new_debug_lhs = NULL_TREE; 1183 tree lhs = gimple_get_lhs (stmt); 1184 1185 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1186 gimple_set_lhs (stmt, new_lhs); 1187 1188 /* Also need to update GIMPLE_DEBUGs. */ 1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1190 { 1191 tree repl = new_lhs; 1192 if (is_gimple_debug (use_stmt)) 1193 { 1194 if (new_debug_lhs == NULL_TREE) 1195 { 1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL); 1197 gdebug *def_temp 1198 = gimple_build_debug_bind (new_debug_lhs, 1199 build2 (opcode, TREE_TYPE (lhs), 1200 new_lhs, op), 1201 stmt); 1202 DECL_ARTIFICIAL (new_debug_lhs) = 1; 1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); 1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); 1205 gimple_set_uid (def_temp, gimple_uid (stmt)); 1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1208 } 1209 repl = new_debug_lhs; 1210 } 1211 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1212 SET_USE (use, repl); 1213 update_stmt (use_stmt); 1214 } 1215 return new_lhs; 1216 } 1217 1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1219 uses with new SSAs. Also do this for the stmt that defines DEF 1220 if *DEF is not OP. */ 1221 1222 static void 1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1224 vec<gimple *> &stmts_to_fix) 1225 { 1226 unsigned i; 1227 gimple *stmt; 1228 1229 if (*def != op 1230 && TREE_CODE (*def) == SSA_NAME 1231 && (stmt = SSA_NAME_DEF_STMT (*def)) 1232 && gimple_code (stmt) != GIMPLE_NOP) 1233 *def = make_new_ssa_for_def (stmt, opcode, op); 1234 1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1236 make_new_ssa_for_def (stmt, opcode, op); 1237 } 1238 1239 /* Find the single immediate use of STMT's LHS, and replace it 1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1241 replace *DEF with OP as well. */ 1242 1243 static void 1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1245 { 1246 tree lhs; 1247 gimple *use_stmt; 1248 use_operand_p use; 1249 gimple_stmt_iterator gsi; 1250 1251 if (is_gimple_call (stmt)) 1252 lhs = gimple_call_lhs (stmt); 1253 else 1254 lhs = gimple_assign_lhs (stmt); 1255 1256 gcc_assert (has_single_use (lhs)); 1257 single_imm_use (lhs, &use, &use_stmt); 1258 if (lhs == *def) 1259 *def = op; 1260 SET_USE (use, op); 1261 if (TREE_CODE (op) != SSA_NAME) 1262 update_stmt (use_stmt); 1263 gsi = gsi_for_stmt (stmt); 1264 unlink_stmt_vdef (stmt); 1265 reassoc_remove_stmt (&gsi); 1266 release_defs (stmt); 1267 } 1268 1269 /* Walks the linear chain with result *DEF searching for an operation 1270 with operand OP and code OPCODE removing that from the chain. *DEF 1271 is updated if there is only one operand but no operation left. */ 1272 1273 static void 1274 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1275 { 1276 tree orig_def = *def; 1277 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1278 /* PR72835 - Record the stmt chain that has to be updated such that 1279 we dont use the same LHS when the values computed are different. */ 1280 auto_vec<gimple *, 64> stmts_to_fix; 1281 1282 do 1283 { 1284 tree name; 1285 1286 if (opcode == MULT_EXPR) 1287 { 1288 if (stmt_is_power_of_op (stmt, op)) 1289 { 1290 if (decrement_power (stmt) == 1) 1291 { 1292 if (stmts_to_fix.length () > 0) 1293 stmts_to_fix.pop (); 1294 propagate_op_to_single_use (op, stmt, def); 1295 } 1296 break; 1297 } 1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1299 { 1300 if (gimple_assign_rhs1 (stmt) == op) 1301 { 1302 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1303 if (stmts_to_fix.length () > 0) 1304 stmts_to_fix.pop (); 1305 propagate_op_to_single_use (cst, stmt, def); 1306 break; 1307 } 1308 else if (integer_minus_onep (op) 1309 || real_minus_onep (op)) 1310 { 1311 gimple_assign_set_rhs_code 1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1313 break; 1314 } 1315 } 1316 } 1317 1318 name = gimple_assign_rhs1 (stmt); 1319 1320 /* If this is the operation we look for and one of the operands 1321 is ours simply propagate the other operand into the stmts 1322 single use. */ 1323 if (gimple_assign_rhs_code (stmt) == opcode 1324 && (name == op 1325 || gimple_assign_rhs2 (stmt) == op)) 1326 { 1327 if (name == op) 1328 name = gimple_assign_rhs2 (stmt); 1329 if (stmts_to_fix.length () > 0) 1330 stmts_to_fix.pop (); 1331 propagate_op_to_single_use (name, stmt, def); 1332 break; 1333 } 1334 1335 /* We might have a multiply of two __builtin_pow* calls, and 1336 the operand might be hiding in the rightmost one. Likewise 1337 this can happen for a negate. */ 1338 if (opcode == MULT_EXPR 1339 && gimple_assign_rhs_code (stmt) == opcode 1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1341 && has_single_use (gimple_assign_rhs2 (stmt))) 1342 { 1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1344 if (stmt_is_power_of_op (stmt2, op)) 1345 { 1346 if (decrement_power (stmt2) == 1) 1347 propagate_op_to_single_use (op, stmt2, def); 1348 else 1349 stmts_to_fix.safe_push (stmt2); 1350 break; 1351 } 1352 else if (is_gimple_assign (stmt2) 1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1354 { 1355 if (gimple_assign_rhs1 (stmt2) == op) 1356 { 1357 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1358 propagate_op_to_single_use (cst, stmt2, def); 1359 break; 1360 } 1361 else if (integer_minus_onep (op) 1362 || real_minus_onep (op)) 1363 { 1364 stmts_to_fix.safe_push (stmt2); 1365 gimple_assign_set_rhs_code 1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1367 break; 1368 } 1369 } 1370 } 1371 1372 /* Continue walking the chain. */ 1373 gcc_assert (name != op 1374 && TREE_CODE (name) == SSA_NAME); 1375 stmt = SSA_NAME_DEF_STMT (name); 1376 stmts_to_fix.safe_push (stmt); 1377 } 1378 while (1); 1379 1380 if (stmts_to_fix.length () > 0 || *def == orig_def) 1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1382 } 1383 1384 /* Returns true if statement S1 dominates statement S2. Like 1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1386 1387 static bool 1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1389 { 1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1391 1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1393 SSA_NAME. Assume it lives at the beginning of function and 1394 thus dominates everything. */ 1395 if (!bb1 || s1 == s2) 1396 return true; 1397 1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1399 if (!bb2) 1400 return false; 1401 1402 if (bb1 == bb2) 1403 { 1404 /* PHIs in the same basic block are assumed to be 1405 executed all in parallel, if only one stmt is a PHI, 1406 it dominates the other stmt in the same basic block. */ 1407 if (gimple_code (s1) == GIMPLE_PHI) 1408 return true; 1409 1410 if (gimple_code (s2) == GIMPLE_PHI) 1411 return false; 1412 1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1414 1415 if (gimple_uid (s1) < gimple_uid (s2)) 1416 return true; 1417 1418 if (gimple_uid (s1) > gimple_uid (s2)) 1419 return false; 1420 1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1422 unsigned int uid = gimple_uid (s1); 1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1424 { 1425 gimple *s = gsi_stmt (gsi); 1426 if (gimple_uid (s) != uid) 1427 break; 1428 if (s == s2) 1429 return true; 1430 } 1431 1432 return false; 1433 } 1434 1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1436 } 1437 1438 /* Insert STMT after INSERT_POINT. */ 1439 1440 static void 1441 insert_stmt_after (gimple *stmt, gimple *insert_point) 1442 { 1443 gimple_stmt_iterator gsi; 1444 basic_block bb; 1445 1446 if (gimple_code (insert_point) == GIMPLE_PHI) 1447 bb = gimple_bb (insert_point); 1448 else if (!stmt_ends_bb_p (insert_point)) 1449 { 1450 gsi = gsi_for_stmt (insert_point); 1451 gimple_set_uid (stmt, gimple_uid (insert_point)); 1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1453 return; 1454 } 1455 else 1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1457 thus if it must end a basic block, it should be a call that can 1458 throw, or some assignment that can throw. If it throws, the LHS 1459 of it will not be initialized though, so only valid places using 1460 the SSA_NAME should be dominated by the fallthru edge. */ 1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1462 gsi = gsi_after_labels (bb); 1463 if (gsi_end_p (gsi)) 1464 { 1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1466 gimple_set_uid (stmt, 1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1468 } 1469 else 1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1472 } 1473 1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1475 the result. Places the statement after the definition of either 1476 OP1 or OP2. Returns the new statement. */ 1477 1478 static gimple * 1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1480 { 1481 gimple *op1def = NULL, *op2def = NULL; 1482 gimple_stmt_iterator gsi; 1483 tree op; 1484 gassign *sum; 1485 1486 /* Create the addition statement. */ 1487 op = make_ssa_name (type); 1488 sum = gimple_build_assign (op, opcode, op1, op2); 1489 1490 /* Find an insertion place and insert. */ 1491 if (TREE_CODE (op1) == SSA_NAME) 1492 op1def = SSA_NAME_DEF_STMT (op1); 1493 if (TREE_CODE (op2) == SSA_NAME) 1494 op2def = SSA_NAME_DEF_STMT (op2); 1495 if ((!op1def || gimple_nop_p (op1def)) 1496 && (!op2def || gimple_nop_p (op2def))) 1497 { 1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1499 if (gsi_end_p (gsi)) 1500 { 1501 gimple_stmt_iterator gsi2 1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1503 gimple_set_uid (sum, 1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1505 } 1506 else 1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1509 } 1510 else 1511 { 1512 gimple *insert_point; 1513 if ((!op1def || gimple_nop_p (op1def)) 1514 || (op2def && !gimple_nop_p (op2def) 1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1516 insert_point = op2def; 1517 else 1518 insert_point = op1def; 1519 insert_stmt_after (sum, insert_point); 1520 } 1521 update_stmt (sum); 1522 1523 return sum; 1524 } 1525 1526 /* Perform un-distribution of divisions and multiplications. 1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1528 to (A + B) / X for real X. 1529 1530 The algorithm is organized as follows. 1531 1532 - First we walk the addition chain *OPS looking for summands that 1533 are defined by a multiplication or a real division. This results 1534 in the candidates bitmap with relevant indices into *OPS. 1535 1536 - Second we build the chains of multiplications or divisions for 1537 these candidates, counting the number of occurrences of (operand, code) 1538 pairs in all of the candidates chains. 1539 1540 - Third we sort the (operand, code) pairs by number of occurrence and 1541 process them starting with the pair with the most uses. 1542 1543 * For each such pair we walk the candidates again to build a 1544 second candidate bitmap noting all multiplication/division chains 1545 that have at least one occurrence of (operand, code). 1546 1547 * We build an alternate addition chain only covering these 1548 candidates with one (operand, code) operation removed from their 1549 multiplication/division chain. 1550 1551 * The first candidate gets replaced by the alternate addition chain 1552 multiplied/divided by the operand. 1553 1554 * All candidate chains get disabled for further processing and 1555 processing of (operand, code) pairs continues. 1556 1557 The alternate addition chains built are re-processed by the main 1558 reassociation algorithm which allows optimizing a * x * y + b * y * x 1559 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1560 1561 static bool 1562 undistribute_ops_list (enum tree_code opcode, 1563 vec<operand_entry *> *ops, struct loop *loop) 1564 { 1565 unsigned int length = ops->length (); 1566 operand_entry *oe1; 1567 unsigned i, j; 1568 unsigned nr_candidates, nr_candidates2; 1569 sbitmap_iterator sbi0; 1570 vec<operand_entry *> *subops; 1571 bool changed = false; 1572 unsigned int next_oecount_id = 0; 1573 1574 if (length <= 1 1575 || opcode != PLUS_EXPR) 1576 return false; 1577 1578 /* Build a list of candidates to process. */ 1579 auto_sbitmap candidates (length); 1580 bitmap_clear (candidates); 1581 nr_candidates = 0; 1582 FOR_EACH_VEC_ELT (*ops, i, oe1) 1583 { 1584 enum tree_code dcode; 1585 gimple *oe1def; 1586 1587 if (TREE_CODE (oe1->op) != SSA_NAME) 1588 continue; 1589 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1590 if (!is_gimple_assign (oe1def)) 1591 continue; 1592 dcode = gimple_assign_rhs_code (oe1def); 1593 if ((dcode != MULT_EXPR 1594 && dcode != RDIV_EXPR) 1595 || !is_reassociable_op (oe1def, dcode, loop)) 1596 continue; 1597 1598 bitmap_set_bit (candidates, i); 1599 nr_candidates++; 1600 } 1601 1602 if (nr_candidates < 2) 1603 return false; 1604 1605 if (dump_file && (dump_flags & TDF_DETAILS)) 1606 { 1607 fprintf (dump_file, "searching for un-distribute opportunities "); 1608 print_generic_expr (dump_file, 1609 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); 1610 fprintf (dump_file, " %d\n", nr_candidates); 1611 } 1612 1613 /* Build linearized sub-operand lists and the counting table. */ 1614 cvec.create (0); 1615 1616 hash_table<oecount_hasher> ctable (15); 1617 1618 /* ??? Macro arguments cannot have multi-argument template types in 1619 them. This typedef is needed to workaround that limitation. */ 1620 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1623 { 1624 gimple *oedef; 1625 enum tree_code oecode; 1626 unsigned j; 1627 1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1629 oecode = gimple_assign_rhs_code (oedef); 1630 linearize_expr_tree (&subops[i], oedef, 1631 associative_tree_code (oecode), false); 1632 1633 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1634 { 1635 oecount c; 1636 int *slot; 1637 int idx; 1638 c.oecode = oecode; 1639 c.cnt = 1; 1640 c.id = next_oecount_id++; 1641 c.op = oe1->op; 1642 cvec.safe_push (c); 1643 idx = cvec.length () + 41; 1644 slot = ctable.find_slot (idx, INSERT); 1645 if (!*slot) 1646 { 1647 *slot = idx; 1648 } 1649 else 1650 { 1651 cvec.pop (); 1652 cvec[*slot - 42].cnt++; 1653 } 1654 } 1655 } 1656 1657 /* Sort the counting table. */ 1658 cvec.qsort (oecount_cmp); 1659 1660 if (dump_file && (dump_flags & TDF_DETAILS)) 1661 { 1662 oecount *c; 1663 fprintf (dump_file, "Candidates:\n"); 1664 FOR_EACH_VEC_ELT (cvec, j, c) 1665 { 1666 fprintf (dump_file, " %u %s: ", c->cnt, 1667 c->oecode == MULT_EXPR 1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1669 print_generic_expr (dump_file, c->op); 1670 fprintf (dump_file, "\n"); 1671 } 1672 } 1673 1674 /* Process the (operand, code) pairs in order of most occurrence. */ 1675 auto_sbitmap candidates2 (length); 1676 while (!cvec.is_empty ()) 1677 { 1678 oecount *c = &cvec.last (); 1679 if (c->cnt < 2) 1680 break; 1681 1682 /* Now collect the operands in the outer chain that contain 1683 the common operand in their inner chain. */ 1684 bitmap_clear (candidates2); 1685 nr_candidates2 = 0; 1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1687 { 1688 gimple *oedef; 1689 enum tree_code oecode; 1690 unsigned j; 1691 tree op = (*ops)[i]->op; 1692 1693 /* If we undistributed in this chain already this may be 1694 a constant. */ 1695 if (TREE_CODE (op) != SSA_NAME) 1696 continue; 1697 1698 oedef = SSA_NAME_DEF_STMT (op); 1699 oecode = gimple_assign_rhs_code (oedef); 1700 if (oecode != c->oecode) 1701 continue; 1702 1703 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1704 { 1705 if (oe1->op == c->op) 1706 { 1707 bitmap_set_bit (candidates2, i); 1708 ++nr_candidates2; 1709 break; 1710 } 1711 } 1712 } 1713 1714 if (nr_candidates2 >= 2) 1715 { 1716 operand_entry *oe1, *oe2; 1717 gimple *prod; 1718 int first = bitmap_first_set_bit (candidates2); 1719 1720 /* Build the new addition chain. */ 1721 oe1 = (*ops)[first]; 1722 if (dump_file && (dump_flags & TDF_DETAILS)) 1723 { 1724 fprintf (dump_file, "Building ("); 1725 print_generic_expr (dump_file, oe1->op); 1726 } 1727 zero_one_operation (&oe1->op, c->oecode, c->op); 1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1729 { 1730 gimple *sum; 1731 oe2 = (*ops)[i]; 1732 if (dump_file && (dump_flags & TDF_DETAILS)) 1733 { 1734 fprintf (dump_file, " + "); 1735 print_generic_expr (dump_file, oe2->op); 1736 } 1737 zero_one_operation (&oe2->op, c->oecode, c->op); 1738 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1739 oe1->op, oe2->op, opcode); 1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1741 oe2->rank = 0; 1742 oe1->op = gimple_get_lhs (sum); 1743 } 1744 1745 /* Apply the multiplication/division. */ 1746 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1747 oe1->op, c->op, c->oecode); 1748 if (dump_file && (dump_flags & TDF_DETAILS)) 1749 { 1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1751 print_generic_expr (dump_file, c->op); 1752 fprintf (dump_file, "\n"); 1753 } 1754 1755 /* Record it in the addition chain and disable further 1756 undistribution with this op. */ 1757 oe1->op = gimple_assign_lhs (prod); 1758 oe1->rank = get_rank (oe1->op); 1759 subops[first].release (); 1760 1761 changed = true; 1762 } 1763 1764 cvec.pop (); 1765 } 1766 1767 for (i = 0; i < ops->length (); ++i) 1768 subops[i].release (); 1769 free (subops); 1770 cvec.release (); 1771 1772 return changed; 1773 } 1774 1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1776 expression, examine the other OPS to see if any of them are comparisons 1777 of the same values, which we may be able to combine or eliminate. 1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1779 1780 static bool 1781 eliminate_redundant_comparison (enum tree_code opcode, 1782 vec<operand_entry *> *ops, 1783 unsigned int currindex, 1784 operand_entry *curr) 1785 { 1786 tree op1, op2; 1787 enum tree_code lcode, rcode; 1788 gimple *def1, *def2; 1789 int i; 1790 operand_entry *oe; 1791 1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1793 return false; 1794 1795 /* Check that CURR is a comparison. */ 1796 if (TREE_CODE (curr->op) != SSA_NAME) 1797 return false; 1798 def1 = SSA_NAME_DEF_STMT (curr->op); 1799 if (!is_gimple_assign (def1)) 1800 return false; 1801 lcode = gimple_assign_rhs_code (def1); 1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1803 return false; 1804 op1 = gimple_assign_rhs1 (def1); 1805 op2 = gimple_assign_rhs2 (def1); 1806 1807 /* Now look for a similar comparison in the remaining OPS. */ 1808 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1809 { 1810 tree t; 1811 1812 if (TREE_CODE (oe->op) != SSA_NAME) 1813 continue; 1814 def2 = SSA_NAME_DEF_STMT (oe->op); 1815 if (!is_gimple_assign (def2)) 1816 continue; 1817 rcode = gimple_assign_rhs_code (def2); 1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1819 continue; 1820 1821 /* If we got here, we have a match. See if we can combine the 1822 two comparisons. */ 1823 if (opcode == BIT_IOR_EXPR) 1824 t = maybe_fold_or_comparisons (lcode, op1, op2, 1825 rcode, gimple_assign_rhs1 (def2), 1826 gimple_assign_rhs2 (def2)); 1827 else 1828 t = maybe_fold_and_comparisons (lcode, op1, op2, 1829 rcode, gimple_assign_rhs1 (def2), 1830 gimple_assign_rhs2 (def2)); 1831 if (!t) 1832 continue; 1833 1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1835 always give us a boolean_type_node value back. If the original 1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1837 we need to convert. */ 1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1839 t = fold_convert (TREE_TYPE (curr->op), t); 1840 1841 if (TREE_CODE (t) != INTEGER_CST 1842 && !operand_equal_p (t, curr->op, 0)) 1843 { 1844 enum tree_code subcode; 1845 tree newop1, newop2; 1846 if (!COMPARISON_CLASS_P (t)) 1847 continue; 1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1849 STRIP_USELESS_TYPE_CONVERSION (newop1); 1850 STRIP_USELESS_TYPE_CONVERSION (newop2); 1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1852 continue; 1853 } 1854 1855 if (dump_file && (dump_flags & TDF_DETAILS)) 1856 { 1857 fprintf (dump_file, "Equivalence: "); 1858 print_generic_expr (dump_file, curr->op); 1859 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1860 print_generic_expr (dump_file, oe->op); 1861 fprintf (dump_file, " -> "); 1862 print_generic_expr (dump_file, t); 1863 fprintf (dump_file, "\n"); 1864 } 1865 1866 /* Now we can delete oe, as it has been subsumed by the new combined 1867 expression t. */ 1868 ops->ordered_remove (i); 1869 reassociate_stats.ops_eliminated ++; 1870 1871 /* If t is the same as curr->op, we're done. Otherwise we must 1872 replace curr->op with t. Special case is if we got a constant 1873 back, in which case we add it to the end instead of in place of 1874 the current entry. */ 1875 if (TREE_CODE (t) == INTEGER_CST) 1876 { 1877 ops->ordered_remove (currindex); 1878 add_to_ops_vec (ops, t); 1879 } 1880 else if (!operand_equal_p (t, curr->op, 0)) 1881 { 1882 gimple *sum; 1883 enum tree_code subcode; 1884 tree newop1; 1885 tree newop2; 1886 gcc_assert (COMPARISON_CLASS_P (t)); 1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1888 STRIP_USELESS_TYPE_CONVERSION (newop1); 1889 STRIP_USELESS_TYPE_CONVERSION (newop2); 1890 gcc_checking_assert (is_gimple_val (newop1) 1891 && is_gimple_val (newop2)); 1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1893 curr->op = gimple_get_lhs (sum); 1894 } 1895 return true; 1896 } 1897 1898 return false; 1899 } 1900 1901 1902 /* Transform repeated addition of same values into multiply with 1903 constant. */ 1904 static bool 1905 transform_add_to_multiply (vec<operand_entry *> *ops) 1906 { 1907 operand_entry *oe; 1908 tree op = NULL_TREE; 1909 int j; 1910 int i, start = -1, end = 0, count = 0; 1911 auto_vec<std::pair <int, int> > indxs; 1912 bool changed = false; 1913 1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 1916 || !flag_unsafe_math_optimizations)) 1917 return false; 1918 1919 /* Look for repeated operands. */ 1920 FOR_EACH_VEC_ELT (*ops, i, oe) 1921 { 1922 if (start == -1) 1923 { 1924 count = 1; 1925 op = oe->op; 1926 start = i; 1927 } 1928 else if (operand_equal_p (oe->op, op, 0)) 1929 { 1930 count++; 1931 end = i; 1932 } 1933 else 1934 { 1935 if (count > 1) 1936 indxs.safe_push (std::make_pair (start, end)); 1937 count = 1; 1938 op = oe->op; 1939 start = i; 1940 } 1941 } 1942 1943 if (count > 1) 1944 indxs.safe_push (std::make_pair (start, end)); 1945 1946 for (j = indxs.length () - 1; j >= 0; --j) 1947 { 1948 /* Convert repeated operand addition to multiplication. */ 1949 start = indxs[j].first; 1950 end = indxs[j].second; 1951 op = (*ops)[start]->op; 1952 count = end - start + 1; 1953 for (i = end; i >= start; --i) 1954 ops->unordered_remove (i); 1955 tree tmp = make_ssa_name (TREE_TYPE (op)); 1956 tree cst = build_int_cst (integer_type_node, count); 1957 gassign *mul_stmt 1958 = gimple_build_assign (tmp, MULT_EXPR, 1959 op, fold_convert (TREE_TYPE (op), cst)); 1960 gimple_set_visited (mul_stmt, true); 1961 add_to_ops_vec (ops, tmp, mul_stmt); 1962 changed = true; 1963 } 1964 1965 return changed; 1966 } 1967 1968 1969 /* Perform various identities and other optimizations on the list of 1970 operand entries, stored in OPS. The tree code for the binary 1971 operation between all the operands is OPCODE. */ 1972 1973 static void 1974 optimize_ops_list (enum tree_code opcode, 1975 vec<operand_entry *> *ops) 1976 { 1977 unsigned int length = ops->length (); 1978 unsigned int i; 1979 operand_entry *oe; 1980 operand_entry *oelast = NULL; 1981 bool iterate = false; 1982 1983 if (length == 1) 1984 return; 1985 1986 oelast = ops->last (); 1987 1988 /* If the last two are constants, pop the constants off, merge them 1989 and try the next two. */ 1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1991 { 1992 operand_entry *oelm1 = (*ops)[length - 2]; 1993 1994 if (oelm1->rank == 0 1995 && is_gimple_min_invariant (oelm1->op) 1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1997 TREE_TYPE (oelast->op))) 1998 { 1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 2000 oelm1->op, oelast->op); 2001 2002 if (folded && is_gimple_min_invariant (folded)) 2003 { 2004 if (dump_file && (dump_flags & TDF_DETAILS)) 2005 fprintf (dump_file, "Merging constants\n"); 2006 2007 ops->pop (); 2008 ops->pop (); 2009 2010 add_to_ops_vec (ops, folded); 2011 reassociate_stats.constants_eliminated++; 2012 2013 optimize_ops_list (opcode, ops); 2014 return; 2015 } 2016 } 2017 } 2018 2019 eliminate_using_constants (opcode, ops); 2020 oelast = NULL; 2021 2022 for (i = 0; ops->iterate (i, &oe);) 2023 { 2024 bool done = false; 2025 2026 if (eliminate_not_pairs (opcode, ops, i, oe)) 2027 return; 2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2031 { 2032 if (done) 2033 return; 2034 iterate = true; 2035 oelast = NULL; 2036 continue; 2037 } 2038 oelast = oe; 2039 i++; 2040 } 2041 2042 length = ops->length (); 2043 oelast = ops->last (); 2044 2045 if (iterate) 2046 optimize_ops_list (opcode, ops); 2047 } 2048 2049 /* The following functions are subroutines to optimize_range_tests and allow 2050 it to try to change a logical combination of comparisons into a range 2051 test. 2052 2053 For example, both 2054 X == 2 || X == 5 || X == 3 || X == 4 2055 and 2056 X >= 2 && X <= 5 2057 are converted to 2058 (unsigned) (X - 2) <= 3 2059 2060 For more information see comments above fold_test_range in fold-const.c, 2061 this implementation is for GIMPLE. */ 2062 2063 struct range_entry 2064 { 2065 tree exp; 2066 tree low; 2067 tree high; 2068 bool in_p; 2069 bool strict_overflow_p; 2070 unsigned int idx, next; 2071 }; 2072 2073 /* This is similar to make_range in fold-const.c, but on top of 2074 GIMPLE instead of trees. If EXP is non-NULL, it should be 2075 an SSA_NAME and STMT argument is ignored, otherwise STMT 2076 argument should be a GIMPLE_COND. */ 2077 2078 static void 2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2080 { 2081 int in_p; 2082 tree low, high; 2083 bool is_bool, strict_overflow_p; 2084 2085 r->exp = NULL_TREE; 2086 r->in_p = false; 2087 r->strict_overflow_p = false; 2088 r->low = NULL_TREE; 2089 r->high = NULL_TREE; 2090 if (exp != NULL_TREE 2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2092 return; 2093 2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2095 and see if we can refine the range. Some of the cases below may not 2096 happen, but it doesn't seem worth worrying about this. We "continue" 2097 the outer loop when we've changed something; otherwise we "break" 2098 the switch, which will "break" the while. */ 2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2100 high = low; 2101 in_p = 0; 2102 strict_overflow_p = false; 2103 is_bool = false; 2104 if (exp == NULL_TREE) 2105 is_bool = true; 2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2107 { 2108 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2109 is_bool = true; 2110 else 2111 return; 2112 } 2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2114 is_bool = true; 2115 2116 while (1) 2117 { 2118 enum tree_code code; 2119 tree arg0, arg1, exp_type; 2120 tree nexp; 2121 location_t loc; 2122 2123 if (exp != NULL_TREE) 2124 { 2125 if (TREE_CODE (exp) != SSA_NAME 2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2127 break; 2128 2129 stmt = SSA_NAME_DEF_STMT (exp); 2130 if (!is_gimple_assign (stmt)) 2131 break; 2132 2133 code = gimple_assign_rhs_code (stmt); 2134 arg0 = gimple_assign_rhs1 (stmt); 2135 arg1 = gimple_assign_rhs2 (stmt); 2136 exp_type = TREE_TYPE (exp); 2137 } 2138 else 2139 { 2140 code = gimple_cond_code (stmt); 2141 arg0 = gimple_cond_lhs (stmt); 2142 arg1 = gimple_cond_rhs (stmt); 2143 exp_type = boolean_type_node; 2144 } 2145 2146 if (TREE_CODE (arg0) != SSA_NAME 2147 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) 2148 break; 2149 loc = gimple_location (stmt); 2150 switch (code) 2151 { 2152 case BIT_NOT_EXPR: 2153 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2154 /* Ensure the range is either +[-,0], +[0,0], 2155 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2156 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2157 or similar expression of unconditional true or 2158 false, it should not be negated. */ 2159 && ((high && integer_zerop (high)) 2160 || (low && integer_onep (low)))) 2161 { 2162 in_p = !in_p; 2163 exp = arg0; 2164 continue; 2165 } 2166 break; 2167 case SSA_NAME: 2168 exp = arg0; 2169 continue; 2170 CASE_CONVERT: 2171 if (is_bool) 2172 { 2173 if ((TYPE_PRECISION (exp_type) == 1 2174 || TREE_CODE (exp_type) == BOOLEAN_TYPE) 2175 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) 2176 return; 2177 } 2178 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2179 { 2180 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2181 is_bool = true; 2182 else 2183 return; 2184 } 2185 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2186 is_bool = true; 2187 goto do_default; 2188 case EQ_EXPR: 2189 case NE_EXPR: 2190 case LT_EXPR: 2191 case LE_EXPR: 2192 case GE_EXPR: 2193 case GT_EXPR: 2194 is_bool = true; 2195 /* FALLTHRU */ 2196 default: 2197 if (!is_bool) 2198 return; 2199 do_default: 2200 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2201 &low, &high, &in_p, 2202 &strict_overflow_p); 2203 if (nexp != NULL_TREE) 2204 { 2205 exp = nexp; 2206 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2207 continue; 2208 } 2209 break; 2210 } 2211 break; 2212 } 2213 if (is_bool) 2214 { 2215 r->exp = exp; 2216 r->in_p = in_p; 2217 r->low = low; 2218 r->high = high; 2219 r->strict_overflow_p = strict_overflow_p; 2220 } 2221 } 2222 2223 /* Comparison function for qsort. Sort entries 2224 without SSA_NAME exp first, then with SSA_NAMEs sorted 2225 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2226 by increasing ->low and if ->low is the same, by increasing 2227 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2228 maximum. */ 2229 2230 static int 2231 range_entry_cmp (const void *a, const void *b) 2232 { 2233 const struct range_entry *p = (const struct range_entry *) a; 2234 const struct range_entry *q = (const struct range_entry *) b; 2235 2236 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2237 { 2238 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2239 { 2240 /* Group range_entries for the same SSA_NAME together. */ 2241 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2242 return -1; 2243 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2244 return 1; 2245 /* If ->low is different, NULL low goes first, then by 2246 ascending low. */ 2247 if (p->low != NULL_TREE) 2248 { 2249 if (q->low != NULL_TREE) 2250 { 2251 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2252 p->low, q->low); 2253 if (tem && integer_onep (tem)) 2254 return -1; 2255 tem = fold_binary (GT_EXPR, boolean_type_node, 2256 p->low, q->low); 2257 if (tem && integer_onep (tem)) 2258 return 1; 2259 } 2260 else 2261 return 1; 2262 } 2263 else if (q->low != NULL_TREE) 2264 return -1; 2265 /* If ->high is different, NULL high goes last, before that by 2266 ascending high. */ 2267 if (p->high != NULL_TREE) 2268 { 2269 if (q->high != NULL_TREE) 2270 { 2271 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2272 p->high, q->high); 2273 if (tem && integer_onep (tem)) 2274 return -1; 2275 tem = fold_binary (GT_EXPR, boolean_type_node, 2276 p->high, q->high); 2277 if (tem && integer_onep (tem)) 2278 return 1; 2279 } 2280 else 2281 return -1; 2282 } 2283 else if (q->high != NULL_TREE) 2284 return 1; 2285 /* If both ranges are the same, sort below by ascending idx. */ 2286 } 2287 else 2288 return 1; 2289 } 2290 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2291 return -1; 2292 2293 if (p->idx < q->idx) 2294 return -1; 2295 else 2296 { 2297 gcc_checking_assert (p->idx > q->idx); 2298 return 1; 2299 } 2300 } 2301 2302 /* Helper function for update_range_test. Force EXPR into an SSA_NAME, 2303 insert needed statements BEFORE or after GSI. */ 2304 2305 static tree 2306 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) 2307 { 2308 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; 2309 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); 2310 if (TREE_CODE (ret) != SSA_NAME) 2311 { 2312 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); 2313 if (before) 2314 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2315 else 2316 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); 2317 ret = gimple_assign_lhs (g); 2318 } 2319 return ret; 2320 } 2321 2322 /* Helper routine of optimize_range_test. 2323 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2324 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2325 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2326 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2327 an array of COUNT pointers to other ranges. Return 2328 true if the range merge has been successful. 2329 If OPCODE is ERROR_MARK, this is called from within 2330 maybe_optimize_range_tests and is performing inter-bb range optimization. 2331 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2332 oe->rank. */ 2333 2334 static bool 2335 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2336 struct range_entry **otherrangep, 2337 unsigned int count, enum tree_code opcode, 2338 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2339 bool in_p, tree low, tree high, bool strict_overflow_p) 2340 { 2341 operand_entry *oe = (*ops)[range->idx]; 2342 tree op = oe->op; 2343 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2344 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2345 location_t loc = gimple_location (stmt); 2346 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2347 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2348 in_p, low, high); 2349 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2350 gimple_stmt_iterator gsi; 2351 unsigned int i, uid; 2352 2353 if (tem == NULL_TREE) 2354 return false; 2355 2356 /* If op is default def SSA_NAME, there is no place to insert the 2357 new comparison. Give up, unless we can use OP itself as the 2358 range test. */ 2359 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2360 { 2361 if (op == range->exp 2362 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2363 || TREE_CODE (optype) == BOOLEAN_TYPE) 2364 && (op == tem 2365 || (TREE_CODE (tem) == EQ_EXPR 2366 && TREE_OPERAND (tem, 0) == op 2367 && integer_onep (TREE_OPERAND (tem, 1)))) 2368 && opcode != BIT_IOR_EXPR 2369 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2370 { 2371 stmt = NULL; 2372 tem = op; 2373 } 2374 else 2375 return false; 2376 } 2377 2378 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2379 warning_at (loc, OPT_Wstrict_overflow, 2380 "assuming signed overflow does not occur " 2381 "when simplifying range test"); 2382 2383 if (dump_file && (dump_flags & TDF_DETAILS)) 2384 { 2385 struct range_entry *r; 2386 fprintf (dump_file, "Optimizing range tests "); 2387 print_generic_expr (dump_file, range->exp); 2388 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2389 print_generic_expr (dump_file, range->low); 2390 fprintf (dump_file, ", "); 2391 print_generic_expr (dump_file, range->high); 2392 fprintf (dump_file, "]"); 2393 for (i = 0; i < count; i++) 2394 { 2395 if (otherrange) 2396 r = otherrange + i; 2397 else 2398 r = otherrangep[i]; 2399 if (r->exp 2400 && r->exp != range->exp 2401 && TREE_CODE (r->exp) == SSA_NAME) 2402 { 2403 fprintf (dump_file, " and "); 2404 print_generic_expr (dump_file, r->exp); 2405 } 2406 else 2407 fprintf (dump_file, " and"); 2408 fprintf (dump_file, " %c[", r->in_p ? '+' : '-'); 2409 print_generic_expr (dump_file, r->low); 2410 fprintf (dump_file, ", "); 2411 print_generic_expr (dump_file, r->high); 2412 fprintf (dump_file, "]"); 2413 } 2414 fprintf (dump_file, "\n into "); 2415 print_generic_expr (dump_file, tem); 2416 fprintf (dump_file, "\n"); 2417 } 2418 2419 if (opcode == BIT_IOR_EXPR 2420 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2421 tem = invert_truthvalue_loc (loc, tem); 2422 2423 tem = fold_convert_loc (loc, optype, tem); 2424 if (stmt) 2425 { 2426 gsi = gsi_for_stmt (stmt); 2427 uid = gimple_uid (stmt); 2428 } 2429 else 2430 { 2431 gsi = gsi_none (); 2432 uid = 0; 2433 } 2434 if (stmt == NULL) 2435 gcc_checking_assert (tem == op); 2436 /* In rare cases range->exp can be equal to lhs of stmt. 2437 In that case we have to insert after the stmt rather then before 2438 it. If stmt is a PHI, insert it at the start of the basic block. */ 2439 else if (op != range->exp) 2440 { 2441 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2442 tem = force_into_ssa_name (&gsi, tem, true); 2443 gsi_prev (&gsi); 2444 } 2445 else if (gimple_code (stmt) != GIMPLE_PHI) 2446 { 2447 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2448 tem = force_into_ssa_name (&gsi, tem, false); 2449 } 2450 else 2451 { 2452 gsi = gsi_after_labels (gimple_bb (stmt)); 2453 if (!gsi_end_p (gsi)) 2454 uid = gimple_uid (gsi_stmt (gsi)); 2455 else 2456 { 2457 gsi = gsi_start_bb (gimple_bb (stmt)); 2458 uid = 1; 2459 while (!gsi_end_p (gsi)) 2460 { 2461 uid = gimple_uid (gsi_stmt (gsi)); 2462 gsi_next (&gsi); 2463 } 2464 } 2465 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2466 tem = force_into_ssa_name (&gsi, tem, true); 2467 if (gsi_end_p (gsi)) 2468 gsi = gsi_last_bb (gimple_bb (stmt)); 2469 else 2470 gsi_prev (&gsi); 2471 } 2472 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2473 if (gimple_uid (gsi_stmt (gsi))) 2474 break; 2475 else 2476 gimple_set_uid (gsi_stmt (gsi), uid); 2477 2478 oe->op = tem; 2479 range->exp = exp; 2480 range->low = low; 2481 range->high = high; 2482 range->in_p = in_p; 2483 range->strict_overflow_p = false; 2484 2485 for (i = 0; i < count; i++) 2486 { 2487 if (otherrange) 2488 range = otherrange + i; 2489 else 2490 range = otherrangep[i]; 2491 oe = (*ops)[range->idx]; 2492 /* Now change all the other range test immediate uses, so that 2493 those tests will be optimized away. */ 2494 if (opcode == ERROR_MARK) 2495 { 2496 if (oe->op) 2497 oe->op = build_int_cst (TREE_TYPE (oe->op), 2498 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2499 else 2500 oe->op = (oe->rank == BIT_IOR_EXPR 2501 ? boolean_false_node : boolean_true_node); 2502 } 2503 else 2504 oe->op = error_mark_node; 2505 range->exp = NULL_TREE; 2506 range->low = NULL_TREE; 2507 range->high = NULL_TREE; 2508 } 2509 return true; 2510 } 2511 2512 /* Optimize X == CST1 || X == CST2 2513 if popcount (CST1 ^ CST2) == 1 into 2514 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2515 Similarly for ranges. E.g. 2516 X != 2 && X != 3 && X != 10 && X != 11 2517 will be transformed by the previous optimization into 2518 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2519 and this loop can transform that into 2520 !(((X & ~8) - 2U) <= 1U). */ 2521 2522 static bool 2523 optimize_range_tests_xor (enum tree_code opcode, tree type, 2524 tree lowi, tree lowj, tree highi, tree highj, 2525 vec<operand_entry *> *ops, 2526 struct range_entry *rangei, 2527 struct range_entry *rangej) 2528 { 2529 tree lowxor, highxor, tem, exp; 2530 /* Check lowi ^ lowj == highi ^ highj and 2531 popcount (lowi ^ lowj) == 1. */ 2532 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2533 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2534 return false; 2535 if (!integer_pow2p (lowxor)) 2536 return false; 2537 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2538 if (!tree_int_cst_equal (lowxor, highxor)) 2539 return false; 2540 2541 exp = rangei->exp; 2542 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 2543 int prec = GET_MODE_PRECISION (mode); 2544 if (TYPE_PRECISION (type) < prec 2545 || (wi::to_wide (TYPE_MIN_VALUE (type)) 2546 != wi::min_value (prec, TYPE_SIGN (type))) 2547 || (wi::to_wide (TYPE_MAX_VALUE (type)) 2548 != wi::max_value (prec, TYPE_SIGN (type)))) 2549 { 2550 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); 2551 exp = fold_convert (type, exp); 2552 lowxor = fold_convert (type, lowxor); 2553 lowi = fold_convert (type, lowi); 2554 highi = fold_convert (type, highi); 2555 } 2556 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2557 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); 2558 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2559 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2560 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2561 NULL, rangei->in_p, lowj, highj, 2562 rangei->strict_overflow_p 2563 || rangej->strict_overflow_p)) 2564 return true; 2565 return false; 2566 } 2567 2568 /* Optimize X == CST1 || X == CST2 2569 if popcount (CST2 - CST1) == 1 into 2570 ((X - CST1) & ~(CST2 - CST1)) == 0. 2571 Similarly for ranges. E.g. 2572 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2573 || X == 75 || X == 45 2574 will be transformed by the previous optimization into 2575 (X - 43U) <= 3U || (X - 75U) <= 3U 2576 and this loop can transform that into 2577 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2578 static bool 2579 optimize_range_tests_diff (enum tree_code opcode, tree type, 2580 tree lowi, tree lowj, tree highi, tree highj, 2581 vec<operand_entry *> *ops, 2582 struct range_entry *rangei, 2583 struct range_entry *rangej) 2584 { 2585 tree tem1, tem2, mask; 2586 /* Check highi - lowi == highj - lowj. */ 2587 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2588 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2589 return false; 2590 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2591 if (!tree_int_cst_equal (tem1, tem2)) 2592 return false; 2593 /* Check popcount (lowj - lowi) == 1. */ 2594 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2595 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2596 return false; 2597 if (!integer_pow2p (tem1)) 2598 return false; 2599 2600 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 2601 int prec = GET_MODE_PRECISION (mode); 2602 if (TYPE_PRECISION (type) < prec 2603 || (wi::to_wide (TYPE_MIN_VALUE (type)) 2604 != wi::min_value (prec, TYPE_SIGN (type))) 2605 || (wi::to_wide (TYPE_MAX_VALUE (type)) 2606 != wi::max_value (prec, TYPE_SIGN (type)))) 2607 type = build_nonstandard_integer_type (prec, 1); 2608 else 2609 type = unsigned_type_for (type); 2610 tem1 = fold_convert (type, tem1); 2611 tem2 = fold_convert (type, tem2); 2612 lowi = fold_convert (type, lowi); 2613 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2614 tem1 = fold_build2 (MINUS_EXPR, type, 2615 fold_convert (type, rangei->exp), lowi); 2616 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2617 lowj = build_int_cst (type, 0); 2618 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2619 NULL, rangei->in_p, lowj, tem2, 2620 rangei->strict_overflow_p 2621 || rangej->strict_overflow_p)) 2622 return true; 2623 return false; 2624 } 2625 2626 /* It does some common checks for function optimize_range_tests_xor and 2627 optimize_range_tests_diff. 2628 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2629 Else it calls optimize_range_tests_diff. */ 2630 2631 static bool 2632 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2633 bool optimize_xor, vec<operand_entry *> *ops, 2634 struct range_entry *ranges) 2635 { 2636 int i, j; 2637 bool any_changes = false; 2638 for (i = first; i < length; i++) 2639 { 2640 tree lowi, highi, lowj, highj, type, tem; 2641 2642 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2643 continue; 2644 type = TREE_TYPE (ranges[i].exp); 2645 if (!INTEGRAL_TYPE_P (type)) 2646 continue; 2647 lowi = ranges[i].low; 2648 if (lowi == NULL_TREE) 2649 lowi = TYPE_MIN_VALUE (type); 2650 highi = ranges[i].high; 2651 if (highi == NULL_TREE) 2652 continue; 2653 for (j = i + 1; j < length && j < i + 64; j++) 2654 { 2655 bool changes; 2656 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2657 continue; 2658 lowj = ranges[j].low; 2659 if (lowj == NULL_TREE) 2660 continue; 2661 highj = ranges[j].high; 2662 if (highj == NULL_TREE) 2663 highj = TYPE_MAX_VALUE (type); 2664 /* Check lowj > highi. */ 2665 tem = fold_binary (GT_EXPR, boolean_type_node, 2666 lowj, highi); 2667 if (tem == NULL_TREE || !integer_onep (tem)) 2668 continue; 2669 if (optimize_xor) 2670 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2671 highi, highj, ops, 2672 ranges + i, ranges + j); 2673 else 2674 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2675 highi, highj, ops, 2676 ranges + i, ranges + j); 2677 if (changes) 2678 { 2679 any_changes = true; 2680 break; 2681 } 2682 } 2683 } 2684 return any_changes; 2685 } 2686 2687 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 2688 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2689 EXP on success, NULL otherwise. */ 2690 2691 static tree 2692 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2693 wide_int *mask, tree *totallowp) 2694 { 2695 tree tem = int_const_binop (MINUS_EXPR, high, low); 2696 if (tem == NULL_TREE 2697 || TREE_CODE (tem) != INTEGER_CST 2698 || TREE_OVERFLOW (tem) 2699 || tree_int_cst_sgn (tem) == -1 2700 || compare_tree_int (tem, prec) != -1) 2701 return NULL_TREE; 2702 2703 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2704 *mask = wi::shifted_mask (0, max, false, prec); 2705 if (TREE_CODE (exp) == BIT_AND_EXPR 2706 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2707 { 2708 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2709 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2710 if (wi::popcount (msk) == 1 2711 && wi::ltu_p (msk, prec - max)) 2712 { 2713 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2714 max += msk.to_uhwi (); 2715 exp = TREE_OPERAND (exp, 0); 2716 if (integer_zerop (low) 2717 && TREE_CODE (exp) == PLUS_EXPR 2718 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2719 { 2720 tree ret = TREE_OPERAND (exp, 0); 2721 STRIP_NOPS (ret); 2722 widest_int bias 2723 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2724 TYPE_PRECISION (TREE_TYPE (low)))); 2725 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2726 if (totallowp) 2727 { 2728 *totallowp = tbias; 2729 return ret; 2730 } 2731 else if (!tree_int_cst_lt (totallow, tbias)) 2732 return NULL_TREE; 2733 bias = wi::to_widest (tbias); 2734 bias -= wi::to_widest (totallow); 2735 if (bias >= 0 && bias < prec - max) 2736 { 2737 *mask = wi::lshift (*mask, bias); 2738 return ret; 2739 } 2740 } 2741 } 2742 } 2743 if (totallowp) 2744 return exp; 2745 if (!tree_int_cst_lt (totallow, low)) 2746 return exp; 2747 tem = int_const_binop (MINUS_EXPR, low, totallow); 2748 if (tem == NULL_TREE 2749 || TREE_CODE (tem) != INTEGER_CST 2750 || TREE_OVERFLOW (tem) 2751 || compare_tree_int (tem, prec - max) == 1) 2752 return NULL_TREE; 2753 2754 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2755 return exp; 2756 } 2757 2758 /* Attempt to optimize small range tests using bit test. 2759 E.g. 2760 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2761 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2762 has been by earlier optimizations optimized into: 2763 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2764 As all the 43 through 82 range is less than 64 numbers, 2765 for 64-bit word targets optimize that into: 2766 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2767 2768 static bool 2769 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2770 vec<operand_entry *> *ops, 2771 struct range_entry *ranges) 2772 { 2773 int i, j; 2774 bool any_changes = false; 2775 int prec = GET_MODE_BITSIZE (word_mode); 2776 auto_vec<struct range_entry *, 64> candidates; 2777 2778 for (i = first; i < length - 2; i++) 2779 { 2780 tree lowi, highi, lowj, highj, type; 2781 2782 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2783 continue; 2784 type = TREE_TYPE (ranges[i].exp); 2785 if (!INTEGRAL_TYPE_P (type)) 2786 continue; 2787 lowi = ranges[i].low; 2788 if (lowi == NULL_TREE) 2789 lowi = TYPE_MIN_VALUE (type); 2790 highi = ranges[i].high; 2791 if (highi == NULL_TREE) 2792 continue; 2793 wide_int mask; 2794 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2795 highi, &mask, &lowi); 2796 if (exp == NULL_TREE) 2797 continue; 2798 bool strict_overflow_p = ranges[i].strict_overflow_p; 2799 candidates.truncate (0); 2800 int end = MIN (i + 64, length); 2801 for (j = i + 1; j < end; j++) 2802 { 2803 tree exp2; 2804 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2805 continue; 2806 if (ranges[j].exp == exp) 2807 ; 2808 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2809 { 2810 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2811 if (exp2 == exp) 2812 ; 2813 else if (TREE_CODE (exp2) == PLUS_EXPR) 2814 { 2815 exp2 = TREE_OPERAND (exp2, 0); 2816 STRIP_NOPS (exp2); 2817 if (exp2 != exp) 2818 continue; 2819 } 2820 else 2821 continue; 2822 } 2823 else 2824 continue; 2825 lowj = ranges[j].low; 2826 if (lowj == NULL_TREE) 2827 continue; 2828 highj = ranges[j].high; 2829 if (highj == NULL_TREE) 2830 highj = TYPE_MAX_VALUE (type); 2831 wide_int mask2; 2832 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2833 highj, &mask2, NULL); 2834 if (exp2 != exp) 2835 continue; 2836 mask |= mask2; 2837 strict_overflow_p |= ranges[j].strict_overflow_p; 2838 candidates.safe_push (&ranges[j]); 2839 } 2840 2841 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2842 if (candidates.length () >= 2) 2843 { 2844 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2845 wi::to_widest (lowi) 2846 + prec - 1 - wi::clz (mask)); 2847 operand_entry *oe = (*ops)[ranges[i].idx]; 2848 tree op = oe->op; 2849 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2850 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2851 location_t loc = gimple_location (stmt); 2852 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2853 2854 /* See if it isn't cheaper to pretend the minimum value of the 2855 range is 0, if maximum value is small enough. 2856 We can avoid then subtraction of the minimum value, but the 2857 mask constant could be perhaps more expensive. */ 2858 if (compare_tree_int (lowi, 0) > 0 2859 && compare_tree_int (high, prec) < 0) 2860 { 2861 int cost_diff; 2862 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2863 rtx reg = gen_raw_REG (word_mode, 10000); 2864 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2865 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2866 GEN_INT (-m)), speed_p); 2867 rtx r = immed_wide_int_const (mask, word_mode); 2868 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2869 word_mode, speed_p); 2870 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2871 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2872 word_mode, speed_p); 2873 if (cost_diff > 0) 2874 { 2875 mask = wi::lshift (mask, m); 2876 lowi = build_zero_cst (TREE_TYPE (lowi)); 2877 } 2878 } 2879 2880 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2881 false, lowi, high); 2882 if (tem == NULL_TREE || is_gimple_val (tem)) 2883 continue; 2884 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2885 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2886 fold_convert_loc (loc, etype, exp), 2887 fold_convert_loc (loc, etype, lowi)); 2888 exp = fold_convert_loc (loc, integer_type_node, exp); 2889 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2890 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2891 build_int_cst (word_type, 1), exp); 2892 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2893 wide_int_to_tree (word_type, mask)); 2894 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2895 build_zero_cst (word_type)); 2896 if (is_gimple_val (exp)) 2897 continue; 2898 2899 /* The shift might have undefined behavior if TEM is true, 2900 but reassociate_bb isn't prepared to have basic blocks 2901 split when it is running. So, temporarily emit a code 2902 with BIT_IOR_EXPR instead of &&, and fix it up in 2903 branch_fixup. */ 2904 gimple_seq seq; 2905 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2906 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2907 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2908 gimple_seq seq2; 2909 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2910 gimple_seq_add_seq_without_update (&seq, seq2); 2911 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2912 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2913 gimple *g = gimple_build_assign (make_ssa_name (optype), 2914 BIT_IOR_EXPR, tem, exp); 2915 gimple_set_location (g, loc); 2916 gimple_seq_add_stmt_without_update (&seq, g); 2917 exp = gimple_assign_lhs (g); 2918 tree val = build_zero_cst (optype); 2919 if (update_range_test (&ranges[i], NULL, candidates.address (), 2920 candidates.length (), opcode, ops, exp, 2921 seq, false, val, val, strict_overflow_p)) 2922 { 2923 any_changes = true; 2924 reassoc_branch_fixups.safe_push (tem); 2925 } 2926 else 2927 gimple_seq_discard (seq); 2928 } 2929 } 2930 return any_changes; 2931 } 2932 2933 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 2934 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */ 2935 2936 static bool 2937 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, 2938 vec<operand_entry *> *ops, 2939 struct range_entry *ranges) 2940 { 2941 int i; 2942 unsigned int b; 2943 bool any_changes = false; 2944 auto_vec<int, 128> buckets; 2945 auto_vec<int, 32> chains; 2946 auto_vec<struct range_entry *, 32> candidates; 2947 2948 for (i = first; i < length; i++) 2949 { 2950 if (ranges[i].exp == NULL_TREE 2951 || TREE_CODE (ranges[i].exp) != SSA_NAME 2952 || !ranges[i].in_p 2953 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 2954 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE 2955 || ranges[i].low == NULL_TREE 2956 || ranges[i].low != ranges[i].high) 2957 continue; 2958 2959 bool zero_p = integer_zerop (ranges[i].low); 2960 if (!zero_p && !integer_all_onesp (ranges[i].low)) 2961 continue; 2962 2963 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p; 2964 if (buckets.length () <= b) 2965 buckets.safe_grow_cleared (b + 1); 2966 if (chains.length () <= (unsigned) i) 2967 chains.safe_grow (i + 1); 2968 chains[i] = buckets[b]; 2969 buckets[b] = i + 1; 2970 } 2971 2972 FOR_EACH_VEC_ELT (buckets, b, i) 2973 if (i && chains[i - 1]) 2974 { 2975 int j, k = i; 2976 for (j = chains[i - 1]; j; j = chains[j - 1]) 2977 { 2978 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); 2979 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); 2980 if (reassoc_stmt_dominates_stmt_p (gk, gj)) 2981 k = j; 2982 } 2983 tree type1 = TREE_TYPE (ranges[k - 1].exp); 2984 tree type2 = NULL_TREE; 2985 bool strict_overflow_p = false; 2986 candidates.truncate (0); 2987 for (j = i; j; j = chains[j - 1]) 2988 { 2989 tree type = TREE_TYPE (ranges[j - 1].exp); 2990 strict_overflow_p |= ranges[j - 1].strict_overflow_p; 2991 if (j == k 2992 || useless_type_conversion_p (type1, type)) 2993 ; 2994 else if (type2 == NULL_TREE 2995 || useless_type_conversion_p (type2, type)) 2996 { 2997 if (type2 == NULL_TREE) 2998 type2 = type; 2999 candidates.safe_push (&ranges[j - 1]); 3000 } 3001 } 3002 unsigned l = candidates.length (); 3003 for (j = i; j; j = chains[j - 1]) 3004 { 3005 tree type = TREE_TYPE (ranges[j - 1].exp); 3006 if (j == k) 3007 continue; 3008 if (useless_type_conversion_p (type1, type)) 3009 ; 3010 else if (type2 == NULL_TREE 3011 || useless_type_conversion_p (type2, type)) 3012 continue; 3013 candidates.safe_push (&ranges[j - 1]); 3014 } 3015 gimple_seq seq = NULL; 3016 tree op = NULL_TREE; 3017 unsigned int id; 3018 struct range_entry *r; 3019 candidates.safe_push (&ranges[k - 1]); 3020 FOR_EACH_VEC_ELT (candidates, id, r) 3021 { 3022 gimple *g; 3023 if (id == 0) 3024 { 3025 op = r->exp; 3026 continue; 3027 } 3028 if (id == l) 3029 { 3030 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); 3031 gimple_seq_add_stmt_without_update (&seq, g); 3032 op = gimple_assign_lhs (g); 3033 } 3034 tree type = TREE_TYPE (r->exp); 3035 tree exp = r->exp; 3036 if (id >= l && !useless_type_conversion_p (type1, type)) 3037 { 3038 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); 3039 gimple_seq_add_stmt_without_update (&seq, g); 3040 exp = gimple_assign_lhs (g); 3041 } 3042 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), 3043 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR, 3044 op, exp); 3045 gimple_seq_add_stmt_without_update (&seq, g); 3046 op = gimple_assign_lhs (g); 3047 } 3048 candidates.pop (); 3049 if (update_range_test (&ranges[k - 1], NULL, candidates.address (), 3050 candidates.length (), opcode, ops, op, 3051 seq, true, ranges[k - 1].low, 3052 ranges[k - 1].low, strict_overflow_p)) 3053 any_changes = true; 3054 else 3055 gimple_seq_discard (seq); 3056 } 3057 3058 return any_changes; 3059 } 3060 3061 /* Attempt to optimize for signed a and b where b is known to be >= 0: 3062 a >= 0 && a < b into (unsigned) a < (unsigned) b 3063 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 3064 3065 static bool 3066 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 3067 vec<operand_entry *> *ops, 3068 struct range_entry *ranges, 3069 basic_block first_bb) 3070 { 3071 int i; 3072 bool any_changes = false; 3073 hash_map<tree, int> *map = NULL; 3074 3075 for (i = first; i < length; i++) 3076 { 3077 if (ranges[i].exp == NULL_TREE 3078 || TREE_CODE (ranges[i].exp) != SSA_NAME 3079 || !ranges[i].in_p) 3080 continue; 3081 3082 tree type = TREE_TYPE (ranges[i].exp); 3083 if (!INTEGRAL_TYPE_P (type) 3084 || TYPE_UNSIGNED (type) 3085 || ranges[i].low == NULL_TREE 3086 || !integer_zerop (ranges[i].low) 3087 || ranges[i].high != NULL_TREE) 3088 continue; 3089 /* EXP >= 0 here. */ 3090 if (map == NULL) 3091 map = new hash_map <tree, int>; 3092 map->put (ranges[i].exp, i); 3093 } 3094 3095 if (map == NULL) 3096 return false; 3097 3098 for (i = 0; i < length; i++) 3099 { 3100 bool in_p = ranges[i].in_p; 3101 if (ranges[i].low == NULL_TREE 3102 || ranges[i].high == NULL_TREE) 3103 continue; 3104 if (!integer_zerop (ranges[i].low) 3105 || !integer_zerop (ranges[i].high)) 3106 { 3107 if (ranges[i].exp 3108 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 3109 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3110 && integer_onep (ranges[i].low) 3111 && integer_onep (ranges[i].high)) 3112 in_p = !in_p; 3113 else 3114 continue; 3115 } 3116 3117 gimple *stmt; 3118 tree_code ccode; 3119 tree rhs1, rhs2; 3120 if (ranges[i].exp) 3121 { 3122 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 3123 continue; 3124 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 3125 if (!is_gimple_assign (stmt)) 3126 continue; 3127 ccode = gimple_assign_rhs_code (stmt); 3128 rhs1 = gimple_assign_rhs1 (stmt); 3129 rhs2 = gimple_assign_rhs2 (stmt); 3130 } 3131 else 3132 { 3133 operand_entry *oe = (*ops)[ranges[i].idx]; 3134 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3135 if (gimple_code (stmt) != GIMPLE_COND) 3136 continue; 3137 ccode = gimple_cond_code (stmt); 3138 rhs1 = gimple_cond_lhs (stmt); 3139 rhs2 = gimple_cond_rhs (stmt); 3140 } 3141 3142 if (TREE_CODE (rhs1) != SSA_NAME 3143 || rhs2 == NULL_TREE 3144 || TREE_CODE (rhs2) != SSA_NAME) 3145 continue; 3146 3147 switch (ccode) 3148 { 3149 case GT_EXPR: 3150 case GE_EXPR: 3151 case LT_EXPR: 3152 case LE_EXPR: 3153 break; 3154 default: 3155 continue; 3156 } 3157 if (in_p) 3158 ccode = invert_tree_comparison (ccode, false); 3159 switch (ccode) 3160 { 3161 case GT_EXPR: 3162 case GE_EXPR: 3163 std::swap (rhs1, rhs2); 3164 ccode = swap_tree_comparison (ccode); 3165 break; 3166 case LT_EXPR: 3167 case LE_EXPR: 3168 break; 3169 default: 3170 gcc_unreachable (); 3171 } 3172 3173 int *idx = map->get (rhs1); 3174 if (idx == NULL) 3175 continue; 3176 3177 /* maybe_optimize_range_tests allows statements without side-effects 3178 in the basic blocks as long as they are consumed in the same bb. 3179 Make sure rhs2's def stmt is not among them, otherwise we can't 3180 use safely get_nonzero_bits on it. E.g. in: 3181 # RANGE [-83, 1] NONZERO 173 3182 # k_32 = PHI <k_47(13), k_12(9)> 3183 ... 3184 if (k_32 >= 0) 3185 goto <bb 5>; [26.46%] 3186 else 3187 goto <bb 9>; [73.54%] 3188 3189 <bb 5> [local count: 140323371]: 3190 # RANGE [0, 1] NONZERO 1 3191 _5 = (int) k_32; 3192 # RANGE [0, 4] NONZERO 4 3193 _21 = _5 << 2; 3194 # RANGE [0, 4] NONZERO 4 3195 iftmp.0_44 = (char) _21; 3196 if (k_32 < iftmp.0_44) 3197 goto <bb 6>; [84.48%] 3198 else 3199 goto <bb 9>; [15.52%] 3200 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3201 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3202 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3203 those stmts even for negative k_32 and the value ranges would be no 3204 longer guaranteed and so the optimization would be invalid. */ 3205 while (opcode == ERROR_MARK) 3206 { 3207 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3208 basic_block bb2 = gimple_bb (g); 3209 if (bb2 3210 && bb2 != first_bb 3211 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3212 { 3213 /* As an exception, handle a few common cases. */ 3214 if (gimple_assign_cast_p (g) 3215 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) 3216 { 3217 tree op0 = gimple_assign_rhs1 (g); 3218 if (TYPE_UNSIGNED (TREE_TYPE (op0)) 3219 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3220 > TYPE_PRECISION (TREE_TYPE (op0)))) 3221 /* Zero-extension is always ok. */ 3222 break; 3223 else if (TYPE_PRECISION (TREE_TYPE (rhs2)) 3224 == TYPE_PRECISION (TREE_TYPE (op0)) 3225 && TREE_CODE (op0) == SSA_NAME) 3226 { 3227 /* Cast from signed to unsigned or vice versa. Retry 3228 with the op0 as new rhs2. */ 3229 rhs2 = op0; 3230 continue; 3231 } 3232 } 3233 else if (is_gimple_assign (g) 3234 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3235 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3236 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) 3237 /* Masking with INTEGER_CST with MSB clear is always ok 3238 too. */ 3239 break; 3240 rhs2 = NULL_TREE; 3241 } 3242 break; 3243 } 3244 if (rhs2 == NULL_TREE) 3245 continue; 3246 3247 wide_int nz = get_nonzero_bits (rhs2); 3248 if (wi::neg_p (nz)) 3249 continue; 3250 3251 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3252 and RHS2 is known to be RHS2 >= 0. */ 3253 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3254 3255 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3256 if ((ranges[*idx].strict_overflow_p 3257 || ranges[i].strict_overflow_p) 3258 && issue_strict_overflow_warning (wc)) 3259 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3260 "assuming signed overflow does not occur " 3261 "when simplifying range test"); 3262 3263 if (dump_file && (dump_flags & TDF_DETAILS)) 3264 { 3265 struct range_entry *r = &ranges[*idx]; 3266 fprintf (dump_file, "Optimizing range test "); 3267 print_generic_expr (dump_file, r->exp); 3268 fprintf (dump_file, " +["); 3269 print_generic_expr (dump_file, r->low); 3270 fprintf (dump_file, ", "); 3271 print_generic_expr (dump_file, r->high); 3272 fprintf (dump_file, "] and comparison "); 3273 print_generic_expr (dump_file, rhs1); 3274 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3275 print_generic_expr (dump_file, rhs2); 3276 fprintf (dump_file, "\n into ("); 3277 print_generic_expr (dump_file, utype); 3278 fprintf (dump_file, ") "); 3279 print_generic_expr (dump_file, rhs1); 3280 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3281 print_generic_expr (dump_file, utype); 3282 fprintf (dump_file, ") "); 3283 print_generic_expr (dump_file, rhs2); 3284 fprintf (dump_file, "\n"); 3285 } 3286 3287 operand_entry *oe = (*ops)[ranges[i].idx]; 3288 ranges[i].in_p = 0; 3289 if (opcode == BIT_IOR_EXPR 3290 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3291 { 3292 ranges[i].in_p = 1; 3293 ccode = invert_tree_comparison (ccode, false); 3294 } 3295 3296 unsigned int uid = gimple_uid (stmt); 3297 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3298 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3299 gimple_set_uid (g, uid); 3300 rhs1 = gimple_assign_lhs (g); 3301 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3302 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) 3303 { 3304 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3305 gimple_set_uid (g, uid); 3306 rhs2 = gimple_assign_lhs (g); 3307 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3308 } 3309 if (tree_swap_operands_p (rhs1, rhs2)) 3310 { 3311 std::swap (rhs1, rhs2); 3312 ccode = swap_tree_comparison (ccode); 3313 } 3314 if (gimple_code (stmt) == GIMPLE_COND) 3315 { 3316 gcond *c = as_a <gcond *> (stmt); 3317 gimple_cond_set_code (c, ccode); 3318 gimple_cond_set_lhs (c, rhs1); 3319 gimple_cond_set_rhs (c, rhs2); 3320 update_stmt (stmt); 3321 } 3322 else 3323 { 3324 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3325 if (!INTEGRAL_TYPE_P (ctype) 3326 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3327 && TYPE_PRECISION (ctype) != 1)) 3328 ctype = boolean_type_node; 3329 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3330 gimple_set_uid (g, uid); 3331 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3332 if (oe->op && ctype != TREE_TYPE (oe->op)) 3333 { 3334 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3335 NOP_EXPR, gimple_assign_lhs (g)); 3336 gimple_set_uid (g, uid); 3337 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3338 } 3339 ranges[i].exp = gimple_assign_lhs (g); 3340 oe->op = ranges[i].exp; 3341 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 3342 ranges[i].high = ranges[i].low; 3343 } 3344 ranges[i].strict_overflow_p = false; 3345 oe = (*ops)[ranges[*idx].idx]; 3346 /* Now change all the other range test immediate uses, so that 3347 those tests will be optimized away. */ 3348 if (opcode == ERROR_MARK) 3349 { 3350 if (oe->op) 3351 oe->op = build_int_cst (TREE_TYPE (oe->op), 3352 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3353 else 3354 oe->op = (oe->rank == BIT_IOR_EXPR 3355 ? boolean_false_node : boolean_true_node); 3356 } 3357 else 3358 oe->op = error_mark_node; 3359 ranges[*idx].exp = NULL_TREE; 3360 ranges[*idx].low = NULL_TREE; 3361 ranges[*idx].high = NULL_TREE; 3362 any_changes = true; 3363 } 3364 3365 delete map; 3366 return any_changes; 3367 } 3368 3369 /* Optimize range tests, similarly how fold_range_test optimizes 3370 it on trees. The tree code for the binary 3371 operation between all the operands is OPCODE. 3372 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 3373 maybe_optimize_range_tests for inter-bb range optimization. 3374 In that case if oe->op is NULL, oe->id is bb->index whose 3375 GIMPLE_COND is && or ||ed into the test, and oe->rank says 3376 the actual opcode. 3377 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 3378 3379 static bool 3380 optimize_range_tests (enum tree_code opcode, 3381 vec<operand_entry *> *ops, basic_block first_bb) 3382 { 3383 unsigned int length = ops->length (), i, j, first; 3384 operand_entry *oe; 3385 struct range_entry *ranges; 3386 bool any_changes = false; 3387 3388 if (length == 1) 3389 return false; 3390 3391 ranges = XNEWVEC (struct range_entry, length); 3392 for (i = 0; i < length; i++) 3393 { 3394 oe = (*ops)[i]; 3395 ranges[i].idx = i; 3396 init_range_entry (ranges + i, oe->op, 3397 oe->op 3398 ? NULL 3399 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 3400 /* For | invert it now, we will invert it again before emitting 3401 the optimized expression. */ 3402 if (opcode == BIT_IOR_EXPR 3403 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3404 ranges[i].in_p = !ranges[i].in_p; 3405 } 3406 3407 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 3408 for (i = 0; i < length; i++) 3409 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 3410 break; 3411 3412 /* Try to merge ranges. */ 3413 for (first = i; i < length; i++) 3414 { 3415 tree low = ranges[i].low; 3416 tree high = ranges[i].high; 3417 int in_p = ranges[i].in_p; 3418 bool strict_overflow_p = ranges[i].strict_overflow_p; 3419 int update_fail_count = 0; 3420 3421 for (j = i + 1; j < length; j++) 3422 { 3423 if (ranges[i].exp != ranges[j].exp) 3424 break; 3425 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 3426 ranges[j].in_p, ranges[j].low, ranges[j].high)) 3427 break; 3428 strict_overflow_p |= ranges[j].strict_overflow_p; 3429 } 3430 3431 if (j == i + 1) 3432 continue; 3433 3434 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 3435 opcode, ops, ranges[i].exp, NULL, in_p, 3436 low, high, strict_overflow_p)) 3437 { 3438 i = j - 1; 3439 any_changes = true; 3440 } 3441 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 3442 while update_range_test would fail. */ 3443 else if (update_fail_count == 64) 3444 i = j - 1; 3445 else 3446 ++update_fail_count; 3447 } 3448 3449 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 3450 ops, ranges); 3451 3452 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 3453 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 3454 ops, ranges); 3455 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 3456 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 3457 ops, ranges); 3458 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, 3459 ops, ranges); 3460 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 3461 ranges, first_bb); 3462 3463 if (any_changes && opcode != ERROR_MARK) 3464 { 3465 j = 0; 3466 FOR_EACH_VEC_ELT (*ops, i, oe) 3467 { 3468 if (oe->op == error_mark_node) 3469 continue; 3470 else if (i != j) 3471 (*ops)[j] = oe; 3472 j++; 3473 } 3474 ops->truncate (j); 3475 } 3476 3477 XDELETEVEC (ranges); 3478 return any_changes; 3479 } 3480 3481 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3482 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3483 otherwise the comparison code. */ 3484 3485 static tree_code 3486 ovce_extract_ops (tree var, gassign **rets, bool *reti) 3487 { 3488 if (TREE_CODE (var) != SSA_NAME) 3489 return ERROR_MARK; 3490 3491 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3492 if (stmt == NULL) 3493 return ERROR_MARK; 3494 3495 /* ??? If we start creating more COND_EXPR, we could perform 3496 this same optimization with them. For now, simplify. */ 3497 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 3498 return ERROR_MARK; 3499 3500 tree cond = gimple_assign_rhs1 (stmt); 3501 tree_code cmp = TREE_CODE (cond); 3502 if (TREE_CODE_CLASS (cmp) != tcc_comparison) 3503 return ERROR_MARK; 3504 3505 /* ??? For now, allow only canonical true and false result vectors. 3506 We could expand this to other constants should the need arise, 3507 but at the moment we don't create them. */ 3508 tree t = gimple_assign_rhs2 (stmt); 3509 tree f = gimple_assign_rhs3 (stmt); 3510 bool inv; 3511 if (integer_all_onesp (t)) 3512 inv = false; 3513 else if (integer_all_onesp (f)) 3514 { 3515 cmp = invert_tree_comparison (cmp, false); 3516 inv = true; 3517 } 3518 else 3519 return ERROR_MARK; 3520 if (!integer_zerop (f)) 3521 return ERROR_MARK; 3522 3523 /* Success! */ 3524 if (rets) 3525 *rets = stmt; 3526 if (reti) 3527 *reti = inv; 3528 return cmp; 3529 } 3530 3531 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3532 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3533 3534 static bool 3535 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 3536 { 3537 unsigned int length = ops->length (), i, j; 3538 bool any_changes = false; 3539 3540 if (length == 1) 3541 return false; 3542 3543 for (i = 0; i < length; ++i) 3544 { 3545 tree elt0 = (*ops)[i]->op; 3546 3547 gassign *stmt0; 3548 bool invert; 3549 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); 3550 if (cmp0 == ERROR_MARK) 3551 continue; 3552 3553 for (j = i + 1; j < length; ++j) 3554 { 3555 tree &elt1 = (*ops)[j]->op; 3556 3557 gassign *stmt1; 3558 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); 3559 if (cmp1 == ERROR_MARK) 3560 continue; 3561 3562 tree cond0 = gimple_assign_rhs1 (stmt0); 3563 tree x0 = TREE_OPERAND (cond0, 0); 3564 tree y0 = TREE_OPERAND (cond0, 1); 3565 3566 tree cond1 = gimple_assign_rhs1 (stmt1); 3567 tree x1 = TREE_OPERAND (cond1, 0); 3568 tree y1 = TREE_OPERAND (cond1, 1); 3569 3570 tree comb; 3571 if (opcode == BIT_AND_EXPR) 3572 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3573 else if (opcode == BIT_IOR_EXPR) 3574 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3575 else 3576 gcc_unreachable (); 3577 if (comb == NULL) 3578 continue; 3579 3580 /* Success! */ 3581 if (dump_file && (dump_flags & TDF_DETAILS)) 3582 { 3583 fprintf (dump_file, "Transforming "); 3584 print_generic_expr (dump_file, cond0); 3585 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 3586 print_generic_expr (dump_file, cond1); 3587 fprintf (dump_file, " into "); 3588 print_generic_expr (dump_file, comb); 3589 fputc ('\n', dump_file); 3590 } 3591 3592 gimple_assign_set_rhs1 (stmt0, comb); 3593 if (invert) 3594 std::swap (*gimple_assign_rhs2_ptr (stmt0), 3595 *gimple_assign_rhs3_ptr (stmt0)); 3596 update_stmt (stmt0); 3597 3598 elt1 = error_mark_node; 3599 any_changes = true; 3600 } 3601 } 3602 3603 if (any_changes) 3604 { 3605 operand_entry *oe; 3606 j = 0; 3607 FOR_EACH_VEC_ELT (*ops, i, oe) 3608 { 3609 if (oe->op == error_mark_node) 3610 continue; 3611 else if (i != j) 3612 (*ops)[j] = oe; 3613 j++; 3614 } 3615 ops->truncate (j); 3616 } 3617 3618 return any_changes; 3619 } 3620 3621 /* Return true if STMT is a cast like: 3622 <bb N>: 3623 ... 3624 _123 = (int) _234; 3625 3626 <bb M>: 3627 # _345 = PHI <_123(N), 1(...), 1(...)> 3628 where _234 has bool type, _123 has single use and 3629 bb N has a single successor M. This is commonly used in 3630 the last block of a range test. 3631 3632 Also Return true if STMT is tcc_compare like: 3633 <bb N>: 3634 ... 3635 _234 = a_2(D) == 2; 3636 3637 <bb M>: 3638 # _345 = PHI <_234(N), 1(...), 1(...)> 3639 _346 = (int) _345; 3640 where _234 has booltype, single use and 3641 bb N has a single successor M. This is commonly used in 3642 the last block of a range test. */ 3643 3644 static bool 3645 final_range_test_p (gimple *stmt) 3646 { 3647 basic_block bb, rhs_bb, lhs_bb; 3648 edge e; 3649 tree lhs, rhs; 3650 use_operand_p use_p; 3651 gimple *use_stmt; 3652 3653 if (!gimple_assign_cast_p (stmt) 3654 && (!is_gimple_assign (stmt) 3655 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 3656 != tcc_comparison))) 3657 return false; 3658 bb = gimple_bb (stmt); 3659 if (!single_succ_p (bb)) 3660 return false; 3661 e = single_succ_edge (bb); 3662 if (e->flags & EDGE_COMPLEX) 3663 return false; 3664 3665 lhs = gimple_assign_lhs (stmt); 3666 rhs = gimple_assign_rhs1 (stmt); 3667 if (gimple_assign_cast_p (stmt) 3668 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3669 || TREE_CODE (rhs) != SSA_NAME 3670 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 3671 return false; 3672 3673 if (!gimple_assign_cast_p (stmt) 3674 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 3675 return false; 3676 3677 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 3678 if (!single_imm_use (lhs, &use_p, &use_stmt)) 3679 return false; 3680 3681 if (gimple_code (use_stmt) != GIMPLE_PHI 3682 || gimple_bb (use_stmt) != e->dest) 3683 return false; 3684 3685 /* And that the rhs is defined in the same loop. */ 3686 if (gimple_assign_cast_p (stmt)) 3687 { 3688 if (TREE_CODE (rhs) != SSA_NAME 3689 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 3690 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 3691 return false; 3692 } 3693 else 3694 { 3695 if (TREE_CODE (lhs) != SSA_NAME 3696 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 3697 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 3698 return false; 3699 } 3700 3701 return true; 3702 } 3703 3704 /* Return true if BB is suitable basic block for inter-bb range test 3705 optimization. If BACKWARD is true, BB should be the only predecessor 3706 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 3707 or compared with to find a common basic block to which all conditions 3708 branch to if true resp. false. If BACKWARD is false, TEST_BB should 3709 be the only predecessor of BB. */ 3710 3711 static bool 3712 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 3713 bool backward) 3714 { 3715 edge_iterator ei, ei2; 3716 edge e, e2; 3717 gimple *stmt; 3718 gphi_iterator gsi; 3719 bool other_edge_seen = false; 3720 bool is_cond; 3721 3722 if (test_bb == bb) 3723 return false; 3724 /* Check last stmt first. */ 3725 stmt = last_stmt (bb); 3726 if (stmt == NULL 3727 || (gimple_code (stmt) != GIMPLE_COND 3728 && (backward || !final_range_test_p (stmt))) 3729 || gimple_visited_p (stmt) 3730 || stmt_could_throw_p (cfun, stmt) 3731 || *other_bb == bb) 3732 return false; 3733 is_cond = gimple_code (stmt) == GIMPLE_COND; 3734 if (is_cond) 3735 { 3736 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 3737 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 3738 to *OTHER_BB (if not set yet, try to find it out). */ 3739 if (EDGE_COUNT (bb->succs) != 2) 3740 return false; 3741 FOR_EACH_EDGE (e, ei, bb->succs) 3742 { 3743 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3744 return false; 3745 if (e->dest == test_bb) 3746 { 3747 if (backward) 3748 continue; 3749 else 3750 return false; 3751 } 3752 if (e->dest == bb) 3753 return false; 3754 if (*other_bb == NULL) 3755 { 3756 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 3757 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 3758 return false; 3759 else if (e->dest == e2->dest) 3760 *other_bb = e->dest; 3761 if (*other_bb == NULL) 3762 return false; 3763 } 3764 if (e->dest == *other_bb) 3765 other_edge_seen = true; 3766 else if (backward) 3767 return false; 3768 } 3769 if (*other_bb == NULL || !other_edge_seen) 3770 return false; 3771 } 3772 else if (single_succ (bb) != *other_bb) 3773 return false; 3774 3775 /* Now check all PHIs of *OTHER_BB. */ 3776 e = find_edge (bb, *other_bb); 3777 e2 = find_edge (test_bb, *other_bb); 3778 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 3779 { 3780 gphi *phi = gsi.phi (); 3781 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 3782 corresponding to BB and TEST_BB predecessor must be the same. */ 3783 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 3784 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 3785 { 3786 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 3787 one of the PHIs should have the lhs of the last stmt in 3788 that block as PHI arg and that PHI should have 0 or 1 3789 corresponding to it in all other range test basic blocks 3790 considered. */ 3791 if (!is_cond) 3792 { 3793 if (gimple_phi_arg_def (phi, e->dest_idx) 3794 == gimple_assign_lhs (stmt) 3795 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 3796 || integer_onep (gimple_phi_arg_def (phi, 3797 e2->dest_idx)))) 3798 continue; 3799 } 3800 else 3801 { 3802 gimple *test_last = last_stmt (test_bb); 3803 if (gimple_code (test_last) != GIMPLE_COND 3804 && gimple_phi_arg_def (phi, e2->dest_idx) 3805 == gimple_assign_lhs (test_last) 3806 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 3807 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 3808 continue; 3809 } 3810 3811 return false; 3812 } 3813 } 3814 return true; 3815 } 3816 3817 /* Return true if BB doesn't have side-effects that would disallow 3818 range test optimization, all SSA_NAMEs set in the bb are consumed 3819 in the bb and there are no PHIs. */ 3820 3821 static bool 3822 no_side_effect_bb (basic_block bb) 3823 { 3824 gimple_stmt_iterator gsi; 3825 gimple *last; 3826 3827 if (!gimple_seq_empty_p (phi_nodes (bb))) 3828 return false; 3829 last = last_stmt (bb); 3830 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3831 { 3832 gimple *stmt = gsi_stmt (gsi); 3833 tree lhs; 3834 imm_use_iterator imm_iter; 3835 use_operand_p use_p; 3836 3837 if (is_gimple_debug (stmt)) 3838 continue; 3839 if (gimple_has_side_effects (stmt)) 3840 return false; 3841 if (stmt == last) 3842 return true; 3843 if (!is_gimple_assign (stmt)) 3844 return false; 3845 lhs = gimple_assign_lhs (stmt); 3846 if (TREE_CODE (lhs) != SSA_NAME) 3847 return false; 3848 if (gimple_assign_rhs_could_trap_p (stmt)) 3849 return false; 3850 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 3851 { 3852 gimple *use_stmt = USE_STMT (use_p); 3853 if (is_gimple_debug (use_stmt)) 3854 continue; 3855 if (gimple_bb (use_stmt) != bb) 3856 return false; 3857 } 3858 } 3859 return false; 3860 } 3861 3862 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3863 return true and fill in *OPS recursively. */ 3864 3865 static bool 3866 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 3867 struct loop *loop) 3868 { 3869 gimple *stmt = SSA_NAME_DEF_STMT (var); 3870 tree rhs[2]; 3871 int i; 3872 3873 if (!is_reassociable_op (stmt, code, loop)) 3874 return false; 3875 3876 rhs[0] = gimple_assign_rhs1 (stmt); 3877 rhs[1] = gimple_assign_rhs2 (stmt); 3878 gimple_set_visited (stmt, true); 3879 for (i = 0; i < 2; i++) 3880 if (TREE_CODE (rhs[i]) == SSA_NAME 3881 && !get_ops (rhs[i], code, ops, loop) 3882 && has_single_use (rhs[i])) 3883 { 3884 operand_entry *oe = operand_entry_pool.allocate (); 3885 3886 oe->op = rhs[i]; 3887 oe->rank = code; 3888 oe->id = 0; 3889 oe->count = 1; 3890 oe->stmt_to_insert = NULL; 3891 ops->safe_push (oe); 3892 } 3893 return true; 3894 } 3895 3896 /* Find the ops that were added by get_ops starting from VAR, see if 3897 they were changed during update_range_test and if yes, create new 3898 stmts. */ 3899 3900 static tree 3901 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 3902 unsigned int *pidx, struct loop *loop) 3903 { 3904 gimple *stmt = SSA_NAME_DEF_STMT (var); 3905 tree rhs[4]; 3906 int i; 3907 3908 if (!is_reassociable_op (stmt, code, loop)) 3909 return NULL; 3910 3911 rhs[0] = gimple_assign_rhs1 (stmt); 3912 rhs[1] = gimple_assign_rhs2 (stmt); 3913 rhs[2] = rhs[0]; 3914 rhs[3] = rhs[1]; 3915 for (i = 0; i < 2; i++) 3916 if (TREE_CODE (rhs[i]) == SSA_NAME) 3917 { 3918 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3919 if (rhs[2 + i] == NULL_TREE) 3920 { 3921 if (has_single_use (rhs[i])) 3922 rhs[2 + i] = ops[(*pidx)++]->op; 3923 else 3924 rhs[2 + i] = rhs[i]; 3925 } 3926 } 3927 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3928 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3929 { 3930 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3931 var = make_ssa_name (TREE_TYPE (var)); 3932 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3933 rhs[2], rhs[3]); 3934 gimple_set_uid (g, gimple_uid (stmt)); 3935 gimple_set_visited (g, true); 3936 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3937 } 3938 return var; 3939 } 3940 3941 /* Structure to track the initial value passed to get_ops and 3942 the range in the ops vector for each basic block. */ 3943 3944 struct inter_bb_range_test_entry 3945 { 3946 tree op; 3947 unsigned int first_idx, last_idx; 3948 }; 3949 3950 /* Inter-bb range test optimization. 3951 3952 Returns TRUE if a gimple conditional is optimized to a true/false, 3953 otherwise return FALSE. 3954 3955 This indicates to the caller that it should run a CFG cleanup pass 3956 once reassociation is completed. */ 3957 3958 static bool 3959 maybe_optimize_range_tests (gimple *stmt) 3960 { 3961 basic_block first_bb = gimple_bb (stmt); 3962 basic_block last_bb = first_bb; 3963 basic_block other_bb = NULL; 3964 basic_block bb; 3965 edge_iterator ei; 3966 edge e; 3967 auto_vec<operand_entry *> ops; 3968 auto_vec<inter_bb_range_test_entry> bbinfo; 3969 bool any_changes = false; 3970 bool cfg_cleanup_needed = false; 3971 3972 /* Consider only basic blocks that end with GIMPLE_COND or 3973 a cast statement satisfying final_range_test_p. All 3974 but the last bb in the first_bb .. last_bb range 3975 should end with GIMPLE_COND. */ 3976 if (gimple_code (stmt) == GIMPLE_COND) 3977 { 3978 if (EDGE_COUNT (first_bb->succs) != 2) 3979 return cfg_cleanup_needed; 3980 } 3981 else if (final_range_test_p (stmt)) 3982 other_bb = single_succ (first_bb); 3983 else 3984 return cfg_cleanup_needed; 3985 3986 if (stmt_could_throw_p (cfun, stmt)) 3987 return cfg_cleanup_needed; 3988 3989 /* As relative ordering of post-dominator sons isn't fixed, 3990 maybe_optimize_range_tests can be called first on any 3991 bb in the range we want to optimize. So, start searching 3992 backwards, if first_bb can be set to a predecessor. */ 3993 while (single_pred_p (first_bb)) 3994 { 3995 basic_block pred_bb = single_pred (first_bb); 3996 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3997 break; 3998 if (!no_side_effect_bb (first_bb)) 3999 break; 4000 first_bb = pred_bb; 4001 } 4002 /* If first_bb is last_bb, other_bb hasn't been computed yet. 4003 Before starting forward search in last_bb successors, find 4004 out the other_bb. */ 4005 if (first_bb == last_bb) 4006 { 4007 other_bb = NULL; 4008 /* As non-GIMPLE_COND last stmt always terminates the range, 4009 if forward search didn't discover anything, just give up. */ 4010 if (gimple_code (stmt) != GIMPLE_COND) 4011 return cfg_cleanup_needed; 4012 /* Look at both successors. Either it ends with a GIMPLE_COND 4013 and satisfies suitable_cond_bb, or ends with a cast and 4014 other_bb is that cast's successor. */ 4015 FOR_EACH_EDGE (e, ei, first_bb->succs) 4016 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 4017 || e->dest == first_bb) 4018 return cfg_cleanup_needed; 4019 else if (single_pred_p (e->dest)) 4020 { 4021 stmt = last_stmt (e->dest); 4022 if (stmt 4023 && gimple_code (stmt) == GIMPLE_COND 4024 && EDGE_COUNT (e->dest->succs) == 2) 4025 { 4026 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 4027 break; 4028 else 4029 other_bb = NULL; 4030 } 4031 else if (stmt 4032 && final_range_test_p (stmt) 4033 && find_edge (first_bb, single_succ (e->dest))) 4034 { 4035 other_bb = single_succ (e->dest); 4036 if (other_bb == first_bb) 4037 other_bb = NULL; 4038 } 4039 } 4040 if (other_bb == NULL) 4041 return cfg_cleanup_needed; 4042 } 4043 /* Now do the forward search, moving last_bb to successor bbs 4044 that aren't other_bb. */ 4045 while (EDGE_COUNT (last_bb->succs) == 2) 4046 { 4047 FOR_EACH_EDGE (e, ei, last_bb->succs) 4048 if (e->dest != other_bb) 4049 break; 4050 if (e == NULL) 4051 break; 4052 if (!single_pred_p (e->dest)) 4053 break; 4054 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 4055 break; 4056 if (!no_side_effect_bb (e->dest)) 4057 break; 4058 last_bb = e->dest; 4059 } 4060 if (first_bb == last_bb) 4061 return cfg_cleanup_needed; 4062 /* Here basic blocks first_bb through last_bb's predecessor 4063 end with GIMPLE_COND, all of them have one of the edges to 4064 other_bb and another to another block in the range, 4065 all blocks except first_bb don't have side-effects and 4066 last_bb ends with either GIMPLE_COND, or cast satisfying 4067 final_range_test_p. */ 4068 for (bb = last_bb; ; bb = single_pred (bb)) 4069 { 4070 enum tree_code code; 4071 tree lhs, rhs; 4072 inter_bb_range_test_entry bb_ent; 4073 4074 bb_ent.op = NULL_TREE; 4075 bb_ent.first_idx = ops.length (); 4076 bb_ent.last_idx = bb_ent.first_idx; 4077 e = find_edge (bb, other_bb); 4078 stmt = last_stmt (bb); 4079 gimple_set_visited (stmt, true); 4080 if (gimple_code (stmt) != GIMPLE_COND) 4081 { 4082 use_operand_p use_p; 4083 gimple *phi; 4084 edge e2; 4085 unsigned int d; 4086 4087 lhs = gimple_assign_lhs (stmt); 4088 rhs = gimple_assign_rhs1 (stmt); 4089 gcc_assert (bb == last_bb); 4090 4091 /* stmt is 4092 _123 = (int) _234; 4093 OR 4094 _234 = a_2(D) == 2; 4095 4096 followed by: 4097 <bb M>: 4098 # _345 = PHI <_123(N), 1(...), 1(...)> 4099 4100 or 0 instead of 1. If it is 0, the _234 4101 range test is anded together with all the 4102 other range tests, if it is 1, it is ored with 4103 them. */ 4104 single_imm_use (lhs, &use_p, &phi); 4105 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 4106 e2 = find_edge (first_bb, other_bb); 4107 d = e2->dest_idx; 4108 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 4109 if (integer_zerop (gimple_phi_arg_def (phi, d))) 4110 code = BIT_AND_EXPR; 4111 else 4112 { 4113 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 4114 code = BIT_IOR_EXPR; 4115 } 4116 4117 /* If _234 SSA_NAME_DEF_STMT is 4118 _234 = _567 | _789; 4119 (or &, corresponding to 1/0 in the phi arguments, 4120 push into ops the individual range test arguments 4121 of the bitwise or resp. and, recursively. */ 4122 if (TREE_CODE (rhs) == SSA_NAME 4123 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4124 != tcc_comparison) 4125 && !get_ops (rhs, code, &ops, 4126 loop_containing_stmt (stmt)) 4127 && has_single_use (rhs)) 4128 { 4129 /* Otherwise, push the _234 range test itself. */ 4130 operand_entry *oe = operand_entry_pool.allocate (); 4131 4132 oe->op = rhs; 4133 oe->rank = code; 4134 oe->id = 0; 4135 oe->count = 1; 4136 oe->stmt_to_insert = NULL; 4137 ops.safe_push (oe); 4138 bb_ent.last_idx++; 4139 bb_ent.op = rhs; 4140 } 4141 else if (is_gimple_assign (stmt) 4142 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4143 == tcc_comparison) 4144 && !get_ops (lhs, code, &ops, 4145 loop_containing_stmt (stmt)) 4146 && has_single_use (lhs)) 4147 { 4148 operand_entry *oe = operand_entry_pool.allocate (); 4149 oe->op = lhs; 4150 oe->rank = code; 4151 oe->id = 0; 4152 oe->count = 1; 4153 ops.safe_push (oe); 4154 bb_ent.last_idx++; 4155 bb_ent.op = lhs; 4156 } 4157 else 4158 { 4159 bb_ent.last_idx = ops.length (); 4160 bb_ent.op = rhs; 4161 } 4162 bbinfo.safe_push (bb_ent); 4163 continue; 4164 } 4165 /* Otherwise stmt is GIMPLE_COND. */ 4166 code = gimple_cond_code (stmt); 4167 lhs = gimple_cond_lhs (stmt); 4168 rhs = gimple_cond_rhs (stmt); 4169 if (TREE_CODE (lhs) == SSA_NAME 4170 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4171 && ((code != EQ_EXPR && code != NE_EXPR) 4172 || rhs != boolean_false_node 4173 /* Either push into ops the individual bitwise 4174 or resp. and operands, depending on which 4175 edge is other_bb. */ 4176 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 4177 ^ (code == EQ_EXPR)) 4178 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 4179 loop_containing_stmt (stmt)))) 4180 { 4181 /* Or push the GIMPLE_COND stmt itself. */ 4182 operand_entry *oe = operand_entry_pool.allocate (); 4183 4184 oe->op = NULL; 4185 oe->rank = (e->flags & EDGE_TRUE_VALUE) 4186 ? BIT_IOR_EXPR : BIT_AND_EXPR; 4187 /* oe->op = NULL signs that there is no SSA_NAME 4188 for the range test, and oe->id instead is the 4189 basic block number, at which's end the GIMPLE_COND 4190 is. */ 4191 oe->id = bb->index; 4192 oe->count = 1; 4193 oe->stmt_to_insert = NULL; 4194 ops.safe_push (oe); 4195 bb_ent.op = NULL; 4196 bb_ent.last_idx++; 4197 } 4198 else if (ops.length () > bb_ent.first_idx) 4199 { 4200 bb_ent.op = lhs; 4201 bb_ent.last_idx = ops.length (); 4202 } 4203 bbinfo.safe_push (bb_ent); 4204 if (bb == first_bb) 4205 break; 4206 } 4207 if (ops.length () > 1) 4208 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 4209 if (any_changes) 4210 { 4211 unsigned int idx, max_idx = 0; 4212 /* update_ops relies on has_single_use predicates returning the 4213 same values as it did during get_ops earlier. Additionally it 4214 never removes statements, only adds new ones and it should walk 4215 from the single imm use and check the predicate already before 4216 making those changes. 4217 On the other side, the handling of GIMPLE_COND directly can turn 4218 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4219 it needs to be done in a separate loop afterwards. */ 4220 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4221 { 4222 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4223 && bbinfo[idx].op != NULL_TREE) 4224 { 4225 tree new_op; 4226 4227 max_idx = idx; 4228 stmt = last_stmt (bb); 4229 new_op = update_ops (bbinfo[idx].op, 4230 (enum tree_code) 4231 ops[bbinfo[idx].first_idx]->rank, 4232 ops, &bbinfo[idx].first_idx, 4233 loop_containing_stmt (stmt)); 4234 if (new_op == NULL_TREE) 4235 { 4236 gcc_assert (bb == last_bb); 4237 new_op = ops[bbinfo[idx].first_idx++]->op; 4238 } 4239 if (bbinfo[idx].op != new_op) 4240 { 4241 imm_use_iterator iter; 4242 use_operand_p use_p; 4243 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4244 4245 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 4246 if (is_gimple_debug (use_stmt)) 4247 continue; 4248 else if (gimple_code (use_stmt) == GIMPLE_COND 4249 || gimple_code (use_stmt) == GIMPLE_PHI) 4250 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4251 SET_USE (use_p, new_op); 4252 else if ((is_gimple_assign (use_stmt) 4253 && (TREE_CODE_CLASS 4254 (gimple_assign_rhs_code (use_stmt)) 4255 == tcc_comparison))) 4256 cast_or_tcc_cmp_stmt = use_stmt; 4257 else if (gimple_assign_cast_p (use_stmt)) 4258 cast_or_tcc_cmp_stmt = use_stmt; 4259 else 4260 gcc_unreachable (); 4261 4262 if (cast_or_tcc_cmp_stmt) 4263 { 4264 gcc_assert (bb == last_bb); 4265 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 4266 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 4267 enum tree_code rhs_code 4268 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 4269 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 4270 : CONVERT_EXPR; 4271 gassign *g; 4272 if (is_gimple_min_invariant (new_op)) 4273 { 4274 new_op = fold_convert (TREE_TYPE (lhs), new_op); 4275 g = gimple_build_assign (new_lhs, new_op); 4276 } 4277 else 4278 g = gimple_build_assign (new_lhs, rhs_code, new_op); 4279 gimple_stmt_iterator gsi 4280 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 4281 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 4282 gimple_set_visited (g, true); 4283 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4284 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 4285 if (is_gimple_debug (use_stmt)) 4286 continue; 4287 else if (gimple_code (use_stmt) == GIMPLE_COND 4288 || gimple_code (use_stmt) == GIMPLE_PHI) 4289 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4290 SET_USE (use_p, new_lhs); 4291 else 4292 gcc_unreachable (); 4293 } 4294 } 4295 } 4296 if (bb == first_bb) 4297 break; 4298 } 4299 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4300 { 4301 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4302 && bbinfo[idx].op == NULL_TREE 4303 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 4304 { 4305 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 4306 4307 if (idx > max_idx) 4308 max_idx = idx; 4309 4310 /* If we collapse the conditional to a true/false 4311 condition, then bubble that knowledge up to our caller. */ 4312 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 4313 { 4314 gimple_cond_make_false (cond_stmt); 4315 cfg_cleanup_needed = true; 4316 } 4317 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 4318 { 4319 gimple_cond_make_true (cond_stmt); 4320 cfg_cleanup_needed = true; 4321 } 4322 else 4323 { 4324 gimple_cond_set_code (cond_stmt, NE_EXPR); 4325 gimple_cond_set_lhs (cond_stmt, 4326 ops[bbinfo[idx].first_idx]->op); 4327 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 4328 } 4329 update_stmt (cond_stmt); 4330 } 4331 if (bb == first_bb) 4332 break; 4333 } 4334 4335 /* The above changes could result in basic blocks after the first 4336 modified one, up to and including last_bb, to be executed even if 4337 they would not be in the original program. If the value ranges of 4338 assignment lhs' in those bbs were dependent on the conditions 4339 guarding those basic blocks which now can change, the VRs might 4340 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 4341 are only used within the same bb, it should be not a big deal if 4342 we just reset all the VRs in those bbs. See PR68671. */ 4343 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 4344 reset_flow_sensitive_info_in_bb (bb); 4345 } 4346 return cfg_cleanup_needed; 4347 } 4348 4349 /* Return true if OPERAND is defined by a PHI node which uses the LHS 4350 of STMT in it's operands. This is also known as a "destructive 4351 update" operation. */ 4352 4353 static bool 4354 is_phi_for_stmt (gimple *stmt, tree operand) 4355 { 4356 gimple *def_stmt; 4357 gphi *def_phi; 4358 tree lhs; 4359 use_operand_p arg_p; 4360 ssa_op_iter i; 4361 4362 if (TREE_CODE (operand) != SSA_NAME) 4363 return false; 4364 4365 lhs = gimple_assign_lhs (stmt); 4366 4367 def_stmt = SSA_NAME_DEF_STMT (operand); 4368 def_phi = dyn_cast <gphi *> (def_stmt); 4369 if (!def_phi) 4370 return false; 4371 4372 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 4373 if (lhs == USE_FROM_PTR (arg_p)) 4374 return true; 4375 return false; 4376 } 4377 4378 /* Remove def stmt of VAR if VAR has zero uses and recurse 4379 on rhs1 operand if so. */ 4380 4381 static void 4382 remove_visited_stmt_chain (tree var) 4383 { 4384 gimple *stmt; 4385 gimple_stmt_iterator gsi; 4386 4387 while (1) 4388 { 4389 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 4390 return; 4391 stmt = SSA_NAME_DEF_STMT (var); 4392 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 4393 { 4394 var = gimple_assign_rhs1 (stmt); 4395 gsi = gsi_for_stmt (stmt); 4396 reassoc_remove_stmt (&gsi); 4397 release_defs (stmt); 4398 } 4399 else 4400 return; 4401 } 4402 } 4403 4404 /* This function checks three consequtive operands in 4405 passed operands vector OPS starting from OPINDEX and 4406 swaps two operands if it is profitable for binary operation 4407 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 4408 4409 We pair ops with the same rank if possible. 4410 4411 The alternative we try is to see if STMT is a destructive 4412 update style statement, which is like: 4413 b = phi (a, ...) 4414 a = c + b; 4415 In that case, we want to use the destructive update form to 4416 expose the possible vectorizer sum reduction opportunity. 4417 In that case, the third operand will be the phi node. This 4418 check is not performed if STMT is null. 4419 4420 We could, of course, try to be better as noted above, and do a 4421 lot of work to try to find these opportunities in >3 operand 4422 cases, but it is unlikely to be worth it. */ 4423 4424 static void 4425 swap_ops_for_binary_stmt (vec<operand_entry *> ops, 4426 unsigned int opindex, gimple *stmt) 4427 { 4428 operand_entry *oe1, *oe2, *oe3; 4429 4430 oe1 = ops[opindex]; 4431 oe2 = ops[opindex + 1]; 4432 oe3 = ops[opindex + 2]; 4433 4434 if ((oe1->rank == oe2->rank 4435 && oe2->rank != oe3->rank) 4436 || (stmt && is_phi_for_stmt (stmt, oe3->op) 4437 && !is_phi_for_stmt (stmt, oe1->op) 4438 && !is_phi_for_stmt (stmt, oe2->op))) 4439 std::swap (*oe1, *oe3); 4440 else if ((oe1->rank == oe3->rank 4441 && oe2->rank != oe3->rank) 4442 || (stmt && is_phi_for_stmt (stmt, oe2->op) 4443 && !is_phi_for_stmt (stmt, oe1->op) 4444 && !is_phi_for_stmt (stmt, oe3->op))) 4445 std::swap (*oe1, *oe2); 4446 } 4447 4448 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 4449 two definitions, otherwise return STMT. */ 4450 4451 static inline gimple * 4452 find_insert_point (gimple *stmt, tree rhs1, tree rhs2) 4453 { 4454 if (TREE_CODE (rhs1) == SSA_NAME 4455 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 4456 stmt = SSA_NAME_DEF_STMT (rhs1); 4457 if (TREE_CODE (rhs2) == SSA_NAME 4458 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 4459 stmt = SSA_NAME_DEF_STMT (rhs2); 4460 return stmt; 4461 } 4462 4463 /* If the stmt that defines operand has to be inserted, insert it 4464 before the use. */ 4465 static void 4466 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 4467 { 4468 gcc_assert (is_gimple_assign (stmt_to_insert)); 4469 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 4470 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 4471 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2); 4472 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 4473 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 4474 4475 /* If the insert point is not stmt, then insert_point would be 4476 the point where operand rhs1 or rhs2 is defined. In this case, 4477 stmt_to_insert has to be inserted afterwards. This would 4478 only happen when the stmt insertion point is flexible. */ 4479 if (stmt == insert_point) 4480 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 4481 else 4482 insert_stmt_after (stmt_to_insert, insert_point); 4483 } 4484 4485 4486 /* Recursively rewrite our linearized statements so that the operators 4487 match those in OPS[OPINDEX], putting the computation in rank 4488 order. Return new lhs. 4489 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 4490 the current stmt and during recursive invocations. 4491 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 4492 recursive invocations. */ 4493 4494 static tree 4495 rewrite_expr_tree (gimple *stmt, unsigned int opindex, 4496 vec<operand_entry *> ops, bool changed, bool next_changed) 4497 { 4498 tree rhs1 = gimple_assign_rhs1 (stmt); 4499 tree rhs2 = gimple_assign_rhs2 (stmt); 4500 tree lhs = gimple_assign_lhs (stmt); 4501 operand_entry *oe; 4502 4503 /* The final recursion case for this function is that you have 4504 exactly two operations left. 4505 If we had exactly one op in the entire list to start with, we 4506 would have never called this function, and the tail recursion 4507 rewrites them one at a time. */ 4508 if (opindex + 2 == ops.length ()) 4509 { 4510 operand_entry *oe1, *oe2; 4511 4512 oe1 = ops[opindex]; 4513 oe2 = ops[opindex + 1]; 4514 4515 if (rhs1 != oe1->op || rhs2 != oe2->op) 4516 { 4517 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4518 unsigned int uid = gimple_uid (stmt); 4519 4520 if (dump_file && (dump_flags & TDF_DETAILS)) 4521 { 4522 fprintf (dump_file, "Transforming "); 4523 print_gimple_stmt (dump_file, stmt, 0); 4524 } 4525 4526 /* If the stmt that defines operand has to be inserted, insert it 4527 before the use. */ 4528 if (oe1->stmt_to_insert) 4529 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 4530 if (oe2->stmt_to_insert) 4531 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 4532 /* Even when changed is false, reassociation could have e.g. removed 4533 some redundant operations, so unless we are just swapping the 4534 arguments or unless there is no change at all (then we just 4535 return lhs), force creation of a new SSA_NAME. */ 4536 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 4537 { 4538 gimple *insert_point 4539 = find_insert_point (stmt, oe1->op, oe2->op); 4540 lhs = make_ssa_name (TREE_TYPE (lhs)); 4541 stmt 4542 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4543 oe1->op, oe2->op); 4544 gimple_set_uid (stmt, uid); 4545 gimple_set_visited (stmt, true); 4546 if (insert_point == gsi_stmt (gsi)) 4547 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4548 else 4549 insert_stmt_after (stmt, insert_point); 4550 } 4551 else 4552 { 4553 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 4554 == stmt); 4555 gimple_assign_set_rhs1 (stmt, oe1->op); 4556 gimple_assign_set_rhs2 (stmt, oe2->op); 4557 update_stmt (stmt); 4558 } 4559 4560 if (rhs1 != oe1->op && rhs1 != oe2->op) 4561 remove_visited_stmt_chain (rhs1); 4562 4563 if (dump_file && (dump_flags & TDF_DETAILS)) 4564 { 4565 fprintf (dump_file, " into "); 4566 print_gimple_stmt (dump_file, stmt, 0); 4567 } 4568 } 4569 return lhs; 4570 } 4571 4572 /* If we hit here, we should have 3 or more ops left. */ 4573 gcc_assert (opindex + 2 < ops.length ()); 4574 4575 /* Rewrite the next operator. */ 4576 oe = ops[opindex]; 4577 4578 /* If the stmt that defines operand has to be inserted, insert it 4579 before the use. */ 4580 if (oe->stmt_to_insert) 4581 insert_stmt_before_use (stmt, oe->stmt_to_insert); 4582 4583 /* Recurse on the LHS of the binary operator, which is guaranteed to 4584 be the non-leaf side. */ 4585 tree new_rhs1 4586 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 4587 changed || oe->op != rhs2 || next_changed, 4588 false); 4589 4590 if (oe->op != rhs2 || new_rhs1 != rhs1) 4591 { 4592 if (dump_file && (dump_flags & TDF_DETAILS)) 4593 { 4594 fprintf (dump_file, "Transforming "); 4595 print_gimple_stmt (dump_file, stmt, 0); 4596 } 4597 4598 /* If changed is false, this is either opindex == 0 4599 or all outer rhs2's were equal to corresponding oe->op, 4600 and powi_result is NULL. 4601 That means lhs is equivalent before and after reassociation. 4602 Otherwise ensure the old lhs SSA_NAME is not reused and 4603 create a new stmt as well, so that any debug stmts will be 4604 properly adjusted. */ 4605 if (changed) 4606 { 4607 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4608 unsigned int uid = gimple_uid (stmt); 4609 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op); 4610 4611 lhs = make_ssa_name (TREE_TYPE (lhs)); 4612 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 4613 new_rhs1, oe->op); 4614 gimple_set_uid (stmt, uid); 4615 gimple_set_visited (stmt, true); 4616 if (insert_point == gsi_stmt (gsi)) 4617 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4618 else 4619 insert_stmt_after (stmt, insert_point); 4620 } 4621 else 4622 { 4623 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 4624 == stmt); 4625 gimple_assign_set_rhs1 (stmt, new_rhs1); 4626 gimple_assign_set_rhs2 (stmt, oe->op); 4627 update_stmt (stmt); 4628 } 4629 4630 if (dump_file && (dump_flags & TDF_DETAILS)) 4631 { 4632 fprintf (dump_file, " into "); 4633 print_gimple_stmt (dump_file, stmt, 0); 4634 } 4635 } 4636 return lhs; 4637 } 4638 4639 /* Find out how many cycles we need to compute statements chain. 4640 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 4641 maximum number of independent statements we may execute per cycle. */ 4642 4643 static int 4644 get_required_cycles (int ops_num, int cpu_width) 4645 { 4646 int res; 4647 int elog; 4648 unsigned int rest; 4649 4650 /* While we have more than 2 * cpu_width operands 4651 we may reduce number of operands by cpu_width 4652 per cycle. */ 4653 res = ops_num / (2 * cpu_width); 4654 4655 /* Remained operands count may be reduced twice per cycle 4656 until we have only one operand. */ 4657 rest = (unsigned)(ops_num - res * cpu_width); 4658 elog = exact_log2 (rest); 4659 if (elog >= 0) 4660 res += elog; 4661 else 4662 res += floor_log2 (rest) + 1; 4663 4664 return res; 4665 } 4666 4667 /* Returns an optimal number of registers to use for computation of 4668 given statements. */ 4669 4670 static int 4671 get_reassociation_width (int ops_num, enum tree_code opc, 4672 machine_mode mode) 4673 { 4674 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 4675 int width; 4676 int width_min; 4677 int cycles_best; 4678 4679 if (param_width > 0) 4680 width = param_width; 4681 else 4682 width = targetm.sched.reassociation_width (opc, mode); 4683 4684 if (width == 1) 4685 return width; 4686 4687 /* Get the minimal time required for sequence computation. */ 4688 cycles_best = get_required_cycles (ops_num, width); 4689 4690 /* Check if we may use less width and still compute sequence for 4691 the same time. It will allow us to reduce registers usage. 4692 get_required_cycles is monotonically increasing with lower width 4693 so we can perform a binary search for the minimal width that still 4694 results in the optimal cycle count. */ 4695 width_min = 1; 4696 while (width > width_min) 4697 { 4698 int width_mid = (width + width_min) / 2; 4699 4700 if (get_required_cycles (ops_num, width_mid) == cycles_best) 4701 width = width_mid; 4702 else if (width_min < width_mid) 4703 width_min = width_mid; 4704 else 4705 break; 4706 } 4707 4708 return width; 4709 } 4710 4711 /* Recursively rewrite our linearized statements so that the operators 4712 match those in OPS[OPINDEX], putting the computation in rank 4713 order and trying to allow operations to be executed in 4714 parallel. */ 4715 4716 static void 4717 rewrite_expr_tree_parallel (gassign *stmt, int width, 4718 vec<operand_entry *> ops) 4719 { 4720 enum tree_code opcode = gimple_assign_rhs_code (stmt); 4721 int op_num = ops.length (); 4722 gcc_assert (op_num > 0); 4723 int stmt_num = op_num - 1; 4724 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 4725 int op_index = op_num - 1; 4726 int stmt_index = 0; 4727 int ready_stmts_end = 0; 4728 int i = 0; 4729 gimple *stmt1 = NULL, *stmt2 = NULL; 4730 tree last_rhs1 = gimple_assign_rhs1 (stmt); 4731 4732 /* We start expression rewriting from the top statements. 4733 So, in this loop we create a full list of statements 4734 we will work with. */ 4735 stmts[stmt_num - 1] = stmt; 4736 for (i = stmt_num - 2; i >= 0; i--) 4737 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 4738 4739 for (i = 0; i < stmt_num; i++) 4740 { 4741 tree op1, op2; 4742 4743 /* Determine whether we should use results of 4744 already handled statements or not. */ 4745 if (ready_stmts_end == 0 4746 && (i - stmt_index >= width || op_index < 1)) 4747 ready_stmts_end = i; 4748 4749 /* Now we choose operands for the next statement. Non zero 4750 value in ready_stmts_end means here that we should use 4751 the result of already generated statements as new operand. */ 4752 if (ready_stmts_end > 0) 4753 { 4754 op1 = gimple_assign_lhs (stmts[stmt_index++]); 4755 if (ready_stmts_end > stmt_index) 4756 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4757 else if (op_index >= 0) 4758 { 4759 operand_entry *oe = ops[op_index--]; 4760 stmt2 = oe->stmt_to_insert; 4761 op2 = oe->op; 4762 } 4763 else 4764 { 4765 gcc_assert (stmt_index < i); 4766 op2 = gimple_assign_lhs (stmts[stmt_index++]); 4767 } 4768 4769 if (stmt_index >= ready_stmts_end) 4770 ready_stmts_end = 0; 4771 } 4772 else 4773 { 4774 if (op_index > 1) 4775 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 4776 operand_entry *oe2 = ops[op_index--]; 4777 operand_entry *oe1 = ops[op_index--]; 4778 op2 = oe2->op; 4779 stmt2 = oe2->stmt_to_insert; 4780 op1 = oe1->op; 4781 stmt1 = oe1->stmt_to_insert; 4782 } 4783 4784 /* If we emit the last statement then we should put 4785 operands into the last statement. It will also 4786 break the loop. */ 4787 if (op_index < 0 && stmt_index == i) 4788 i = stmt_num - 1; 4789 4790 if (dump_file && (dump_flags & TDF_DETAILS)) 4791 { 4792 fprintf (dump_file, "Transforming "); 4793 print_gimple_stmt (dump_file, stmts[i], 0); 4794 } 4795 4796 /* If the stmt that defines operand has to be inserted, insert it 4797 before the use. */ 4798 if (stmt1) 4799 insert_stmt_before_use (stmts[i], stmt1); 4800 if (stmt2) 4801 insert_stmt_before_use (stmts[i], stmt2); 4802 stmt1 = stmt2 = NULL; 4803 4804 /* We keep original statement only for the last one. All 4805 others are recreated. */ 4806 if (i == stmt_num - 1) 4807 { 4808 gimple_assign_set_rhs1 (stmts[i], op1); 4809 gimple_assign_set_rhs2 (stmts[i], op2); 4810 update_stmt (stmts[i]); 4811 } 4812 else 4813 { 4814 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 4815 gimple_set_visited (stmts[i], true); 4816 } 4817 if (dump_file && (dump_flags & TDF_DETAILS)) 4818 { 4819 fprintf (dump_file, " into "); 4820 print_gimple_stmt (dump_file, stmts[i], 0); 4821 } 4822 } 4823 4824 remove_visited_stmt_chain (last_rhs1); 4825 } 4826 4827 /* Transform STMT, which is really (A +B) + (C + D) into the left 4828 linear form, ((A+B)+C)+D. 4829 Recurse on D if necessary. */ 4830 4831 static void 4832 linearize_expr (gimple *stmt) 4833 { 4834 gimple_stmt_iterator gsi; 4835 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 4836 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4837 gimple *oldbinrhs = binrhs; 4838 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4839 gimple *newbinrhs = NULL; 4840 struct loop *loop = loop_containing_stmt (stmt); 4841 tree lhs = gimple_assign_lhs (stmt); 4842 4843 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 4844 && is_reassociable_op (binrhs, rhscode, loop)); 4845 4846 gsi = gsi_for_stmt (stmt); 4847 4848 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 4849 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 4850 gimple_assign_rhs_code (binrhs), 4851 gimple_assign_lhs (binlhs), 4852 gimple_assign_rhs2 (binrhs)); 4853 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 4854 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 4855 gimple_set_uid (binrhs, gimple_uid (stmt)); 4856 4857 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 4858 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 4859 4860 if (dump_file && (dump_flags & TDF_DETAILS)) 4861 { 4862 fprintf (dump_file, "Linearized: "); 4863 print_gimple_stmt (dump_file, stmt, 0); 4864 } 4865 4866 reassociate_stats.linearized++; 4867 update_stmt (stmt); 4868 4869 gsi = gsi_for_stmt (oldbinrhs); 4870 reassoc_remove_stmt (&gsi); 4871 release_defs (oldbinrhs); 4872 4873 gimple_set_visited (stmt, true); 4874 gimple_set_visited (binlhs, true); 4875 gimple_set_visited (binrhs, true); 4876 4877 /* Tail recurse on the new rhs if it still needs reassociation. */ 4878 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 4879 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 4880 want to change the algorithm while converting to tuples. */ 4881 linearize_expr (stmt); 4882 } 4883 4884 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 4885 it. Otherwise, return NULL. */ 4886 4887 static gimple * 4888 get_single_immediate_use (tree lhs) 4889 { 4890 use_operand_p immuse; 4891 gimple *immusestmt; 4892 4893 if (TREE_CODE (lhs) == SSA_NAME 4894 && single_imm_use (lhs, &immuse, &immusestmt) 4895 && is_gimple_assign (immusestmt)) 4896 return immusestmt; 4897 4898 return NULL; 4899 } 4900 4901 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 4902 representing the negated value. Insertions of any necessary 4903 instructions go before GSI. 4904 This function is recursive in that, if you hand it "a_5" as the 4905 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 4906 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 4907 4908 static tree 4909 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 4910 { 4911 gimple *negatedefstmt = NULL; 4912 tree resultofnegate; 4913 gimple_stmt_iterator gsi; 4914 unsigned int uid; 4915 4916 /* If we are trying to negate a name, defined by an add, negate the 4917 add operands instead. */ 4918 if (TREE_CODE (tonegate) == SSA_NAME) 4919 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 4920 if (TREE_CODE (tonegate) == SSA_NAME 4921 && is_gimple_assign (negatedefstmt) 4922 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 4923 && has_single_use (gimple_assign_lhs (negatedefstmt)) 4924 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 4925 { 4926 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 4927 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 4928 tree lhs = gimple_assign_lhs (negatedefstmt); 4929 gimple *g; 4930 4931 gsi = gsi_for_stmt (negatedefstmt); 4932 rhs1 = negate_value (rhs1, &gsi); 4933 4934 gsi = gsi_for_stmt (negatedefstmt); 4935 rhs2 = negate_value (rhs2, &gsi); 4936 4937 gsi = gsi_for_stmt (negatedefstmt); 4938 lhs = make_ssa_name (TREE_TYPE (lhs)); 4939 gimple_set_visited (negatedefstmt, true); 4940 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 4941 gimple_set_uid (g, gimple_uid (negatedefstmt)); 4942 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4943 return lhs; 4944 } 4945 4946 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 4947 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 4948 NULL_TREE, true, GSI_SAME_STMT); 4949 gsi = *gsip; 4950 uid = gimple_uid (gsi_stmt (gsi)); 4951 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 4952 { 4953 gimple *stmt = gsi_stmt (gsi); 4954 if (gimple_uid (stmt) != 0) 4955 break; 4956 gimple_set_uid (stmt, uid); 4957 } 4958 return resultofnegate; 4959 } 4960 4961 /* Return true if we should break up the subtract in STMT into an add 4962 with negate. This is true when we the subtract operands are really 4963 adds, or the subtract itself is used in an add expression. In 4964 either case, breaking up the subtract into an add with negate 4965 exposes the adds to reassociation. */ 4966 4967 static bool 4968 should_break_up_subtract (gimple *stmt) 4969 { 4970 tree lhs = gimple_assign_lhs (stmt); 4971 tree binlhs = gimple_assign_rhs1 (stmt); 4972 tree binrhs = gimple_assign_rhs2 (stmt); 4973 gimple *immusestmt; 4974 struct loop *loop = loop_containing_stmt (stmt); 4975 4976 if (TREE_CODE (binlhs) == SSA_NAME 4977 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4978 return true; 4979 4980 if (TREE_CODE (binrhs) == SSA_NAME 4981 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4982 return true; 4983 4984 if (TREE_CODE (lhs) == SSA_NAME 4985 && (immusestmt = get_single_immediate_use (lhs)) 4986 && is_gimple_assign (immusestmt) 4987 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4988 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR 4989 && gimple_assign_rhs1 (immusestmt) == lhs) 4990 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4991 return true; 4992 return false; 4993 } 4994 4995 /* Transform STMT from A - B into A + -B. */ 4996 4997 static void 4998 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 4999 { 5000 tree rhs1 = gimple_assign_rhs1 (stmt); 5001 tree rhs2 = gimple_assign_rhs2 (stmt); 5002 5003 if (dump_file && (dump_flags & TDF_DETAILS)) 5004 { 5005 fprintf (dump_file, "Breaking up subtract "); 5006 print_gimple_stmt (dump_file, stmt, 0); 5007 } 5008 5009 rhs2 = negate_value (rhs2, gsip); 5010 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 5011 update_stmt (stmt); 5012 } 5013 5014 /* Determine whether STMT is a builtin call that raises an SSA name 5015 to an integer power and has only one use. If so, and this is early 5016 reassociation and unsafe math optimizations are permitted, place 5017 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 5018 If any of these conditions does not hold, return FALSE. */ 5019 5020 static bool 5021 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 5022 { 5023 tree arg1; 5024 REAL_VALUE_TYPE c, cint; 5025 5026 switch (gimple_call_combined_fn (stmt)) 5027 { 5028 CASE_CFN_POW: 5029 if (flag_errno_math) 5030 return false; 5031 5032 *base = gimple_call_arg (stmt, 0); 5033 arg1 = gimple_call_arg (stmt, 1); 5034 5035 if (TREE_CODE (arg1) != REAL_CST) 5036 return false; 5037 5038 c = TREE_REAL_CST (arg1); 5039 5040 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 5041 return false; 5042 5043 *exponent = real_to_integer (&c); 5044 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 5045 if (!real_identical (&c, &cint)) 5046 return false; 5047 5048 break; 5049 5050 CASE_CFN_POWI: 5051 *base = gimple_call_arg (stmt, 0); 5052 arg1 = gimple_call_arg (stmt, 1); 5053 5054 if (!tree_fits_shwi_p (arg1)) 5055 return false; 5056 5057 *exponent = tree_to_shwi (arg1); 5058 break; 5059 5060 default: 5061 return false; 5062 } 5063 5064 /* Expanding negative exponents is generally unproductive, so we don't 5065 complicate matters with those. Exponents of zero and one should 5066 have been handled by expression folding. */ 5067 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 5068 return false; 5069 5070 return true; 5071 } 5072 5073 /* Try to derive and add operand entry for OP to *OPS. Return false if 5074 unsuccessful. */ 5075 5076 static bool 5077 try_special_add_to_ops (vec<operand_entry *> *ops, 5078 enum tree_code code, 5079 tree op, gimple* def_stmt) 5080 { 5081 tree base = NULL_TREE; 5082 HOST_WIDE_INT exponent = 0; 5083 5084 if (TREE_CODE (op) != SSA_NAME 5085 || ! has_single_use (op)) 5086 return false; 5087 5088 if (code == MULT_EXPR 5089 && reassoc_insert_powi_p 5090 && flag_unsafe_math_optimizations 5091 && is_gimple_call (def_stmt) 5092 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 5093 { 5094 add_repeat_to_ops_vec (ops, base, exponent); 5095 gimple_set_visited (def_stmt, true); 5096 return true; 5097 } 5098 else if (code == MULT_EXPR 5099 && is_gimple_assign (def_stmt) 5100 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 5101 && !HONOR_SNANS (TREE_TYPE (op)) 5102 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 5103 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) 5104 { 5105 tree rhs1 = gimple_assign_rhs1 (def_stmt); 5106 tree cst = build_minus_one_cst (TREE_TYPE (op)); 5107 add_to_ops_vec (ops, rhs1); 5108 add_to_ops_vec (ops, cst); 5109 gimple_set_visited (def_stmt, true); 5110 return true; 5111 } 5112 5113 return false; 5114 } 5115 5116 /* Recursively linearize a binary expression that is the RHS of STMT. 5117 Place the operands of the expression tree in the vector named OPS. */ 5118 5119 static void 5120 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 5121 bool is_associative, bool set_visited) 5122 { 5123 tree binlhs = gimple_assign_rhs1 (stmt); 5124 tree binrhs = gimple_assign_rhs2 (stmt); 5125 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5126 bool binlhsisreassoc = false; 5127 bool binrhsisreassoc = false; 5128 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5129 struct loop *loop = loop_containing_stmt (stmt); 5130 5131 if (set_visited) 5132 gimple_set_visited (stmt, true); 5133 5134 if (TREE_CODE (binlhs) == SSA_NAME) 5135 { 5136 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 5137 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 5138 && !stmt_could_throw_p (cfun, binlhsdef)); 5139 } 5140 5141 if (TREE_CODE (binrhs) == SSA_NAME) 5142 { 5143 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 5144 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 5145 && !stmt_could_throw_p (cfun, binrhsdef)); 5146 } 5147 5148 /* If the LHS is not reassociable, but the RHS is, we need to swap 5149 them. If neither is reassociable, there is nothing we can do, so 5150 just put them in the ops vector. If the LHS is reassociable, 5151 linearize it. If both are reassociable, then linearize the RHS 5152 and the LHS. */ 5153 5154 if (!binlhsisreassoc) 5155 { 5156 /* If this is not a associative operation like division, give up. */ 5157 if (!is_associative) 5158 { 5159 add_to_ops_vec (ops, binrhs); 5160 return; 5161 } 5162 5163 if (!binrhsisreassoc) 5164 { 5165 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5166 add_to_ops_vec (ops, binrhs); 5167 5168 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 5169 add_to_ops_vec (ops, binlhs); 5170 5171 return; 5172 } 5173 5174 if (dump_file && (dump_flags & TDF_DETAILS)) 5175 { 5176 fprintf (dump_file, "swapping operands of "); 5177 print_gimple_stmt (dump_file, stmt, 0); 5178 } 5179 5180 swap_ssa_operands (stmt, 5181 gimple_assign_rhs1_ptr (stmt), 5182 gimple_assign_rhs2_ptr (stmt)); 5183 update_stmt (stmt); 5184 5185 if (dump_file && (dump_flags & TDF_DETAILS)) 5186 { 5187 fprintf (dump_file, " is now "); 5188 print_gimple_stmt (dump_file, stmt, 0); 5189 } 5190 5191 /* We want to make it so the lhs is always the reassociative op, 5192 so swap. */ 5193 std::swap (binlhs, binrhs); 5194 } 5195 else if (binrhsisreassoc) 5196 { 5197 linearize_expr (stmt); 5198 binlhs = gimple_assign_rhs1 (stmt); 5199 binrhs = gimple_assign_rhs2 (stmt); 5200 } 5201 5202 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 5203 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 5204 rhscode, loop)); 5205 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 5206 is_associative, set_visited); 5207 5208 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5209 add_to_ops_vec (ops, binrhs); 5210 } 5211 5212 /* Repropagate the negates back into subtracts, since no other pass 5213 currently does it. */ 5214 5215 static void 5216 repropagate_negates (void) 5217 { 5218 unsigned int i = 0; 5219 tree negate; 5220 5221 FOR_EACH_VEC_ELT (plus_negates, i, negate) 5222 { 5223 gimple *user = get_single_immediate_use (negate); 5224 5225 if (!user || !is_gimple_assign (user)) 5226 continue; 5227 5228 /* The negate operand can be either operand of a PLUS_EXPR 5229 (it can be the LHS if the RHS is a constant for example). 5230 5231 Force the negate operand to the RHS of the PLUS_EXPR, then 5232 transform the PLUS_EXPR into a MINUS_EXPR. */ 5233 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 5234 { 5235 /* If the negated operand appears on the LHS of the 5236 PLUS_EXPR, exchange the operands of the PLUS_EXPR 5237 to force the negated operand to the RHS of the PLUS_EXPR. */ 5238 if (gimple_assign_rhs1 (user) == negate) 5239 { 5240 swap_ssa_operands (user, 5241 gimple_assign_rhs1_ptr (user), 5242 gimple_assign_rhs2_ptr (user)); 5243 } 5244 5245 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 5246 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 5247 if (gimple_assign_rhs2 (user) == negate) 5248 { 5249 tree rhs1 = gimple_assign_rhs1 (user); 5250 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 5251 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5252 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 5253 update_stmt (user); 5254 } 5255 } 5256 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 5257 { 5258 if (gimple_assign_rhs1 (user) == negate) 5259 { 5260 /* We have 5261 x = -a 5262 y = x - b 5263 which we transform into 5264 x = a + b 5265 y = -x . 5266 This pushes down the negate which we possibly can merge 5267 into some other operation, hence insert it into the 5268 plus_negates vector. */ 5269 gimple *feed = SSA_NAME_DEF_STMT (negate); 5270 tree a = gimple_assign_rhs1 (feed); 5271 tree b = gimple_assign_rhs2 (user); 5272 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 5273 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 5274 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 5275 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b); 5276 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5277 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 5278 user = gsi_stmt (gsi2); 5279 update_stmt (user); 5280 reassoc_remove_stmt (&gsi); 5281 release_defs (feed); 5282 plus_negates.safe_push (gimple_assign_lhs (user)); 5283 } 5284 else 5285 { 5286 /* Transform "x = -a; y = b - x" into "y = b + a", getting 5287 rid of one operation. */ 5288 gimple *feed = SSA_NAME_DEF_STMT (negate); 5289 tree a = gimple_assign_rhs1 (feed); 5290 tree rhs1 = gimple_assign_rhs1 (user); 5291 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5292 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 5293 update_stmt (gsi_stmt (gsi)); 5294 } 5295 } 5296 } 5297 } 5298 5299 /* Returns true if OP is of a type for which we can do reassociation. 5300 That is for integral or non-saturating fixed-point types, and for 5301 floating point type when associative-math is enabled. */ 5302 5303 static bool 5304 can_reassociate_p (tree op) 5305 { 5306 tree type = TREE_TYPE (op); 5307 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 5308 return false; 5309 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 5310 || NON_SAT_FIXED_POINT_TYPE_P (type) 5311 || (flag_associative_math && FLOAT_TYPE_P (type))) 5312 return true; 5313 return false; 5314 } 5315 5316 /* Break up subtract operations in block BB. 5317 5318 We do this top down because we don't know whether the subtract is 5319 part of a possible chain of reassociation except at the top. 5320 5321 IE given 5322 d = f + g 5323 c = a + e 5324 b = c - d 5325 q = b - r 5326 k = t - q 5327 5328 we want to break up k = t - q, but we won't until we've transformed q 5329 = b - r, which won't be broken up until we transform b = c - d. 5330 5331 En passant, clear the GIMPLE visited flag on every statement 5332 and set UIDs within each basic block. */ 5333 5334 static void 5335 break_up_subtract_bb (basic_block bb) 5336 { 5337 gimple_stmt_iterator gsi; 5338 basic_block son; 5339 unsigned int uid = 1; 5340 5341 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5342 { 5343 gimple *stmt = gsi_stmt (gsi); 5344 gimple_set_visited (stmt, false); 5345 gimple_set_uid (stmt, uid++); 5346 5347 if (!is_gimple_assign (stmt) 5348 || !can_reassociate_p (gimple_assign_lhs (stmt))) 5349 continue; 5350 5351 /* Look for simple gimple subtract operations. */ 5352 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 5353 { 5354 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 5355 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 5356 continue; 5357 5358 /* Check for a subtract used only in an addition. If this 5359 is the case, transform it into add of a negate for better 5360 reassociation. IE transform C = A-B into C = A + -B if C 5361 is only used in an addition. */ 5362 if (should_break_up_subtract (stmt)) 5363 break_up_subtract (stmt, &gsi); 5364 } 5365 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 5366 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 5367 plus_negates.safe_push (gimple_assign_lhs (stmt)); 5368 } 5369 for (son = first_dom_son (CDI_DOMINATORS, bb); 5370 son; 5371 son = next_dom_son (CDI_DOMINATORS, son)) 5372 break_up_subtract_bb (son); 5373 } 5374 5375 /* Used for repeated factor analysis. */ 5376 struct repeat_factor 5377 { 5378 /* An SSA name that occurs in a multiply chain. */ 5379 tree factor; 5380 5381 /* Cached rank of the factor. */ 5382 unsigned rank; 5383 5384 /* Number of occurrences of the factor in the chain. */ 5385 HOST_WIDE_INT count; 5386 5387 /* An SSA name representing the product of this factor and 5388 all factors appearing later in the repeated factor vector. */ 5389 tree repr; 5390 }; 5391 5392 5393 static vec<repeat_factor> repeat_factor_vec; 5394 5395 /* Used for sorting the repeat factor vector. Sort primarily by 5396 ascending occurrence count, secondarily by descending rank. */ 5397 5398 static int 5399 compare_repeat_factors (const void *x1, const void *x2) 5400 { 5401 const repeat_factor *rf1 = (const repeat_factor *) x1; 5402 const repeat_factor *rf2 = (const repeat_factor *) x2; 5403 5404 if (rf1->count != rf2->count) 5405 return rf1->count - rf2->count; 5406 5407 return rf2->rank - rf1->rank; 5408 } 5409 5410 /* Look for repeated operands in OPS in the multiply tree rooted at 5411 STMT. Replace them with an optimal sequence of multiplies and powi 5412 builtin calls, and remove the used operands from OPS. Return an 5413 SSA name representing the value of the replacement sequence. */ 5414 5415 static tree 5416 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 5417 { 5418 unsigned i, j, vec_len; 5419 int ii; 5420 operand_entry *oe; 5421 repeat_factor *rf1, *rf2; 5422 repeat_factor rfnew; 5423 tree result = NULL_TREE; 5424 tree target_ssa, iter_result; 5425 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 5426 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 5427 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5428 gimple *mul_stmt, *pow_stmt; 5429 5430 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 5431 target. */ 5432 if (!powi_fndecl) 5433 return NULL_TREE; 5434 5435 /* Allocate the repeated factor vector. */ 5436 repeat_factor_vec.create (10); 5437 5438 /* Scan the OPS vector for all SSA names in the product and build 5439 up a vector of occurrence counts for each factor. */ 5440 FOR_EACH_VEC_ELT (*ops, i, oe) 5441 { 5442 if (TREE_CODE (oe->op) == SSA_NAME) 5443 { 5444 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5445 { 5446 if (rf1->factor == oe->op) 5447 { 5448 rf1->count += oe->count; 5449 break; 5450 } 5451 } 5452 5453 if (j >= repeat_factor_vec.length ()) 5454 { 5455 rfnew.factor = oe->op; 5456 rfnew.rank = oe->rank; 5457 rfnew.count = oe->count; 5458 rfnew.repr = NULL_TREE; 5459 repeat_factor_vec.safe_push (rfnew); 5460 } 5461 } 5462 } 5463 5464 /* Sort the repeated factor vector by (a) increasing occurrence count, 5465 and (b) decreasing rank. */ 5466 repeat_factor_vec.qsort (compare_repeat_factors); 5467 5468 /* It is generally best to combine as many base factors as possible 5469 into a product before applying __builtin_powi to the result. 5470 However, the sort order chosen for the repeated factor vector 5471 allows us to cache partial results for the product of the base 5472 factors for subsequent use. When we already have a cached partial 5473 result from a previous iteration, it is best to make use of it 5474 before looking for another __builtin_pow opportunity. 5475 5476 As an example, consider x * x * y * y * y * z * z * z * z. 5477 We want to first compose the product x * y * z, raise it to the 5478 second power, then multiply this by y * z, and finally multiply 5479 by z. This can be done in 5 multiplies provided we cache y * z 5480 for use in both expressions: 5481 5482 t1 = y * z 5483 t2 = t1 * x 5484 t3 = t2 * t2 5485 t4 = t1 * t3 5486 result = t4 * z 5487 5488 If we instead ignored the cached y * z and first multiplied by 5489 the __builtin_pow opportunity z * z, we would get the inferior: 5490 5491 t1 = y * z 5492 t2 = t1 * x 5493 t3 = t2 * t2 5494 t4 = z * z 5495 t5 = t3 * t4 5496 result = t5 * y */ 5497 5498 vec_len = repeat_factor_vec.length (); 5499 5500 /* Repeatedly look for opportunities to create a builtin_powi call. */ 5501 while (true) 5502 { 5503 HOST_WIDE_INT power; 5504 5505 /* First look for the largest cached product of factors from 5506 preceding iterations. If found, create a builtin_powi for 5507 it if the minimum occurrence count for its factors is at 5508 least 2, or just use this cached product as our next 5509 multiplicand if the minimum occurrence count is 1. */ 5510 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5511 { 5512 if (rf1->repr && rf1->count > 0) 5513 break; 5514 } 5515 5516 if (j < vec_len) 5517 { 5518 power = rf1->count; 5519 5520 if (power == 1) 5521 { 5522 iter_result = rf1->repr; 5523 5524 if (dump_file && (dump_flags & TDF_DETAILS)) 5525 { 5526 unsigned elt; 5527 repeat_factor *rf; 5528 fputs ("Multiplying by cached product ", dump_file); 5529 for (elt = j; elt < vec_len; elt++) 5530 { 5531 rf = &repeat_factor_vec[elt]; 5532 print_generic_expr (dump_file, rf->factor); 5533 if (elt < vec_len - 1) 5534 fputs (" * ", dump_file); 5535 } 5536 fputs ("\n", dump_file); 5537 } 5538 } 5539 else 5540 { 5541 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5542 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5543 build_int_cst (integer_type_node, 5544 power)); 5545 gimple_call_set_lhs (pow_stmt, iter_result); 5546 gimple_set_location (pow_stmt, gimple_location (stmt)); 5547 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5548 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5549 5550 if (dump_file && (dump_flags & TDF_DETAILS)) 5551 { 5552 unsigned elt; 5553 repeat_factor *rf; 5554 fputs ("Building __builtin_pow call for cached product (", 5555 dump_file); 5556 for (elt = j; elt < vec_len; elt++) 5557 { 5558 rf = &repeat_factor_vec[elt]; 5559 print_generic_expr (dump_file, rf->factor); 5560 if (elt < vec_len - 1) 5561 fputs (" * ", dump_file); 5562 } 5563 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 5564 power); 5565 } 5566 } 5567 } 5568 else 5569 { 5570 /* Otherwise, find the first factor in the repeated factor 5571 vector whose occurrence count is at least 2. If no such 5572 factor exists, there are no builtin_powi opportunities 5573 remaining. */ 5574 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5575 { 5576 if (rf1->count >= 2) 5577 break; 5578 } 5579 5580 if (j >= vec_len) 5581 break; 5582 5583 power = rf1->count; 5584 5585 if (dump_file && (dump_flags & TDF_DETAILS)) 5586 { 5587 unsigned elt; 5588 repeat_factor *rf; 5589 fputs ("Building __builtin_pow call for (", dump_file); 5590 for (elt = j; elt < vec_len; elt++) 5591 { 5592 rf = &repeat_factor_vec[elt]; 5593 print_generic_expr (dump_file, rf->factor); 5594 if (elt < vec_len - 1) 5595 fputs (" * ", dump_file); 5596 } 5597 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 5598 } 5599 5600 reassociate_stats.pows_created++; 5601 5602 /* Visit each element of the vector in reverse order (so that 5603 high-occurrence elements are visited first, and within the 5604 same occurrence count, lower-ranked elements are visited 5605 first). Form a linear product of all elements in this order 5606 whose occurrencce count is at least that of element J. 5607 Record the SSA name representing the product of each element 5608 with all subsequent elements in the vector. */ 5609 if (j == vec_len - 1) 5610 rf1->repr = rf1->factor; 5611 else 5612 { 5613 for (ii = vec_len - 2; ii >= (int)j; ii--) 5614 { 5615 tree op1, op2; 5616 5617 rf1 = &repeat_factor_vec[ii]; 5618 rf2 = &repeat_factor_vec[ii + 1]; 5619 5620 /* Init the last factor's representative to be itself. */ 5621 if (!rf2->repr) 5622 rf2->repr = rf2->factor; 5623 5624 op1 = rf1->factor; 5625 op2 = rf2->repr; 5626 5627 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 5628 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 5629 op1, op2); 5630 gimple_set_location (mul_stmt, gimple_location (stmt)); 5631 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5632 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5633 rf1->repr = target_ssa; 5634 5635 /* Don't reprocess the multiply we just introduced. */ 5636 gimple_set_visited (mul_stmt, true); 5637 } 5638 } 5639 5640 /* Form a call to __builtin_powi for the maximum product 5641 just formed, raised to the power obtained earlier. */ 5642 rf1 = &repeat_factor_vec[j]; 5643 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5644 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5645 build_int_cst (integer_type_node, 5646 power)); 5647 gimple_call_set_lhs (pow_stmt, iter_result); 5648 gimple_set_location (pow_stmt, gimple_location (stmt)); 5649 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5650 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5651 } 5652 5653 /* If we previously formed at least one other builtin_powi call, 5654 form the product of this one and those others. */ 5655 if (result) 5656 { 5657 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5658 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 5659 result, iter_result); 5660 gimple_set_location (mul_stmt, gimple_location (stmt)); 5661 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 5662 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 5663 gimple_set_visited (mul_stmt, true); 5664 result = new_result; 5665 } 5666 else 5667 result = iter_result; 5668 5669 /* Decrement the occurrence count of each element in the product 5670 by the count found above, and remove this many copies of each 5671 factor from OPS. */ 5672 for (i = j; i < vec_len; i++) 5673 { 5674 unsigned k = power; 5675 unsigned n; 5676 5677 rf1 = &repeat_factor_vec[i]; 5678 rf1->count -= power; 5679 5680 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 5681 { 5682 if (oe->op == rf1->factor) 5683 { 5684 if (oe->count <= k) 5685 { 5686 ops->ordered_remove (n); 5687 k -= oe->count; 5688 5689 if (k == 0) 5690 break; 5691 } 5692 else 5693 { 5694 oe->count -= k; 5695 break; 5696 } 5697 } 5698 } 5699 } 5700 } 5701 5702 /* At this point all elements in the repeated factor vector have a 5703 remaining occurrence count of 0 or 1, and those with a count of 1 5704 don't have cached representatives. Re-sort the ops vector and 5705 clean up. */ 5706 ops->qsort (sort_by_operand_rank); 5707 repeat_factor_vec.release (); 5708 5709 /* Return the final product computed herein. Note that there may 5710 still be some elements with single occurrence count left in OPS; 5711 those will be handled by the normal reassociation logic. */ 5712 return result; 5713 } 5714 5715 /* Attempt to optimize 5716 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 5717 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 5718 5719 static void 5720 attempt_builtin_copysign (vec<operand_entry *> *ops) 5721 { 5722 operand_entry *oe; 5723 unsigned int i; 5724 unsigned int length = ops->length (); 5725 tree cst = ops->last ()->op; 5726 5727 if (length == 1 || TREE_CODE (cst) != REAL_CST) 5728 return; 5729 5730 FOR_EACH_VEC_ELT (*ops, i, oe) 5731 { 5732 if (TREE_CODE (oe->op) == SSA_NAME 5733 && has_single_use (oe->op)) 5734 { 5735 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 5736 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 5737 { 5738 tree arg0, arg1; 5739 switch (gimple_call_combined_fn (old_call)) 5740 { 5741 CASE_CFN_COPYSIGN: 5742 CASE_CFN_COPYSIGN_FN: 5743 arg0 = gimple_call_arg (old_call, 0); 5744 arg1 = gimple_call_arg (old_call, 1); 5745 /* The first argument of copysign must be a constant, 5746 otherwise there's nothing to do. */ 5747 if (TREE_CODE (arg0) == REAL_CST) 5748 { 5749 tree type = TREE_TYPE (arg0); 5750 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 5751 /* If we couldn't fold to a single constant, skip it. 5752 That happens e.g. for inexact multiplication when 5753 -frounding-math. */ 5754 if (mul == NULL_TREE) 5755 break; 5756 /* Instead of adjusting OLD_CALL, let's build a new 5757 call to not leak the LHS and prevent keeping bogus 5758 debug statements. DCE will clean up the old call. */ 5759 gcall *new_call; 5760 if (gimple_call_internal_p (old_call)) 5761 new_call = gimple_build_call_internal 5762 (IFN_COPYSIGN, 2, mul, arg1); 5763 else 5764 new_call = gimple_build_call 5765 (gimple_call_fndecl (old_call), 2, mul, arg1); 5766 tree lhs = make_ssa_name (type); 5767 gimple_call_set_lhs (new_call, lhs); 5768 gimple_set_location (new_call, 5769 gimple_location (old_call)); 5770 insert_stmt_after (new_call, old_call); 5771 /* We've used the constant, get rid of it. */ 5772 ops->pop (); 5773 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 5774 /* Handle the CST1 < 0 case by negating the result. */ 5775 if (cst1_neg) 5776 { 5777 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 5778 gimple *negate_stmt 5779 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 5780 insert_stmt_after (negate_stmt, new_call); 5781 oe->op = negrhs; 5782 } 5783 else 5784 oe->op = lhs; 5785 if (dump_file && (dump_flags & TDF_DETAILS)) 5786 { 5787 fprintf (dump_file, "Optimizing copysign: "); 5788 print_generic_expr (dump_file, cst); 5789 fprintf (dump_file, " * COPYSIGN ("); 5790 print_generic_expr (dump_file, arg0); 5791 fprintf (dump_file, ", "); 5792 print_generic_expr (dump_file, arg1); 5793 fprintf (dump_file, ") into %sCOPYSIGN (", 5794 cst1_neg ? "-" : ""); 5795 print_generic_expr (dump_file, mul); 5796 fprintf (dump_file, ", "); 5797 print_generic_expr (dump_file, arg1); 5798 fprintf (dump_file, "\n"); 5799 } 5800 return; 5801 } 5802 break; 5803 default: 5804 break; 5805 } 5806 } 5807 } 5808 } 5809 } 5810 5811 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 5812 5813 static void 5814 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 5815 { 5816 tree rhs1; 5817 5818 if (dump_file && (dump_flags & TDF_DETAILS)) 5819 { 5820 fprintf (dump_file, "Transforming "); 5821 print_gimple_stmt (dump_file, stmt, 0); 5822 } 5823 5824 rhs1 = gimple_assign_rhs1 (stmt); 5825 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 5826 update_stmt (stmt); 5827 remove_visited_stmt_chain (rhs1); 5828 5829 if (dump_file && (dump_flags & TDF_DETAILS)) 5830 { 5831 fprintf (dump_file, " into "); 5832 print_gimple_stmt (dump_file, stmt, 0); 5833 } 5834 } 5835 5836 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 5837 5838 static void 5839 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 5840 tree rhs1, tree rhs2) 5841 { 5842 if (dump_file && (dump_flags & TDF_DETAILS)) 5843 { 5844 fprintf (dump_file, "Transforming "); 5845 print_gimple_stmt (dump_file, stmt, 0); 5846 } 5847 5848 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 5849 update_stmt (gsi_stmt (*gsi)); 5850 remove_visited_stmt_chain (rhs1); 5851 5852 if (dump_file && (dump_flags & TDF_DETAILS)) 5853 { 5854 fprintf (dump_file, " into "); 5855 print_gimple_stmt (dump_file, stmt, 0); 5856 } 5857 } 5858 5859 /* Reassociate expressions in basic block BB and its post-dominator as 5860 children. 5861 5862 Bubble up return status from maybe_optimize_range_tests. */ 5863 5864 static bool 5865 reassociate_bb (basic_block bb) 5866 { 5867 gimple_stmt_iterator gsi; 5868 basic_block son; 5869 gimple *stmt = last_stmt (bb); 5870 bool cfg_cleanup_needed = false; 5871 5872 if (stmt && !gimple_visited_p (stmt)) 5873 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 5874 5875 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 5876 { 5877 stmt = gsi_stmt (gsi); 5878 5879 if (is_gimple_assign (stmt) 5880 && !stmt_could_throw_p (cfun, stmt)) 5881 { 5882 tree lhs, rhs1, rhs2; 5883 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 5884 5885 /* If this is not a gimple binary expression, there is 5886 nothing for us to do with it. */ 5887 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 5888 continue; 5889 5890 /* If this was part of an already processed statement, 5891 we don't need to touch it again. */ 5892 if (gimple_visited_p (stmt)) 5893 { 5894 /* This statement might have become dead because of previous 5895 reassociations. */ 5896 if (has_zero_uses (gimple_get_lhs (stmt))) 5897 { 5898 reassoc_remove_stmt (&gsi); 5899 release_defs (stmt); 5900 /* We might end up removing the last stmt above which 5901 places the iterator to the end of the sequence. 5902 Reset it to the last stmt in this case which might 5903 be the end of the sequence as well if we removed 5904 the last statement of the sequence. In which case 5905 we need to bail out. */ 5906 if (gsi_end_p (gsi)) 5907 { 5908 gsi = gsi_last_bb (bb); 5909 if (gsi_end_p (gsi)) 5910 break; 5911 } 5912 } 5913 continue; 5914 } 5915 5916 lhs = gimple_assign_lhs (stmt); 5917 rhs1 = gimple_assign_rhs1 (stmt); 5918 rhs2 = gimple_assign_rhs2 (stmt); 5919 5920 /* For non-bit or min/max operations we can't associate 5921 all types. Verify that here. */ 5922 if (rhs_code != BIT_IOR_EXPR 5923 && rhs_code != BIT_AND_EXPR 5924 && rhs_code != BIT_XOR_EXPR 5925 && rhs_code != MIN_EXPR 5926 && rhs_code != MAX_EXPR 5927 && (!can_reassociate_p (lhs) 5928 || !can_reassociate_p (rhs1) 5929 || !can_reassociate_p (rhs2))) 5930 continue; 5931 5932 if (associative_tree_code (rhs_code)) 5933 { 5934 auto_vec<operand_entry *> ops; 5935 tree powi_result = NULL_TREE; 5936 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 5937 5938 /* There may be no immediate uses left by the time we 5939 get here because we may have eliminated them all. */ 5940 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 5941 continue; 5942 5943 gimple_set_visited (stmt, true); 5944 linearize_expr_tree (&ops, stmt, true, true); 5945 ops.qsort (sort_by_operand_rank); 5946 int orig_len = ops.length (); 5947 optimize_ops_list (rhs_code, &ops); 5948 if (undistribute_ops_list (rhs_code, &ops, 5949 loop_containing_stmt (stmt))) 5950 { 5951 ops.qsort (sort_by_operand_rank); 5952 optimize_ops_list (rhs_code, &ops); 5953 } 5954 5955 if (rhs_code == PLUS_EXPR 5956 && transform_add_to_multiply (&ops)) 5957 ops.qsort (sort_by_operand_rank); 5958 5959 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 5960 { 5961 if (is_vector) 5962 optimize_vec_cond_expr (rhs_code, &ops); 5963 else 5964 optimize_range_tests (rhs_code, &ops, NULL); 5965 } 5966 5967 if (rhs_code == MULT_EXPR && !is_vector) 5968 { 5969 attempt_builtin_copysign (&ops); 5970 5971 if (reassoc_insert_powi_p 5972 && flag_unsafe_math_optimizations) 5973 powi_result = attempt_builtin_powi (stmt, &ops); 5974 } 5975 5976 operand_entry *last; 5977 bool negate_result = false; 5978 if (ops.length () > 1 5979 && rhs_code == MULT_EXPR) 5980 { 5981 last = ops.last (); 5982 if ((integer_minus_onep (last->op) 5983 || real_minus_onep (last->op)) 5984 && !HONOR_SNANS (TREE_TYPE (lhs)) 5985 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 5986 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 5987 { 5988 ops.pop (); 5989 negate_result = true; 5990 } 5991 } 5992 5993 tree new_lhs = lhs; 5994 /* If the operand vector is now empty, all operands were 5995 consumed by the __builtin_powi optimization. */ 5996 if (ops.length () == 0) 5997 transform_stmt_to_copy (&gsi, stmt, powi_result); 5998 else if (ops.length () == 1) 5999 { 6000 tree last_op = ops.last ()->op; 6001 6002 /* If the stmt that defines operand has to be inserted, insert it 6003 before the use. */ 6004 if (ops.last ()->stmt_to_insert) 6005 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 6006 if (powi_result) 6007 transform_stmt_to_multiply (&gsi, stmt, last_op, 6008 powi_result); 6009 else 6010 transform_stmt_to_copy (&gsi, stmt, last_op); 6011 } 6012 else 6013 { 6014 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 6015 int ops_num = ops.length (); 6016 int width; 6017 6018 /* For binary bit operations, if there are at least 3 6019 operands and the last last operand in OPS is a constant, 6020 move it to the front. This helps ensure that we generate 6021 (X & Y) & C rather than (X & C) & Y. The former will 6022 often match a canonical bit test when we get to RTL. */ 6023 if (ops.length () > 2 6024 && (rhs_code == BIT_AND_EXPR 6025 || rhs_code == BIT_IOR_EXPR 6026 || rhs_code == BIT_XOR_EXPR) 6027 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 6028 std::swap (*ops[0], *ops[ops_num - 1]); 6029 6030 /* Only rewrite the expression tree to parallel in the 6031 last reassoc pass to avoid useless work back-and-forth 6032 with initial linearization. */ 6033 if (!reassoc_insert_powi_p 6034 && ops.length () > 3 6035 && (width = get_reassociation_width (ops_num, rhs_code, 6036 mode)) > 1) 6037 { 6038 if (dump_file && (dump_flags & TDF_DETAILS)) 6039 fprintf (dump_file, 6040 "Width = %d was chosen for reassociation\n", 6041 width); 6042 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 6043 width, ops); 6044 } 6045 else 6046 { 6047 /* When there are three operands left, we want 6048 to make sure the ones that get the double 6049 binary op are chosen wisely. */ 6050 int len = ops.length (); 6051 if (len >= 3) 6052 swap_ops_for_binary_stmt (ops, len - 3, stmt); 6053 6054 new_lhs = rewrite_expr_tree (stmt, 0, ops, 6055 powi_result != NULL 6056 || negate_result, 6057 len != orig_len); 6058 } 6059 6060 /* If we combined some repeated factors into a 6061 __builtin_powi call, multiply that result by the 6062 reassociated operands. */ 6063 if (powi_result) 6064 { 6065 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 6066 tree type = TREE_TYPE (lhs); 6067 tree target_ssa = make_temp_ssa_name (type, NULL, 6068 "reassocpow"); 6069 gimple_set_lhs (lhs_stmt, target_ssa); 6070 update_stmt (lhs_stmt); 6071 if (lhs != new_lhs) 6072 { 6073 target_ssa = new_lhs; 6074 new_lhs = lhs; 6075 } 6076 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 6077 powi_result, target_ssa); 6078 gimple_set_location (mul_stmt, gimple_location (stmt)); 6079 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6080 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 6081 } 6082 } 6083 6084 if (negate_result) 6085 { 6086 stmt = SSA_NAME_DEF_STMT (lhs); 6087 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 6088 gimple_set_lhs (stmt, tmp); 6089 if (lhs != new_lhs) 6090 tmp = new_lhs; 6091 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 6092 tmp); 6093 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 6094 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 6095 update_stmt (stmt); 6096 } 6097 } 6098 } 6099 } 6100 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 6101 son; 6102 son = next_dom_son (CDI_POST_DOMINATORS, son)) 6103 cfg_cleanup_needed |= reassociate_bb (son); 6104 6105 return cfg_cleanup_needed; 6106 } 6107 6108 /* Add jumps around shifts for range tests turned into bit tests. 6109 For each SSA_NAME VAR we have code like: 6110 VAR = ...; // final stmt of range comparison 6111 // bit test here...; 6112 OTHERVAR = ...; // final stmt of the bit test sequence 6113 RES = VAR | OTHERVAR; 6114 Turn the above into: 6115 VAR = ...; 6116 if (VAR != 0) 6117 goto <l3>; 6118 else 6119 goto <l2>; 6120 <l2>: 6121 // bit test here...; 6122 OTHERVAR = ...; 6123 <l3>: 6124 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 6125 6126 static void 6127 branch_fixup (void) 6128 { 6129 tree var; 6130 unsigned int i; 6131 6132 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 6133 { 6134 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 6135 gimple *use_stmt; 6136 use_operand_p use; 6137 bool ok = single_imm_use (var, &use, &use_stmt); 6138 gcc_assert (ok 6139 && is_gimple_assign (use_stmt) 6140 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 6141 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 6142 6143 basic_block cond_bb = gimple_bb (def_stmt); 6144 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 6145 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 6146 6147 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 6148 gimple *g = gimple_build_cond (NE_EXPR, var, 6149 build_zero_cst (TREE_TYPE (var)), 6150 NULL_TREE, NULL_TREE); 6151 location_t loc = gimple_location (use_stmt); 6152 gimple_set_location (g, loc); 6153 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 6154 6155 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 6156 etrue->probability = profile_probability::even (); 6157 edge efalse = find_edge (cond_bb, then_bb); 6158 efalse->flags = EDGE_FALSE_VALUE; 6159 efalse->probability -= etrue->probability; 6160 then_bb->count -= etrue->count (); 6161 6162 tree othervar = NULL_TREE; 6163 if (gimple_assign_rhs1 (use_stmt) == var) 6164 othervar = gimple_assign_rhs2 (use_stmt); 6165 else if (gimple_assign_rhs2 (use_stmt) == var) 6166 othervar = gimple_assign_rhs1 (use_stmt); 6167 else 6168 gcc_unreachable (); 6169 tree lhs = gimple_assign_lhs (use_stmt); 6170 gphi *phi = create_phi_node (lhs, merge_bb); 6171 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 6172 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 6173 gsi = gsi_for_stmt (use_stmt); 6174 gsi_remove (&gsi, true); 6175 6176 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 6177 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 6178 } 6179 reassoc_branch_fixups.release (); 6180 } 6181 6182 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 6183 void debug_ops_vector (vec<operand_entry *> ops); 6184 6185 /* Dump the operand entry vector OPS to FILE. */ 6186 6187 void 6188 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 6189 { 6190 operand_entry *oe; 6191 unsigned int i; 6192 6193 FOR_EACH_VEC_ELT (ops, i, oe) 6194 { 6195 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 6196 print_generic_expr (file, oe->op); 6197 fprintf (file, "\n"); 6198 } 6199 } 6200 6201 /* Dump the operand entry vector OPS to STDERR. */ 6202 6203 DEBUG_FUNCTION void 6204 debug_ops_vector (vec<operand_entry *> ops) 6205 { 6206 dump_ops_vector (stderr, ops); 6207 } 6208 6209 /* Bubble up return status from reassociate_bb. */ 6210 6211 static bool 6212 do_reassoc (void) 6213 { 6214 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 6215 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 6216 } 6217 6218 /* Initialize the reassociation pass. */ 6219 6220 static void 6221 init_reassoc (void) 6222 { 6223 int i; 6224 long rank = 2; 6225 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 6226 6227 /* Find the loops, so that we can prevent moving calculations in 6228 them. */ 6229 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 6230 6231 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 6232 6233 next_operand_entry_id = 0; 6234 6235 /* Reverse RPO (Reverse Post Order) will give us something where 6236 deeper loops come later. */ 6237 pre_and_rev_post_order_compute (NULL, bbs, false); 6238 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 6239 operand_rank = new hash_map<tree, long>; 6240 6241 /* Give each default definition a distinct rank. This includes 6242 parameters and the static chain. Walk backwards over all 6243 SSA names so that we get proper rank ordering according 6244 to tree_swap_operands_p. */ 6245 for (i = num_ssa_names - 1; i > 0; --i) 6246 { 6247 tree name = ssa_name (i); 6248 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 6249 insert_operand_rank (name, ++rank); 6250 } 6251 6252 /* Set up rank for each BB */ 6253 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 6254 bb_rank[bbs[i]] = ++rank << 16; 6255 6256 free (bbs); 6257 calculate_dominance_info (CDI_POST_DOMINATORS); 6258 plus_negates = vNULL; 6259 } 6260 6261 /* Cleanup after the reassociation pass, and print stats if 6262 requested. */ 6263 6264 static void 6265 fini_reassoc (void) 6266 { 6267 statistics_counter_event (cfun, "Linearized", 6268 reassociate_stats.linearized); 6269 statistics_counter_event (cfun, "Constants eliminated", 6270 reassociate_stats.constants_eliminated); 6271 statistics_counter_event (cfun, "Ops eliminated", 6272 reassociate_stats.ops_eliminated); 6273 statistics_counter_event (cfun, "Statements rewritten", 6274 reassociate_stats.rewritten); 6275 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 6276 reassociate_stats.pows_encountered); 6277 statistics_counter_event (cfun, "Built-in powi calls created", 6278 reassociate_stats.pows_created); 6279 6280 delete operand_rank; 6281 operand_entry_pool.release (); 6282 free (bb_rank); 6283 plus_negates.release (); 6284 free_dominance_info (CDI_POST_DOMINATORS); 6285 loop_optimizer_finalize (); 6286 } 6287 6288 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 6289 insertion of __builtin_powi calls. 6290 6291 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 6292 optimization of a gimple conditional. Otherwise returns zero. */ 6293 6294 static unsigned int 6295 execute_reassoc (bool insert_powi_p) 6296 { 6297 reassoc_insert_powi_p = insert_powi_p; 6298 6299 init_reassoc (); 6300 6301 bool cfg_cleanup_needed; 6302 cfg_cleanup_needed = do_reassoc (); 6303 repropagate_negates (); 6304 branch_fixup (); 6305 6306 fini_reassoc (); 6307 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 6308 } 6309 6310 namespace { 6311 6312 const pass_data pass_data_reassoc = 6313 { 6314 GIMPLE_PASS, /* type */ 6315 "reassoc", /* name */ 6316 OPTGROUP_NONE, /* optinfo_flags */ 6317 TV_TREE_REASSOC, /* tv_id */ 6318 ( PROP_cfg | PROP_ssa ), /* properties_required */ 6319 0, /* properties_provided */ 6320 0, /* properties_destroyed */ 6321 0, /* todo_flags_start */ 6322 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 6323 }; 6324 6325 class pass_reassoc : public gimple_opt_pass 6326 { 6327 public: 6328 pass_reassoc (gcc::context *ctxt) 6329 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 6330 {} 6331 6332 /* opt_pass methods: */ 6333 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 6334 void set_pass_param (unsigned int n, bool param) 6335 { 6336 gcc_assert (n == 0); 6337 insert_powi_p = param; 6338 } 6339 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 6340 virtual unsigned int execute (function *) 6341 { return execute_reassoc (insert_powi_p); } 6342 6343 private: 6344 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 6345 point 3a in the pass header comment. */ 6346 bool insert_powi_p; 6347 }; // class pass_reassoc 6348 6349 } // anon namespace 6350 6351 gimple_opt_pass * 6352 make_pass_reassoc (gcc::context *ctxt) 6353 { 6354 return new pass_reassoc (ctxt); 6355 } 6356