1 /* Reassociation for trees. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "builtins.h" 52 #include "gimplify.h" 53 #include "case-cfn-macros.h" 54 55 /* This is a simple global reassociation pass. It is, in part, based 56 on the LLVM pass of the same name (They do some things more/less 57 than we do, in different orders, etc). 58 59 It consists of five steps: 60 61 1. Breaking up subtract operations into addition + negate, where 62 it would promote the reassociation of adds. 63 64 2. Left linearization of the expression trees, so that (A+B)+(C+D) 65 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 66 During linearization, we place the operands of the binary 67 expressions into a vector of operand_entry_* 68 69 3. Optimization of the operand lists, eliminating things like a + 70 -a, a & a, etc. 71 72 3a. Combine repeated factors with the same occurrence counts 73 into a __builtin_powi call that will later be optimized into 74 an optimal number of multiplies. 75 76 4. Rewrite the expression trees we linearized and optimized so 77 they are in proper rank order. 78 79 5. Repropagate negates, as nothing else will clean it up ATM. 80 81 A bit of theory on #4, since nobody seems to write anything down 82 about why it makes sense to do it the way they do it: 83 84 We could do this much nicer theoretically, but don't (for reasons 85 explained after how to do it theoretically nice :P). 86 87 In order to promote the most redundancy elimination, you want 88 binary expressions whose operands are the same rank (or 89 preferably, the same value) exposed to the redundancy eliminator, 90 for possible elimination. 91 92 So the way to do this if we really cared, is to build the new op 93 tree from the leaves to the roots, merging as you go, and putting the 94 new op on the end of the worklist, until you are left with one 95 thing on the worklist. 96 97 IE if you have to rewrite the following set of operands (listed with 98 rank in parentheses), with opcode PLUS_EXPR: 99 100 a (1), b (1), c (1), d (2), e (2) 101 102 103 We start with our merge worklist empty, and the ops list with all of 104 those on it. 105 106 You want to first merge all leaves of the same rank, as much as 107 possible. 108 109 So first build a binary op of 110 111 mergetmp = a + b, and put "mergetmp" on the merge worklist. 112 113 Because there is no three operand form of PLUS_EXPR, c is not going to 114 be exposed to redundancy elimination as a rank 1 operand. 115 116 So you might as well throw it on the merge worklist (you could also 117 consider it to now be a rank two operand, and merge it with d and e, 118 but in this case, you then have evicted e from a binary op. So at 119 least in this situation, you can't win.) 120 121 Then build a binary op of d + e 122 mergetmp2 = d + e 123 124 and put mergetmp2 on the merge worklist. 125 126 so merge worklist = {mergetmp, c, mergetmp2} 127 128 Continue building binary ops of these operations until you have only 129 one operation left on the worklist. 130 131 So we have 132 133 build binary op 134 mergetmp3 = mergetmp + c 135 136 worklist = {mergetmp2, mergetmp3} 137 138 mergetmp4 = mergetmp2 + mergetmp3 139 140 worklist = {mergetmp4} 141 142 because we have one operation left, we can now just set the original 143 statement equal to the result of that operation. 144 145 This will at least expose a + b and d + e to redundancy elimination 146 as binary operations. 147 148 For extra points, you can reuse the old statements to build the 149 mergetmps, since you shouldn't run out. 150 151 So why don't we do this? 152 153 Because it's expensive, and rarely will help. Most trees we are 154 reassociating have 3 or less ops. If they have 2 ops, they already 155 will be written into a nice single binary op. If you have 3 ops, a 156 single simple check suffices to tell you whether the first two are of the 157 same rank. If so, you know to order it 158 159 mergetmp = op1 + op2 160 newstmt = mergetmp + op3 161 162 instead of 163 mergetmp = op2 + op3 164 newstmt = mergetmp + op1 165 166 If all three are of the same rank, you can't expose them all in a 167 single binary operator anyway, so the above is *still* the best you 168 can do. 169 170 Thus, this is what we do. When we have three ops left, we check to see 171 what order to put them in, and call it a day. As a nod to vector sum 172 reduction, we check if any of the ops are really a phi node that is a 173 destructive update for the associating op, and keep the destructive 174 update together for vector sum reduction recognition. */ 175 176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 177 point 3a in the pass header comment. */ 178 static bool reassoc_insert_powi_p; 179 180 /* Statistics */ 181 static struct 182 { 183 int linearized; 184 int constants_eliminated; 185 int ops_eliminated; 186 int rewritten; 187 int pows_encountered; 188 int pows_created; 189 } reassociate_stats; 190 191 /* Operator, rank pair. */ 192 struct operand_entry 193 { 194 unsigned int rank; 195 unsigned int id; 196 tree op; 197 unsigned int count; 198 gimple *stmt_to_insert; 199 }; 200 201 static object_allocator<operand_entry> operand_entry_pool 202 ("operand entry pool"); 203 204 /* This is used to assign a unique ID to each struct operand_entry 205 so that qsort results are identical on different hosts. */ 206 static unsigned int next_operand_entry_id; 207 208 /* Starting rank number for a given basic block, so that we can rank 209 operations using unmovable instructions in that BB based on the bb 210 depth. */ 211 static int64_t *bb_rank; 212 213 /* Operand->rank hashtable. */ 214 static hash_map<tree, int64_t> *operand_rank; 215 216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 217 all basic blocks the CFG should be adjusted - basic blocks 218 split right after that SSA_NAME's definition statement and before 219 the only use, which must be a bit ior. */ 220 static vec<tree> reassoc_branch_fixups; 221 222 /* Forward decls. */ 223 static int64_t get_rank (tree); 224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 225 226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 227 possibly added by gsi_remove. */ 228 229 bool 230 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 231 { 232 gimple *stmt = gsi_stmt (*gsi); 233 234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) 235 return gsi_remove (gsi, true); 236 237 gimple_stmt_iterator prev = *gsi; 238 gsi_prev (&prev); 239 unsigned uid = gimple_uid (stmt); 240 basic_block bb = gimple_bb (stmt); 241 bool ret = gsi_remove (gsi, true); 242 if (!gsi_end_p (prev)) 243 gsi_next (&prev); 244 else 245 prev = gsi_start_bb (bb); 246 gimple *end_stmt = gsi_stmt (*gsi); 247 while ((stmt = gsi_stmt (prev)) != end_stmt) 248 { 249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 250 gimple_set_uid (stmt, uid); 251 gsi_next (&prev); 252 } 253 return ret; 254 } 255 256 /* Bias amount for loop-carried phis. We want this to be larger than 257 the depth of any reassociation tree we can see, but not larger than 258 the rank difference between two blocks. */ 259 #define PHI_LOOP_BIAS (1 << 15) 260 261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 262 an innermost loop, and the phi has only a single use which is inside 263 the loop, then the rank is the block rank of the loop latch plus an 264 extra bias for the loop-carried dependence. This causes expressions 265 calculated into an accumulator variable to be independent for each 266 iteration of the loop. If STMT is some other phi, the rank is the 267 block rank of its containing block. */ 268 static int64_t 269 phi_rank (gimple *stmt) 270 { 271 basic_block bb = gimple_bb (stmt); 272 class loop *father = bb->loop_father; 273 tree res; 274 unsigned i; 275 use_operand_p use; 276 gimple *use_stmt; 277 278 /* We only care about real loops (those with a latch). */ 279 if (!father->latch) 280 return bb_rank[bb->index]; 281 282 /* Interesting phis must be in headers of innermost loops. */ 283 if (bb != father->header 284 || father->inner) 285 return bb_rank[bb->index]; 286 287 /* Ignore virtual SSA_NAMEs. */ 288 res = gimple_phi_result (stmt); 289 if (virtual_operand_p (res)) 290 return bb_rank[bb->index]; 291 292 /* The phi definition must have a single use, and that use must be 293 within the loop. Otherwise this isn't an accumulator pattern. */ 294 if (!single_imm_use (res, &use, &use_stmt) 295 || gimple_bb (use_stmt)->loop_father != father) 296 return bb_rank[bb->index]; 297 298 /* Look for phi arguments from within the loop. If found, bias this phi. */ 299 for (i = 0; i < gimple_phi_num_args (stmt); i++) 300 { 301 tree arg = gimple_phi_arg_def (stmt, i); 302 if (TREE_CODE (arg) == SSA_NAME 303 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 304 { 305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 306 if (gimple_bb (def_stmt)->loop_father == father) 307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 308 } 309 } 310 311 /* Must be an uninteresting phi. */ 312 return bb_rank[bb->index]; 313 } 314 315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a 316 loop-carried dependence of an innermost loop, return TRUE; else 317 return FALSE. */ 318 static bool 319 loop_carried_phi (tree exp) 320 { 321 gimple *phi_stmt; 322 int64_t block_rank; 323 324 if (TREE_CODE (exp) != SSA_NAME 325 || SSA_NAME_IS_DEFAULT_DEF (exp)) 326 return false; 327 328 phi_stmt = SSA_NAME_DEF_STMT (exp); 329 330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 331 return false; 332 333 /* Non-loop-carried phis have block rank. Loop-carried phis have 334 an additional bias added in. If this phi doesn't have block rank, 335 it's biased and should not be propagated. */ 336 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 337 338 if (phi_rank (phi_stmt) != block_rank) 339 return true; 340 341 return false; 342 } 343 344 /* Return the maximum of RANK and the rank that should be propagated 345 from expression OP. For most operands, this is just the rank of OP. 346 For loop-carried phis, the value is zero to avoid undoing the bias 347 in favor of the phi. */ 348 static int64_t 349 propagate_rank (int64_t rank, tree op) 350 { 351 int64_t op_rank; 352 353 if (loop_carried_phi (op)) 354 return rank; 355 356 op_rank = get_rank (op); 357 358 return MAX (rank, op_rank); 359 } 360 361 /* Look up the operand rank structure for expression E. */ 362 363 static inline int64_t 364 find_operand_rank (tree e) 365 { 366 int64_t *slot = operand_rank->get (e); 367 return slot ? *slot : -1; 368 } 369 370 /* Insert {E,RANK} into the operand rank hashtable. */ 371 372 static inline void 373 insert_operand_rank (tree e, int64_t rank) 374 { 375 gcc_assert (rank > 0); 376 gcc_assert (!operand_rank->put (e, rank)); 377 } 378 379 /* Given an expression E, return the rank of the expression. */ 380 381 static int64_t 382 get_rank (tree e) 383 { 384 /* SSA_NAME's have the rank of the expression they are the result 385 of. 386 For globals and uninitialized values, the rank is 0. 387 For function arguments, use the pre-setup rank. 388 For PHI nodes, stores, asm statements, etc, we use the rank of 389 the BB. 390 For simple operations, the rank is the maximum rank of any of 391 its operands, or the bb_rank, whichever is less. 392 I make no claims that this is optimal, however, it gives good 393 results. */ 394 395 /* We make an exception to the normal ranking system to break 396 dependences of accumulator variables in loops. Suppose we 397 have a simple one-block loop containing: 398 399 x_1 = phi(x_0, x_2) 400 b = a + x_1 401 c = b + d 402 x_2 = c + e 403 404 As shown, each iteration of the calculation into x is fully 405 dependent upon the iteration before it. We would prefer to 406 see this in the form: 407 408 x_1 = phi(x_0, x_2) 409 b = a + d 410 c = b + e 411 x_2 = c + x_1 412 413 If the loop is unrolled, the calculations of b and c from 414 different iterations can be interleaved. 415 416 To obtain this result during reassociation, we bias the rank 417 of the phi definition x_1 upward, when it is recognized as an 418 accumulator pattern. The artificial rank causes it to be 419 added last, providing the desired independence. */ 420 421 if (TREE_CODE (e) == SSA_NAME) 422 { 423 ssa_op_iter iter; 424 gimple *stmt; 425 int64_t rank; 426 tree op; 427 428 if (SSA_NAME_IS_DEFAULT_DEF (e)) 429 return find_operand_rank (e); 430 431 stmt = SSA_NAME_DEF_STMT (e); 432 if (gimple_code (stmt) == GIMPLE_PHI) 433 return phi_rank (stmt); 434 435 if (!is_gimple_assign (stmt)) 436 return bb_rank[gimple_bb (stmt)->index]; 437 438 /* If we already have a rank for this expression, use that. */ 439 rank = find_operand_rank (e); 440 if (rank != -1) 441 return rank; 442 443 /* Otherwise, find the maximum rank for the operands. As an 444 exception, remove the bias from loop-carried phis when propagating 445 the rank so that dependent operations are not also biased. */ 446 /* Simply walk over all SSA uses - this takes advatage of the 447 fact that non-SSA operands are is_gimple_min_invariant and 448 thus have rank 0. */ 449 rank = 0; 450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 451 rank = propagate_rank (rank, op); 452 453 if (dump_file && (dump_flags & TDF_DETAILS)) 454 { 455 fprintf (dump_file, "Rank for "); 456 print_generic_expr (dump_file, e); 457 fprintf (dump_file, " is %" PRId64 "\n", (rank + 1)); 458 } 459 460 /* Note the rank in the hashtable so we don't recompute it. */ 461 insert_operand_rank (e, (rank + 1)); 462 return (rank + 1); 463 } 464 465 /* Constants, globals, etc., are rank 0 */ 466 return 0; 467 } 468 469 470 /* We want integer ones to end up last no matter what, since they are 471 the ones we can do the most with. */ 472 #define INTEGER_CONST_TYPE 1 << 4 473 #define FLOAT_ONE_CONST_TYPE 1 << 3 474 #define FLOAT_CONST_TYPE 1 << 2 475 #define OTHER_CONST_TYPE 1 << 1 476 477 /* Classify an invariant tree into integer, float, or other, so that 478 we can sort them to be near other constants of the same type. */ 479 static inline int 480 constant_type (tree t) 481 { 482 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 483 return INTEGER_CONST_TYPE; 484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 485 { 486 /* Sort -1.0 and 1.0 constants last, while in some cases 487 const_binop can't optimize some inexact operations, multiplication 488 by -1.0 or 1.0 can be always merged with others. */ 489 if (real_onep (t) || real_minus_onep (t)) 490 return FLOAT_ONE_CONST_TYPE; 491 return FLOAT_CONST_TYPE; 492 } 493 else 494 return OTHER_CONST_TYPE; 495 } 496 497 /* qsort comparison function to sort operand entries PA and PB by rank 498 so that the sorted array is ordered by rank in decreasing order. */ 499 static int 500 sort_by_operand_rank (const void *pa, const void *pb) 501 { 502 const operand_entry *oea = *(const operand_entry *const *)pa; 503 const operand_entry *oeb = *(const operand_entry *const *)pb; 504 505 if (oeb->rank != oea->rank) 506 return oeb->rank > oea->rank ? 1 : -1; 507 508 /* It's nicer for optimize_expression if constants that are likely 509 to fold when added/multiplied/whatever are put next to each 510 other. Since all constants have rank 0, order them by type. */ 511 if (oea->rank == 0) 512 { 513 if (constant_type (oeb->op) != constant_type (oea->op)) 514 return constant_type (oea->op) - constant_type (oeb->op); 515 else 516 /* To make sorting result stable, we use unique IDs to determine 517 order. */ 518 return oeb->id > oea->id ? 1 : -1; 519 } 520 521 if (TREE_CODE (oea->op) != SSA_NAME) 522 { 523 if (TREE_CODE (oeb->op) != SSA_NAME) 524 return oeb->id > oea->id ? 1 : -1; 525 else 526 return 1; 527 } 528 else if (TREE_CODE (oeb->op) != SSA_NAME) 529 return -1; 530 531 /* Lastly, make sure the versions that are the same go next to each 532 other. */ 533 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 534 { 535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 536 versions of removed SSA_NAMEs, so if possible, prefer to sort 537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 538 See PR60418. */ 539 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 540 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 541 basic_block bba = gimple_bb (stmta); 542 basic_block bbb = gimple_bb (stmtb); 543 if (bbb != bba) 544 { 545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert 546 but the other might not. */ 547 if (!bba) 548 return 1; 549 if (!bbb) 550 return -1; 551 /* If neither is, compare bb_rank. */ 552 if (bb_rank[bbb->index] != bb_rank[bba->index]) 553 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); 554 } 555 556 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 557 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 558 if (da != db) 559 return da ? 1 : -1; 560 561 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 562 } 563 564 return oeb->id > oea->id ? 1 : -1; 565 } 566 567 /* Add an operand entry to *OPS for the tree operand OP. */ 568 569 static void 570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 571 { 572 operand_entry *oe = operand_entry_pool.allocate (); 573 574 oe->op = op; 575 oe->rank = get_rank (op); 576 oe->id = next_operand_entry_id++; 577 oe->count = 1; 578 oe->stmt_to_insert = stmt_to_insert; 579 ops->safe_push (oe); 580 } 581 582 /* Add an operand entry to *OPS for the tree operand OP with repeat 583 count REPEAT. */ 584 585 static void 586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 587 HOST_WIDE_INT repeat) 588 { 589 operand_entry *oe = operand_entry_pool.allocate (); 590 591 oe->op = op; 592 oe->rank = get_rank (op); 593 oe->id = next_operand_entry_id++; 594 oe->count = repeat; 595 oe->stmt_to_insert = NULL; 596 ops->safe_push (oe); 597 598 reassociate_stats.pows_encountered++; 599 } 600 601 /* Return true if STMT is reassociable operation containing a binary 602 operation with tree code CODE, and is inside LOOP. */ 603 604 static bool 605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) 606 { 607 basic_block bb = gimple_bb (stmt); 608 609 if (gimple_bb (stmt) == NULL) 610 return false; 611 612 if (!flow_bb_inside_loop_p (loop, bb)) 613 return false; 614 615 if (is_gimple_assign (stmt) 616 && gimple_assign_rhs_code (stmt) == code 617 && has_single_use (gimple_assign_lhs (stmt))) 618 { 619 tree rhs1 = gimple_assign_rhs1 (stmt); 620 tree rhs2 = gimple_assign_rhs2 (stmt); 621 if (TREE_CODE (rhs1) == SSA_NAME 622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) 623 return false; 624 if (rhs2 625 && TREE_CODE (rhs2) == SSA_NAME 626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)) 627 return false; 628 return true; 629 } 630 631 return false; 632 } 633 634 635 /* Return true if STMT is a nop-conversion. */ 636 637 static bool 638 gimple_nop_conversion_p (gimple *stmt) 639 { 640 if (gassign *ass = dyn_cast <gassign *> (stmt)) 641 { 642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 644 TREE_TYPE (gimple_assign_rhs1 (ass)))) 645 return true; 646 } 647 return false; 648 } 649 650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 651 operand of the negate operation. Otherwise, return NULL. */ 652 653 static tree 654 get_unary_op (tree name, enum tree_code opcode) 655 { 656 gimple *stmt = SSA_NAME_DEF_STMT (name); 657 658 /* Look through nop conversions (sign changes). */ 659 if (gimple_nop_conversion_p (stmt) 660 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 661 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 662 663 if (!is_gimple_assign (stmt)) 664 return NULL_TREE; 665 666 if (gimple_assign_rhs_code (stmt) == opcode) 667 return gimple_assign_rhs1 (stmt); 668 return NULL_TREE; 669 } 670 671 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 672 673 static bool 674 ops_equal_values_p (tree op1, tree op2) 675 { 676 if (op1 == op2) 677 return true; 678 679 tree orig_op1 = op1; 680 if (TREE_CODE (op1) == SSA_NAME) 681 { 682 gimple *stmt = SSA_NAME_DEF_STMT (op1); 683 if (gimple_nop_conversion_p (stmt)) 684 { 685 op1 = gimple_assign_rhs1 (stmt); 686 if (op1 == op2) 687 return true; 688 } 689 } 690 691 if (TREE_CODE (op2) == SSA_NAME) 692 { 693 gimple *stmt = SSA_NAME_DEF_STMT (op2); 694 if (gimple_nop_conversion_p (stmt)) 695 { 696 op2 = gimple_assign_rhs1 (stmt); 697 if (op1 == op2 698 || orig_op1 == op2) 699 return true; 700 } 701 } 702 703 return false; 704 } 705 706 707 /* If CURR and LAST are a pair of ops that OPCODE allows us to 708 eliminate through equivalences, do so, remove them from OPS, and 709 return true. Otherwise, return false. */ 710 711 static bool 712 eliminate_duplicate_pair (enum tree_code opcode, 713 vec<operand_entry *> *ops, 714 bool *all_done, 715 unsigned int i, 716 operand_entry *curr, 717 operand_entry *last) 718 { 719 720 /* If we have two of the same op, and the opcode is & |, min, or max, 721 we can eliminate one of them. 722 If we have two of the same op, and the opcode is ^, we can 723 eliminate both of them. */ 724 725 if (last && last->op == curr->op) 726 { 727 switch (opcode) 728 { 729 case MAX_EXPR: 730 case MIN_EXPR: 731 case BIT_IOR_EXPR: 732 case BIT_AND_EXPR: 733 if (dump_file && (dump_flags & TDF_DETAILS)) 734 { 735 fprintf (dump_file, "Equivalence: "); 736 print_generic_expr (dump_file, curr->op); 737 fprintf (dump_file, " [&|minmax] "); 738 print_generic_expr (dump_file, last->op); 739 fprintf (dump_file, " -> "); 740 print_generic_stmt (dump_file, last->op); 741 } 742 743 ops->ordered_remove (i); 744 reassociate_stats.ops_eliminated ++; 745 746 return true; 747 748 case BIT_XOR_EXPR: 749 if (dump_file && (dump_flags & TDF_DETAILS)) 750 { 751 fprintf (dump_file, "Equivalence: "); 752 print_generic_expr (dump_file, curr->op); 753 fprintf (dump_file, " ^ "); 754 print_generic_expr (dump_file, last->op); 755 fprintf (dump_file, " -> nothing\n"); 756 } 757 758 reassociate_stats.ops_eliminated += 2; 759 760 if (ops->length () == 2) 761 { 762 ops->truncate (0); 763 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 764 *all_done = true; 765 } 766 else 767 { 768 ops->ordered_remove (i-1); 769 ops->ordered_remove (i-1); 770 } 771 772 return true; 773 774 default: 775 break; 776 } 777 } 778 return false; 779 } 780 781 static vec<tree> plus_negates; 782 783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 784 expression, look in OPS for a corresponding positive operation to cancel 785 it out. If we find one, remove the other from OPS, replace 786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 787 return false. */ 788 789 static bool 790 eliminate_plus_minus_pair (enum tree_code opcode, 791 vec<operand_entry *> *ops, 792 unsigned int currindex, 793 operand_entry *curr) 794 { 795 tree negateop; 796 tree notop; 797 unsigned int i; 798 operand_entry *oe; 799 800 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 801 return false; 802 803 negateop = get_unary_op (curr->op, NEGATE_EXPR); 804 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 805 if (negateop == NULL_TREE && notop == NULL_TREE) 806 return false; 807 808 /* Any non-negated version will have a rank that is one less than 809 the current rank. So once we hit those ranks, if we don't find 810 one, we can stop. */ 811 812 for (i = currindex + 1; 813 ops->iterate (i, &oe) 814 && oe->rank >= curr->rank - 1 ; 815 i++) 816 { 817 if (negateop 818 && ops_equal_values_p (oe->op, negateop)) 819 { 820 if (dump_file && (dump_flags & TDF_DETAILS)) 821 { 822 fprintf (dump_file, "Equivalence: "); 823 print_generic_expr (dump_file, negateop); 824 fprintf (dump_file, " + -"); 825 print_generic_expr (dump_file, oe->op); 826 fprintf (dump_file, " -> 0\n"); 827 } 828 829 ops->ordered_remove (i); 830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 831 ops->ordered_remove (currindex); 832 reassociate_stats.ops_eliminated ++; 833 834 return true; 835 } 836 else if (notop 837 && ops_equal_values_p (oe->op, notop)) 838 { 839 tree op_type = TREE_TYPE (oe->op); 840 841 if (dump_file && (dump_flags & TDF_DETAILS)) 842 { 843 fprintf (dump_file, "Equivalence: "); 844 print_generic_expr (dump_file, notop); 845 fprintf (dump_file, " + ~"); 846 print_generic_expr (dump_file, oe->op); 847 fprintf (dump_file, " -> -1\n"); 848 } 849 850 ops->ordered_remove (i); 851 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 852 ops->ordered_remove (currindex); 853 reassociate_stats.ops_eliminated ++; 854 855 return true; 856 } 857 } 858 859 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 860 save it for later inspection in repropagate_negates(). */ 861 if (negateop != NULL_TREE 862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 863 plus_negates.safe_push (curr->op); 864 865 return false; 866 } 867 868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 869 bitwise not expression, look in OPS for a corresponding operand to 870 cancel it out. If we find one, remove the other from OPS, replace 871 OPS[CURRINDEX] with 0, and return true. Otherwise, return 872 false. */ 873 874 static bool 875 eliminate_not_pairs (enum tree_code opcode, 876 vec<operand_entry *> *ops, 877 unsigned int currindex, 878 operand_entry *curr) 879 { 880 tree notop; 881 unsigned int i; 882 operand_entry *oe; 883 884 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 885 || TREE_CODE (curr->op) != SSA_NAME) 886 return false; 887 888 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 889 if (notop == NULL_TREE) 890 return false; 891 892 /* Any non-not version will have a rank that is one less than 893 the current rank. So once we hit those ranks, if we don't find 894 one, we can stop. */ 895 896 for (i = currindex + 1; 897 ops->iterate (i, &oe) 898 && oe->rank >= curr->rank - 1; 899 i++) 900 { 901 if (oe->op == notop) 902 { 903 if (dump_file && (dump_flags & TDF_DETAILS)) 904 { 905 fprintf (dump_file, "Equivalence: "); 906 print_generic_expr (dump_file, notop); 907 if (opcode == BIT_AND_EXPR) 908 fprintf (dump_file, " & ~"); 909 else if (opcode == BIT_IOR_EXPR) 910 fprintf (dump_file, " | ~"); 911 print_generic_expr (dump_file, oe->op); 912 if (opcode == BIT_AND_EXPR) 913 fprintf (dump_file, " -> 0\n"); 914 else if (opcode == BIT_IOR_EXPR) 915 fprintf (dump_file, " -> -1\n"); 916 } 917 918 if (opcode == BIT_AND_EXPR) 919 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 920 else if (opcode == BIT_IOR_EXPR) 921 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 922 923 reassociate_stats.ops_eliminated += ops->length () - 1; 924 ops->truncate (0); 925 ops->quick_push (oe); 926 return true; 927 } 928 } 929 930 return false; 931 } 932 933 /* Use constant value that may be present in OPS to try to eliminate 934 operands. Note that this function is only really used when we've 935 eliminated ops for other reasons, or merged constants. Across 936 single statements, fold already does all of this, plus more. There 937 is little point in duplicating logic, so I've only included the 938 identities that I could ever construct testcases to trigger. */ 939 940 static void 941 eliminate_using_constants (enum tree_code opcode, 942 vec<operand_entry *> *ops) 943 { 944 operand_entry *oelast = ops->last (); 945 tree type = TREE_TYPE (oelast->op); 946 947 if (oelast->rank == 0 948 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 949 { 950 switch (opcode) 951 { 952 case BIT_AND_EXPR: 953 if (integer_zerop (oelast->op)) 954 { 955 if (ops->length () != 1) 956 { 957 if (dump_file && (dump_flags & TDF_DETAILS)) 958 fprintf (dump_file, "Found & 0, removing all other ops\n"); 959 960 reassociate_stats.ops_eliminated += ops->length () - 1; 961 962 ops->truncate (0); 963 ops->quick_push (oelast); 964 return; 965 } 966 } 967 else if (integer_all_onesp (oelast->op)) 968 { 969 if (ops->length () != 1) 970 { 971 if (dump_file && (dump_flags & TDF_DETAILS)) 972 fprintf (dump_file, "Found & -1, removing\n"); 973 ops->pop (); 974 reassociate_stats.ops_eliminated++; 975 } 976 } 977 break; 978 case BIT_IOR_EXPR: 979 if (integer_all_onesp (oelast->op)) 980 { 981 if (ops->length () != 1) 982 { 983 if (dump_file && (dump_flags & TDF_DETAILS)) 984 fprintf (dump_file, "Found | -1, removing all other ops\n"); 985 986 reassociate_stats.ops_eliminated += ops->length () - 1; 987 988 ops->truncate (0); 989 ops->quick_push (oelast); 990 return; 991 } 992 } 993 else if (integer_zerop (oelast->op)) 994 { 995 if (ops->length () != 1) 996 { 997 if (dump_file && (dump_flags & TDF_DETAILS)) 998 fprintf (dump_file, "Found | 0, removing\n"); 999 ops->pop (); 1000 reassociate_stats.ops_eliminated++; 1001 } 1002 } 1003 break; 1004 case MULT_EXPR: 1005 if (integer_zerop (oelast->op) 1006 || (FLOAT_TYPE_P (type) 1007 && !HONOR_NANS (type) 1008 && !HONOR_SIGNED_ZEROS (type) 1009 && real_zerop (oelast->op))) 1010 { 1011 if (ops->length () != 1) 1012 { 1013 if (dump_file && (dump_flags & TDF_DETAILS)) 1014 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1015 1016 reassociate_stats.ops_eliminated += ops->length () - 1; 1017 ops->truncate (0); 1018 ops->quick_push (oelast); 1019 return; 1020 } 1021 } 1022 else if (integer_onep (oelast->op) 1023 || (FLOAT_TYPE_P (type) 1024 && !HONOR_SNANS (type) 1025 && real_onep (oelast->op))) 1026 { 1027 if (ops->length () != 1) 1028 { 1029 if (dump_file && (dump_flags & TDF_DETAILS)) 1030 fprintf (dump_file, "Found * 1, removing\n"); 1031 ops->pop (); 1032 reassociate_stats.ops_eliminated++; 1033 return; 1034 } 1035 } 1036 break; 1037 case BIT_XOR_EXPR: 1038 case PLUS_EXPR: 1039 case MINUS_EXPR: 1040 if (integer_zerop (oelast->op) 1041 || (FLOAT_TYPE_P (type) 1042 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1043 && fold_real_zero_addition_p (type, oelast->op, 1044 opcode == MINUS_EXPR))) 1045 { 1046 if (ops->length () != 1) 1047 { 1048 if (dump_file && (dump_flags & TDF_DETAILS)) 1049 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1050 ops->pop (); 1051 reassociate_stats.ops_eliminated++; 1052 return; 1053 } 1054 } 1055 break; 1056 default: 1057 break; 1058 } 1059 } 1060 } 1061 1062 1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1064 bool, bool); 1065 1066 /* Structure for tracking and counting operands. */ 1067 struct oecount { 1068 unsigned int cnt; 1069 unsigned int id; 1070 enum tree_code oecode; 1071 tree op; 1072 }; 1073 1074 1075 /* The heap for the oecount hashtable and the sorted list of operands. */ 1076 static vec<oecount> cvec; 1077 1078 1079 /* Oecount hashtable helpers. */ 1080 1081 struct oecount_hasher : int_hash <int, 0, 1> 1082 { 1083 static inline hashval_t hash (int); 1084 static inline bool equal (int, int); 1085 }; 1086 1087 /* Hash function for oecount. */ 1088 1089 inline hashval_t 1090 oecount_hasher::hash (int p) 1091 { 1092 const oecount *c = &cvec[p - 42]; 1093 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1094 } 1095 1096 /* Comparison function for oecount. */ 1097 1098 inline bool 1099 oecount_hasher::equal (int p1, int p2) 1100 { 1101 const oecount *c1 = &cvec[p1 - 42]; 1102 const oecount *c2 = &cvec[p2 - 42]; 1103 return c1->oecode == c2->oecode && c1->op == c2->op; 1104 } 1105 1106 /* Comparison function for qsort sorting oecount elements by count. */ 1107 1108 static int 1109 oecount_cmp (const void *p1, const void *p2) 1110 { 1111 const oecount *c1 = (const oecount *)p1; 1112 const oecount *c2 = (const oecount *)p2; 1113 if (c1->cnt != c2->cnt) 1114 return c1->cnt > c2->cnt ? 1 : -1; 1115 else 1116 /* If counts are identical, use unique IDs to stabilize qsort. */ 1117 return c1->id > c2->id ? 1 : -1; 1118 } 1119 1120 /* Return TRUE iff STMT represents a builtin call that raises OP 1121 to some exponent. */ 1122 1123 static bool 1124 stmt_is_power_of_op (gimple *stmt, tree op) 1125 { 1126 if (!is_gimple_call (stmt)) 1127 return false; 1128 1129 switch (gimple_call_combined_fn (stmt)) 1130 { 1131 CASE_CFN_POW: 1132 CASE_CFN_POWI: 1133 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1134 1135 default: 1136 return false; 1137 } 1138 } 1139 1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1141 in place and return the result. Assumes that stmt_is_power_of_op 1142 was previously called for STMT and returned TRUE. */ 1143 1144 static HOST_WIDE_INT 1145 decrement_power (gimple *stmt) 1146 { 1147 REAL_VALUE_TYPE c, cint; 1148 HOST_WIDE_INT power; 1149 tree arg1; 1150 1151 switch (gimple_call_combined_fn (stmt)) 1152 { 1153 CASE_CFN_POW: 1154 arg1 = gimple_call_arg (stmt, 1); 1155 c = TREE_REAL_CST (arg1); 1156 power = real_to_integer (&c) - 1; 1157 real_from_integer (&cint, VOIDmode, power, SIGNED); 1158 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1159 return power; 1160 1161 CASE_CFN_POWI: 1162 arg1 = gimple_call_arg (stmt, 1); 1163 power = TREE_INT_CST_LOW (arg1) - 1; 1164 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1165 return power; 1166 1167 default: 1168 gcc_unreachable (); 1169 } 1170 } 1171 1172 /* Replace SSA defined by STMT and replace all its uses with new 1173 SSA. Also return the new SSA. */ 1174 1175 static tree 1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1177 { 1178 gimple *use_stmt; 1179 use_operand_p use; 1180 imm_use_iterator iter; 1181 tree new_lhs, new_debug_lhs = NULL_TREE; 1182 tree lhs = gimple_get_lhs (stmt); 1183 1184 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1185 gimple_set_lhs (stmt, new_lhs); 1186 1187 /* Also need to update GIMPLE_DEBUGs. */ 1188 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1189 { 1190 tree repl = new_lhs; 1191 if (is_gimple_debug (use_stmt)) 1192 { 1193 if (new_debug_lhs == NULL_TREE) 1194 { 1195 new_debug_lhs = make_node (DEBUG_EXPR_DECL); 1196 gdebug *def_temp 1197 = gimple_build_debug_bind (new_debug_lhs, 1198 build2 (opcode, TREE_TYPE (lhs), 1199 new_lhs, op), 1200 stmt); 1201 DECL_ARTIFICIAL (new_debug_lhs) = 1; 1202 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs); 1203 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs))); 1204 gimple_set_uid (def_temp, gimple_uid (stmt)); 1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1206 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1207 } 1208 repl = new_debug_lhs; 1209 } 1210 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1211 SET_USE (use, repl); 1212 update_stmt (use_stmt); 1213 } 1214 return new_lhs; 1215 } 1216 1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1218 uses with new SSAs. Also do this for the stmt that defines DEF 1219 if *DEF is not OP. */ 1220 1221 static void 1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1223 vec<gimple *> &stmts_to_fix) 1224 { 1225 unsigned i; 1226 gimple *stmt; 1227 1228 if (*def != op 1229 && TREE_CODE (*def) == SSA_NAME 1230 && (stmt = SSA_NAME_DEF_STMT (*def)) 1231 && gimple_code (stmt) != GIMPLE_NOP) 1232 *def = make_new_ssa_for_def (stmt, opcode, op); 1233 1234 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1235 make_new_ssa_for_def (stmt, opcode, op); 1236 } 1237 1238 /* Find the single immediate use of STMT's LHS, and replace it 1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1240 replace *DEF with OP as well. */ 1241 1242 static void 1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1244 { 1245 tree lhs; 1246 gimple *use_stmt; 1247 use_operand_p use; 1248 gimple_stmt_iterator gsi; 1249 1250 if (is_gimple_call (stmt)) 1251 lhs = gimple_call_lhs (stmt); 1252 else 1253 lhs = gimple_assign_lhs (stmt); 1254 1255 gcc_assert (has_single_use (lhs)); 1256 single_imm_use (lhs, &use, &use_stmt); 1257 if (lhs == *def) 1258 *def = op; 1259 SET_USE (use, op); 1260 if (TREE_CODE (op) != SSA_NAME) 1261 update_stmt (use_stmt); 1262 gsi = gsi_for_stmt (stmt); 1263 unlink_stmt_vdef (stmt); 1264 reassoc_remove_stmt (&gsi); 1265 release_defs (stmt); 1266 } 1267 1268 /* Walks the linear chain with result *DEF searching for an operation 1269 with operand OP and code OPCODE removing that from the chain. *DEF 1270 is updated if there is only one operand but no operation left. */ 1271 1272 static void 1273 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1274 { 1275 tree orig_def = *def; 1276 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1277 /* PR72835 - Record the stmt chain that has to be updated such that 1278 we dont use the same LHS when the values computed are different. */ 1279 auto_vec<gimple *, 64> stmts_to_fix; 1280 1281 do 1282 { 1283 tree name; 1284 1285 if (opcode == MULT_EXPR) 1286 { 1287 if (stmt_is_power_of_op (stmt, op)) 1288 { 1289 if (decrement_power (stmt) == 1) 1290 { 1291 if (stmts_to_fix.length () > 0) 1292 stmts_to_fix.pop (); 1293 propagate_op_to_single_use (op, stmt, def); 1294 } 1295 break; 1296 } 1297 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1298 { 1299 if (gimple_assign_rhs1 (stmt) == op) 1300 { 1301 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1302 if (stmts_to_fix.length () > 0) 1303 stmts_to_fix.pop (); 1304 propagate_op_to_single_use (cst, stmt, def); 1305 break; 1306 } 1307 else if (integer_minus_onep (op) 1308 || real_minus_onep (op)) 1309 { 1310 gimple_assign_set_rhs_code 1311 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1312 break; 1313 } 1314 } 1315 } 1316 1317 name = gimple_assign_rhs1 (stmt); 1318 1319 /* If this is the operation we look for and one of the operands 1320 is ours simply propagate the other operand into the stmts 1321 single use. */ 1322 if (gimple_assign_rhs_code (stmt) == opcode 1323 && (name == op 1324 || gimple_assign_rhs2 (stmt) == op)) 1325 { 1326 if (name == op) 1327 name = gimple_assign_rhs2 (stmt); 1328 if (stmts_to_fix.length () > 0) 1329 stmts_to_fix.pop (); 1330 propagate_op_to_single_use (name, stmt, def); 1331 break; 1332 } 1333 1334 /* We might have a multiply of two __builtin_pow* calls, and 1335 the operand might be hiding in the rightmost one. Likewise 1336 this can happen for a negate. */ 1337 if (opcode == MULT_EXPR 1338 && gimple_assign_rhs_code (stmt) == opcode 1339 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1340 && has_single_use (gimple_assign_rhs2 (stmt))) 1341 { 1342 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1343 if (stmt_is_power_of_op (stmt2, op)) 1344 { 1345 if (decrement_power (stmt2) == 1) 1346 propagate_op_to_single_use (op, stmt2, def); 1347 else 1348 stmts_to_fix.safe_push (stmt2); 1349 break; 1350 } 1351 else if (is_gimple_assign (stmt2) 1352 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1353 { 1354 if (gimple_assign_rhs1 (stmt2) == op) 1355 { 1356 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1357 propagate_op_to_single_use (cst, stmt2, def); 1358 break; 1359 } 1360 else if (integer_minus_onep (op) 1361 || real_minus_onep (op)) 1362 { 1363 stmts_to_fix.safe_push (stmt2); 1364 gimple_assign_set_rhs_code 1365 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1366 break; 1367 } 1368 } 1369 } 1370 1371 /* Continue walking the chain. */ 1372 gcc_assert (name != op 1373 && TREE_CODE (name) == SSA_NAME); 1374 stmt = SSA_NAME_DEF_STMT (name); 1375 stmts_to_fix.safe_push (stmt); 1376 } 1377 while (1); 1378 1379 if (stmts_to_fix.length () > 0 || *def == orig_def) 1380 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1381 } 1382 1383 /* Returns true if statement S1 dominates statement S2. Like 1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1385 1386 static bool 1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1388 { 1389 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1390 1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1392 SSA_NAME. Assume it lives at the beginning of function and 1393 thus dominates everything. */ 1394 if (!bb1 || s1 == s2) 1395 return true; 1396 1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1398 if (!bb2) 1399 return false; 1400 1401 if (bb1 == bb2) 1402 { 1403 /* PHIs in the same basic block are assumed to be 1404 executed all in parallel, if only one stmt is a PHI, 1405 it dominates the other stmt in the same basic block. */ 1406 if (gimple_code (s1) == GIMPLE_PHI) 1407 return true; 1408 1409 if (gimple_code (s2) == GIMPLE_PHI) 1410 return false; 1411 1412 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1413 1414 if (gimple_uid (s1) < gimple_uid (s2)) 1415 return true; 1416 1417 if (gimple_uid (s1) > gimple_uid (s2)) 1418 return false; 1419 1420 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1421 unsigned int uid = gimple_uid (s1); 1422 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1423 { 1424 gimple *s = gsi_stmt (gsi); 1425 if (gimple_uid (s) != uid) 1426 break; 1427 if (s == s2) 1428 return true; 1429 } 1430 1431 return false; 1432 } 1433 1434 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1435 } 1436 1437 /* Insert STMT after INSERT_POINT. */ 1438 1439 static void 1440 insert_stmt_after (gimple *stmt, gimple *insert_point) 1441 { 1442 gimple_stmt_iterator gsi; 1443 basic_block bb; 1444 1445 if (gimple_code (insert_point) == GIMPLE_PHI) 1446 bb = gimple_bb (insert_point); 1447 else if (!stmt_ends_bb_p (insert_point)) 1448 { 1449 gsi = gsi_for_stmt (insert_point); 1450 gimple_set_uid (stmt, gimple_uid (insert_point)); 1451 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1452 return; 1453 } 1454 else 1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1456 thus if it must end a basic block, it should be a call that can 1457 throw, or some assignment that can throw. If it throws, the LHS 1458 of it will not be initialized though, so only valid places using 1459 the SSA_NAME should be dominated by the fallthru edge. */ 1460 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1461 gsi = gsi_after_labels (bb); 1462 if (gsi_end_p (gsi)) 1463 { 1464 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1465 gimple_set_uid (stmt, 1466 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1467 } 1468 else 1469 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1471 } 1472 1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1474 the result. Places the statement after the definition of either 1475 OP1 or OP2. Returns the new statement. */ 1476 1477 static gimple * 1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1479 { 1480 gimple *op1def = NULL, *op2def = NULL; 1481 gimple_stmt_iterator gsi; 1482 tree op; 1483 gassign *sum; 1484 1485 /* Create the addition statement. */ 1486 op = make_ssa_name (type); 1487 sum = gimple_build_assign (op, opcode, op1, op2); 1488 1489 /* Find an insertion place and insert. */ 1490 if (TREE_CODE (op1) == SSA_NAME) 1491 op1def = SSA_NAME_DEF_STMT (op1); 1492 if (TREE_CODE (op2) == SSA_NAME) 1493 op2def = SSA_NAME_DEF_STMT (op2); 1494 if ((!op1def || gimple_nop_p (op1def)) 1495 && (!op2def || gimple_nop_p (op2def))) 1496 { 1497 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1498 if (gsi_end_p (gsi)) 1499 { 1500 gimple_stmt_iterator gsi2 1501 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1502 gimple_set_uid (sum, 1503 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1504 } 1505 else 1506 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1507 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1508 } 1509 else 1510 { 1511 gimple *insert_point; 1512 if ((!op1def || gimple_nop_p (op1def)) 1513 || (op2def && !gimple_nop_p (op2def) 1514 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1515 insert_point = op2def; 1516 else 1517 insert_point = op1def; 1518 insert_stmt_after (sum, insert_point); 1519 } 1520 update_stmt (sum); 1521 1522 return sum; 1523 } 1524 1525 /* Perform un-distribution of divisions and multiplications. 1526 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1527 to (A + B) / X for real X. 1528 1529 The algorithm is organized as follows. 1530 1531 - First we walk the addition chain *OPS looking for summands that 1532 are defined by a multiplication or a real division. This results 1533 in the candidates bitmap with relevant indices into *OPS. 1534 1535 - Second we build the chains of multiplications or divisions for 1536 these candidates, counting the number of occurrences of (operand, code) 1537 pairs in all of the candidates chains. 1538 1539 - Third we sort the (operand, code) pairs by number of occurrence and 1540 process them starting with the pair with the most uses. 1541 1542 * For each such pair we walk the candidates again to build a 1543 second candidate bitmap noting all multiplication/division chains 1544 that have at least one occurrence of (operand, code). 1545 1546 * We build an alternate addition chain only covering these 1547 candidates with one (operand, code) operation removed from their 1548 multiplication/division chain. 1549 1550 * The first candidate gets replaced by the alternate addition chain 1551 multiplied/divided by the operand. 1552 1553 * All candidate chains get disabled for further processing and 1554 processing of (operand, code) pairs continues. 1555 1556 The alternate addition chains built are re-processed by the main 1557 reassociation algorithm which allows optimizing a * x * y + b * y * x 1558 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1559 1560 static bool 1561 undistribute_ops_list (enum tree_code opcode, 1562 vec<operand_entry *> *ops, class loop *loop) 1563 { 1564 unsigned int length = ops->length (); 1565 operand_entry *oe1; 1566 unsigned i, j; 1567 unsigned nr_candidates, nr_candidates2; 1568 sbitmap_iterator sbi0; 1569 vec<operand_entry *> *subops; 1570 bool changed = false; 1571 unsigned int next_oecount_id = 0; 1572 1573 if (length <= 1 1574 || opcode != PLUS_EXPR) 1575 return false; 1576 1577 /* Build a list of candidates to process. */ 1578 auto_sbitmap candidates (length); 1579 bitmap_clear (candidates); 1580 nr_candidates = 0; 1581 FOR_EACH_VEC_ELT (*ops, i, oe1) 1582 { 1583 enum tree_code dcode; 1584 gimple *oe1def; 1585 1586 if (TREE_CODE (oe1->op) != SSA_NAME) 1587 continue; 1588 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1589 if (!is_gimple_assign (oe1def)) 1590 continue; 1591 dcode = gimple_assign_rhs_code (oe1def); 1592 if ((dcode != MULT_EXPR 1593 && dcode != RDIV_EXPR) 1594 || !is_reassociable_op (oe1def, dcode, loop)) 1595 continue; 1596 1597 bitmap_set_bit (candidates, i); 1598 nr_candidates++; 1599 } 1600 1601 if (nr_candidates < 2) 1602 return false; 1603 1604 if (dump_file && (dump_flags & TDF_DETAILS)) 1605 { 1606 fprintf (dump_file, "searching for un-distribute opportunities "); 1607 print_generic_expr (dump_file, 1608 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); 1609 fprintf (dump_file, " %d\n", nr_candidates); 1610 } 1611 1612 /* Build linearized sub-operand lists and the counting table. */ 1613 cvec.create (0); 1614 1615 hash_table<oecount_hasher> ctable (15); 1616 1617 /* ??? Macro arguments cannot have multi-argument template types in 1618 them. This typedef is needed to workaround that limitation. */ 1619 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1620 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1621 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1622 { 1623 gimple *oedef; 1624 enum tree_code oecode; 1625 unsigned j; 1626 1627 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1628 oecode = gimple_assign_rhs_code (oedef); 1629 linearize_expr_tree (&subops[i], oedef, 1630 associative_tree_code (oecode), false); 1631 1632 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1633 { 1634 oecount c; 1635 int *slot; 1636 int idx; 1637 c.oecode = oecode; 1638 c.cnt = 1; 1639 c.id = next_oecount_id++; 1640 c.op = oe1->op; 1641 cvec.safe_push (c); 1642 idx = cvec.length () + 41; 1643 slot = ctable.find_slot (idx, INSERT); 1644 if (!*slot) 1645 { 1646 *slot = idx; 1647 } 1648 else 1649 { 1650 cvec.pop (); 1651 cvec[*slot - 42].cnt++; 1652 } 1653 } 1654 } 1655 1656 /* Sort the counting table. */ 1657 cvec.qsort (oecount_cmp); 1658 1659 if (dump_file && (dump_flags & TDF_DETAILS)) 1660 { 1661 oecount *c; 1662 fprintf (dump_file, "Candidates:\n"); 1663 FOR_EACH_VEC_ELT (cvec, j, c) 1664 { 1665 fprintf (dump_file, " %u %s: ", c->cnt, 1666 c->oecode == MULT_EXPR 1667 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1668 print_generic_expr (dump_file, c->op); 1669 fprintf (dump_file, "\n"); 1670 } 1671 } 1672 1673 /* Process the (operand, code) pairs in order of most occurrence. */ 1674 auto_sbitmap candidates2 (length); 1675 while (!cvec.is_empty ()) 1676 { 1677 oecount *c = &cvec.last (); 1678 if (c->cnt < 2) 1679 break; 1680 1681 /* Now collect the operands in the outer chain that contain 1682 the common operand in their inner chain. */ 1683 bitmap_clear (candidates2); 1684 nr_candidates2 = 0; 1685 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1686 { 1687 gimple *oedef; 1688 enum tree_code oecode; 1689 unsigned j; 1690 tree op = (*ops)[i]->op; 1691 1692 /* If we undistributed in this chain already this may be 1693 a constant. */ 1694 if (TREE_CODE (op) != SSA_NAME) 1695 continue; 1696 1697 oedef = SSA_NAME_DEF_STMT (op); 1698 oecode = gimple_assign_rhs_code (oedef); 1699 if (oecode != c->oecode) 1700 continue; 1701 1702 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1703 { 1704 if (oe1->op == c->op) 1705 { 1706 bitmap_set_bit (candidates2, i); 1707 ++nr_candidates2; 1708 break; 1709 } 1710 } 1711 } 1712 1713 if (nr_candidates2 >= 2) 1714 { 1715 operand_entry *oe1, *oe2; 1716 gimple *prod; 1717 int first = bitmap_first_set_bit (candidates2); 1718 1719 /* Build the new addition chain. */ 1720 oe1 = (*ops)[first]; 1721 if (dump_file && (dump_flags & TDF_DETAILS)) 1722 { 1723 fprintf (dump_file, "Building ("); 1724 print_generic_expr (dump_file, oe1->op); 1725 } 1726 zero_one_operation (&oe1->op, c->oecode, c->op); 1727 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1728 { 1729 gimple *sum; 1730 oe2 = (*ops)[i]; 1731 if (dump_file && (dump_flags & TDF_DETAILS)) 1732 { 1733 fprintf (dump_file, " + "); 1734 print_generic_expr (dump_file, oe2->op); 1735 } 1736 zero_one_operation (&oe2->op, c->oecode, c->op); 1737 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1738 oe1->op, oe2->op, opcode); 1739 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1740 oe2->rank = 0; 1741 oe1->op = gimple_get_lhs (sum); 1742 } 1743 1744 /* Apply the multiplication/division. */ 1745 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1746 oe1->op, c->op, c->oecode); 1747 if (dump_file && (dump_flags & TDF_DETAILS)) 1748 { 1749 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1750 print_generic_expr (dump_file, c->op); 1751 fprintf (dump_file, "\n"); 1752 } 1753 1754 /* Record it in the addition chain and disable further 1755 undistribution with this op. */ 1756 oe1->op = gimple_assign_lhs (prod); 1757 oe1->rank = get_rank (oe1->op); 1758 subops[first].release (); 1759 1760 changed = true; 1761 } 1762 1763 cvec.pop (); 1764 } 1765 1766 for (i = 0; i < ops->length (); ++i) 1767 subops[i].release (); 1768 free (subops); 1769 cvec.release (); 1770 1771 return changed; 1772 } 1773 1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: 1775 first: element index for each relevant BIT_FIELD_REF. 1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */ 1777 typedef std::pair<unsigned, unsigned> v_info_elem; 1778 struct v_info { 1779 tree vec_type; 1780 auto_vec<v_info_elem, 32> vec; 1781 }; 1782 typedef v_info *v_info_ptr; 1783 1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ 1785 static int 1786 sort_by_mach_mode (const void *p_i, const void *p_j) 1787 { 1788 const tree tr1 = *((const tree *) p_i); 1789 const tree tr2 = *((const tree *) p_j); 1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); 1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); 1792 if (mode1 > mode2) 1793 return 1; 1794 else if (mode1 < mode2) 1795 return -1; 1796 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2)) 1797 return -1; 1798 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2)) 1799 return 1; 1800 return 0; 1801 } 1802 1803 /* Cleanup hash map for VECTOR information. */ 1804 static void 1805 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) 1806 { 1807 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); 1808 it != info_map.end (); ++it) 1809 { 1810 v_info_ptr info = (*it).second; 1811 delete info; 1812 (*it).second = NULL; 1813 } 1814 } 1815 1816 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. 1817 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] 1818 is transformed to 1819 Vs = (V1 + V2 + ... + Vn) 1820 Vs[0] + Vs[1] + ... + Vs[k] 1821 1822 The basic steps are listed below: 1823 1824 1) Check the addition chain *OPS by looking those summands coming from 1825 VECTOR bit_field_ref on VECTOR type. Put the information into 1826 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. 1827 1828 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are 1829 continuous, they can cover the whole VECTOR perfectly without any holes. 1830 Obtain one VECTOR list which contain candidates to be transformed. 1831 1832 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of 1833 candidates with same mode, build the addition statements for them and 1834 generate BIT_FIELD_REFs accordingly. 1835 1836 TODO: 1837 The current implementation requires the whole VECTORs should be fully 1838 covered, but it can be extended to support partial, checking adjacent 1839 but not fill the whole, it may need some cost model to define the 1840 boundary to do or not. 1841 */ 1842 static bool 1843 undistribute_bitref_for_vector (enum tree_code opcode, 1844 vec<operand_entry *> *ops, struct loop *loop) 1845 { 1846 if (ops->length () <= 1) 1847 return false; 1848 1849 if (opcode != PLUS_EXPR 1850 && opcode != MULT_EXPR 1851 && opcode != BIT_XOR_EXPR 1852 && opcode != BIT_IOR_EXPR 1853 && opcode != BIT_AND_EXPR) 1854 return false; 1855 1856 hash_map<tree, v_info_ptr> v_info_map; 1857 operand_entry *oe1; 1858 unsigned i; 1859 1860 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the 1861 information into map. */ 1862 FOR_EACH_VEC_ELT (*ops, i, oe1) 1863 { 1864 enum tree_code dcode; 1865 gimple *oe1def; 1866 1867 if (TREE_CODE (oe1->op) != SSA_NAME) 1868 continue; 1869 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1870 if (!is_gimple_assign (oe1def)) 1871 continue; 1872 dcode = gimple_assign_rhs_code (oe1def); 1873 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop)) 1874 continue; 1875 1876 tree rhs = gimple_assign_rhs1 (oe1def); 1877 tree vec = TREE_OPERAND (rhs, 0); 1878 tree vec_type = TREE_TYPE (vec); 1879 1880 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) 1881 continue; 1882 1883 /* Ignore it if target machine can't support this VECTOR type. */ 1884 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1885 continue; 1886 1887 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ 1888 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1889 continue; 1890 1891 if (VECTOR_TYPE_P (TREE_TYPE (rhs)) 1892 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) 1893 continue; 1894 1895 /* The type of BIT_FIELD_REF might not be equal to the element type of 1896 the vector. We want to use a vector type with element type the 1897 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ 1898 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) 1899 { 1900 machine_mode simd_mode; 1901 unsigned HOST_WIDE_INT size, nunits; 1902 unsigned HOST_WIDE_INT elem_size 1903 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); 1904 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) 1905 continue; 1906 if (size <= elem_size || (size % elem_size) != 0) 1907 continue; 1908 nunits = size / elem_size; 1909 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), 1910 nunits).exists (&simd_mode)) 1911 continue; 1912 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); 1913 1914 /* Ignore it if target machine can't support this VECTOR type. */ 1915 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1916 continue; 1917 1918 /* Check const vector type, constrain BIT_FIELD_REF offset and 1919 size. */ 1920 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1921 continue; 1922 1923 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), 1924 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) 1925 continue; 1926 } 1927 1928 tree elem_type = TREE_TYPE (vec_type); 1929 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); 1930 if (maybe_ne (bit_field_size (rhs), elem_size)) 1931 continue; 1932 1933 unsigned idx; 1934 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx)) 1935 continue; 1936 1937 /* Ignore it if target machine can't support this type of VECTOR 1938 operation. */ 1939 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); 1940 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) 1941 continue; 1942 1943 bool existed; 1944 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); 1945 if (!existed) 1946 { 1947 info = new v_info; 1948 info->vec_type = vec_type; 1949 } 1950 else if (!types_compatible_p (vec_type, info->vec_type)) 1951 continue; 1952 info->vec.safe_push (std::make_pair (idx, i)); 1953 } 1954 1955 /* At least two VECTOR to combine. */ 1956 if (v_info_map.elements () <= 1) 1957 { 1958 cleanup_vinfo_map (v_info_map); 1959 return false; 1960 } 1961 1962 /* Verify all VECTOR candidates by checking two conditions: 1963 1) sorted offsets are adjacent, no holes. 1964 2) can fill the whole VECTOR perfectly. 1965 And add the valid candidates to a vector for further handling. */ 1966 auto_vec<tree> valid_vecs (v_info_map.elements ()); 1967 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); 1968 it != v_info_map.end (); ++it) 1969 { 1970 tree cand_vec = (*it).first; 1971 v_info_ptr cand_info = (*it).second; 1972 unsigned int num_elems 1973 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); 1974 if (cand_info->vec.length () != num_elems) 1975 continue; 1976 sbitmap holes = sbitmap_alloc (num_elems); 1977 bitmap_ones (holes); 1978 bool valid = true; 1979 v_info_elem *curr; 1980 FOR_EACH_VEC_ELT (cand_info->vec, i, curr) 1981 { 1982 if (!bitmap_bit_p (holes, curr->first)) 1983 { 1984 valid = false; 1985 break; 1986 } 1987 else 1988 bitmap_clear_bit (holes, curr->first); 1989 } 1990 if (valid && bitmap_empty_p (holes)) 1991 valid_vecs.quick_push (cand_vec); 1992 sbitmap_free (holes); 1993 } 1994 1995 /* At least two VECTOR to combine. */ 1996 if (valid_vecs.length () <= 1) 1997 { 1998 cleanup_vinfo_map (v_info_map); 1999 return false; 2000 } 2001 2002 valid_vecs.qsort (sort_by_mach_mode); 2003 /* Go through all candidates by machine mode order, query the mode_to_total 2004 to get the total number for each mode and skip the single one. */ 2005 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) 2006 { 2007 tree tvec = valid_vecs[i]; 2008 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); 2009 2010 /* Skip modes with only a single candidate. */ 2011 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) 2012 continue; 2013 2014 unsigned int idx, j; 2015 gimple *sum = NULL; 2016 tree sum_vec = tvec; 2017 v_info_ptr info_ptr = *(v_info_map.get (tvec)); 2018 v_info_elem *elem; 2019 tree vec_type = info_ptr->vec_type; 2020 2021 /* Build the sum for all candidates with same mode. */ 2022 do 2023 { 2024 sum = build_and_add_sum (vec_type, sum_vec, 2025 valid_vecs[i + 1], opcode); 2026 if (!useless_type_conversion_p (vec_type, 2027 TREE_TYPE (valid_vecs[i + 1]))) 2028 { 2029 /* Update the operands only after build_and_add_sum, 2030 so that we don't have to repeat the placement algorithm 2031 of build_and_add_sum. */ 2032 gimple_stmt_iterator gsi = gsi_for_stmt (sum); 2033 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, 2034 valid_vecs[i + 1]); 2035 tree lhs = make_ssa_name (vec_type); 2036 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2037 gimple_set_uid (g, gimple_uid (sum)); 2038 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2039 gimple_assign_set_rhs2 (sum, lhs); 2040 if (sum_vec == tvec) 2041 { 2042 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); 2043 lhs = make_ssa_name (vec_type); 2044 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2045 gimple_set_uid (g, gimple_uid (sum)); 2046 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2047 gimple_assign_set_rhs1 (sum, lhs); 2048 } 2049 update_stmt (sum); 2050 } 2051 sum_vec = gimple_get_lhs (sum); 2052 info_ptr = *(v_info_map.get (valid_vecs[i + 1])); 2053 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); 2054 /* Update those related ops of current candidate VECTOR. */ 2055 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2056 { 2057 idx = elem->second; 2058 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2059 /* Set this then op definition will get DCEd later. */ 2060 gimple_set_visited (def, true); 2061 if (opcode == PLUS_EXPR 2062 || opcode == BIT_XOR_EXPR 2063 || opcode == BIT_IOR_EXPR) 2064 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); 2065 else if (opcode == MULT_EXPR) 2066 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); 2067 else 2068 { 2069 gcc_assert (opcode == BIT_AND_EXPR); 2070 (*ops)[idx]->op 2071 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); 2072 } 2073 (*ops)[idx]->rank = 0; 2074 } 2075 if (dump_file && (dump_flags & TDF_DETAILS)) 2076 { 2077 fprintf (dump_file, "Generating addition -> "); 2078 print_gimple_stmt (dump_file, sum, 0); 2079 } 2080 i++; 2081 } 2082 while ((i < valid_vecs.length () - 1) 2083 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); 2084 2085 /* Referring to first valid VECTOR with this mode, generate the 2086 BIT_FIELD_REF statements accordingly. */ 2087 info_ptr = *(v_info_map.get (tvec)); 2088 gcc_assert (sum); 2089 tree elem_type = TREE_TYPE (vec_type); 2090 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2091 { 2092 idx = elem->second; 2093 tree dst = make_ssa_name (elem_type); 2094 tree pos = bitsize_int (elem->first 2095 * tree_to_uhwi (TYPE_SIZE (elem_type))); 2096 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, 2097 TYPE_SIZE (elem_type), pos); 2098 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); 2099 insert_stmt_after (gs, sum); 2100 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2101 /* Set this then op definition will get DCEd later. */ 2102 gimple_set_visited (def, true); 2103 (*ops)[idx]->op = gimple_assign_lhs (gs); 2104 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); 2105 if (dump_file && (dump_flags & TDF_DETAILS)) 2106 { 2107 fprintf (dump_file, "Generating bit_field_ref -> "); 2108 print_gimple_stmt (dump_file, gs, 0); 2109 } 2110 } 2111 } 2112 2113 if (dump_file && (dump_flags & TDF_DETAILS)) 2114 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n"); 2115 2116 cleanup_vinfo_map (v_info_map); 2117 2118 return true; 2119 } 2120 2121 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 2122 expression, examine the other OPS to see if any of them are comparisons 2123 of the same values, which we may be able to combine or eliminate. 2124 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 2125 2126 static bool 2127 eliminate_redundant_comparison (enum tree_code opcode, 2128 vec<operand_entry *> *ops, 2129 unsigned int currindex, 2130 operand_entry *curr) 2131 { 2132 tree op1, op2; 2133 enum tree_code lcode, rcode; 2134 gimple *def1, *def2; 2135 int i; 2136 operand_entry *oe; 2137 2138 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 2139 return false; 2140 2141 /* Check that CURR is a comparison. */ 2142 if (TREE_CODE (curr->op) != SSA_NAME) 2143 return false; 2144 def1 = SSA_NAME_DEF_STMT (curr->op); 2145 if (!is_gimple_assign (def1)) 2146 return false; 2147 lcode = gimple_assign_rhs_code (def1); 2148 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 2149 return false; 2150 op1 = gimple_assign_rhs1 (def1); 2151 op2 = gimple_assign_rhs2 (def1); 2152 2153 /* Now look for a similar comparison in the remaining OPS. */ 2154 for (i = currindex + 1; ops->iterate (i, &oe); i++) 2155 { 2156 tree t; 2157 2158 if (TREE_CODE (oe->op) != SSA_NAME) 2159 continue; 2160 def2 = SSA_NAME_DEF_STMT (oe->op); 2161 if (!is_gimple_assign (def2)) 2162 continue; 2163 rcode = gimple_assign_rhs_code (def2); 2164 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 2165 continue; 2166 2167 /* If we got here, we have a match. See if we can combine the 2168 two comparisons. */ 2169 tree type = TREE_TYPE (gimple_assign_lhs (def1)); 2170 if (opcode == BIT_IOR_EXPR) 2171 t = maybe_fold_or_comparisons (type, 2172 lcode, op1, op2, 2173 rcode, gimple_assign_rhs1 (def2), 2174 gimple_assign_rhs2 (def2)); 2175 else 2176 t = maybe_fold_and_comparisons (type, 2177 lcode, op1, op2, 2178 rcode, gimple_assign_rhs1 (def2), 2179 gimple_assign_rhs2 (def2)); 2180 if (!t) 2181 continue; 2182 2183 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 2184 always give us a boolean_type_node value back. If the original 2185 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 2186 we need to convert. */ 2187 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 2188 t = fold_convert (TREE_TYPE (curr->op), t); 2189 2190 if (TREE_CODE (t) != INTEGER_CST 2191 && !operand_equal_p (t, curr->op, 0)) 2192 { 2193 enum tree_code subcode; 2194 tree newop1, newop2; 2195 if (!COMPARISON_CLASS_P (t)) 2196 continue; 2197 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2198 STRIP_USELESS_TYPE_CONVERSION (newop1); 2199 STRIP_USELESS_TYPE_CONVERSION (newop2); 2200 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 2201 continue; 2202 } 2203 2204 if (dump_file && (dump_flags & TDF_DETAILS)) 2205 { 2206 fprintf (dump_file, "Equivalence: "); 2207 print_generic_expr (dump_file, curr->op); 2208 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 2209 print_generic_expr (dump_file, oe->op); 2210 fprintf (dump_file, " -> "); 2211 print_generic_expr (dump_file, t); 2212 fprintf (dump_file, "\n"); 2213 } 2214 2215 /* Now we can delete oe, as it has been subsumed by the new combined 2216 expression t. */ 2217 ops->ordered_remove (i); 2218 reassociate_stats.ops_eliminated ++; 2219 2220 /* If t is the same as curr->op, we're done. Otherwise we must 2221 replace curr->op with t. Special case is if we got a constant 2222 back, in which case we add it to the end instead of in place of 2223 the current entry. */ 2224 if (TREE_CODE (t) == INTEGER_CST) 2225 { 2226 ops->ordered_remove (currindex); 2227 add_to_ops_vec (ops, t); 2228 } 2229 else if (!operand_equal_p (t, curr->op, 0)) 2230 { 2231 gimple *sum; 2232 enum tree_code subcode; 2233 tree newop1; 2234 tree newop2; 2235 gcc_assert (COMPARISON_CLASS_P (t)); 2236 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2237 STRIP_USELESS_TYPE_CONVERSION (newop1); 2238 STRIP_USELESS_TYPE_CONVERSION (newop2); 2239 gcc_checking_assert (is_gimple_val (newop1) 2240 && is_gimple_val (newop2)); 2241 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 2242 curr->op = gimple_get_lhs (sum); 2243 } 2244 return true; 2245 } 2246 2247 return false; 2248 } 2249 2250 2251 /* Transform repeated addition of same values into multiply with 2252 constant. */ 2253 static bool 2254 transform_add_to_multiply (vec<operand_entry *> *ops) 2255 { 2256 operand_entry *oe; 2257 tree op = NULL_TREE; 2258 int j; 2259 int i, start = -1, end = 0, count = 0; 2260 auto_vec<std::pair <int, int> > indxs; 2261 bool changed = false; 2262 2263 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2264 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2265 || !flag_unsafe_math_optimizations)) 2266 return false; 2267 2268 /* Look for repeated operands. */ 2269 FOR_EACH_VEC_ELT (*ops, i, oe) 2270 { 2271 if (start == -1) 2272 { 2273 count = 1; 2274 op = oe->op; 2275 start = i; 2276 } 2277 else if (operand_equal_p (oe->op, op, 0)) 2278 { 2279 count++; 2280 end = i; 2281 } 2282 else 2283 { 2284 if (count > 1) 2285 indxs.safe_push (std::make_pair (start, end)); 2286 count = 1; 2287 op = oe->op; 2288 start = i; 2289 } 2290 } 2291 2292 if (count > 1) 2293 indxs.safe_push (std::make_pair (start, end)); 2294 2295 for (j = indxs.length () - 1; j >= 0; --j) 2296 { 2297 /* Convert repeated operand addition to multiplication. */ 2298 start = indxs[j].first; 2299 end = indxs[j].second; 2300 op = (*ops)[start]->op; 2301 count = end - start + 1; 2302 for (i = end; i >= start; --i) 2303 ops->unordered_remove (i); 2304 tree tmp = make_ssa_name (TREE_TYPE (op)); 2305 tree cst = build_int_cst (integer_type_node, count); 2306 gassign *mul_stmt 2307 = gimple_build_assign (tmp, MULT_EXPR, 2308 op, fold_convert (TREE_TYPE (op), cst)); 2309 gimple_set_visited (mul_stmt, true); 2310 add_to_ops_vec (ops, tmp, mul_stmt); 2311 changed = true; 2312 } 2313 2314 return changed; 2315 } 2316 2317 2318 /* Perform various identities and other optimizations on the list of 2319 operand entries, stored in OPS. The tree code for the binary 2320 operation between all the operands is OPCODE. */ 2321 2322 static void 2323 optimize_ops_list (enum tree_code opcode, 2324 vec<operand_entry *> *ops) 2325 { 2326 unsigned int length = ops->length (); 2327 unsigned int i; 2328 operand_entry *oe; 2329 operand_entry *oelast = NULL; 2330 bool iterate = false; 2331 2332 if (length == 1) 2333 return; 2334 2335 oelast = ops->last (); 2336 2337 /* If the last two are constants, pop the constants off, merge them 2338 and try the next two. */ 2339 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 2340 { 2341 operand_entry *oelm1 = (*ops)[length - 2]; 2342 2343 if (oelm1->rank == 0 2344 && is_gimple_min_invariant (oelm1->op) 2345 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 2346 TREE_TYPE (oelast->op))) 2347 { 2348 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 2349 oelm1->op, oelast->op); 2350 2351 if (folded && is_gimple_min_invariant (folded)) 2352 { 2353 if (dump_file && (dump_flags & TDF_DETAILS)) 2354 fprintf (dump_file, "Merging constants\n"); 2355 2356 ops->pop (); 2357 ops->pop (); 2358 2359 add_to_ops_vec (ops, folded); 2360 reassociate_stats.constants_eliminated++; 2361 2362 optimize_ops_list (opcode, ops); 2363 return; 2364 } 2365 } 2366 } 2367 2368 eliminate_using_constants (opcode, ops); 2369 oelast = NULL; 2370 2371 for (i = 0; ops->iterate (i, &oe);) 2372 { 2373 bool done = false; 2374 2375 if (eliminate_not_pairs (opcode, ops, i, oe)) 2376 return; 2377 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2378 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2379 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2380 { 2381 if (done) 2382 return; 2383 iterate = true; 2384 oelast = NULL; 2385 continue; 2386 } 2387 oelast = oe; 2388 i++; 2389 } 2390 2391 if (iterate) 2392 optimize_ops_list (opcode, ops); 2393 } 2394 2395 /* The following functions are subroutines to optimize_range_tests and allow 2396 it to try to change a logical combination of comparisons into a range 2397 test. 2398 2399 For example, both 2400 X == 2 || X == 5 || X == 3 || X == 4 2401 and 2402 X >= 2 && X <= 5 2403 are converted to 2404 (unsigned) (X - 2) <= 3 2405 2406 For more information see comments above fold_test_range in fold-const.c, 2407 this implementation is for GIMPLE. */ 2408 2409 struct range_entry 2410 { 2411 tree exp; 2412 tree low; 2413 tree high; 2414 bool in_p; 2415 bool strict_overflow_p; 2416 unsigned int idx, next; 2417 }; 2418 2419 /* This is similar to make_range in fold-const.c, but on top of 2420 GIMPLE instead of trees. If EXP is non-NULL, it should be 2421 an SSA_NAME and STMT argument is ignored, otherwise STMT 2422 argument should be a GIMPLE_COND. */ 2423 2424 static void 2425 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2426 { 2427 int in_p; 2428 tree low, high; 2429 bool is_bool, strict_overflow_p; 2430 2431 r->exp = NULL_TREE; 2432 r->in_p = false; 2433 r->strict_overflow_p = false; 2434 r->low = NULL_TREE; 2435 r->high = NULL_TREE; 2436 if (exp != NULL_TREE 2437 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2438 return; 2439 2440 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2441 and see if we can refine the range. Some of the cases below may not 2442 happen, but it doesn't seem worth worrying about this. We "continue" 2443 the outer loop when we've changed something; otherwise we "break" 2444 the switch, which will "break" the while. */ 2445 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2446 high = low; 2447 in_p = 0; 2448 strict_overflow_p = false; 2449 is_bool = false; 2450 if (exp == NULL_TREE) 2451 is_bool = true; 2452 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2453 { 2454 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2455 is_bool = true; 2456 else 2457 return; 2458 } 2459 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2460 is_bool = true; 2461 2462 while (1) 2463 { 2464 enum tree_code code; 2465 tree arg0, arg1, exp_type; 2466 tree nexp; 2467 location_t loc; 2468 2469 if (exp != NULL_TREE) 2470 { 2471 if (TREE_CODE (exp) != SSA_NAME 2472 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2473 break; 2474 2475 stmt = SSA_NAME_DEF_STMT (exp); 2476 if (!is_gimple_assign (stmt)) 2477 break; 2478 2479 code = gimple_assign_rhs_code (stmt); 2480 arg0 = gimple_assign_rhs1 (stmt); 2481 arg1 = gimple_assign_rhs2 (stmt); 2482 exp_type = TREE_TYPE (exp); 2483 } 2484 else 2485 { 2486 code = gimple_cond_code (stmt); 2487 arg0 = gimple_cond_lhs (stmt); 2488 arg1 = gimple_cond_rhs (stmt); 2489 exp_type = boolean_type_node; 2490 } 2491 2492 if (TREE_CODE (arg0) != SSA_NAME 2493 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) 2494 break; 2495 loc = gimple_location (stmt); 2496 switch (code) 2497 { 2498 case BIT_NOT_EXPR: 2499 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2500 /* Ensure the range is either +[-,0], +[0,0], 2501 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2502 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2503 or similar expression of unconditional true or 2504 false, it should not be negated. */ 2505 && ((high && integer_zerop (high)) 2506 || (low && integer_onep (low)))) 2507 { 2508 in_p = !in_p; 2509 exp = arg0; 2510 continue; 2511 } 2512 break; 2513 case SSA_NAME: 2514 exp = arg0; 2515 continue; 2516 CASE_CONVERT: 2517 if (is_bool) 2518 { 2519 if ((TYPE_PRECISION (exp_type) == 1 2520 || TREE_CODE (exp_type) == BOOLEAN_TYPE) 2521 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) 2522 return; 2523 } 2524 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2525 { 2526 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2527 is_bool = true; 2528 else 2529 return; 2530 } 2531 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2532 is_bool = true; 2533 goto do_default; 2534 case EQ_EXPR: 2535 case NE_EXPR: 2536 case LT_EXPR: 2537 case LE_EXPR: 2538 case GE_EXPR: 2539 case GT_EXPR: 2540 is_bool = true; 2541 /* FALLTHRU */ 2542 default: 2543 if (!is_bool) 2544 return; 2545 do_default: 2546 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2547 &low, &high, &in_p, 2548 &strict_overflow_p); 2549 if (nexp != NULL_TREE) 2550 { 2551 exp = nexp; 2552 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2553 continue; 2554 } 2555 break; 2556 } 2557 break; 2558 } 2559 if (is_bool) 2560 { 2561 r->exp = exp; 2562 r->in_p = in_p; 2563 r->low = low; 2564 r->high = high; 2565 r->strict_overflow_p = strict_overflow_p; 2566 } 2567 } 2568 2569 /* Comparison function for qsort. Sort entries 2570 without SSA_NAME exp first, then with SSA_NAMEs sorted 2571 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2572 by increasing ->low and if ->low is the same, by increasing 2573 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2574 maximum. */ 2575 2576 static int 2577 range_entry_cmp (const void *a, const void *b) 2578 { 2579 const struct range_entry *p = (const struct range_entry *) a; 2580 const struct range_entry *q = (const struct range_entry *) b; 2581 2582 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2583 { 2584 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2585 { 2586 /* Group range_entries for the same SSA_NAME together. */ 2587 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2588 return -1; 2589 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2590 return 1; 2591 /* If ->low is different, NULL low goes first, then by 2592 ascending low. */ 2593 if (p->low != NULL_TREE) 2594 { 2595 if (q->low != NULL_TREE) 2596 { 2597 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2598 p->low, q->low); 2599 if (tem && integer_onep (tem)) 2600 return -1; 2601 tem = fold_binary (GT_EXPR, boolean_type_node, 2602 p->low, q->low); 2603 if (tem && integer_onep (tem)) 2604 return 1; 2605 } 2606 else 2607 return 1; 2608 } 2609 else if (q->low != NULL_TREE) 2610 return -1; 2611 /* If ->high is different, NULL high goes last, before that by 2612 ascending high. */ 2613 if (p->high != NULL_TREE) 2614 { 2615 if (q->high != NULL_TREE) 2616 { 2617 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2618 p->high, q->high); 2619 if (tem && integer_onep (tem)) 2620 return -1; 2621 tem = fold_binary (GT_EXPR, boolean_type_node, 2622 p->high, q->high); 2623 if (tem && integer_onep (tem)) 2624 return 1; 2625 } 2626 else 2627 return -1; 2628 } 2629 else if (q->high != NULL_TREE) 2630 return 1; 2631 /* If both ranges are the same, sort below by ascending idx. */ 2632 } 2633 else 2634 return 1; 2635 } 2636 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2637 return -1; 2638 2639 if (p->idx < q->idx) 2640 return -1; 2641 else 2642 { 2643 gcc_checking_assert (p->idx > q->idx); 2644 return 1; 2645 } 2646 } 2647 2648 /* Helper function for update_range_test. Force EXPR into an SSA_NAME, 2649 insert needed statements BEFORE or after GSI. */ 2650 2651 static tree 2652 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) 2653 { 2654 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; 2655 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); 2656 if (TREE_CODE (ret) != SSA_NAME) 2657 { 2658 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); 2659 if (before) 2660 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2661 else 2662 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); 2663 ret = gimple_assign_lhs (g); 2664 } 2665 return ret; 2666 } 2667 2668 /* Helper routine of optimize_range_test. 2669 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2670 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2671 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2672 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2673 an array of COUNT pointers to other ranges. Return 2674 true if the range merge has been successful. 2675 If OPCODE is ERROR_MARK, this is called from within 2676 maybe_optimize_range_tests and is performing inter-bb range optimization. 2677 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2678 oe->rank. */ 2679 2680 static bool 2681 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2682 struct range_entry **otherrangep, 2683 unsigned int count, enum tree_code opcode, 2684 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2685 bool in_p, tree low, tree high, bool strict_overflow_p) 2686 { 2687 operand_entry *oe = (*ops)[range->idx]; 2688 tree op = oe->op; 2689 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2690 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2691 location_t loc = gimple_location (stmt); 2692 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2693 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2694 in_p, low, high); 2695 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2696 gimple_stmt_iterator gsi; 2697 unsigned int i, uid; 2698 2699 if (tem == NULL_TREE) 2700 return false; 2701 2702 /* If op is default def SSA_NAME, there is no place to insert the 2703 new comparison. Give up, unless we can use OP itself as the 2704 range test. */ 2705 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2706 { 2707 if (op == range->exp 2708 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2709 || TREE_CODE (optype) == BOOLEAN_TYPE) 2710 && (op == tem 2711 || (TREE_CODE (tem) == EQ_EXPR 2712 && TREE_OPERAND (tem, 0) == op 2713 && integer_onep (TREE_OPERAND (tem, 1)))) 2714 && opcode != BIT_IOR_EXPR 2715 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2716 { 2717 stmt = NULL; 2718 tem = op; 2719 } 2720 else 2721 return false; 2722 } 2723 2724 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2725 warning_at (loc, OPT_Wstrict_overflow, 2726 "assuming signed overflow does not occur " 2727 "when simplifying range test"); 2728 2729 if (dump_file && (dump_flags & TDF_DETAILS)) 2730 { 2731 struct range_entry *r; 2732 fprintf (dump_file, "Optimizing range tests "); 2733 print_generic_expr (dump_file, range->exp); 2734 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2735 print_generic_expr (dump_file, range->low); 2736 fprintf (dump_file, ", "); 2737 print_generic_expr (dump_file, range->high); 2738 fprintf (dump_file, "]"); 2739 for (i = 0; i < count; i++) 2740 { 2741 if (otherrange) 2742 r = otherrange + i; 2743 else 2744 r = otherrangep[i]; 2745 if (r->exp 2746 && r->exp != range->exp 2747 && TREE_CODE (r->exp) == SSA_NAME) 2748 { 2749 fprintf (dump_file, " and "); 2750 print_generic_expr (dump_file, r->exp); 2751 } 2752 else 2753 fprintf (dump_file, " and"); 2754 fprintf (dump_file, " %c[", r->in_p ? '+' : '-'); 2755 print_generic_expr (dump_file, r->low); 2756 fprintf (dump_file, ", "); 2757 print_generic_expr (dump_file, r->high); 2758 fprintf (dump_file, "]"); 2759 } 2760 fprintf (dump_file, "\n into "); 2761 print_generic_expr (dump_file, tem); 2762 fprintf (dump_file, "\n"); 2763 } 2764 2765 if (opcode == BIT_IOR_EXPR 2766 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2767 tem = invert_truthvalue_loc (loc, tem); 2768 2769 tem = fold_convert_loc (loc, optype, tem); 2770 if (stmt) 2771 { 2772 gsi = gsi_for_stmt (stmt); 2773 uid = gimple_uid (stmt); 2774 } 2775 else 2776 { 2777 gsi = gsi_none (); 2778 uid = 0; 2779 } 2780 if (stmt == NULL) 2781 gcc_checking_assert (tem == op); 2782 /* In rare cases range->exp can be equal to lhs of stmt. 2783 In that case we have to insert after the stmt rather then before 2784 it. If stmt is a PHI, insert it at the start of the basic block. */ 2785 else if (op != range->exp) 2786 { 2787 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2788 tem = force_into_ssa_name (&gsi, tem, true); 2789 gsi_prev (&gsi); 2790 } 2791 else if (gimple_code (stmt) != GIMPLE_PHI) 2792 { 2793 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2794 tem = force_into_ssa_name (&gsi, tem, false); 2795 } 2796 else 2797 { 2798 gsi = gsi_after_labels (gimple_bb (stmt)); 2799 if (!gsi_end_p (gsi)) 2800 uid = gimple_uid (gsi_stmt (gsi)); 2801 else 2802 { 2803 gsi = gsi_start_bb (gimple_bb (stmt)); 2804 uid = 1; 2805 while (!gsi_end_p (gsi)) 2806 { 2807 uid = gimple_uid (gsi_stmt (gsi)); 2808 gsi_next (&gsi); 2809 } 2810 } 2811 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2812 tem = force_into_ssa_name (&gsi, tem, true); 2813 if (gsi_end_p (gsi)) 2814 gsi = gsi_last_bb (gimple_bb (stmt)); 2815 else 2816 gsi_prev (&gsi); 2817 } 2818 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2819 if (gimple_uid (gsi_stmt (gsi))) 2820 break; 2821 else 2822 gimple_set_uid (gsi_stmt (gsi), uid); 2823 2824 oe->op = tem; 2825 range->exp = exp; 2826 range->low = low; 2827 range->high = high; 2828 range->in_p = in_p; 2829 range->strict_overflow_p = false; 2830 2831 for (i = 0; i < count; i++) 2832 { 2833 if (otherrange) 2834 range = otherrange + i; 2835 else 2836 range = otherrangep[i]; 2837 oe = (*ops)[range->idx]; 2838 /* Now change all the other range test immediate uses, so that 2839 those tests will be optimized away. */ 2840 if (opcode == ERROR_MARK) 2841 { 2842 if (oe->op) 2843 oe->op = build_int_cst (TREE_TYPE (oe->op), 2844 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2845 else 2846 oe->op = (oe->rank == BIT_IOR_EXPR 2847 ? boolean_false_node : boolean_true_node); 2848 } 2849 else 2850 oe->op = error_mark_node; 2851 range->exp = NULL_TREE; 2852 range->low = NULL_TREE; 2853 range->high = NULL_TREE; 2854 } 2855 return true; 2856 } 2857 2858 /* Optimize X == CST1 || X == CST2 2859 if popcount (CST1 ^ CST2) == 1 into 2860 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2861 Similarly for ranges. E.g. 2862 X != 2 && X != 3 && X != 10 && X != 11 2863 will be transformed by the previous optimization into 2864 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2865 and this loop can transform that into 2866 !(((X & ~8) - 2U) <= 1U). */ 2867 2868 static bool 2869 optimize_range_tests_xor (enum tree_code opcode, tree type, 2870 tree lowi, tree lowj, tree highi, tree highj, 2871 vec<operand_entry *> *ops, 2872 struct range_entry *rangei, 2873 struct range_entry *rangej) 2874 { 2875 tree lowxor, highxor, tem, exp; 2876 /* Check lowi ^ lowj == highi ^ highj and 2877 popcount (lowi ^ lowj) == 1. */ 2878 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2879 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2880 return false; 2881 if (!integer_pow2p (lowxor)) 2882 return false; 2883 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2884 if (!tree_int_cst_equal (lowxor, highxor)) 2885 return false; 2886 2887 exp = rangei->exp; 2888 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 2889 int prec = GET_MODE_PRECISION (mode); 2890 if (TYPE_PRECISION (type) < prec 2891 || (wi::to_wide (TYPE_MIN_VALUE (type)) 2892 != wi::min_value (prec, TYPE_SIGN (type))) 2893 || (wi::to_wide (TYPE_MAX_VALUE (type)) 2894 != wi::max_value (prec, TYPE_SIGN (type)))) 2895 { 2896 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); 2897 exp = fold_convert (type, exp); 2898 lowxor = fold_convert (type, lowxor); 2899 lowi = fold_convert (type, lowi); 2900 highi = fold_convert (type, highi); 2901 } 2902 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2903 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); 2904 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2905 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2906 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2907 NULL, rangei->in_p, lowj, highj, 2908 rangei->strict_overflow_p 2909 || rangej->strict_overflow_p)) 2910 return true; 2911 return false; 2912 } 2913 2914 /* Optimize X == CST1 || X == CST2 2915 if popcount (CST2 - CST1) == 1 into 2916 ((X - CST1) & ~(CST2 - CST1)) == 0. 2917 Similarly for ranges. E.g. 2918 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2919 || X == 75 || X == 45 2920 will be transformed by the previous optimization into 2921 (X - 43U) <= 3U || (X - 75U) <= 3U 2922 and this loop can transform that into 2923 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2924 static bool 2925 optimize_range_tests_diff (enum tree_code opcode, tree type, 2926 tree lowi, tree lowj, tree highi, tree highj, 2927 vec<operand_entry *> *ops, 2928 struct range_entry *rangei, 2929 struct range_entry *rangej) 2930 { 2931 tree tem1, tem2, mask; 2932 /* Check highi - lowi == highj - lowj. */ 2933 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2934 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2935 return false; 2936 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2937 if (!tree_int_cst_equal (tem1, tem2)) 2938 return false; 2939 /* Check popcount (lowj - lowi) == 1. */ 2940 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2941 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2942 return false; 2943 if (!integer_pow2p (tem1)) 2944 return false; 2945 2946 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 2947 int prec = GET_MODE_PRECISION (mode); 2948 if (TYPE_PRECISION (type) < prec 2949 || (wi::to_wide (TYPE_MIN_VALUE (type)) 2950 != wi::min_value (prec, TYPE_SIGN (type))) 2951 || (wi::to_wide (TYPE_MAX_VALUE (type)) 2952 != wi::max_value (prec, TYPE_SIGN (type)))) 2953 type = build_nonstandard_integer_type (prec, 1); 2954 else 2955 type = unsigned_type_for (type); 2956 tem1 = fold_convert (type, tem1); 2957 tem2 = fold_convert (type, tem2); 2958 lowi = fold_convert (type, lowi); 2959 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2960 tem1 = fold_build2 (MINUS_EXPR, type, 2961 fold_convert (type, rangei->exp), lowi); 2962 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2963 lowj = build_int_cst (type, 0); 2964 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2965 NULL, rangei->in_p, lowj, tem2, 2966 rangei->strict_overflow_p 2967 || rangej->strict_overflow_p)) 2968 return true; 2969 return false; 2970 } 2971 2972 /* It does some common checks for function optimize_range_tests_xor and 2973 optimize_range_tests_diff. 2974 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2975 Else it calls optimize_range_tests_diff. */ 2976 2977 static bool 2978 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2979 bool optimize_xor, vec<operand_entry *> *ops, 2980 struct range_entry *ranges) 2981 { 2982 int i, j; 2983 bool any_changes = false; 2984 for (i = first; i < length; i++) 2985 { 2986 tree lowi, highi, lowj, highj, type, tem; 2987 2988 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2989 continue; 2990 type = TREE_TYPE (ranges[i].exp); 2991 if (!INTEGRAL_TYPE_P (type)) 2992 continue; 2993 lowi = ranges[i].low; 2994 if (lowi == NULL_TREE) 2995 lowi = TYPE_MIN_VALUE (type); 2996 highi = ranges[i].high; 2997 if (highi == NULL_TREE) 2998 continue; 2999 for (j = i + 1; j < length && j < i + 64; j++) 3000 { 3001 bool changes; 3002 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 3003 continue; 3004 lowj = ranges[j].low; 3005 if (lowj == NULL_TREE) 3006 continue; 3007 highj = ranges[j].high; 3008 if (highj == NULL_TREE) 3009 highj = TYPE_MAX_VALUE (type); 3010 /* Check lowj > highi. */ 3011 tem = fold_binary (GT_EXPR, boolean_type_node, 3012 lowj, highi); 3013 if (tem == NULL_TREE || !integer_onep (tem)) 3014 continue; 3015 if (optimize_xor) 3016 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 3017 highi, highj, ops, 3018 ranges + i, ranges + j); 3019 else 3020 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 3021 highi, highj, ops, 3022 ranges + i, ranges + j); 3023 if (changes) 3024 { 3025 any_changes = true; 3026 break; 3027 } 3028 } 3029 } 3030 return any_changes; 3031 } 3032 3033 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 3034 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 3035 EXP on success, NULL otherwise. */ 3036 3037 static tree 3038 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 3039 wide_int *mask, tree *totallowp) 3040 { 3041 tree tem = int_const_binop (MINUS_EXPR, high, low); 3042 if (tem == NULL_TREE 3043 || TREE_CODE (tem) != INTEGER_CST 3044 || TREE_OVERFLOW (tem) 3045 || tree_int_cst_sgn (tem) == -1 3046 || compare_tree_int (tem, prec) != -1) 3047 return NULL_TREE; 3048 3049 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 3050 *mask = wi::shifted_mask (0, max, false, prec); 3051 if (TREE_CODE (exp) == BIT_AND_EXPR 3052 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3053 { 3054 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 3055 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 3056 if (wi::popcount (msk) == 1 3057 && wi::ltu_p (msk, prec - max)) 3058 { 3059 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 3060 max += msk.to_uhwi (); 3061 exp = TREE_OPERAND (exp, 0); 3062 if (integer_zerop (low) 3063 && TREE_CODE (exp) == PLUS_EXPR 3064 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3065 { 3066 tree ret = TREE_OPERAND (exp, 0); 3067 STRIP_NOPS (ret); 3068 widest_int bias 3069 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 3070 TYPE_PRECISION (TREE_TYPE (low)))); 3071 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 3072 if (totallowp) 3073 { 3074 *totallowp = tbias; 3075 return ret; 3076 } 3077 else if (!tree_int_cst_lt (totallow, tbias)) 3078 return NULL_TREE; 3079 bias = wi::to_widest (tbias); 3080 bias -= wi::to_widest (totallow); 3081 if (bias >= 0 && bias < prec - max) 3082 { 3083 *mask = wi::lshift (*mask, bias); 3084 return ret; 3085 } 3086 } 3087 } 3088 } 3089 if (totallowp) 3090 return exp; 3091 if (!tree_int_cst_lt (totallow, low)) 3092 return exp; 3093 tem = int_const_binop (MINUS_EXPR, low, totallow); 3094 if (tem == NULL_TREE 3095 || TREE_CODE (tem) != INTEGER_CST 3096 || TREE_OVERFLOW (tem) 3097 || compare_tree_int (tem, prec - max) == 1) 3098 return NULL_TREE; 3099 3100 *mask = wi::lshift (*mask, wi::to_widest (tem)); 3101 return exp; 3102 } 3103 3104 /* Attempt to optimize small range tests using bit test. 3105 E.g. 3106 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 3107 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 3108 has been by earlier optimizations optimized into: 3109 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 3110 As all the 43 through 82 range is less than 64 numbers, 3111 for 64-bit word targets optimize that into: 3112 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 3113 3114 static bool 3115 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 3116 vec<operand_entry *> *ops, 3117 struct range_entry *ranges) 3118 { 3119 int i, j; 3120 bool any_changes = false; 3121 int prec = GET_MODE_BITSIZE (word_mode); 3122 auto_vec<struct range_entry *, 64> candidates; 3123 3124 for (i = first; i < length - 2; i++) 3125 { 3126 tree lowi, highi, lowj, highj, type; 3127 3128 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 3129 continue; 3130 type = TREE_TYPE (ranges[i].exp); 3131 if (!INTEGRAL_TYPE_P (type)) 3132 continue; 3133 lowi = ranges[i].low; 3134 if (lowi == NULL_TREE) 3135 lowi = TYPE_MIN_VALUE (type); 3136 highi = ranges[i].high; 3137 if (highi == NULL_TREE) 3138 continue; 3139 wide_int mask; 3140 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 3141 highi, &mask, &lowi); 3142 if (exp == NULL_TREE) 3143 continue; 3144 bool strict_overflow_p = ranges[i].strict_overflow_p; 3145 candidates.truncate (0); 3146 int end = MIN (i + 64, length); 3147 for (j = i + 1; j < end; j++) 3148 { 3149 tree exp2; 3150 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 3151 continue; 3152 if (ranges[j].exp == exp) 3153 ; 3154 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 3155 { 3156 exp2 = TREE_OPERAND (ranges[j].exp, 0); 3157 if (exp2 == exp) 3158 ; 3159 else if (TREE_CODE (exp2) == PLUS_EXPR) 3160 { 3161 exp2 = TREE_OPERAND (exp2, 0); 3162 STRIP_NOPS (exp2); 3163 if (exp2 != exp) 3164 continue; 3165 } 3166 else 3167 continue; 3168 } 3169 else 3170 continue; 3171 lowj = ranges[j].low; 3172 if (lowj == NULL_TREE) 3173 continue; 3174 highj = ranges[j].high; 3175 if (highj == NULL_TREE) 3176 highj = TYPE_MAX_VALUE (type); 3177 wide_int mask2; 3178 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 3179 highj, &mask2, NULL); 3180 if (exp2 != exp) 3181 continue; 3182 mask |= mask2; 3183 strict_overflow_p |= ranges[j].strict_overflow_p; 3184 candidates.safe_push (&ranges[j]); 3185 } 3186 3187 /* If we need otherwise 3 or more comparisons, use a bit test. */ 3188 if (candidates.length () >= 2) 3189 { 3190 tree high = wide_int_to_tree (TREE_TYPE (lowi), 3191 wi::to_widest (lowi) 3192 + prec - 1 - wi::clz (mask)); 3193 operand_entry *oe = (*ops)[ranges[i].idx]; 3194 tree op = oe->op; 3195 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 3196 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3197 location_t loc = gimple_location (stmt); 3198 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 3199 3200 /* See if it isn't cheaper to pretend the minimum value of the 3201 range is 0, if maximum value is small enough. 3202 We can avoid then subtraction of the minimum value, but the 3203 mask constant could be perhaps more expensive. */ 3204 if (compare_tree_int (lowi, 0) > 0 3205 && compare_tree_int (high, prec) < 0) 3206 { 3207 int cost_diff; 3208 HOST_WIDE_INT m = tree_to_uhwi (lowi); 3209 rtx reg = gen_raw_REG (word_mode, 10000); 3210 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 3211 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 3212 GEN_INT (-m)), speed_p); 3213 rtx r = immed_wide_int_const (mask, word_mode); 3214 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 3215 word_mode, speed_p); 3216 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 3217 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 3218 word_mode, speed_p); 3219 if (cost_diff > 0) 3220 { 3221 mask = wi::lshift (mask, m); 3222 lowi = build_zero_cst (TREE_TYPE (lowi)); 3223 } 3224 } 3225 3226 tree tem = build_range_check (loc, optype, unshare_expr (exp), 3227 false, lowi, high); 3228 if (tem == NULL_TREE || is_gimple_val (tem)) 3229 continue; 3230 tree etype = unsigned_type_for (TREE_TYPE (exp)); 3231 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 3232 fold_convert_loc (loc, etype, exp), 3233 fold_convert_loc (loc, etype, lowi)); 3234 exp = fold_convert_loc (loc, integer_type_node, exp); 3235 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 3236 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 3237 build_int_cst (word_type, 1), exp); 3238 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 3239 wide_int_to_tree (word_type, mask)); 3240 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 3241 build_zero_cst (word_type)); 3242 if (is_gimple_val (exp)) 3243 continue; 3244 3245 /* The shift might have undefined behavior if TEM is true, 3246 but reassociate_bb isn't prepared to have basic blocks 3247 split when it is running. So, temporarily emit a code 3248 with BIT_IOR_EXPR instead of &&, and fix it up in 3249 branch_fixup. */ 3250 gimple_seq seq; 3251 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 3252 gcc_assert (TREE_CODE (tem) == SSA_NAME); 3253 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 3254 gimple_seq seq2; 3255 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 3256 gimple_seq_add_seq_without_update (&seq, seq2); 3257 gcc_assert (TREE_CODE (exp) == SSA_NAME); 3258 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 3259 gimple *g = gimple_build_assign (make_ssa_name (optype), 3260 BIT_IOR_EXPR, tem, exp); 3261 gimple_set_location (g, loc); 3262 gimple_seq_add_stmt_without_update (&seq, g); 3263 exp = gimple_assign_lhs (g); 3264 tree val = build_zero_cst (optype); 3265 if (update_range_test (&ranges[i], NULL, candidates.address (), 3266 candidates.length (), opcode, ops, exp, 3267 seq, false, val, val, strict_overflow_p)) 3268 { 3269 any_changes = true; 3270 reassoc_branch_fixups.safe_push (tem); 3271 } 3272 else 3273 gimple_seq_discard (seq); 3274 } 3275 } 3276 return any_changes; 3277 } 3278 3279 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 3280 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */ 3281 3282 static bool 3283 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, 3284 vec<operand_entry *> *ops, 3285 struct range_entry *ranges) 3286 { 3287 int i; 3288 unsigned int b; 3289 bool any_changes = false; 3290 auto_vec<int, 128> buckets; 3291 auto_vec<int, 32> chains; 3292 auto_vec<struct range_entry *, 32> candidates; 3293 3294 for (i = first; i < length; i++) 3295 { 3296 if (ranges[i].exp == NULL_TREE 3297 || TREE_CODE (ranges[i].exp) != SSA_NAME 3298 || !ranges[i].in_p 3299 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 3300 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE 3301 || ranges[i].low == NULL_TREE 3302 || ranges[i].low != ranges[i].high) 3303 continue; 3304 3305 bool zero_p = integer_zerop (ranges[i].low); 3306 if (!zero_p && !integer_all_onesp (ranges[i].low)) 3307 continue; 3308 3309 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p; 3310 if (buckets.length () <= b) 3311 buckets.safe_grow_cleared (b + 1); 3312 if (chains.length () <= (unsigned) i) 3313 chains.safe_grow (i + 1); 3314 chains[i] = buckets[b]; 3315 buckets[b] = i + 1; 3316 } 3317 3318 FOR_EACH_VEC_ELT (buckets, b, i) 3319 if (i && chains[i - 1]) 3320 { 3321 int j, k = i; 3322 for (j = chains[i - 1]; j; j = chains[j - 1]) 3323 { 3324 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); 3325 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); 3326 if (reassoc_stmt_dominates_stmt_p (gk, gj)) 3327 k = j; 3328 } 3329 tree type1 = TREE_TYPE (ranges[k - 1].exp); 3330 tree type2 = NULL_TREE; 3331 bool strict_overflow_p = false; 3332 candidates.truncate (0); 3333 for (j = i; j; j = chains[j - 1]) 3334 { 3335 tree type = TREE_TYPE (ranges[j - 1].exp); 3336 strict_overflow_p |= ranges[j - 1].strict_overflow_p; 3337 if (j == k 3338 || useless_type_conversion_p (type1, type)) 3339 ; 3340 else if (type2 == NULL_TREE 3341 || useless_type_conversion_p (type2, type)) 3342 { 3343 if (type2 == NULL_TREE) 3344 type2 = type; 3345 candidates.safe_push (&ranges[j - 1]); 3346 } 3347 } 3348 unsigned l = candidates.length (); 3349 for (j = i; j; j = chains[j - 1]) 3350 { 3351 tree type = TREE_TYPE (ranges[j - 1].exp); 3352 if (j == k) 3353 continue; 3354 if (useless_type_conversion_p (type1, type)) 3355 ; 3356 else if (type2 == NULL_TREE 3357 || useless_type_conversion_p (type2, type)) 3358 continue; 3359 candidates.safe_push (&ranges[j - 1]); 3360 } 3361 gimple_seq seq = NULL; 3362 tree op = NULL_TREE; 3363 unsigned int id; 3364 struct range_entry *r; 3365 candidates.safe_push (&ranges[k - 1]); 3366 FOR_EACH_VEC_ELT (candidates, id, r) 3367 { 3368 gimple *g; 3369 if (id == 0) 3370 { 3371 op = r->exp; 3372 continue; 3373 } 3374 if (id == l) 3375 { 3376 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op); 3377 gimple_seq_add_stmt_without_update (&seq, g); 3378 op = gimple_assign_lhs (g); 3379 } 3380 tree type = TREE_TYPE (r->exp); 3381 tree exp = r->exp; 3382 if (id >= l && !useless_type_conversion_p (type1, type)) 3383 { 3384 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); 3385 gimple_seq_add_stmt_without_update (&seq, g); 3386 exp = gimple_assign_lhs (g); 3387 } 3388 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), 3389 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR, 3390 op, exp); 3391 gimple_seq_add_stmt_without_update (&seq, g); 3392 op = gimple_assign_lhs (g); 3393 } 3394 candidates.pop (); 3395 if (update_range_test (&ranges[k - 1], NULL, candidates.address (), 3396 candidates.length (), opcode, ops, op, 3397 seq, true, ranges[k - 1].low, 3398 ranges[k - 1].low, strict_overflow_p)) 3399 any_changes = true; 3400 else 3401 gimple_seq_discard (seq); 3402 } 3403 3404 return any_changes; 3405 } 3406 3407 /* Attempt to optimize for signed a and b where b is known to be >= 0: 3408 a >= 0 && a < b into (unsigned) a < (unsigned) b 3409 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 3410 3411 static bool 3412 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 3413 vec<operand_entry *> *ops, 3414 struct range_entry *ranges, 3415 basic_block first_bb) 3416 { 3417 int i; 3418 bool any_changes = false; 3419 hash_map<tree, int> *map = NULL; 3420 3421 for (i = first; i < length; i++) 3422 { 3423 if (ranges[i].exp == NULL_TREE 3424 || TREE_CODE (ranges[i].exp) != SSA_NAME 3425 || !ranges[i].in_p) 3426 continue; 3427 3428 tree type = TREE_TYPE (ranges[i].exp); 3429 if (!INTEGRAL_TYPE_P (type) 3430 || TYPE_UNSIGNED (type) 3431 || ranges[i].low == NULL_TREE 3432 || !integer_zerop (ranges[i].low) 3433 || ranges[i].high != NULL_TREE) 3434 continue; 3435 /* EXP >= 0 here. */ 3436 if (map == NULL) 3437 map = new hash_map <tree, int>; 3438 map->put (ranges[i].exp, i); 3439 } 3440 3441 if (map == NULL) 3442 return false; 3443 3444 for (i = 0; i < length; i++) 3445 { 3446 bool in_p = ranges[i].in_p; 3447 if (ranges[i].low == NULL_TREE 3448 || ranges[i].high == NULL_TREE) 3449 continue; 3450 if (!integer_zerop (ranges[i].low) 3451 || !integer_zerop (ranges[i].high)) 3452 { 3453 if (ranges[i].exp 3454 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 3455 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3456 && integer_onep (ranges[i].low) 3457 && integer_onep (ranges[i].high)) 3458 in_p = !in_p; 3459 else 3460 continue; 3461 } 3462 3463 gimple *stmt; 3464 tree_code ccode; 3465 tree rhs1, rhs2; 3466 if (ranges[i].exp) 3467 { 3468 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 3469 continue; 3470 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 3471 if (!is_gimple_assign (stmt)) 3472 continue; 3473 ccode = gimple_assign_rhs_code (stmt); 3474 rhs1 = gimple_assign_rhs1 (stmt); 3475 rhs2 = gimple_assign_rhs2 (stmt); 3476 } 3477 else 3478 { 3479 operand_entry *oe = (*ops)[ranges[i].idx]; 3480 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3481 if (gimple_code (stmt) != GIMPLE_COND) 3482 continue; 3483 ccode = gimple_cond_code (stmt); 3484 rhs1 = gimple_cond_lhs (stmt); 3485 rhs2 = gimple_cond_rhs (stmt); 3486 } 3487 3488 if (TREE_CODE (rhs1) != SSA_NAME 3489 || rhs2 == NULL_TREE 3490 || TREE_CODE (rhs2) != SSA_NAME) 3491 continue; 3492 3493 switch (ccode) 3494 { 3495 case GT_EXPR: 3496 case GE_EXPR: 3497 case LT_EXPR: 3498 case LE_EXPR: 3499 break; 3500 default: 3501 continue; 3502 } 3503 if (in_p) 3504 ccode = invert_tree_comparison (ccode, false); 3505 switch (ccode) 3506 { 3507 case GT_EXPR: 3508 case GE_EXPR: 3509 std::swap (rhs1, rhs2); 3510 ccode = swap_tree_comparison (ccode); 3511 break; 3512 case LT_EXPR: 3513 case LE_EXPR: 3514 break; 3515 default: 3516 gcc_unreachable (); 3517 } 3518 3519 int *idx = map->get (rhs1); 3520 if (idx == NULL) 3521 continue; 3522 3523 /* maybe_optimize_range_tests allows statements without side-effects 3524 in the basic blocks as long as they are consumed in the same bb. 3525 Make sure rhs2's def stmt is not among them, otherwise we can't 3526 use safely get_nonzero_bits on it. E.g. in: 3527 # RANGE [-83, 1] NONZERO 173 3528 # k_32 = PHI <k_47(13), k_12(9)> 3529 ... 3530 if (k_32 >= 0) 3531 goto <bb 5>; [26.46%] 3532 else 3533 goto <bb 9>; [73.54%] 3534 3535 <bb 5> [local count: 140323371]: 3536 # RANGE [0, 1] NONZERO 1 3537 _5 = (int) k_32; 3538 # RANGE [0, 4] NONZERO 4 3539 _21 = _5 << 2; 3540 # RANGE [0, 4] NONZERO 4 3541 iftmp.0_44 = (char) _21; 3542 if (k_32 < iftmp.0_44) 3543 goto <bb 6>; [84.48%] 3544 else 3545 goto <bb 9>; [15.52%] 3546 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3547 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3548 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3549 those stmts even for negative k_32 and the value ranges would be no 3550 longer guaranteed and so the optimization would be invalid. */ 3551 while (opcode == ERROR_MARK) 3552 { 3553 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3554 basic_block bb2 = gimple_bb (g); 3555 if (bb2 3556 && bb2 != first_bb 3557 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3558 { 3559 /* As an exception, handle a few common cases. */ 3560 if (gimple_assign_cast_p (g) 3561 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) 3562 { 3563 tree op0 = gimple_assign_rhs1 (g); 3564 if (TYPE_UNSIGNED (TREE_TYPE (op0)) 3565 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3566 > TYPE_PRECISION (TREE_TYPE (op0)))) 3567 /* Zero-extension is always ok. */ 3568 break; 3569 else if (TYPE_PRECISION (TREE_TYPE (rhs2)) 3570 == TYPE_PRECISION (TREE_TYPE (op0)) 3571 && TREE_CODE (op0) == SSA_NAME) 3572 { 3573 /* Cast from signed to unsigned or vice versa. Retry 3574 with the op0 as new rhs2. */ 3575 rhs2 = op0; 3576 continue; 3577 } 3578 } 3579 else if (is_gimple_assign (g) 3580 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3581 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3582 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) 3583 /* Masking with INTEGER_CST with MSB clear is always ok 3584 too. */ 3585 break; 3586 rhs2 = NULL_TREE; 3587 } 3588 break; 3589 } 3590 if (rhs2 == NULL_TREE) 3591 continue; 3592 3593 wide_int nz = get_nonzero_bits (rhs2); 3594 if (wi::neg_p (nz)) 3595 continue; 3596 3597 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3598 and RHS2 is known to be RHS2 >= 0. */ 3599 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3600 3601 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3602 if ((ranges[*idx].strict_overflow_p 3603 || ranges[i].strict_overflow_p) 3604 && issue_strict_overflow_warning (wc)) 3605 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3606 "assuming signed overflow does not occur " 3607 "when simplifying range test"); 3608 3609 if (dump_file && (dump_flags & TDF_DETAILS)) 3610 { 3611 struct range_entry *r = &ranges[*idx]; 3612 fprintf (dump_file, "Optimizing range test "); 3613 print_generic_expr (dump_file, r->exp); 3614 fprintf (dump_file, " +["); 3615 print_generic_expr (dump_file, r->low); 3616 fprintf (dump_file, ", "); 3617 print_generic_expr (dump_file, r->high); 3618 fprintf (dump_file, "] and comparison "); 3619 print_generic_expr (dump_file, rhs1); 3620 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3621 print_generic_expr (dump_file, rhs2); 3622 fprintf (dump_file, "\n into ("); 3623 print_generic_expr (dump_file, utype); 3624 fprintf (dump_file, ") "); 3625 print_generic_expr (dump_file, rhs1); 3626 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3627 print_generic_expr (dump_file, utype); 3628 fprintf (dump_file, ") "); 3629 print_generic_expr (dump_file, rhs2); 3630 fprintf (dump_file, "\n"); 3631 } 3632 3633 operand_entry *oe = (*ops)[ranges[i].idx]; 3634 ranges[i].in_p = 0; 3635 if (opcode == BIT_IOR_EXPR 3636 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3637 { 3638 ranges[i].in_p = 1; 3639 ccode = invert_tree_comparison (ccode, false); 3640 } 3641 3642 unsigned int uid = gimple_uid (stmt); 3643 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3644 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3645 gimple_set_uid (g, uid); 3646 rhs1 = gimple_assign_lhs (g); 3647 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3648 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) 3649 { 3650 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3651 gimple_set_uid (g, uid); 3652 rhs2 = gimple_assign_lhs (g); 3653 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3654 } 3655 if (tree_swap_operands_p (rhs1, rhs2)) 3656 { 3657 std::swap (rhs1, rhs2); 3658 ccode = swap_tree_comparison (ccode); 3659 } 3660 if (gimple_code (stmt) == GIMPLE_COND) 3661 { 3662 gcond *c = as_a <gcond *> (stmt); 3663 gimple_cond_set_code (c, ccode); 3664 gimple_cond_set_lhs (c, rhs1); 3665 gimple_cond_set_rhs (c, rhs2); 3666 update_stmt (stmt); 3667 } 3668 else 3669 { 3670 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3671 if (!INTEGRAL_TYPE_P (ctype) 3672 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3673 && TYPE_PRECISION (ctype) != 1)) 3674 ctype = boolean_type_node; 3675 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3676 gimple_set_uid (g, uid); 3677 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3678 if (oe->op && ctype != TREE_TYPE (oe->op)) 3679 { 3680 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3681 NOP_EXPR, gimple_assign_lhs (g)); 3682 gimple_set_uid (g, uid); 3683 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3684 } 3685 ranges[i].exp = gimple_assign_lhs (g); 3686 oe->op = ranges[i].exp; 3687 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 3688 ranges[i].high = ranges[i].low; 3689 } 3690 ranges[i].strict_overflow_p = false; 3691 oe = (*ops)[ranges[*idx].idx]; 3692 /* Now change all the other range test immediate uses, so that 3693 those tests will be optimized away. */ 3694 if (opcode == ERROR_MARK) 3695 { 3696 if (oe->op) 3697 oe->op = build_int_cst (TREE_TYPE (oe->op), 3698 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3699 else 3700 oe->op = (oe->rank == BIT_IOR_EXPR 3701 ? boolean_false_node : boolean_true_node); 3702 } 3703 else 3704 oe->op = error_mark_node; 3705 ranges[*idx].exp = NULL_TREE; 3706 ranges[*idx].low = NULL_TREE; 3707 ranges[*idx].high = NULL_TREE; 3708 any_changes = true; 3709 } 3710 3711 delete map; 3712 return any_changes; 3713 } 3714 3715 /* Optimize range tests, similarly how fold_range_test optimizes 3716 it on trees. The tree code for the binary 3717 operation between all the operands is OPCODE. 3718 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 3719 maybe_optimize_range_tests for inter-bb range optimization. 3720 In that case if oe->op is NULL, oe->id is bb->index whose 3721 GIMPLE_COND is && or ||ed into the test, and oe->rank says 3722 the actual opcode. 3723 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 3724 3725 static bool 3726 optimize_range_tests (enum tree_code opcode, 3727 vec<operand_entry *> *ops, basic_block first_bb) 3728 { 3729 unsigned int length = ops->length (), i, j, first; 3730 operand_entry *oe; 3731 struct range_entry *ranges; 3732 bool any_changes = false; 3733 3734 if (length == 1) 3735 return false; 3736 3737 ranges = XNEWVEC (struct range_entry, length); 3738 for (i = 0; i < length; i++) 3739 { 3740 oe = (*ops)[i]; 3741 ranges[i].idx = i; 3742 init_range_entry (ranges + i, oe->op, 3743 oe->op 3744 ? NULL 3745 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 3746 /* For | invert it now, we will invert it again before emitting 3747 the optimized expression. */ 3748 if (opcode == BIT_IOR_EXPR 3749 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3750 ranges[i].in_p = !ranges[i].in_p; 3751 } 3752 3753 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 3754 for (i = 0; i < length; i++) 3755 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 3756 break; 3757 3758 /* Try to merge ranges. */ 3759 for (first = i; i < length; i++) 3760 { 3761 tree low = ranges[i].low; 3762 tree high = ranges[i].high; 3763 int in_p = ranges[i].in_p; 3764 bool strict_overflow_p = ranges[i].strict_overflow_p; 3765 int update_fail_count = 0; 3766 3767 for (j = i + 1; j < length; j++) 3768 { 3769 if (ranges[i].exp != ranges[j].exp) 3770 break; 3771 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 3772 ranges[j].in_p, ranges[j].low, ranges[j].high)) 3773 break; 3774 strict_overflow_p |= ranges[j].strict_overflow_p; 3775 } 3776 3777 if (j == i + 1) 3778 continue; 3779 3780 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 3781 opcode, ops, ranges[i].exp, NULL, in_p, 3782 low, high, strict_overflow_p)) 3783 { 3784 i = j - 1; 3785 any_changes = true; 3786 } 3787 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 3788 while update_range_test would fail. */ 3789 else if (update_fail_count == 64) 3790 i = j - 1; 3791 else 3792 ++update_fail_count; 3793 } 3794 3795 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 3796 ops, ranges); 3797 3798 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 3799 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 3800 ops, ranges); 3801 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 3802 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 3803 ops, ranges); 3804 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, 3805 ops, ranges); 3806 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 3807 ranges, first_bb); 3808 3809 if (any_changes && opcode != ERROR_MARK) 3810 { 3811 j = 0; 3812 FOR_EACH_VEC_ELT (*ops, i, oe) 3813 { 3814 if (oe->op == error_mark_node) 3815 continue; 3816 else if (i != j) 3817 (*ops)[j] = oe; 3818 j++; 3819 } 3820 ops->truncate (j); 3821 } 3822 3823 XDELETEVEC (ranges); 3824 return any_changes; 3825 } 3826 3827 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3828 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3829 otherwise the comparison code. TYPE is a return value that is set 3830 to type of comparison. */ 3831 3832 static tree_code 3833 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) 3834 { 3835 if (TREE_CODE (var) != SSA_NAME) 3836 return ERROR_MARK; 3837 3838 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3839 if (stmt == NULL) 3840 return ERROR_MARK; 3841 3842 /* ??? If we start creating more COND_EXPR, we could perform 3843 this same optimization with them. For now, simplify. */ 3844 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 3845 return ERROR_MARK; 3846 3847 tree cond = gimple_assign_rhs1 (stmt); 3848 tree_code cmp = TREE_CODE (cond); 3849 if (TREE_CODE_CLASS (cmp) != tcc_comparison) 3850 return ERROR_MARK; 3851 3852 /* ??? For now, allow only canonical true and false result vectors. 3853 We could expand this to other constants should the need arise, 3854 but at the moment we don't create them. */ 3855 tree t = gimple_assign_rhs2 (stmt); 3856 tree f = gimple_assign_rhs3 (stmt); 3857 bool inv; 3858 if (integer_all_onesp (t)) 3859 inv = false; 3860 else if (integer_all_onesp (f)) 3861 { 3862 cmp = invert_tree_comparison (cmp, false); 3863 inv = true; 3864 } 3865 else 3866 return ERROR_MARK; 3867 if (!integer_zerop (f)) 3868 return ERROR_MARK; 3869 3870 /* Success! */ 3871 if (rets) 3872 *rets = stmt; 3873 if (reti) 3874 *reti = inv; 3875 if (type) 3876 *type = TREE_TYPE (cond); 3877 return cmp; 3878 } 3879 3880 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3881 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3882 3883 static bool 3884 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 3885 { 3886 unsigned int length = ops->length (), i, j; 3887 bool any_changes = false; 3888 3889 if (length == 1) 3890 return false; 3891 3892 for (i = 0; i < length; ++i) 3893 { 3894 tree elt0 = (*ops)[i]->op; 3895 3896 gassign *stmt0; 3897 bool invert; 3898 tree type; 3899 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type); 3900 if (cmp0 == ERROR_MARK) 3901 continue; 3902 3903 for (j = i + 1; j < length; ++j) 3904 { 3905 tree &elt1 = (*ops)[j]->op; 3906 3907 gassign *stmt1; 3908 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL); 3909 if (cmp1 == ERROR_MARK) 3910 continue; 3911 3912 tree cond0 = gimple_assign_rhs1 (stmt0); 3913 tree x0 = TREE_OPERAND (cond0, 0); 3914 tree y0 = TREE_OPERAND (cond0, 1); 3915 3916 tree cond1 = gimple_assign_rhs1 (stmt1); 3917 tree x1 = TREE_OPERAND (cond1, 0); 3918 tree y1 = TREE_OPERAND (cond1, 1); 3919 3920 tree comb; 3921 if (opcode == BIT_AND_EXPR) 3922 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1, 3923 y1); 3924 else if (opcode == BIT_IOR_EXPR) 3925 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1, 3926 y1); 3927 else 3928 gcc_unreachable (); 3929 if (comb == NULL) 3930 continue; 3931 3932 /* Success! */ 3933 if (dump_file && (dump_flags & TDF_DETAILS)) 3934 { 3935 fprintf (dump_file, "Transforming "); 3936 print_generic_expr (dump_file, cond0); 3937 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 3938 print_generic_expr (dump_file, cond1); 3939 fprintf (dump_file, " into "); 3940 print_generic_expr (dump_file, comb); 3941 fputc ('\n', dump_file); 3942 } 3943 3944 gimple_assign_set_rhs1 (stmt0, comb); 3945 if (invert) 3946 std::swap (*gimple_assign_rhs2_ptr (stmt0), 3947 *gimple_assign_rhs3_ptr (stmt0)); 3948 update_stmt (stmt0); 3949 3950 elt1 = error_mark_node; 3951 any_changes = true; 3952 } 3953 } 3954 3955 if (any_changes) 3956 { 3957 operand_entry *oe; 3958 j = 0; 3959 FOR_EACH_VEC_ELT (*ops, i, oe) 3960 { 3961 if (oe->op == error_mark_node) 3962 continue; 3963 else if (i != j) 3964 (*ops)[j] = oe; 3965 j++; 3966 } 3967 ops->truncate (j); 3968 } 3969 3970 return any_changes; 3971 } 3972 3973 /* Return true if STMT is a cast like: 3974 <bb N>: 3975 ... 3976 _123 = (int) _234; 3977 3978 <bb M>: 3979 # _345 = PHI <_123(N), 1(...), 1(...)> 3980 where _234 has bool type, _123 has single use and 3981 bb N has a single successor M. This is commonly used in 3982 the last block of a range test. 3983 3984 Also Return true if STMT is tcc_compare like: 3985 <bb N>: 3986 ... 3987 _234 = a_2(D) == 2; 3988 3989 <bb M>: 3990 # _345 = PHI <_234(N), 1(...), 1(...)> 3991 _346 = (int) _345; 3992 where _234 has booltype, single use and 3993 bb N has a single successor M. This is commonly used in 3994 the last block of a range test. */ 3995 3996 static bool 3997 final_range_test_p (gimple *stmt) 3998 { 3999 basic_block bb, rhs_bb, lhs_bb; 4000 edge e; 4001 tree lhs, rhs; 4002 use_operand_p use_p; 4003 gimple *use_stmt; 4004 4005 if (!gimple_assign_cast_p (stmt) 4006 && (!is_gimple_assign (stmt) 4007 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4008 != tcc_comparison))) 4009 return false; 4010 bb = gimple_bb (stmt); 4011 if (!single_succ_p (bb)) 4012 return false; 4013 e = single_succ_edge (bb); 4014 if (e->flags & EDGE_COMPLEX) 4015 return false; 4016 4017 lhs = gimple_assign_lhs (stmt); 4018 rhs = gimple_assign_rhs1 (stmt); 4019 if (gimple_assign_cast_p (stmt) 4020 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4021 || TREE_CODE (rhs) != SSA_NAME 4022 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 4023 return false; 4024 4025 if (!gimple_assign_cast_p (stmt) 4026 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 4027 return false; 4028 4029 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 4030 if (!single_imm_use (lhs, &use_p, &use_stmt)) 4031 return false; 4032 4033 if (gimple_code (use_stmt) != GIMPLE_PHI 4034 || gimple_bb (use_stmt) != e->dest) 4035 return false; 4036 4037 /* And that the rhs is defined in the same loop. */ 4038 if (gimple_assign_cast_p (stmt)) 4039 { 4040 if (TREE_CODE (rhs) != SSA_NAME 4041 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 4042 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 4043 return false; 4044 } 4045 else 4046 { 4047 if (TREE_CODE (lhs) != SSA_NAME 4048 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 4049 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 4050 return false; 4051 } 4052 4053 return true; 4054 } 4055 4056 /* Return true if BB is suitable basic block for inter-bb range test 4057 optimization. If BACKWARD is true, BB should be the only predecessor 4058 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 4059 or compared with to find a common basic block to which all conditions 4060 branch to if true resp. false. If BACKWARD is false, TEST_BB should 4061 be the only predecessor of BB. */ 4062 4063 static bool 4064 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 4065 bool backward) 4066 { 4067 edge_iterator ei, ei2; 4068 edge e, e2; 4069 gimple *stmt; 4070 gphi_iterator gsi; 4071 bool other_edge_seen = false; 4072 bool is_cond; 4073 4074 if (test_bb == bb) 4075 return false; 4076 /* Check last stmt first. */ 4077 stmt = last_stmt (bb); 4078 if (stmt == NULL 4079 || (gimple_code (stmt) != GIMPLE_COND 4080 && (backward || !final_range_test_p (stmt))) 4081 || gimple_visited_p (stmt) 4082 || stmt_could_throw_p (cfun, stmt) 4083 || *other_bb == bb) 4084 return false; 4085 is_cond = gimple_code (stmt) == GIMPLE_COND; 4086 if (is_cond) 4087 { 4088 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 4089 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 4090 to *OTHER_BB (if not set yet, try to find it out). */ 4091 if (EDGE_COUNT (bb->succs) != 2) 4092 return false; 4093 FOR_EACH_EDGE (e, ei, bb->succs) 4094 { 4095 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4096 return false; 4097 if (e->dest == test_bb) 4098 { 4099 if (backward) 4100 continue; 4101 else 4102 return false; 4103 } 4104 if (e->dest == bb) 4105 return false; 4106 if (*other_bb == NULL) 4107 { 4108 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 4109 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4110 return false; 4111 else if (e->dest == e2->dest) 4112 *other_bb = e->dest; 4113 if (*other_bb == NULL) 4114 return false; 4115 } 4116 if (e->dest == *other_bb) 4117 other_edge_seen = true; 4118 else if (backward) 4119 return false; 4120 } 4121 if (*other_bb == NULL || !other_edge_seen) 4122 return false; 4123 } 4124 else if (single_succ (bb) != *other_bb) 4125 return false; 4126 4127 /* Now check all PHIs of *OTHER_BB. */ 4128 e = find_edge (bb, *other_bb); 4129 e2 = find_edge (test_bb, *other_bb); 4130 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 4131 { 4132 gphi *phi = gsi.phi (); 4133 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 4134 corresponding to BB and TEST_BB predecessor must be the same. */ 4135 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 4136 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 4137 { 4138 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 4139 one of the PHIs should have the lhs of the last stmt in 4140 that block as PHI arg and that PHI should have 0 or 1 4141 corresponding to it in all other range test basic blocks 4142 considered. */ 4143 if (!is_cond) 4144 { 4145 if (gimple_phi_arg_def (phi, e->dest_idx) 4146 == gimple_assign_lhs (stmt) 4147 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 4148 || integer_onep (gimple_phi_arg_def (phi, 4149 e2->dest_idx)))) 4150 continue; 4151 } 4152 else 4153 { 4154 gimple *test_last = last_stmt (test_bb); 4155 if (gimple_code (test_last) != GIMPLE_COND 4156 && gimple_phi_arg_def (phi, e2->dest_idx) 4157 == gimple_assign_lhs (test_last) 4158 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 4159 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 4160 continue; 4161 } 4162 4163 return false; 4164 } 4165 } 4166 return true; 4167 } 4168 4169 /* Return true if BB doesn't have side-effects that would disallow 4170 range test optimization, all SSA_NAMEs set in the bb are consumed 4171 in the bb and there are no PHIs. */ 4172 4173 static bool 4174 no_side_effect_bb (basic_block bb) 4175 { 4176 gimple_stmt_iterator gsi; 4177 gimple *last; 4178 4179 if (!gimple_seq_empty_p (phi_nodes (bb))) 4180 return false; 4181 last = last_stmt (bb); 4182 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 4183 { 4184 gimple *stmt = gsi_stmt (gsi); 4185 tree lhs; 4186 imm_use_iterator imm_iter; 4187 use_operand_p use_p; 4188 4189 if (is_gimple_debug (stmt)) 4190 continue; 4191 if (gimple_has_side_effects (stmt)) 4192 return false; 4193 if (stmt == last) 4194 return true; 4195 if (!is_gimple_assign (stmt)) 4196 return false; 4197 lhs = gimple_assign_lhs (stmt); 4198 if (TREE_CODE (lhs) != SSA_NAME) 4199 return false; 4200 if (gimple_assign_rhs_could_trap_p (stmt)) 4201 return false; 4202 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 4203 { 4204 gimple *use_stmt = USE_STMT (use_p); 4205 if (is_gimple_debug (use_stmt)) 4206 continue; 4207 if (gimple_bb (use_stmt) != bb) 4208 return false; 4209 } 4210 } 4211 return false; 4212 } 4213 4214 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 4215 return true and fill in *OPS recursively. */ 4216 4217 static bool 4218 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 4219 class loop *loop) 4220 { 4221 gimple *stmt = SSA_NAME_DEF_STMT (var); 4222 tree rhs[2]; 4223 int i; 4224 4225 if (!is_reassociable_op (stmt, code, loop)) 4226 return false; 4227 4228 rhs[0] = gimple_assign_rhs1 (stmt); 4229 rhs[1] = gimple_assign_rhs2 (stmt); 4230 gimple_set_visited (stmt, true); 4231 for (i = 0; i < 2; i++) 4232 if (TREE_CODE (rhs[i]) == SSA_NAME 4233 && !get_ops (rhs[i], code, ops, loop) 4234 && has_single_use (rhs[i])) 4235 { 4236 operand_entry *oe = operand_entry_pool.allocate (); 4237 4238 oe->op = rhs[i]; 4239 oe->rank = code; 4240 oe->id = 0; 4241 oe->count = 1; 4242 oe->stmt_to_insert = NULL; 4243 ops->safe_push (oe); 4244 } 4245 return true; 4246 } 4247 4248 /* Find the ops that were added by get_ops starting from VAR, see if 4249 they were changed during update_range_test and if yes, create new 4250 stmts. */ 4251 4252 static tree 4253 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 4254 unsigned int *pidx, class loop *loop) 4255 { 4256 gimple *stmt = SSA_NAME_DEF_STMT (var); 4257 tree rhs[4]; 4258 int i; 4259 4260 if (!is_reassociable_op (stmt, code, loop)) 4261 return NULL; 4262 4263 rhs[0] = gimple_assign_rhs1 (stmt); 4264 rhs[1] = gimple_assign_rhs2 (stmt); 4265 rhs[2] = rhs[0]; 4266 rhs[3] = rhs[1]; 4267 for (i = 0; i < 2; i++) 4268 if (TREE_CODE (rhs[i]) == SSA_NAME) 4269 { 4270 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 4271 if (rhs[2 + i] == NULL_TREE) 4272 { 4273 if (has_single_use (rhs[i])) 4274 rhs[2 + i] = ops[(*pidx)++]->op; 4275 else 4276 rhs[2 + i] = rhs[i]; 4277 } 4278 } 4279 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 4280 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 4281 { 4282 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4283 var = make_ssa_name (TREE_TYPE (var)); 4284 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 4285 rhs[2], rhs[3]); 4286 gimple_set_uid (g, gimple_uid (stmt)); 4287 gimple_set_visited (g, true); 4288 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4289 } 4290 return var; 4291 } 4292 4293 /* Structure to track the initial value passed to get_ops and 4294 the range in the ops vector for each basic block. */ 4295 4296 struct inter_bb_range_test_entry 4297 { 4298 tree op; 4299 unsigned int first_idx, last_idx; 4300 }; 4301 4302 /* Inter-bb range test optimization. 4303 4304 Returns TRUE if a gimple conditional is optimized to a true/false, 4305 otherwise return FALSE. 4306 4307 This indicates to the caller that it should run a CFG cleanup pass 4308 once reassociation is completed. */ 4309 4310 static bool 4311 maybe_optimize_range_tests (gimple *stmt) 4312 { 4313 basic_block first_bb = gimple_bb (stmt); 4314 basic_block last_bb = first_bb; 4315 basic_block other_bb = NULL; 4316 basic_block bb; 4317 edge_iterator ei; 4318 edge e; 4319 auto_vec<operand_entry *> ops; 4320 auto_vec<inter_bb_range_test_entry> bbinfo; 4321 bool any_changes = false; 4322 bool cfg_cleanup_needed = false; 4323 4324 /* Consider only basic blocks that end with GIMPLE_COND or 4325 a cast statement satisfying final_range_test_p. All 4326 but the last bb in the first_bb .. last_bb range 4327 should end with GIMPLE_COND. */ 4328 if (gimple_code (stmt) == GIMPLE_COND) 4329 { 4330 if (EDGE_COUNT (first_bb->succs) != 2) 4331 return cfg_cleanup_needed; 4332 } 4333 else if (final_range_test_p (stmt)) 4334 other_bb = single_succ (first_bb); 4335 else 4336 return cfg_cleanup_needed; 4337 4338 if (stmt_could_throw_p (cfun, stmt)) 4339 return cfg_cleanup_needed; 4340 4341 /* As relative ordering of post-dominator sons isn't fixed, 4342 maybe_optimize_range_tests can be called first on any 4343 bb in the range we want to optimize. So, start searching 4344 backwards, if first_bb can be set to a predecessor. */ 4345 while (single_pred_p (first_bb)) 4346 { 4347 basic_block pred_bb = single_pred (first_bb); 4348 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 4349 break; 4350 if (!no_side_effect_bb (first_bb)) 4351 break; 4352 first_bb = pred_bb; 4353 } 4354 /* If first_bb is last_bb, other_bb hasn't been computed yet. 4355 Before starting forward search in last_bb successors, find 4356 out the other_bb. */ 4357 if (first_bb == last_bb) 4358 { 4359 other_bb = NULL; 4360 /* As non-GIMPLE_COND last stmt always terminates the range, 4361 if forward search didn't discover anything, just give up. */ 4362 if (gimple_code (stmt) != GIMPLE_COND) 4363 return cfg_cleanup_needed; 4364 /* Look at both successors. Either it ends with a GIMPLE_COND 4365 and satisfies suitable_cond_bb, or ends with a cast and 4366 other_bb is that cast's successor. */ 4367 FOR_EACH_EDGE (e, ei, first_bb->succs) 4368 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 4369 || e->dest == first_bb) 4370 return cfg_cleanup_needed; 4371 else if (single_pred_p (e->dest)) 4372 { 4373 stmt = last_stmt (e->dest); 4374 if (stmt 4375 && gimple_code (stmt) == GIMPLE_COND 4376 && EDGE_COUNT (e->dest->succs) == 2) 4377 { 4378 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 4379 break; 4380 else 4381 other_bb = NULL; 4382 } 4383 else if (stmt 4384 && final_range_test_p (stmt) 4385 && find_edge (first_bb, single_succ (e->dest))) 4386 { 4387 other_bb = single_succ (e->dest); 4388 if (other_bb == first_bb) 4389 other_bb = NULL; 4390 } 4391 } 4392 if (other_bb == NULL) 4393 return cfg_cleanup_needed; 4394 } 4395 /* Now do the forward search, moving last_bb to successor bbs 4396 that aren't other_bb. */ 4397 while (EDGE_COUNT (last_bb->succs) == 2) 4398 { 4399 FOR_EACH_EDGE (e, ei, last_bb->succs) 4400 if (e->dest != other_bb) 4401 break; 4402 if (e == NULL) 4403 break; 4404 if (!single_pred_p (e->dest)) 4405 break; 4406 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 4407 break; 4408 if (!no_side_effect_bb (e->dest)) 4409 break; 4410 last_bb = e->dest; 4411 } 4412 if (first_bb == last_bb) 4413 return cfg_cleanup_needed; 4414 /* Here basic blocks first_bb through last_bb's predecessor 4415 end with GIMPLE_COND, all of them have one of the edges to 4416 other_bb and another to another block in the range, 4417 all blocks except first_bb don't have side-effects and 4418 last_bb ends with either GIMPLE_COND, or cast satisfying 4419 final_range_test_p. */ 4420 for (bb = last_bb; ; bb = single_pred (bb)) 4421 { 4422 enum tree_code code; 4423 tree lhs, rhs; 4424 inter_bb_range_test_entry bb_ent; 4425 4426 bb_ent.op = NULL_TREE; 4427 bb_ent.first_idx = ops.length (); 4428 bb_ent.last_idx = bb_ent.first_idx; 4429 e = find_edge (bb, other_bb); 4430 stmt = last_stmt (bb); 4431 gimple_set_visited (stmt, true); 4432 if (gimple_code (stmt) != GIMPLE_COND) 4433 { 4434 use_operand_p use_p; 4435 gimple *phi; 4436 edge e2; 4437 unsigned int d; 4438 4439 lhs = gimple_assign_lhs (stmt); 4440 rhs = gimple_assign_rhs1 (stmt); 4441 gcc_assert (bb == last_bb); 4442 4443 /* stmt is 4444 _123 = (int) _234; 4445 OR 4446 _234 = a_2(D) == 2; 4447 4448 followed by: 4449 <bb M>: 4450 # _345 = PHI <_123(N), 1(...), 1(...)> 4451 4452 or 0 instead of 1. If it is 0, the _234 4453 range test is anded together with all the 4454 other range tests, if it is 1, it is ored with 4455 them. */ 4456 single_imm_use (lhs, &use_p, &phi); 4457 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 4458 e2 = find_edge (first_bb, other_bb); 4459 d = e2->dest_idx; 4460 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 4461 if (integer_zerop (gimple_phi_arg_def (phi, d))) 4462 code = BIT_AND_EXPR; 4463 else 4464 { 4465 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 4466 code = BIT_IOR_EXPR; 4467 } 4468 4469 /* If _234 SSA_NAME_DEF_STMT is 4470 _234 = _567 | _789; 4471 (or &, corresponding to 1/0 in the phi arguments, 4472 push into ops the individual range test arguments 4473 of the bitwise or resp. and, recursively. */ 4474 if (TREE_CODE (rhs) == SSA_NAME 4475 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4476 != tcc_comparison) 4477 && !get_ops (rhs, code, &ops, 4478 loop_containing_stmt (stmt)) 4479 && has_single_use (rhs)) 4480 { 4481 /* Otherwise, push the _234 range test itself. */ 4482 operand_entry *oe = operand_entry_pool.allocate (); 4483 4484 oe->op = rhs; 4485 oe->rank = code; 4486 oe->id = 0; 4487 oe->count = 1; 4488 oe->stmt_to_insert = NULL; 4489 ops.safe_push (oe); 4490 bb_ent.last_idx++; 4491 bb_ent.op = rhs; 4492 } 4493 else if (is_gimple_assign (stmt) 4494 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4495 == tcc_comparison) 4496 && !get_ops (lhs, code, &ops, 4497 loop_containing_stmt (stmt)) 4498 && has_single_use (lhs)) 4499 { 4500 operand_entry *oe = operand_entry_pool.allocate (); 4501 oe->op = lhs; 4502 oe->rank = code; 4503 oe->id = 0; 4504 oe->count = 1; 4505 ops.safe_push (oe); 4506 bb_ent.last_idx++; 4507 bb_ent.op = lhs; 4508 } 4509 else 4510 { 4511 bb_ent.last_idx = ops.length (); 4512 bb_ent.op = rhs; 4513 } 4514 bbinfo.safe_push (bb_ent); 4515 continue; 4516 } 4517 /* Otherwise stmt is GIMPLE_COND. */ 4518 code = gimple_cond_code (stmt); 4519 lhs = gimple_cond_lhs (stmt); 4520 rhs = gimple_cond_rhs (stmt); 4521 if (TREE_CODE (lhs) == SSA_NAME 4522 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4523 && ((code != EQ_EXPR && code != NE_EXPR) 4524 || rhs != boolean_false_node 4525 /* Either push into ops the individual bitwise 4526 or resp. and operands, depending on which 4527 edge is other_bb. */ 4528 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 4529 ^ (code == EQ_EXPR)) 4530 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 4531 loop_containing_stmt (stmt)))) 4532 { 4533 /* Or push the GIMPLE_COND stmt itself. */ 4534 operand_entry *oe = operand_entry_pool.allocate (); 4535 4536 oe->op = NULL; 4537 oe->rank = (e->flags & EDGE_TRUE_VALUE) 4538 ? BIT_IOR_EXPR : BIT_AND_EXPR; 4539 /* oe->op = NULL signs that there is no SSA_NAME 4540 for the range test, and oe->id instead is the 4541 basic block number, at which's end the GIMPLE_COND 4542 is. */ 4543 oe->id = bb->index; 4544 oe->count = 1; 4545 oe->stmt_to_insert = NULL; 4546 ops.safe_push (oe); 4547 bb_ent.op = NULL; 4548 bb_ent.last_idx++; 4549 } 4550 else if (ops.length () > bb_ent.first_idx) 4551 { 4552 bb_ent.op = lhs; 4553 bb_ent.last_idx = ops.length (); 4554 } 4555 bbinfo.safe_push (bb_ent); 4556 if (bb == first_bb) 4557 break; 4558 } 4559 if (ops.length () > 1) 4560 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 4561 if (any_changes) 4562 { 4563 unsigned int idx, max_idx = 0; 4564 /* update_ops relies on has_single_use predicates returning the 4565 same values as it did during get_ops earlier. Additionally it 4566 never removes statements, only adds new ones and it should walk 4567 from the single imm use and check the predicate already before 4568 making those changes. 4569 On the other side, the handling of GIMPLE_COND directly can turn 4570 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4571 it needs to be done in a separate loop afterwards. */ 4572 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4573 { 4574 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4575 && bbinfo[idx].op != NULL_TREE) 4576 { 4577 tree new_op; 4578 4579 max_idx = idx; 4580 stmt = last_stmt (bb); 4581 new_op = update_ops (bbinfo[idx].op, 4582 (enum tree_code) 4583 ops[bbinfo[idx].first_idx]->rank, 4584 ops, &bbinfo[idx].first_idx, 4585 loop_containing_stmt (stmt)); 4586 if (new_op == NULL_TREE) 4587 { 4588 gcc_assert (bb == last_bb); 4589 new_op = ops[bbinfo[idx].first_idx++]->op; 4590 } 4591 if (bbinfo[idx].op != new_op) 4592 { 4593 imm_use_iterator iter; 4594 use_operand_p use_p; 4595 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4596 4597 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 4598 if (is_gimple_debug (use_stmt)) 4599 continue; 4600 else if (gimple_code (use_stmt) == GIMPLE_COND 4601 || gimple_code (use_stmt) == GIMPLE_PHI) 4602 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4603 SET_USE (use_p, new_op); 4604 else if ((is_gimple_assign (use_stmt) 4605 && (TREE_CODE_CLASS 4606 (gimple_assign_rhs_code (use_stmt)) 4607 == tcc_comparison))) 4608 cast_or_tcc_cmp_stmt = use_stmt; 4609 else if (gimple_assign_cast_p (use_stmt)) 4610 cast_or_tcc_cmp_stmt = use_stmt; 4611 else 4612 gcc_unreachable (); 4613 4614 if (cast_or_tcc_cmp_stmt) 4615 { 4616 gcc_assert (bb == last_bb); 4617 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 4618 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 4619 enum tree_code rhs_code 4620 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 4621 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 4622 : CONVERT_EXPR; 4623 gassign *g; 4624 if (is_gimple_min_invariant (new_op)) 4625 { 4626 new_op = fold_convert (TREE_TYPE (lhs), new_op); 4627 g = gimple_build_assign (new_lhs, new_op); 4628 } 4629 else 4630 g = gimple_build_assign (new_lhs, rhs_code, new_op); 4631 gimple_stmt_iterator gsi 4632 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 4633 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 4634 gimple_set_visited (g, true); 4635 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4636 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 4637 if (is_gimple_debug (use_stmt)) 4638 continue; 4639 else if (gimple_code (use_stmt) == GIMPLE_COND 4640 || gimple_code (use_stmt) == GIMPLE_PHI) 4641 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 4642 SET_USE (use_p, new_lhs); 4643 else 4644 gcc_unreachable (); 4645 } 4646 } 4647 } 4648 if (bb == first_bb) 4649 break; 4650 } 4651 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4652 { 4653 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4654 && bbinfo[idx].op == NULL_TREE 4655 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 4656 { 4657 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 4658 4659 if (idx > max_idx) 4660 max_idx = idx; 4661 4662 /* If we collapse the conditional to a true/false 4663 condition, then bubble that knowledge up to our caller. */ 4664 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 4665 { 4666 gimple_cond_make_false (cond_stmt); 4667 cfg_cleanup_needed = true; 4668 } 4669 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 4670 { 4671 gimple_cond_make_true (cond_stmt); 4672 cfg_cleanup_needed = true; 4673 } 4674 else 4675 { 4676 gimple_cond_set_code (cond_stmt, NE_EXPR); 4677 gimple_cond_set_lhs (cond_stmt, 4678 ops[bbinfo[idx].first_idx]->op); 4679 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 4680 } 4681 update_stmt (cond_stmt); 4682 } 4683 if (bb == first_bb) 4684 break; 4685 } 4686 4687 /* The above changes could result in basic blocks after the first 4688 modified one, up to and including last_bb, to be executed even if 4689 they would not be in the original program. If the value ranges of 4690 assignment lhs' in those bbs were dependent on the conditions 4691 guarding those basic blocks which now can change, the VRs might 4692 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 4693 are only used within the same bb, it should be not a big deal if 4694 we just reset all the VRs in those bbs. See PR68671. */ 4695 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 4696 reset_flow_sensitive_info_in_bb (bb); 4697 } 4698 return cfg_cleanup_needed; 4699 } 4700 4701 /* Return true if OPERAND is defined by a PHI node which uses the LHS 4702 of STMT in it's operands. This is also known as a "destructive 4703 update" operation. */ 4704 4705 static bool 4706 is_phi_for_stmt (gimple *stmt, tree operand) 4707 { 4708 gimple *def_stmt; 4709 gphi *def_phi; 4710 tree lhs; 4711 use_operand_p arg_p; 4712 ssa_op_iter i; 4713 4714 if (TREE_CODE (operand) != SSA_NAME) 4715 return false; 4716 4717 lhs = gimple_assign_lhs (stmt); 4718 4719 def_stmt = SSA_NAME_DEF_STMT (operand); 4720 def_phi = dyn_cast <gphi *> (def_stmt); 4721 if (!def_phi) 4722 return false; 4723 4724 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 4725 if (lhs == USE_FROM_PTR (arg_p)) 4726 return true; 4727 return false; 4728 } 4729 4730 /* Remove def stmt of VAR if VAR has zero uses and recurse 4731 on rhs1 operand if so. */ 4732 4733 static void 4734 remove_visited_stmt_chain (tree var) 4735 { 4736 gimple *stmt; 4737 gimple_stmt_iterator gsi; 4738 4739 while (1) 4740 { 4741 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 4742 return; 4743 stmt = SSA_NAME_DEF_STMT (var); 4744 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 4745 { 4746 var = gimple_assign_rhs1 (stmt); 4747 gsi = gsi_for_stmt (stmt); 4748 reassoc_remove_stmt (&gsi); 4749 release_defs (stmt); 4750 } 4751 else 4752 return; 4753 } 4754 } 4755 4756 /* This function checks three consequtive operands in 4757 passed operands vector OPS starting from OPINDEX and 4758 swaps two operands if it is profitable for binary operation 4759 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 4760 4761 We pair ops with the same rank if possible. 4762 4763 The alternative we try is to see if STMT is a destructive 4764 update style statement, which is like: 4765 b = phi (a, ...) 4766 a = c + b; 4767 In that case, we want to use the destructive update form to 4768 expose the possible vectorizer sum reduction opportunity. 4769 In that case, the third operand will be the phi node. This 4770 check is not performed if STMT is null. 4771 4772 We could, of course, try to be better as noted above, and do a 4773 lot of work to try to find these opportunities in >3 operand 4774 cases, but it is unlikely to be worth it. */ 4775 4776 static void 4777 swap_ops_for_binary_stmt (vec<operand_entry *> ops, 4778 unsigned int opindex, gimple *stmt) 4779 { 4780 operand_entry *oe1, *oe2, *oe3; 4781 4782 oe1 = ops[opindex]; 4783 oe2 = ops[opindex + 1]; 4784 oe3 = ops[opindex + 2]; 4785 4786 if ((oe1->rank == oe2->rank 4787 && oe2->rank != oe3->rank) 4788 || (stmt && is_phi_for_stmt (stmt, oe3->op) 4789 && !is_phi_for_stmt (stmt, oe1->op) 4790 && !is_phi_for_stmt (stmt, oe2->op))) 4791 std::swap (*oe1, *oe3); 4792 else if ((oe1->rank == oe3->rank 4793 && oe2->rank != oe3->rank) 4794 || (stmt && is_phi_for_stmt (stmt, oe2->op) 4795 && !is_phi_for_stmt (stmt, oe1->op) 4796 && !is_phi_for_stmt (stmt, oe3->op))) 4797 std::swap (*oe1, *oe2); 4798 } 4799 4800 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 4801 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate 4802 whether RHS1 op RHS2 can be inserted before or needs to be inserted 4803 after the returned stmt. */ 4804 4805 static inline gimple * 4806 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before) 4807 { 4808 insert_before = true; 4809 if (TREE_CODE (rhs1) == SSA_NAME 4810 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 4811 { 4812 stmt = SSA_NAME_DEF_STMT (rhs1); 4813 insert_before = false; 4814 } 4815 if (TREE_CODE (rhs2) == SSA_NAME 4816 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 4817 { 4818 stmt = SSA_NAME_DEF_STMT (rhs2); 4819 insert_before = false; 4820 } 4821 return stmt; 4822 } 4823 4824 /* If the stmt that defines operand has to be inserted, insert it 4825 before the use. */ 4826 static void 4827 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 4828 { 4829 gcc_assert (is_gimple_assign (stmt_to_insert)); 4830 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 4831 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 4832 bool insert_before; 4833 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before); 4834 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 4835 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 4836 4837 /* If the insert point is not stmt, then insert_point would be 4838 the point where operand rhs1 or rhs2 is defined. In this case, 4839 stmt_to_insert has to be inserted afterwards. This would 4840 only happen when the stmt insertion point is flexible. */ 4841 if (insert_before) 4842 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 4843 else 4844 insert_stmt_after (stmt_to_insert, insert_point); 4845 } 4846 4847 4848 /* Recursively rewrite our linearized statements so that the operators 4849 match those in OPS[OPINDEX], putting the computation in rank 4850 order. Return new lhs. 4851 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 4852 the current stmt and during recursive invocations. 4853 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 4854 recursive invocations. */ 4855 4856 static tree 4857 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex, 4858 vec<operand_entry *> ops, bool changed, bool next_changed) 4859 { 4860 tree rhs1 = gimple_assign_rhs1 (stmt); 4861 tree rhs2 = gimple_assign_rhs2 (stmt); 4862 tree lhs = gimple_assign_lhs (stmt); 4863 operand_entry *oe; 4864 4865 /* The final recursion case for this function is that you have 4866 exactly two operations left. 4867 If we had exactly one op in the entire list to start with, we 4868 would have never called this function, and the tail recursion 4869 rewrites them one at a time. */ 4870 if (opindex + 2 == ops.length ()) 4871 { 4872 operand_entry *oe1, *oe2; 4873 4874 oe1 = ops[opindex]; 4875 oe2 = ops[opindex + 1]; 4876 4877 if (rhs1 != oe1->op || rhs2 != oe2->op) 4878 { 4879 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4880 unsigned int uid = gimple_uid (stmt); 4881 4882 if (dump_file && (dump_flags & TDF_DETAILS)) 4883 { 4884 fprintf (dump_file, "Transforming "); 4885 print_gimple_stmt (dump_file, stmt, 0); 4886 } 4887 4888 /* If the stmt that defines operand has to be inserted, insert it 4889 before the use. */ 4890 if (oe1->stmt_to_insert) 4891 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 4892 if (oe2->stmt_to_insert) 4893 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 4894 /* Even when changed is false, reassociation could have e.g. removed 4895 some redundant operations, so unless we are just swapping the 4896 arguments or unless there is no change at all (then we just 4897 return lhs), force creation of a new SSA_NAME. */ 4898 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 4899 { 4900 bool insert_before; 4901 gimple *insert_point 4902 = find_insert_point (stmt, oe1->op, oe2->op, insert_before); 4903 lhs = make_ssa_name (TREE_TYPE (lhs)); 4904 stmt 4905 = gimple_build_assign (lhs, rhs_code, 4906 oe1->op, oe2->op); 4907 gimple_set_uid (stmt, uid); 4908 gimple_set_visited (stmt, true); 4909 if (insert_before) 4910 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4911 else 4912 insert_stmt_after (stmt, insert_point); 4913 } 4914 else 4915 { 4916 bool insert_before; 4917 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op, 4918 insert_before) 4919 == stmt); 4920 gimple_assign_set_rhs1 (stmt, oe1->op); 4921 gimple_assign_set_rhs2 (stmt, oe2->op); 4922 update_stmt (stmt); 4923 } 4924 4925 if (rhs1 != oe1->op && rhs1 != oe2->op) 4926 remove_visited_stmt_chain (rhs1); 4927 4928 if (dump_file && (dump_flags & TDF_DETAILS)) 4929 { 4930 fprintf (dump_file, " into "); 4931 print_gimple_stmt (dump_file, stmt, 0); 4932 } 4933 } 4934 return lhs; 4935 } 4936 4937 /* If we hit here, we should have 3 or more ops left. */ 4938 gcc_assert (opindex + 2 < ops.length ()); 4939 4940 /* Rewrite the next operator. */ 4941 oe = ops[opindex]; 4942 4943 /* If the stmt that defines operand has to be inserted, insert it 4944 before the use. */ 4945 if (oe->stmt_to_insert) 4946 insert_stmt_before_use (stmt, oe->stmt_to_insert); 4947 4948 /* Recurse on the LHS of the binary operator, which is guaranteed to 4949 be the non-leaf side. */ 4950 tree new_rhs1 4951 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops, 4952 changed || oe->op != rhs2 || next_changed, 4953 false); 4954 4955 if (oe->op != rhs2 || new_rhs1 != rhs1) 4956 { 4957 if (dump_file && (dump_flags & TDF_DETAILS)) 4958 { 4959 fprintf (dump_file, "Transforming "); 4960 print_gimple_stmt (dump_file, stmt, 0); 4961 } 4962 4963 /* If changed is false, this is either opindex == 0 4964 or all outer rhs2's were equal to corresponding oe->op, 4965 and powi_result is NULL. 4966 That means lhs is equivalent before and after reassociation. 4967 Otherwise ensure the old lhs SSA_NAME is not reused and 4968 create a new stmt as well, so that any debug stmts will be 4969 properly adjusted. */ 4970 if (changed) 4971 { 4972 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4973 unsigned int uid = gimple_uid (stmt); 4974 bool insert_before; 4975 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op, 4976 insert_before); 4977 4978 lhs = make_ssa_name (TREE_TYPE (lhs)); 4979 stmt = gimple_build_assign (lhs, rhs_code, 4980 new_rhs1, oe->op); 4981 gimple_set_uid (stmt, uid); 4982 gimple_set_visited (stmt, true); 4983 if (insert_before) 4984 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 4985 else 4986 insert_stmt_after (stmt, insert_point); 4987 } 4988 else 4989 { 4990 bool insert_before; 4991 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op, 4992 insert_before) 4993 == stmt); 4994 gimple_assign_set_rhs1 (stmt, new_rhs1); 4995 gimple_assign_set_rhs2 (stmt, oe->op); 4996 update_stmt (stmt); 4997 } 4998 4999 if (dump_file && (dump_flags & TDF_DETAILS)) 5000 { 5001 fprintf (dump_file, " into "); 5002 print_gimple_stmt (dump_file, stmt, 0); 5003 } 5004 } 5005 return lhs; 5006 } 5007 5008 /* Find out how many cycles we need to compute statements chain. 5009 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 5010 maximum number of independent statements we may execute per cycle. */ 5011 5012 static int 5013 get_required_cycles (int ops_num, int cpu_width) 5014 { 5015 int res; 5016 int elog; 5017 unsigned int rest; 5018 5019 /* While we have more than 2 * cpu_width operands 5020 we may reduce number of operands by cpu_width 5021 per cycle. */ 5022 res = ops_num / (2 * cpu_width); 5023 5024 /* Remained operands count may be reduced twice per cycle 5025 until we have only one operand. */ 5026 rest = (unsigned)(ops_num - res * cpu_width); 5027 elog = exact_log2 (rest); 5028 if (elog >= 0) 5029 res += elog; 5030 else 5031 res += floor_log2 (rest) + 1; 5032 5033 return res; 5034 } 5035 5036 /* Returns an optimal number of registers to use for computation of 5037 given statements. */ 5038 5039 static int 5040 get_reassociation_width (int ops_num, enum tree_code opc, 5041 machine_mode mode) 5042 { 5043 int param_width = param_tree_reassoc_width; 5044 int width; 5045 int width_min; 5046 int cycles_best; 5047 5048 if (param_width > 0) 5049 width = param_width; 5050 else 5051 width = targetm.sched.reassociation_width (opc, mode); 5052 5053 if (width == 1) 5054 return width; 5055 5056 /* Get the minimal time required for sequence computation. */ 5057 cycles_best = get_required_cycles (ops_num, width); 5058 5059 /* Check if we may use less width and still compute sequence for 5060 the same time. It will allow us to reduce registers usage. 5061 get_required_cycles is monotonically increasing with lower width 5062 so we can perform a binary search for the minimal width that still 5063 results in the optimal cycle count. */ 5064 width_min = 1; 5065 while (width > width_min) 5066 { 5067 int width_mid = (width + width_min) / 2; 5068 5069 if (get_required_cycles (ops_num, width_mid) == cycles_best) 5070 width = width_mid; 5071 else if (width_min < width_mid) 5072 width_min = width_mid; 5073 else 5074 break; 5075 } 5076 5077 return width; 5078 } 5079 5080 /* Recursively rewrite our linearized statements so that the operators 5081 match those in OPS[OPINDEX], putting the computation in rank 5082 order and trying to allow operations to be executed in 5083 parallel. */ 5084 5085 static void 5086 rewrite_expr_tree_parallel (gassign *stmt, int width, 5087 vec<operand_entry *> ops) 5088 { 5089 enum tree_code opcode = gimple_assign_rhs_code (stmt); 5090 int op_num = ops.length (); 5091 gcc_assert (op_num > 0); 5092 int stmt_num = op_num - 1; 5093 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 5094 int op_index = op_num - 1; 5095 int stmt_index = 0; 5096 int ready_stmts_end = 0; 5097 int i = 0; 5098 gimple *stmt1 = NULL, *stmt2 = NULL; 5099 tree last_rhs1 = gimple_assign_rhs1 (stmt); 5100 5101 /* We start expression rewriting from the top statements. 5102 So, in this loop we create a full list of statements 5103 we will work with. */ 5104 stmts[stmt_num - 1] = stmt; 5105 for (i = stmt_num - 2; i >= 0; i--) 5106 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 5107 5108 for (i = 0; i < stmt_num; i++) 5109 { 5110 tree op1, op2; 5111 5112 /* Determine whether we should use results of 5113 already handled statements or not. */ 5114 if (ready_stmts_end == 0 5115 && (i - stmt_index >= width || op_index < 1)) 5116 ready_stmts_end = i; 5117 5118 /* Now we choose operands for the next statement. Non zero 5119 value in ready_stmts_end means here that we should use 5120 the result of already generated statements as new operand. */ 5121 if (ready_stmts_end > 0) 5122 { 5123 op1 = gimple_assign_lhs (stmts[stmt_index++]); 5124 if (ready_stmts_end > stmt_index) 5125 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5126 else if (op_index >= 0) 5127 { 5128 operand_entry *oe = ops[op_index--]; 5129 stmt2 = oe->stmt_to_insert; 5130 op2 = oe->op; 5131 } 5132 else 5133 { 5134 gcc_assert (stmt_index < i); 5135 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5136 } 5137 5138 if (stmt_index >= ready_stmts_end) 5139 ready_stmts_end = 0; 5140 } 5141 else 5142 { 5143 if (op_index > 1) 5144 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 5145 operand_entry *oe2 = ops[op_index--]; 5146 operand_entry *oe1 = ops[op_index--]; 5147 op2 = oe2->op; 5148 stmt2 = oe2->stmt_to_insert; 5149 op1 = oe1->op; 5150 stmt1 = oe1->stmt_to_insert; 5151 } 5152 5153 /* If we emit the last statement then we should put 5154 operands into the last statement. It will also 5155 break the loop. */ 5156 if (op_index < 0 && stmt_index == i) 5157 i = stmt_num - 1; 5158 5159 if (dump_file && (dump_flags & TDF_DETAILS)) 5160 { 5161 fprintf (dump_file, "Transforming "); 5162 print_gimple_stmt (dump_file, stmts[i], 0); 5163 } 5164 5165 /* If the stmt that defines operand has to be inserted, insert it 5166 before the use. */ 5167 if (stmt1) 5168 insert_stmt_before_use (stmts[i], stmt1); 5169 if (stmt2) 5170 insert_stmt_before_use (stmts[i], stmt2); 5171 stmt1 = stmt2 = NULL; 5172 5173 /* We keep original statement only for the last one. All 5174 others are recreated. */ 5175 if (i == stmt_num - 1) 5176 { 5177 gimple_assign_set_rhs1 (stmts[i], op1); 5178 gimple_assign_set_rhs2 (stmts[i], op2); 5179 update_stmt (stmts[i]); 5180 } 5181 else 5182 { 5183 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 5184 gimple_set_visited (stmts[i], true); 5185 } 5186 if (dump_file && (dump_flags & TDF_DETAILS)) 5187 { 5188 fprintf (dump_file, " into "); 5189 print_gimple_stmt (dump_file, stmts[i], 0); 5190 } 5191 } 5192 5193 remove_visited_stmt_chain (last_rhs1); 5194 } 5195 5196 /* Transform STMT, which is really (A +B) + (C + D) into the left 5197 linear form, ((A+B)+C)+D. 5198 Recurse on D if necessary. */ 5199 5200 static void 5201 linearize_expr (gimple *stmt) 5202 { 5203 gimple_stmt_iterator gsi; 5204 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 5205 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5206 gimple *oldbinrhs = binrhs; 5207 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5208 gimple *newbinrhs = NULL; 5209 class loop *loop = loop_containing_stmt (stmt); 5210 tree lhs = gimple_assign_lhs (stmt); 5211 5212 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 5213 && is_reassociable_op (binrhs, rhscode, loop)); 5214 5215 gsi = gsi_for_stmt (stmt); 5216 5217 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 5218 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 5219 gimple_assign_rhs_code (binrhs), 5220 gimple_assign_lhs (binlhs), 5221 gimple_assign_rhs2 (binrhs)); 5222 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 5223 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 5224 gimple_set_uid (binrhs, gimple_uid (stmt)); 5225 5226 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 5227 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5228 5229 if (dump_file && (dump_flags & TDF_DETAILS)) 5230 { 5231 fprintf (dump_file, "Linearized: "); 5232 print_gimple_stmt (dump_file, stmt, 0); 5233 } 5234 5235 reassociate_stats.linearized++; 5236 update_stmt (stmt); 5237 5238 gsi = gsi_for_stmt (oldbinrhs); 5239 reassoc_remove_stmt (&gsi); 5240 release_defs (oldbinrhs); 5241 5242 gimple_set_visited (stmt, true); 5243 gimple_set_visited (binlhs, true); 5244 gimple_set_visited (binrhs, true); 5245 5246 /* Tail recurse on the new rhs if it still needs reassociation. */ 5247 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 5248 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 5249 want to change the algorithm while converting to tuples. */ 5250 linearize_expr (stmt); 5251 } 5252 5253 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 5254 it. Otherwise, return NULL. */ 5255 5256 static gimple * 5257 get_single_immediate_use (tree lhs) 5258 { 5259 use_operand_p immuse; 5260 gimple *immusestmt; 5261 5262 if (TREE_CODE (lhs) == SSA_NAME 5263 && single_imm_use (lhs, &immuse, &immusestmt) 5264 && is_gimple_assign (immusestmt)) 5265 return immusestmt; 5266 5267 return NULL; 5268 } 5269 5270 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 5271 representing the negated value. Insertions of any necessary 5272 instructions go before GSI. 5273 This function is recursive in that, if you hand it "a_5" as the 5274 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 5275 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 5276 5277 static tree 5278 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 5279 { 5280 gimple *negatedefstmt = NULL; 5281 tree resultofnegate; 5282 gimple_stmt_iterator gsi; 5283 unsigned int uid; 5284 5285 /* If we are trying to negate a name, defined by an add, negate the 5286 add operands instead. */ 5287 if (TREE_CODE (tonegate) == SSA_NAME) 5288 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 5289 if (TREE_CODE (tonegate) == SSA_NAME 5290 && is_gimple_assign (negatedefstmt) 5291 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 5292 && has_single_use (gimple_assign_lhs (negatedefstmt)) 5293 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 5294 { 5295 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 5296 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 5297 tree lhs = gimple_assign_lhs (negatedefstmt); 5298 gimple *g; 5299 5300 gsi = gsi_for_stmt (negatedefstmt); 5301 rhs1 = negate_value (rhs1, &gsi); 5302 5303 gsi = gsi_for_stmt (negatedefstmt); 5304 rhs2 = negate_value (rhs2, &gsi); 5305 5306 gsi = gsi_for_stmt (negatedefstmt); 5307 lhs = make_ssa_name (TREE_TYPE (lhs)); 5308 gimple_set_visited (negatedefstmt, true); 5309 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 5310 gimple_set_uid (g, gimple_uid (negatedefstmt)); 5311 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5312 return lhs; 5313 } 5314 5315 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 5316 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 5317 NULL_TREE, true, GSI_SAME_STMT); 5318 gsi = *gsip; 5319 uid = gimple_uid (gsi_stmt (gsi)); 5320 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 5321 { 5322 gimple *stmt = gsi_stmt (gsi); 5323 if (gimple_uid (stmt) != 0) 5324 break; 5325 gimple_set_uid (stmt, uid); 5326 } 5327 return resultofnegate; 5328 } 5329 5330 /* Return true if we should break up the subtract in STMT into an add 5331 with negate. This is true when we the subtract operands are really 5332 adds, or the subtract itself is used in an add expression. In 5333 either case, breaking up the subtract into an add with negate 5334 exposes the adds to reassociation. */ 5335 5336 static bool 5337 should_break_up_subtract (gimple *stmt) 5338 { 5339 tree lhs = gimple_assign_lhs (stmt); 5340 tree binlhs = gimple_assign_rhs1 (stmt); 5341 tree binrhs = gimple_assign_rhs2 (stmt); 5342 gimple *immusestmt; 5343 class loop *loop = loop_containing_stmt (stmt); 5344 5345 if (TREE_CODE (binlhs) == SSA_NAME 5346 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 5347 return true; 5348 5349 if (TREE_CODE (binrhs) == SSA_NAME 5350 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 5351 return true; 5352 5353 if (TREE_CODE (lhs) == SSA_NAME 5354 && (immusestmt = get_single_immediate_use (lhs)) 5355 && is_gimple_assign (immusestmt) 5356 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 5357 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR 5358 && gimple_assign_rhs1 (immusestmt) == lhs) 5359 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 5360 return true; 5361 return false; 5362 } 5363 5364 /* Transform STMT from A - B into A + -B. */ 5365 5366 static void 5367 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 5368 { 5369 tree rhs1 = gimple_assign_rhs1 (stmt); 5370 tree rhs2 = gimple_assign_rhs2 (stmt); 5371 5372 if (dump_file && (dump_flags & TDF_DETAILS)) 5373 { 5374 fprintf (dump_file, "Breaking up subtract "); 5375 print_gimple_stmt (dump_file, stmt, 0); 5376 } 5377 5378 rhs2 = negate_value (rhs2, gsip); 5379 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 5380 update_stmt (stmt); 5381 } 5382 5383 /* Determine whether STMT is a builtin call that raises an SSA name 5384 to an integer power and has only one use. If so, and this is early 5385 reassociation and unsafe math optimizations are permitted, place 5386 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 5387 If any of these conditions does not hold, return FALSE. */ 5388 5389 static bool 5390 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 5391 { 5392 tree arg1; 5393 REAL_VALUE_TYPE c, cint; 5394 5395 switch (gimple_call_combined_fn (stmt)) 5396 { 5397 CASE_CFN_POW: 5398 if (flag_errno_math) 5399 return false; 5400 5401 *base = gimple_call_arg (stmt, 0); 5402 arg1 = gimple_call_arg (stmt, 1); 5403 5404 if (TREE_CODE (arg1) != REAL_CST) 5405 return false; 5406 5407 c = TREE_REAL_CST (arg1); 5408 5409 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 5410 return false; 5411 5412 *exponent = real_to_integer (&c); 5413 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 5414 if (!real_identical (&c, &cint)) 5415 return false; 5416 5417 break; 5418 5419 CASE_CFN_POWI: 5420 *base = gimple_call_arg (stmt, 0); 5421 arg1 = gimple_call_arg (stmt, 1); 5422 5423 if (!tree_fits_shwi_p (arg1)) 5424 return false; 5425 5426 *exponent = tree_to_shwi (arg1); 5427 break; 5428 5429 default: 5430 return false; 5431 } 5432 5433 /* Expanding negative exponents is generally unproductive, so we don't 5434 complicate matters with those. Exponents of zero and one should 5435 have been handled by expression folding. */ 5436 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 5437 return false; 5438 5439 return true; 5440 } 5441 5442 /* Try to derive and add operand entry for OP to *OPS. Return false if 5443 unsuccessful. */ 5444 5445 static bool 5446 try_special_add_to_ops (vec<operand_entry *> *ops, 5447 enum tree_code code, 5448 tree op, gimple* def_stmt) 5449 { 5450 tree base = NULL_TREE; 5451 HOST_WIDE_INT exponent = 0; 5452 5453 if (TREE_CODE (op) != SSA_NAME 5454 || ! has_single_use (op)) 5455 return false; 5456 5457 if (code == MULT_EXPR 5458 && reassoc_insert_powi_p 5459 && flag_unsafe_math_optimizations 5460 && is_gimple_call (def_stmt) 5461 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 5462 { 5463 add_repeat_to_ops_vec (ops, base, exponent); 5464 gimple_set_visited (def_stmt, true); 5465 return true; 5466 } 5467 else if (code == MULT_EXPR 5468 && is_gimple_assign (def_stmt) 5469 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 5470 && !HONOR_SNANS (TREE_TYPE (op)) 5471 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 5472 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))) 5473 { 5474 tree rhs1 = gimple_assign_rhs1 (def_stmt); 5475 tree cst = build_minus_one_cst (TREE_TYPE (op)); 5476 add_to_ops_vec (ops, rhs1); 5477 add_to_ops_vec (ops, cst); 5478 gimple_set_visited (def_stmt, true); 5479 return true; 5480 } 5481 5482 return false; 5483 } 5484 5485 /* Recursively linearize a binary expression that is the RHS of STMT. 5486 Place the operands of the expression tree in the vector named OPS. */ 5487 5488 static void 5489 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 5490 bool is_associative, bool set_visited) 5491 { 5492 tree binlhs = gimple_assign_rhs1 (stmt); 5493 tree binrhs = gimple_assign_rhs2 (stmt); 5494 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5495 bool binlhsisreassoc = false; 5496 bool binrhsisreassoc = false; 5497 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5498 class loop *loop = loop_containing_stmt (stmt); 5499 5500 if (set_visited) 5501 gimple_set_visited (stmt, true); 5502 5503 if (TREE_CODE (binlhs) == SSA_NAME) 5504 { 5505 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 5506 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 5507 && !stmt_could_throw_p (cfun, binlhsdef)); 5508 } 5509 5510 if (TREE_CODE (binrhs) == SSA_NAME) 5511 { 5512 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 5513 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 5514 && !stmt_could_throw_p (cfun, binrhsdef)); 5515 } 5516 5517 /* If the LHS is not reassociable, but the RHS is, we need to swap 5518 them. If neither is reassociable, there is nothing we can do, so 5519 just put them in the ops vector. If the LHS is reassociable, 5520 linearize it. If both are reassociable, then linearize the RHS 5521 and the LHS. */ 5522 5523 if (!binlhsisreassoc) 5524 { 5525 /* If this is not a associative operation like division, give up. */ 5526 if (!is_associative) 5527 { 5528 add_to_ops_vec (ops, binrhs); 5529 return; 5530 } 5531 5532 if (!binrhsisreassoc) 5533 { 5534 bool swap = false; 5535 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5536 /* If we add ops for the rhs we expect to be able to recurse 5537 to it via the lhs during expression rewrite so swap 5538 operands. */ 5539 swap = true; 5540 else 5541 add_to_ops_vec (ops, binrhs); 5542 5543 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 5544 add_to_ops_vec (ops, binlhs); 5545 5546 if (!swap) 5547 return; 5548 } 5549 5550 if (dump_file && (dump_flags & TDF_DETAILS)) 5551 { 5552 fprintf (dump_file, "swapping operands of "); 5553 print_gimple_stmt (dump_file, stmt, 0); 5554 } 5555 5556 swap_ssa_operands (stmt, 5557 gimple_assign_rhs1_ptr (stmt), 5558 gimple_assign_rhs2_ptr (stmt)); 5559 update_stmt (stmt); 5560 5561 if (dump_file && (dump_flags & TDF_DETAILS)) 5562 { 5563 fprintf (dump_file, " is now "); 5564 print_gimple_stmt (dump_file, stmt, 0); 5565 } 5566 if (!binrhsisreassoc) 5567 return; 5568 5569 /* We want to make it so the lhs is always the reassociative op, 5570 so swap. */ 5571 std::swap (binlhs, binrhs); 5572 } 5573 else if (binrhsisreassoc) 5574 { 5575 linearize_expr (stmt); 5576 binlhs = gimple_assign_rhs1 (stmt); 5577 binrhs = gimple_assign_rhs2 (stmt); 5578 } 5579 5580 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 5581 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 5582 rhscode, loop)); 5583 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 5584 is_associative, set_visited); 5585 5586 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5587 add_to_ops_vec (ops, binrhs); 5588 } 5589 5590 /* Repropagate the negates back into subtracts, since no other pass 5591 currently does it. */ 5592 5593 static void 5594 repropagate_negates (void) 5595 { 5596 unsigned int i = 0; 5597 tree negate; 5598 5599 FOR_EACH_VEC_ELT (plus_negates, i, negate) 5600 { 5601 gimple *user = get_single_immediate_use (negate); 5602 if (!user || !is_gimple_assign (user)) 5603 continue; 5604 5605 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 5606 if (TREE_CODE (negateop) == SSA_NAME 5607 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop)) 5608 continue; 5609 5610 /* The negate operand can be either operand of a PLUS_EXPR 5611 (it can be the LHS if the RHS is a constant for example). 5612 5613 Force the negate operand to the RHS of the PLUS_EXPR, then 5614 transform the PLUS_EXPR into a MINUS_EXPR. */ 5615 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 5616 { 5617 /* If the negated operand appears on the LHS of the 5618 PLUS_EXPR, exchange the operands of the PLUS_EXPR 5619 to force the negated operand to the RHS of the PLUS_EXPR. */ 5620 if (gimple_assign_rhs1 (user) == negate) 5621 { 5622 swap_ssa_operands (user, 5623 gimple_assign_rhs1_ptr (user), 5624 gimple_assign_rhs2_ptr (user)); 5625 } 5626 5627 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 5628 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 5629 if (gimple_assign_rhs2 (user) == negate) 5630 { 5631 tree rhs1 = gimple_assign_rhs1 (user); 5632 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5633 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, 5634 negateop); 5635 update_stmt (user); 5636 } 5637 } 5638 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 5639 { 5640 if (gimple_assign_rhs1 (user) == negate) 5641 { 5642 /* We have 5643 x = -negateop 5644 y = x - b 5645 which we transform into 5646 x = negateop + b 5647 y = -x . 5648 This pushes down the negate which we possibly can merge 5649 into some other operation, hence insert it into the 5650 plus_negates vector. */ 5651 gimple *feed = SSA_NAME_DEF_STMT (negate); 5652 tree b = gimple_assign_rhs2 (user); 5653 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 5654 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 5655 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 5656 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b); 5657 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5658 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 5659 user = gsi_stmt (gsi2); 5660 update_stmt (user); 5661 reassoc_remove_stmt (&gsi); 5662 release_defs (feed); 5663 plus_negates.safe_push (gimple_assign_lhs (user)); 5664 } 5665 else 5666 { 5667 /* Transform "x = -negateop; y = b - x" into "y = b + negateop", 5668 getting rid of one operation. */ 5669 tree rhs1 = gimple_assign_rhs1 (user); 5670 gimple_stmt_iterator gsi = gsi_for_stmt (user); 5671 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop); 5672 update_stmt (gsi_stmt (gsi)); 5673 } 5674 } 5675 } 5676 } 5677 5678 /* Returns true if OP is of a type for which we can do reassociation. 5679 That is for integral or non-saturating fixed-point types, and for 5680 floating point type when associative-math is enabled. */ 5681 5682 static bool 5683 can_reassociate_p (tree op) 5684 { 5685 tree type = TREE_TYPE (op); 5686 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 5687 return false; 5688 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 5689 || NON_SAT_FIXED_POINT_TYPE_P (type) 5690 || (flag_associative_math && FLOAT_TYPE_P (type))) 5691 return true; 5692 return false; 5693 } 5694 5695 /* Break up subtract operations in block BB. 5696 5697 We do this top down because we don't know whether the subtract is 5698 part of a possible chain of reassociation except at the top. 5699 5700 IE given 5701 d = f + g 5702 c = a + e 5703 b = c - d 5704 q = b - r 5705 k = t - q 5706 5707 we want to break up k = t - q, but we won't until we've transformed q 5708 = b - r, which won't be broken up until we transform b = c - d. 5709 5710 En passant, clear the GIMPLE visited flag on every statement 5711 and set UIDs within each basic block. */ 5712 5713 static void 5714 break_up_subtract_bb (basic_block bb) 5715 { 5716 gimple_stmt_iterator gsi; 5717 basic_block son; 5718 unsigned int uid = 1; 5719 5720 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5721 { 5722 gimple *stmt = gsi_stmt (gsi); 5723 gimple_set_visited (stmt, false); 5724 gimple_set_uid (stmt, uid++); 5725 5726 if (!is_gimple_assign (stmt) 5727 || !can_reassociate_p (gimple_assign_lhs (stmt))) 5728 continue; 5729 5730 /* Look for simple gimple subtract operations. */ 5731 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 5732 { 5733 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 5734 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 5735 continue; 5736 5737 /* Check for a subtract used only in an addition. If this 5738 is the case, transform it into add of a negate for better 5739 reassociation. IE transform C = A-B into C = A + -B if C 5740 is only used in an addition. */ 5741 if (should_break_up_subtract (stmt)) 5742 break_up_subtract (stmt, &gsi); 5743 } 5744 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 5745 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 5746 plus_negates.safe_push (gimple_assign_lhs (stmt)); 5747 } 5748 for (son = first_dom_son (CDI_DOMINATORS, bb); 5749 son; 5750 son = next_dom_son (CDI_DOMINATORS, son)) 5751 break_up_subtract_bb (son); 5752 } 5753 5754 /* Used for repeated factor analysis. */ 5755 struct repeat_factor 5756 { 5757 /* An SSA name that occurs in a multiply chain. */ 5758 tree factor; 5759 5760 /* Cached rank of the factor. */ 5761 unsigned rank; 5762 5763 /* Number of occurrences of the factor in the chain. */ 5764 HOST_WIDE_INT count; 5765 5766 /* An SSA name representing the product of this factor and 5767 all factors appearing later in the repeated factor vector. */ 5768 tree repr; 5769 }; 5770 5771 5772 static vec<repeat_factor> repeat_factor_vec; 5773 5774 /* Used for sorting the repeat factor vector. Sort primarily by 5775 ascending occurrence count, secondarily by descending rank. */ 5776 5777 static int 5778 compare_repeat_factors (const void *x1, const void *x2) 5779 { 5780 const repeat_factor *rf1 = (const repeat_factor *) x1; 5781 const repeat_factor *rf2 = (const repeat_factor *) x2; 5782 5783 if (rf1->count != rf2->count) 5784 return rf1->count - rf2->count; 5785 5786 return rf2->rank - rf1->rank; 5787 } 5788 5789 /* Look for repeated operands in OPS in the multiply tree rooted at 5790 STMT. Replace them with an optimal sequence of multiplies and powi 5791 builtin calls, and remove the used operands from OPS. Return an 5792 SSA name representing the value of the replacement sequence. */ 5793 5794 static tree 5795 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 5796 { 5797 unsigned i, j, vec_len; 5798 int ii; 5799 operand_entry *oe; 5800 repeat_factor *rf1, *rf2; 5801 repeat_factor rfnew; 5802 tree result = NULL_TREE; 5803 tree target_ssa, iter_result; 5804 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 5805 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 5806 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5807 gimple *mul_stmt, *pow_stmt; 5808 5809 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 5810 target. */ 5811 if (!powi_fndecl) 5812 return NULL_TREE; 5813 5814 /* Allocate the repeated factor vector. */ 5815 repeat_factor_vec.create (10); 5816 5817 /* Scan the OPS vector for all SSA names in the product and build 5818 up a vector of occurrence counts for each factor. */ 5819 FOR_EACH_VEC_ELT (*ops, i, oe) 5820 { 5821 if (TREE_CODE (oe->op) == SSA_NAME) 5822 { 5823 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5824 { 5825 if (rf1->factor == oe->op) 5826 { 5827 rf1->count += oe->count; 5828 break; 5829 } 5830 } 5831 5832 if (j >= repeat_factor_vec.length ()) 5833 { 5834 rfnew.factor = oe->op; 5835 rfnew.rank = oe->rank; 5836 rfnew.count = oe->count; 5837 rfnew.repr = NULL_TREE; 5838 repeat_factor_vec.safe_push (rfnew); 5839 } 5840 } 5841 } 5842 5843 /* Sort the repeated factor vector by (a) increasing occurrence count, 5844 and (b) decreasing rank. */ 5845 repeat_factor_vec.qsort (compare_repeat_factors); 5846 5847 /* It is generally best to combine as many base factors as possible 5848 into a product before applying __builtin_powi to the result. 5849 However, the sort order chosen for the repeated factor vector 5850 allows us to cache partial results for the product of the base 5851 factors for subsequent use. When we already have a cached partial 5852 result from a previous iteration, it is best to make use of it 5853 before looking for another __builtin_pow opportunity. 5854 5855 As an example, consider x * x * y * y * y * z * z * z * z. 5856 We want to first compose the product x * y * z, raise it to the 5857 second power, then multiply this by y * z, and finally multiply 5858 by z. This can be done in 5 multiplies provided we cache y * z 5859 for use in both expressions: 5860 5861 t1 = y * z 5862 t2 = t1 * x 5863 t3 = t2 * t2 5864 t4 = t1 * t3 5865 result = t4 * z 5866 5867 If we instead ignored the cached y * z and first multiplied by 5868 the __builtin_pow opportunity z * z, we would get the inferior: 5869 5870 t1 = y * z 5871 t2 = t1 * x 5872 t3 = t2 * t2 5873 t4 = z * z 5874 t5 = t3 * t4 5875 result = t5 * y */ 5876 5877 vec_len = repeat_factor_vec.length (); 5878 5879 /* Repeatedly look for opportunities to create a builtin_powi call. */ 5880 while (true) 5881 { 5882 HOST_WIDE_INT power; 5883 5884 /* First look for the largest cached product of factors from 5885 preceding iterations. If found, create a builtin_powi for 5886 it if the minimum occurrence count for its factors is at 5887 least 2, or just use this cached product as our next 5888 multiplicand if the minimum occurrence count is 1. */ 5889 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5890 { 5891 if (rf1->repr && rf1->count > 0) 5892 break; 5893 } 5894 5895 if (j < vec_len) 5896 { 5897 power = rf1->count; 5898 5899 if (power == 1) 5900 { 5901 iter_result = rf1->repr; 5902 5903 if (dump_file && (dump_flags & TDF_DETAILS)) 5904 { 5905 unsigned elt; 5906 repeat_factor *rf; 5907 fputs ("Multiplying by cached product ", dump_file); 5908 for (elt = j; elt < vec_len; elt++) 5909 { 5910 rf = &repeat_factor_vec[elt]; 5911 print_generic_expr (dump_file, rf->factor); 5912 if (elt < vec_len - 1) 5913 fputs (" * ", dump_file); 5914 } 5915 fputs ("\n", dump_file); 5916 } 5917 } 5918 else 5919 { 5920 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 5921 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 5922 build_int_cst (integer_type_node, 5923 power)); 5924 gimple_call_set_lhs (pow_stmt, iter_result); 5925 gimple_set_location (pow_stmt, gimple_location (stmt)); 5926 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 5927 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 5928 5929 if (dump_file && (dump_flags & TDF_DETAILS)) 5930 { 5931 unsigned elt; 5932 repeat_factor *rf; 5933 fputs ("Building __builtin_pow call for cached product (", 5934 dump_file); 5935 for (elt = j; elt < vec_len; elt++) 5936 { 5937 rf = &repeat_factor_vec[elt]; 5938 print_generic_expr (dump_file, rf->factor); 5939 if (elt < vec_len - 1) 5940 fputs (" * ", dump_file); 5941 } 5942 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 5943 power); 5944 } 5945 } 5946 } 5947 else 5948 { 5949 /* Otherwise, find the first factor in the repeated factor 5950 vector whose occurrence count is at least 2. If no such 5951 factor exists, there are no builtin_powi opportunities 5952 remaining. */ 5953 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 5954 { 5955 if (rf1->count >= 2) 5956 break; 5957 } 5958 5959 if (j >= vec_len) 5960 break; 5961 5962 power = rf1->count; 5963 5964 if (dump_file && (dump_flags & TDF_DETAILS)) 5965 { 5966 unsigned elt; 5967 repeat_factor *rf; 5968 fputs ("Building __builtin_pow call for (", dump_file); 5969 for (elt = j; elt < vec_len; elt++) 5970 { 5971 rf = &repeat_factor_vec[elt]; 5972 print_generic_expr (dump_file, rf->factor); 5973 if (elt < vec_len - 1) 5974 fputs (" * ", dump_file); 5975 } 5976 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 5977 } 5978 5979 reassociate_stats.pows_created++; 5980 5981 /* Visit each element of the vector in reverse order (so that 5982 high-occurrence elements are visited first, and within the 5983 same occurrence count, lower-ranked elements are visited 5984 first). Form a linear product of all elements in this order 5985 whose occurrencce count is at least that of element J. 5986 Record the SSA name representing the product of each element 5987 with all subsequent elements in the vector. */ 5988 if (j == vec_len - 1) 5989 rf1->repr = rf1->factor; 5990 else 5991 { 5992 for (ii = vec_len - 2; ii >= (int)j; ii--) 5993 { 5994 tree op1, op2; 5995 5996 rf1 = &repeat_factor_vec[ii]; 5997 rf2 = &repeat_factor_vec[ii + 1]; 5998 5999 /* Init the last factor's representative to be itself. */ 6000 if (!rf2->repr) 6001 rf2->repr = rf2->factor; 6002 6003 op1 = rf1->factor; 6004 op2 = rf2->repr; 6005 6006 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 6007 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 6008 op1, op2); 6009 gimple_set_location (mul_stmt, gimple_location (stmt)); 6010 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6011 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6012 rf1->repr = target_ssa; 6013 6014 /* Don't reprocess the multiply we just introduced. */ 6015 gimple_set_visited (mul_stmt, true); 6016 } 6017 } 6018 6019 /* Form a call to __builtin_powi for the maximum product 6020 just formed, raised to the power obtained earlier. */ 6021 rf1 = &repeat_factor_vec[j]; 6022 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6023 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 6024 build_int_cst (integer_type_node, 6025 power)); 6026 gimple_call_set_lhs (pow_stmt, iter_result); 6027 gimple_set_location (pow_stmt, gimple_location (stmt)); 6028 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 6029 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 6030 } 6031 6032 /* If we previously formed at least one other builtin_powi call, 6033 form the product of this one and those others. */ 6034 if (result) 6035 { 6036 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6037 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 6038 result, iter_result); 6039 gimple_set_location (mul_stmt, gimple_location (stmt)); 6040 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6041 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6042 gimple_set_visited (mul_stmt, true); 6043 result = new_result; 6044 } 6045 else 6046 result = iter_result; 6047 6048 /* Decrement the occurrence count of each element in the product 6049 by the count found above, and remove this many copies of each 6050 factor from OPS. */ 6051 for (i = j; i < vec_len; i++) 6052 { 6053 unsigned k = power; 6054 unsigned n; 6055 6056 rf1 = &repeat_factor_vec[i]; 6057 rf1->count -= power; 6058 6059 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 6060 { 6061 if (oe->op == rf1->factor) 6062 { 6063 if (oe->count <= k) 6064 { 6065 ops->ordered_remove (n); 6066 k -= oe->count; 6067 6068 if (k == 0) 6069 break; 6070 } 6071 else 6072 { 6073 oe->count -= k; 6074 break; 6075 } 6076 } 6077 } 6078 } 6079 } 6080 6081 /* At this point all elements in the repeated factor vector have a 6082 remaining occurrence count of 0 or 1, and those with a count of 1 6083 don't have cached representatives. Re-sort the ops vector and 6084 clean up. */ 6085 ops->qsort (sort_by_operand_rank); 6086 repeat_factor_vec.release (); 6087 6088 /* Return the final product computed herein. Note that there may 6089 still be some elements with single occurrence count left in OPS; 6090 those will be handled by the normal reassociation logic. */ 6091 return result; 6092 } 6093 6094 /* Attempt to optimize 6095 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 6096 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 6097 6098 static void 6099 attempt_builtin_copysign (vec<operand_entry *> *ops) 6100 { 6101 operand_entry *oe; 6102 unsigned int i; 6103 unsigned int length = ops->length (); 6104 tree cst = ops->last ()->op; 6105 6106 if (length == 1 || TREE_CODE (cst) != REAL_CST) 6107 return; 6108 6109 FOR_EACH_VEC_ELT (*ops, i, oe) 6110 { 6111 if (TREE_CODE (oe->op) == SSA_NAME 6112 && has_single_use (oe->op)) 6113 { 6114 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 6115 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 6116 { 6117 tree arg0, arg1; 6118 switch (gimple_call_combined_fn (old_call)) 6119 { 6120 CASE_CFN_COPYSIGN: 6121 CASE_CFN_COPYSIGN_FN: 6122 arg0 = gimple_call_arg (old_call, 0); 6123 arg1 = gimple_call_arg (old_call, 1); 6124 /* The first argument of copysign must be a constant, 6125 otherwise there's nothing to do. */ 6126 if (TREE_CODE (arg0) == REAL_CST) 6127 { 6128 tree type = TREE_TYPE (arg0); 6129 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 6130 /* If we couldn't fold to a single constant, skip it. 6131 That happens e.g. for inexact multiplication when 6132 -frounding-math. */ 6133 if (mul == NULL_TREE) 6134 break; 6135 /* Instead of adjusting OLD_CALL, let's build a new 6136 call to not leak the LHS and prevent keeping bogus 6137 debug statements. DCE will clean up the old call. */ 6138 gcall *new_call; 6139 if (gimple_call_internal_p (old_call)) 6140 new_call = gimple_build_call_internal 6141 (IFN_COPYSIGN, 2, mul, arg1); 6142 else 6143 new_call = gimple_build_call 6144 (gimple_call_fndecl (old_call), 2, mul, arg1); 6145 tree lhs = make_ssa_name (type); 6146 gimple_call_set_lhs (new_call, lhs); 6147 gimple_set_location (new_call, 6148 gimple_location (old_call)); 6149 insert_stmt_after (new_call, old_call); 6150 /* We've used the constant, get rid of it. */ 6151 ops->pop (); 6152 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 6153 /* Handle the CST1 < 0 case by negating the result. */ 6154 if (cst1_neg) 6155 { 6156 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 6157 gimple *negate_stmt 6158 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 6159 insert_stmt_after (negate_stmt, new_call); 6160 oe->op = negrhs; 6161 } 6162 else 6163 oe->op = lhs; 6164 if (dump_file && (dump_flags & TDF_DETAILS)) 6165 { 6166 fprintf (dump_file, "Optimizing copysign: "); 6167 print_generic_expr (dump_file, cst); 6168 fprintf (dump_file, " * COPYSIGN ("); 6169 print_generic_expr (dump_file, arg0); 6170 fprintf (dump_file, ", "); 6171 print_generic_expr (dump_file, arg1); 6172 fprintf (dump_file, ") into %sCOPYSIGN (", 6173 cst1_neg ? "-" : ""); 6174 print_generic_expr (dump_file, mul); 6175 fprintf (dump_file, ", "); 6176 print_generic_expr (dump_file, arg1); 6177 fprintf (dump_file, "\n"); 6178 } 6179 return; 6180 } 6181 break; 6182 default: 6183 break; 6184 } 6185 } 6186 } 6187 } 6188 } 6189 6190 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 6191 6192 static void 6193 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 6194 { 6195 tree rhs1; 6196 6197 if (dump_file && (dump_flags & TDF_DETAILS)) 6198 { 6199 fprintf (dump_file, "Transforming "); 6200 print_gimple_stmt (dump_file, stmt, 0); 6201 } 6202 6203 rhs1 = gimple_assign_rhs1 (stmt); 6204 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 6205 update_stmt (stmt); 6206 remove_visited_stmt_chain (rhs1); 6207 6208 if (dump_file && (dump_flags & TDF_DETAILS)) 6209 { 6210 fprintf (dump_file, " into "); 6211 print_gimple_stmt (dump_file, stmt, 0); 6212 } 6213 } 6214 6215 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 6216 6217 static void 6218 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 6219 tree rhs1, tree rhs2) 6220 { 6221 if (dump_file && (dump_flags & TDF_DETAILS)) 6222 { 6223 fprintf (dump_file, "Transforming "); 6224 print_gimple_stmt (dump_file, stmt, 0); 6225 } 6226 6227 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 6228 update_stmt (gsi_stmt (*gsi)); 6229 remove_visited_stmt_chain (rhs1); 6230 6231 if (dump_file && (dump_flags & TDF_DETAILS)) 6232 { 6233 fprintf (dump_file, " into "); 6234 print_gimple_stmt (dump_file, stmt, 0); 6235 } 6236 } 6237 6238 /* Reassociate expressions in basic block BB and its post-dominator as 6239 children. 6240 6241 Bubble up return status from maybe_optimize_range_tests. */ 6242 6243 static bool 6244 reassociate_bb (basic_block bb) 6245 { 6246 gimple_stmt_iterator gsi; 6247 basic_block son; 6248 gimple *stmt = last_stmt (bb); 6249 bool cfg_cleanup_needed = false; 6250 6251 if (stmt && !gimple_visited_p (stmt)) 6252 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 6253 6254 bool do_prev = false; 6255 for (gsi = gsi_last_bb (bb); 6256 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0) 6257 { 6258 do_prev = true; 6259 stmt = gsi_stmt (gsi); 6260 6261 if (is_gimple_assign (stmt) 6262 && !stmt_could_throw_p (cfun, stmt)) 6263 { 6264 tree lhs, rhs1, rhs2; 6265 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 6266 6267 /* If this was part of an already processed statement, 6268 we don't need to touch it again. */ 6269 if (gimple_visited_p (stmt)) 6270 { 6271 /* This statement might have become dead because of previous 6272 reassociations. */ 6273 if (has_zero_uses (gimple_get_lhs (stmt))) 6274 { 6275 reassoc_remove_stmt (&gsi); 6276 release_defs (stmt); 6277 /* We might end up removing the last stmt above which 6278 places the iterator to the end of the sequence. 6279 Reset it to the last stmt in this case and make sure 6280 we don't do gsi_prev in that case. */ 6281 if (gsi_end_p (gsi)) 6282 { 6283 gsi = gsi_last_bb (bb); 6284 do_prev = false; 6285 } 6286 } 6287 continue; 6288 } 6289 6290 /* If this is not a gimple binary expression, there is 6291 nothing for us to do with it. */ 6292 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 6293 continue; 6294 6295 lhs = gimple_assign_lhs (stmt); 6296 rhs1 = gimple_assign_rhs1 (stmt); 6297 rhs2 = gimple_assign_rhs2 (stmt); 6298 6299 /* For non-bit or min/max operations we can't associate 6300 all types. Verify that here. */ 6301 if (rhs_code != BIT_IOR_EXPR 6302 && rhs_code != BIT_AND_EXPR 6303 && rhs_code != BIT_XOR_EXPR 6304 && rhs_code != MIN_EXPR 6305 && rhs_code != MAX_EXPR 6306 && (!can_reassociate_p (lhs) 6307 || !can_reassociate_p (rhs1) 6308 || !can_reassociate_p (rhs2))) 6309 continue; 6310 6311 if (associative_tree_code (rhs_code)) 6312 { 6313 auto_vec<operand_entry *> ops; 6314 tree powi_result = NULL_TREE; 6315 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 6316 6317 /* There may be no immediate uses left by the time we 6318 get here because we may have eliminated them all. */ 6319 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 6320 continue; 6321 6322 gimple_set_visited (stmt, true); 6323 linearize_expr_tree (&ops, stmt, true, true); 6324 ops.qsort (sort_by_operand_rank); 6325 int orig_len = ops.length (); 6326 optimize_ops_list (rhs_code, &ops); 6327 if (undistribute_ops_list (rhs_code, &ops, 6328 loop_containing_stmt (stmt))) 6329 { 6330 ops.qsort (sort_by_operand_rank); 6331 optimize_ops_list (rhs_code, &ops); 6332 } 6333 if (undistribute_bitref_for_vector (rhs_code, &ops, 6334 loop_containing_stmt (stmt))) 6335 { 6336 ops.qsort (sort_by_operand_rank); 6337 optimize_ops_list (rhs_code, &ops); 6338 } 6339 if (rhs_code == PLUS_EXPR 6340 && transform_add_to_multiply (&ops)) 6341 ops.qsort (sort_by_operand_rank); 6342 6343 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 6344 { 6345 if (is_vector) 6346 optimize_vec_cond_expr (rhs_code, &ops); 6347 else 6348 optimize_range_tests (rhs_code, &ops, NULL); 6349 } 6350 6351 if (rhs_code == MULT_EXPR && !is_vector) 6352 { 6353 attempt_builtin_copysign (&ops); 6354 6355 if (reassoc_insert_powi_p 6356 && flag_unsafe_math_optimizations) 6357 powi_result = attempt_builtin_powi (stmt, &ops); 6358 } 6359 6360 operand_entry *last; 6361 bool negate_result = false; 6362 if (ops.length () > 1 6363 && rhs_code == MULT_EXPR) 6364 { 6365 last = ops.last (); 6366 if ((integer_minus_onep (last->op) 6367 || real_minus_onep (last->op)) 6368 && !HONOR_SNANS (TREE_TYPE (lhs)) 6369 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 6370 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 6371 { 6372 ops.pop (); 6373 negate_result = true; 6374 } 6375 } 6376 6377 tree new_lhs = lhs; 6378 /* If the operand vector is now empty, all operands were 6379 consumed by the __builtin_powi optimization. */ 6380 if (ops.length () == 0) 6381 transform_stmt_to_copy (&gsi, stmt, powi_result); 6382 else if (ops.length () == 1) 6383 { 6384 tree last_op = ops.last ()->op; 6385 6386 /* If the stmt that defines operand has to be inserted, insert it 6387 before the use. */ 6388 if (ops.last ()->stmt_to_insert) 6389 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 6390 if (powi_result) 6391 transform_stmt_to_multiply (&gsi, stmt, last_op, 6392 powi_result); 6393 else 6394 transform_stmt_to_copy (&gsi, stmt, last_op); 6395 } 6396 else 6397 { 6398 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 6399 int ops_num = ops.length (); 6400 int width; 6401 6402 /* For binary bit operations, if there are at least 3 6403 operands and the last operand in OPS is a constant, 6404 move it to the front. This helps ensure that we generate 6405 (X & Y) & C rather than (X & C) & Y. The former will 6406 often match a canonical bit test when we get to RTL. */ 6407 if (ops.length () > 2 6408 && (rhs_code == BIT_AND_EXPR 6409 || rhs_code == BIT_IOR_EXPR 6410 || rhs_code == BIT_XOR_EXPR) 6411 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 6412 std::swap (*ops[0], *ops[ops_num - 1]); 6413 6414 /* Only rewrite the expression tree to parallel in the 6415 last reassoc pass to avoid useless work back-and-forth 6416 with initial linearization. */ 6417 if (!reassoc_insert_powi_p 6418 && ops.length () > 3 6419 && (width = get_reassociation_width (ops_num, rhs_code, 6420 mode)) > 1) 6421 { 6422 if (dump_file && (dump_flags & TDF_DETAILS)) 6423 fprintf (dump_file, 6424 "Width = %d was chosen for reassociation\n", 6425 width); 6426 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 6427 width, ops); 6428 } 6429 else 6430 { 6431 /* When there are three operands left, we want 6432 to make sure the ones that get the double 6433 binary op are chosen wisely. */ 6434 int len = ops.length (); 6435 if (len >= 3) 6436 swap_ops_for_binary_stmt (ops, len - 3, stmt); 6437 6438 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops, 6439 powi_result != NULL 6440 || negate_result, 6441 len != orig_len); 6442 } 6443 6444 /* If we combined some repeated factors into a 6445 __builtin_powi call, multiply that result by the 6446 reassociated operands. */ 6447 if (powi_result) 6448 { 6449 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 6450 tree type = TREE_TYPE (lhs); 6451 tree target_ssa = make_temp_ssa_name (type, NULL, 6452 "reassocpow"); 6453 gimple_set_lhs (lhs_stmt, target_ssa); 6454 update_stmt (lhs_stmt); 6455 if (lhs != new_lhs) 6456 { 6457 target_ssa = new_lhs; 6458 new_lhs = lhs; 6459 } 6460 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 6461 powi_result, target_ssa); 6462 gimple_set_location (mul_stmt, gimple_location (stmt)); 6463 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6464 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 6465 } 6466 } 6467 6468 if (negate_result) 6469 { 6470 stmt = SSA_NAME_DEF_STMT (lhs); 6471 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 6472 gimple_set_lhs (stmt, tmp); 6473 if (lhs != new_lhs) 6474 tmp = new_lhs; 6475 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 6476 tmp); 6477 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 6478 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 6479 update_stmt (stmt); 6480 } 6481 } 6482 } 6483 } 6484 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 6485 son; 6486 son = next_dom_son (CDI_POST_DOMINATORS, son)) 6487 cfg_cleanup_needed |= reassociate_bb (son); 6488 6489 return cfg_cleanup_needed; 6490 } 6491 6492 /* Add jumps around shifts for range tests turned into bit tests. 6493 For each SSA_NAME VAR we have code like: 6494 VAR = ...; // final stmt of range comparison 6495 // bit test here...; 6496 OTHERVAR = ...; // final stmt of the bit test sequence 6497 RES = VAR | OTHERVAR; 6498 Turn the above into: 6499 VAR = ...; 6500 if (VAR != 0) 6501 goto <l3>; 6502 else 6503 goto <l2>; 6504 <l2>: 6505 // bit test here...; 6506 OTHERVAR = ...; 6507 <l3>: 6508 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 6509 6510 static void 6511 branch_fixup (void) 6512 { 6513 tree var; 6514 unsigned int i; 6515 6516 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 6517 { 6518 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 6519 gimple *use_stmt; 6520 use_operand_p use; 6521 bool ok = single_imm_use (var, &use, &use_stmt); 6522 gcc_assert (ok 6523 && is_gimple_assign (use_stmt) 6524 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 6525 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 6526 6527 basic_block cond_bb = gimple_bb (def_stmt); 6528 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 6529 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 6530 6531 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 6532 gimple *g = gimple_build_cond (NE_EXPR, var, 6533 build_zero_cst (TREE_TYPE (var)), 6534 NULL_TREE, NULL_TREE); 6535 location_t loc = gimple_location (use_stmt); 6536 gimple_set_location (g, loc); 6537 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 6538 6539 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 6540 etrue->probability = profile_probability::even (); 6541 edge efalse = find_edge (cond_bb, then_bb); 6542 efalse->flags = EDGE_FALSE_VALUE; 6543 efalse->probability -= etrue->probability; 6544 then_bb->count -= etrue->count (); 6545 6546 tree othervar = NULL_TREE; 6547 if (gimple_assign_rhs1 (use_stmt) == var) 6548 othervar = gimple_assign_rhs2 (use_stmt); 6549 else if (gimple_assign_rhs2 (use_stmt) == var) 6550 othervar = gimple_assign_rhs1 (use_stmt); 6551 else 6552 gcc_unreachable (); 6553 tree lhs = gimple_assign_lhs (use_stmt); 6554 gphi *phi = create_phi_node (lhs, merge_bb); 6555 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 6556 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 6557 gsi = gsi_for_stmt (use_stmt); 6558 gsi_remove (&gsi, true); 6559 6560 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 6561 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 6562 } 6563 reassoc_branch_fixups.release (); 6564 } 6565 6566 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 6567 void debug_ops_vector (vec<operand_entry *> ops); 6568 6569 /* Dump the operand entry vector OPS to FILE. */ 6570 6571 void 6572 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 6573 { 6574 operand_entry *oe; 6575 unsigned int i; 6576 6577 FOR_EACH_VEC_ELT (ops, i, oe) 6578 { 6579 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 6580 print_generic_expr (file, oe->op); 6581 fprintf (file, "\n"); 6582 } 6583 } 6584 6585 /* Dump the operand entry vector OPS to STDERR. */ 6586 6587 DEBUG_FUNCTION void 6588 debug_ops_vector (vec<operand_entry *> ops) 6589 { 6590 dump_ops_vector (stderr, ops); 6591 } 6592 6593 /* Bubble up return status from reassociate_bb. */ 6594 6595 static bool 6596 do_reassoc (void) 6597 { 6598 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 6599 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 6600 } 6601 6602 /* Initialize the reassociation pass. */ 6603 6604 static void 6605 init_reassoc (void) 6606 { 6607 int i; 6608 int64_t rank = 2; 6609 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 6610 6611 /* Find the loops, so that we can prevent moving calculations in 6612 them. */ 6613 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 6614 6615 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 6616 6617 next_operand_entry_id = 0; 6618 6619 /* Reverse RPO (Reverse Post Order) will give us something where 6620 deeper loops come later. */ 6621 pre_and_rev_post_order_compute (NULL, bbs, false); 6622 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun)); 6623 operand_rank = new hash_map<tree, int64_t>; 6624 6625 /* Give each default definition a distinct rank. This includes 6626 parameters and the static chain. Walk backwards over all 6627 SSA names so that we get proper rank ordering according 6628 to tree_swap_operands_p. */ 6629 for (i = num_ssa_names - 1; i > 0; --i) 6630 { 6631 tree name = ssa_name (i); 6632 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 6633 insert_operand_rank (name, ++rank); 6634 } 6635 6636 /* Set up rank for each BB */ 6637 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 6638 bb_rank[bbs[i]] = ++rank << 16; 6639 6640 free (bbs); 6641 calculate_dominance_info (CDI_POST_DOMINATORS); 6642 plus_negates = vNULL; 6643 } 6644 6645 /* Cleanup after the reassociation pass, and print stats if 6646 requested. */ 6647 6648 static void 6649 fini_reassoc (void) 6650 { 6651 statistics_counter_event (cfun, "Linearized", 6652 reassociate_stats.linearized); 6653 statistics_counter_event (cfun, "Constants eliminated", 6654 reassociate_stats.constants_eliminated); 6655 statistics_counter_event (cfun, "Ops eliminated", 6656 reassociate_stats.ops_eliminated); 6657 statistics_counter_event (cfun, "Statements rewritten", 6658 reassociate_stats.rewritten); 6659 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 6660 reassociate_stats.pows_encountered); 6661 statistics_counter_event (cfun, "Built-in powi calls created", 6662 reassociate_stats.pows_created); 6663 6664 delete operand_rank; 6665 operand_entry_pool.release (); 6666 free (bb_rank); 6667 plus_negates.release (); 6668 free_dominance_info (CDI_POST_DOMINATORS); 6669 loop_optimizer_finalize (); 6670 } 6671 6672 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 6673 insertion of __builtin_powi calls. 6674 6675 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 6676 optimization of a gimple conditional. Otherwise returns zero. */ 6677 6678 static unsigned int 6679 execute_reassoc (bool insert_powi_p) 6680 { 6681 reassoc_insert_powi_p = insert_powi_p; 6682 6683 init_reassoc (); 6684 6685 bool cfg_cleanup_needed; 6686 cfg_cleanup_needed = do_reassoc (); 6687 repropagate_negates (); 6688 branch_fixup (); 6689 6690 fini_reassoc (); 6691 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 6692 } 6693 6694 namespace { 6695 6696 const pass_data pass_data_reassoc = 6697 { 6698 GIMPLE_PASS, /* type */ 6699 "reassoc", /* name */ 6700 OPTGROUP_NONE, /* optinfo_flags */ 6701 TV_TREE_REASSOC, /* tv_id */ 6702 ( PROP_cfg | PROP_ssa ), /* properties_required */ 6703 0, /* properties_provided */ 6704 0, /* properties_destroyed */ 6705 0, /* todo_flags_start */ 6706 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 6707 }; 6708 6709 class pass_reassoc : public gimple_opt_pass 6710 { 6711 public: 6712 pass_reassoc (gcc::context *ctxt) 6713 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 6714 {} 6715 6716 /* opt_pass methods: */ 6717 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 6718 void set_pass_param (unsigned int n, bool param) 6719 { 6720 gcc_assert (n == 0); 6721 insert_powi_p = param; 6722 } 6723 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 6724 virtual unsigned int execute (function *) 6725 { return execute_reassoc (insert_powi_p); } 6726 6727 private: 6728 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 6729 point 3a in the pass header comment. */ 6730 bool insert_powi_p; 6731 }; // class pass_reassoc 6732 6733 } // anon namespace 6734 6735 gimple_opt_pass * 6736 make_pass_reassoc (gcc::context *ctxt) 6737 { 6738 return new pass_reassoc (ctxt); 6739 } 6740