1 /* Reassociation for trees. 2 Copyright (C) 2005-2022 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "builtins.h" 52 #include "gimplify.h" 53 #include "case-cfn-macros.h" 54 #include "tree-ssa-reassoc.h" 55 #include "tree-ssa-math-opts.h" 56 #include "gimple-range.h" 57 58 /* This is a simple global reassociation pass. It is, in part, based 59 on the LLVM pass of the same name (They do some things more/less 60 than we do, in different orders, etc). 61 62 It consists of five steps: 63 64 1. Breaking up subtract operations into addition + negate, where 65 it would promote the reassociation of adds. 66 67 2. Left linearization of the expression trees, so that (A+B)+(C+D) 68 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 69 During linearization, we place the operands of the binary 70 expressions into a vector of operand_entry_* 71 72 3. Optimization of the operand lists, eliminating things like a + 73 -a, a & a, etc. 74 75 3a. Combine repeated factors with the same occurrence counts 76 into a __builtin_powi call that will later be optimized into 77 an optimal number of multiplies. 78 79 4. Rewrite the expression trees we linearized and optimized so 80 they are in proper rank order. 81 82 5. Repropagate negates, as nothing else will clean it up ATM. 83 84 A bit of theory on #4, since nobody seems to write anything down 85 about why it makes sense to do it the way they do it: 86 87 We could do this much nicer theoretically, but don't (for reasons 88 explained after how to do it theoretically nice :P). 89 90 In order to promote the most redundancy elimination, you want 91 binary expressions whose operands are the same rank (or 92 preferably, the same value) exposed to the redundancy eliminator, 93 for possible elimination. 94 95 So the way to do this if we really cared, is to build the new op 96 tree from the leaves to the roots, merging as you go, and putting the 97 new op on the end of the worklist, until you are left with one 98 thing on the worklist. 99 100 IE if you have to rewrite the following set of operands (listed with 101 rank in parentheses), with opcode PLUS_EXPR: 102 103 a (1), b (1), c (1), d (2), e (2) 104 105 106 We start with our merge worklist empty, and the ops list with all of 107 those on it. 108 109 You want to first merge all leaves of the same rank, as much as 110 possible. 111 112 So first build a binary op of 113 114 mergetmp = a + b, and put "mergetmp" on the merge worklist. 115 116 Because there is no three operand form of PLUS_EXPR, c is not going to 117 be exposed to redundancy elimination as a rank 1 operand. 118 119 So you might as well throw it on the merge worklist (you could also 120 consider it to now be a rank two operand, and merge it with d and e, 121 but in this case, you then have evicted e from a binary op. So at 122 least in this situation, you can't win.) 123 124 Then build a binary op of d + e 125 mergetmp2 = d + e 126 127 and put mergetmp2 on the merge worklist. 128 129 so merge worklist = {mergetmp, c, mergetmp2} 130 131 Continue building binary ops of these operations until you have only 132 one operation left on the worklist. 133 134 So we have 135 136 build binary op 137 mergetmp3 = mergetmp + c 138 139 worklist = {mergetmp2, mergetmp3} 140 141 mergetmp4 = mergetmp2 + mergetmp3 142 143 worklist = {mergetmp4} 144 145 because we have one operation left, we can now just set the original 146 statement equal to the result of that operation. 147 148 This will at least expose a + b and d + e to redundancy elimination 149 as binary operations. 150 151 For extra points, you can reuse the old statements to build the 152 mergetmps, since you shouldn't run out. 153 154 So why don't we do this? 155 156 Because it's expensive, and rarely will help. Most trees we are 157 reassociating have 3 or less ops. If they have 2 ops, they already 158 will be written into a nice single binary op. If you have 3 ops, a 159 single simple check suffices to tell you whether the first two are of the 160 same rank. If so, you know to order it 161 162 mergetmp = op1 + op2 163 newstmt = mergetmp + op3 164 165 instead of 166 mergetmp = op2 + op3 167 newstmt = mergetmp + op1 168 169 If all three are of the same rank, you can't expose them all in a 170 single binary operator anyway, so the above is *still* the best you 171 can do. 172 173 Thus, this is what we do. When we have three ops left, we check to see 174 what order to put them in, and call it a day. As a nod to vector sum 175 reduction, we check if any of the ops are really a phi node that is a 176 destructive update for the associating op, and keep the destructive 177 update together for vector sum reduction recognition. */ 178 179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 180 point 3a in the pass header comment. */ 181 static bool reassoc_insert_powi_p; 182 183 /* Enable biasing ranks of loop accumulators. We don't want this before 184 vectorization, since it interferes with reduction chains. */ 185 static bool reassoc_bias_loop_carried_phi_ranks_p; 186 187 /* Statistics */ 188 static struct 189 { 190 int linearized; 191 int constants_eliminated; 192 int ops_eliminated; 193 int rewritten; 194 int pows_encountered; 195 int pows_created; 196 } reassociate_stats; 197 198 199 static object_allocator<operand_entry> operand_entry_pool 200 ("operand entry pool"); 201 202 /* This is used to assign a unique ID to each struct operand_entry 203 so that qsort results are identical on different hosts. */ 204 static unsigned int next_operand_entry_id; 205 206 /* Starting rank number for a given basic block, so that we can rank 207 operations using unmovable instructions in that BB based on the bb 208 depth. */ 209 static int64_t *bb_rank; 210 211 /* Operand->rank hashtable. */ 212 static hash_map<tree, int64_t> *operand_rank; 213 214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be 215 biased. */ 216 static auto_bitmap biased_names; 217 218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 219 all basic blocks the CFG should be adjusted - basic blocks 220 split right after that SSA_NAME's definition statement and before 221 the only use, which must be a bit ior. */ 222 static vec<tree> reassoc_branch_fixups; 223 224 /* Forward decls. */ 225 static int64_t get_rank (tree); 226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 227 228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 229 possibly added by gsi_remove. */ 230 231 static bool 232 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 233 { 234 gimple *stmt = gsi_stmt (*gsi); 235 236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) 237 return gsi_remove (gsi, true); 238 239 gimple_stmt_iterator prev = *gsi; 240 gsi_prev (&prev); 241 unsigned uid = gimple_uid (stmt); 242 basic_block bb = gimple_bb (stmt); 243 bool ret = gsi_remove (gsi, true); 244 if (!gsi_end_p (prev)) 245 gsi_next (&prev); 246 else 247 prev = gsi_start_bb (bb); 248 gimple *end_stmt = gsi_stmt (*gsi); 249 while ((stmt = gsi_stmt (prev)) != end_stmt) 250 { 251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 252 gimple_set_uid (stmt, uid); 253 gsi_next (&prev); 254 } 255 return ret; 256 } 257 258 /* Bias amount for loop-carried phis. We want this to be larger than 259 the depth of any reassociation tree we can see, but not larger than 260 the rank difference between two blocks. */ 261 #define PHI_LOOP_BIAS (1 << 15) 262 263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's 264 operands to the STMT's left-hand side. The goal is to preserve bias in code 265 like this: 266 267 x_1 = phi(x_0, x_2) 268 a = x_1 | 1 269 b = a ^ 2 270 .MEM = b 271 c = b + d 272 x_2 = c + e 273 274 That is, we need to preserve bias along single-use chains originating from 275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be 276 uses, because only they participate in rank propagation. */ 277 static bool 278 propagate_bias_p (gimple *stmt) 279 { 280 use_operand_p use; 281 imm_use_iterator use_iter; 282 gimple *single_use_stmt = NULL; 283 284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference) 285 return false; 286 287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt)) 288 { 289 gimple *current_use_stmt = USE_STMT (use); 290 291 if (is_gimple_assign (current_use_stmt) 292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME) 293 { 294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt) 295 return false; 296 single_use_stmt = current_use_stmt; 297 } 298 } 299 300 if (single_use_stmt == NULL) 301 return false; 302 303 if (gimple_bb (stmt)->loop_father 304 != gimple_bb (single_use_stmt)->loop_father) 305 return false; 306 307 return true; 308 } 309 310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 311 an innermost loop, and the phi has only a single use which is inside 312 the loop, then the rank is the block rank of the loop latch plus an 313 extra bias for the loop-carried dependence. This causes expressions 314 calculated into an accumulator variable to be independent for each 315 iteration of the loop. If STMT is some other phi, the rank is the 316 block rank of its containing block. */ 317 static int64_t 318 phi_rank (gimple *stmt) 319 { 320 basic_block bb = gimple_bb (stmt); 321 class loop *father = bb->loop_father; 322 tree res; 323 unsigned i; 324 use_operand_p use; 325 gimple *use_stmt; 326 327 if (!reassoc_bias_loop_carried_phi_ranks_p) 328 return bb_rank[bb->index]; 329 330 /* We only care about real loops (those with a latch). */ 331 if (!father->latch) 332 return bb_rank[bb->index]; 333 334 /* Interesting phis must be in headers of innermost loops. */ 335 if (bb != father->header 336 || father->inner) 337 return bb_rank[bb->index]; 338 339 /* Ignore virtual SSA_NAMEs. */ 340 res = gimple_phi_result (stmt); 341 if (virtual_operand_p (res)) 342 return bb_rank[bb->index]; 343 344 /* The phi definition must have a single use, and that use must be 345 within the loop. Otherwise this isn't an accumulator pattern. */ 346 if (!single_imm_use (res, &use, &use_stmt) 347 || gimple_bb (use_stmt)->loop_father != father) 348 return bb_rank[bb->index]; 349 350 /* Look for phi arguments from within the loop. If found, bias this phi. */ 351 for (i = 0; i < gimple_phi_num_args (stmt); i++) 352 { 353 tree arg = gimple_phi_arg_def (stmt, i); 354 if (TREE_CODE (arg) == SSA_NAME 355 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 356 { 357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 358 if (gimple_bb (def_stmt)->loop_father == father) 359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 360 } 361 } 362 363 /* Must be an uninteresting phi. */ 364 return bb_rank[bb->index]; 365 } 366 367 /* Return the maximum of RANK and the rank that should be propagated 368 from expression OP. For most operands, this is just the rank of OP. 369 For loop-carried phis, the value is zero to avoid undoing the bias 370 in favor of the phi. */ 371 static int64_t 372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p) 373 { 374 int64_t op_rank; 375 376 op_rank = get_rank (op); 377 378 /* Check whether op is biased after the get_rank () call, since it might have 379 updated biased_names. */ 380 if (TREE_CODE (op) == SSA_NAME 381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op))) 382 { 383 if (maybe_biased_p == NULL) 384 return rank; 385 *maybe_biased_p = true; 386 } 387 388 return MAX (rank, op_rank); 389 } 390 391 /* Look up the operand rank structure for expression E. */ 392 393 static inline int64_t 394 find_operand_rank (tree e) 395 { 396 int64_t *slot = operand_rank->get (e); 397 return slot ? *slot : -1; 398 } 399 400 /* Insert {E,RANK} into the operand rank hashtable. */ 401 402 static inline void 403 insert_operand_rank (tree e, int64_t rank) 404 { 405 gcc_assert (rank > 0); 406 gcc_assert (!operand_rank->put (e, rank)); 407 } 408 409 /* Given an expression E, return the rank of the expression. */ 410 411 static int64_t 412 get_rank (tree e) 413 { 414 /* SSA_NAME's have the rank of the expression they are the result 415 of. 416 For globals and uninitialized values, the rank is 0. 417 For function arguments, use the pre-setup rank. 418 For PHI nodes, stores, asm statements, etc, we use the rank of 419 the BB. 420 For simple operations, the rank is the maximum rank of any of 421 its operands, or the bb_rank, whichever is less. 422 I make no claims that this is optimal, however, it gives good 423 results. */ 424 425 /* We make an exception to the normal ranking system to break 426 dependences of accumulator variables in loops. Suppose we 427 have a simple one-block loop containing: 428 429 x_1 = phi(x_0, x_2) 430 b = a + x_1 431 c = b + d 432 x_2 = c + e 433 434 As shown, each iteration of the calculation into x is fully 435 dependent upon the iteration before it. We would prefer to 436 see this in the form: 437 438 x_1 = phi(x_0, x_2) 439 b = a + d 440 c = b + e 441 x_2 = c + x_1 442 443 If the loop is unrolled, the calculations of b and c from 444 different iterations can be interleaved. 445 446 To obtain this result during reassociation, we bias the rank 447 of the phi definition x_1 upward, when it is recognized as an 448 accumulator pattern. The artificial rank causes it to be 449 added last, providing the desired independence. */ 450 451 if (TREE_CODE (e) == SSA_NAME) 452 { 453 ssa_op_iter iter; 454 gimple *stmt; 455 int64_t rank; 456 tree op; 457 458 /* If we already have a rank for this expression, use that. */ 459 rank = find_operand_rank (e); 460 if (rank != -1) 461 return rank; 462 463 stmt = SSA_NAME_DEF_STMT (e); 464 if (gimple_code (stmt) == GIMPLE_PHI) 465 { 466 rank = phi_rank (stmt); 467 if (rank != bb_rank[gimple_bb (stmt)->index]) 468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); 469 } 470 471 else if (!is_gimple_assign (stmt)) 472 rank = bb_rank[gimple_bb (stmt)->index]; 473 474 else 475 { 476 bool biased_p = false; 477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL; 478 479 /* Otherwise, find the maximum rank for the operands. As an 480 exception, remove the bias from loop-carried phis when propagating 481 the rank so that dependent operations are not also biased. */ 482 /* Simply walk over all SSA uses - this takes advatage of the 483 fact that non-SSA operands are is_gimple_min_invariant and 484 thus have rank 0. */ 485 rank = 0; 486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 487 rank = propagate_rank (rank, op, maybe_biased_p); 488 489 rank += 1; 490 if (biased_p) 491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); 492 } 493 494 if (dump_file && (dump_flags & TDF_DETAILS)) 495 { 496 fprintf (dump_file, "Rank for "); 497 print_generic_expr (dump_file, e); 498 fprintf (dump_file, " is %" PRId64 "\n", rank); 499 } 500 501 /* Note the rank in the hashtable so we don't recompute it. */ 502 insert_operand_rank (e, rank); 503 return rank; 504 } 505 506 /* Constants, globals, etc., are rank 0 */ 507 return 0; 508 } 509 510 511 /* We want integer ones to end up last no matter what, since they are 512 the ones we can do the most with. */ 513 #define INTEGER_CONST_TYPE 1 << 4 514 #define FLOAT_ONE_CONST_TYPE 1 << 3 515 #define FLOAT_CONST_TYPE 1 << 2 516 #define OTHER_CONST_TYPE 1 << 1 517 518 /* Classify an invariant tree into integer, float, or other, so that 519 we can sort them to be near other constants of the same type. */ 520 static inline int 521 constant_type (tree t) 522 { 523 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 524 return INTEGER_CONST_TYPE; 525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 526 { 527 /* Sort -1.0 and 1.0 constants last, while in some cases 528 const_binop can't optimize some inexact operations, multiplication 529 by -1.0 or 1.0 can be always merged with others. */ 530 if (real_onep (t) || real_minus_onep (t)) 531 return FLOAT_ONE_CONST_TYPE; 532 return FLOAT_CONST_TYPE; 533 } 534 else 535 return OTHER_CONST_TYPE; 536 } 537 538 /* qsort comparison function to sort operand entries PA and PB by rank 539 so that the sorted array is ordered by rank in decreasing order. */ 540 static int 541 sort_by_operand_rank (const void *pa, const void *pb) 542 { 543 const operand_entry *oea = *(const operand_entry *const *)pa; 544 const operand_entry *oeb = *(const operand_entry *const *)pb; 545 546 if (oeb->rank != oea->rank) 547 return oeb->rank > oea->rank ? 1 : -1; 548 549 /* It's nicer for optimize_expression if constants that are likely 550 to fold when added/multiplied/whatever are put next to each 551 other. Since all constants have rank 0, order them by type. */ 552 if (oea->rank == 0) 553 { 554 if (constant_type (oeb->op) != constant_type (oea->op)) 555 return constant_type (oea->op) - constant_type (oeb->op); 556 else 557 /* To make sorting result stable, we use unique IDs to determine 558 order. */ 559 return oeb->id > oea->id ? 1 : -1; 560 } 561 562 if (TREE_CODE (oea->op) != SSA_NAME) 563 { 564 if (TREE_CODE (oeb->op) != SSA_NAME) 565 return oeb->id > oea->id ? 1 : -1; 566 else 567 return 1; 568 } 569 else if (TREE_CODE (oeb->op) != SSA_NAME) 570 return -1; 571 572 /* Lastly, make sure the versions that are the same go next to each 573 other. */ 574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 575 { 576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 577 versions of removed SSA_NAMEs, so if possible, prefer to sort 578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 579 See PR60418. */ 580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 582 basic_block bba = gimple_bb (stmta); 583 basic_block bbb = gimple_bb (stmtb); 584 if (bbb != bba) 585 { 586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert 587 but the other might not. */ 588 if (!bba) 589 return 1; 590 if (!bbb) 591 return -1; 592 /* If neither is, compare bb_rank. */ 593 if (bb_rank[bbb->index] != bb_rank[bba->index]) 594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); 595 } 596 597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 599 if (da != db) 600 return da ? 1 : -1; 601 602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 603 } 604 605 return oeb->id > oea->id ? 1 : -1; 606 } 607 608 /* Add an operand entry to *OPS for the tree operand OP. */ 609 610 static void 611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 612 { 613 operand_entry *oe = operand_entry_pool.allocate (); 614 615 oe->op = op; 616 oe->rank = get_rank (op); 617 oe->id = next_operand_entry_id++; 618 oe->count = 1; 619 oe->stmt_to_insert = stmt_to_insert; 620 ops->safe_push (oe); 621 } 622 623 /* Add an operand entry to *OPS for the tree operand OP with repeat 624 count REPEAT. */ 625 626 static void 627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 628 HOST_WIDE_INT repeat) 629 { 630 operand_entry *oe = operand_entry_pool.allocate (); 631 632 oe->op = op; 633 oe->rank = get_rank (op); 634 oe->id = next_operand_entry_id++; 635 oe->count = repeat; 636 oe->stmt_to_insert = NULL; 637 ops->safe_push (oe); 638 639 reassociate_stats.pows_encountered++; 640 } 641 642 /* Returns true if we can associate the SSA def OP. */ 643 644 static bool 645 can_reassociate_op_p (tree op) 646 { 647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 648 return false; 649 /* Make sure asm goto outputs do not participate in reassociation since 650 we have no way to find an insertion place after asm goto. */ 651 if (TREE_CODE (op) == SSA_NAME 652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM 653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0) 654 return false; 655 return true; 656 } 657 658 /* Returns true if we can reassociate operations of TYPE. 659 That is for integral or non-saturating fixed-point types, and for 660 floating point type when associative-math is enabled. */ 661 662 static bool 663 can_reassociate_type_p (tree type) 664 { 665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 666 || NON_SAT_FIXED_POINT_TYPE_P (type) 667 || (flag_associative_math && FLOAT_TYPE_P (type))) 668 return true; 669 return false; 670 } 671 672 /* Return true if STMT is reassociable operation containing a binary 673 operation with tree code CODE, and is inside LOOP. */ 674 675 static bool 676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) 677 { 678 basic_block bb = gimple_bb (stmt); 679 680 if (gimple_bb (stmt) == NULL) 681 return false; 682 683 if (!flow_bb_inside_loop_p (loop, bb)) 684 return false; 685 686 if (is_gimple_assign (stmt) 687 && gimple_assign_rhs_code (stmt) == code 688 && has_single_use (gimple_assign_lhs (stmt))) 689 { 690 tree rhs1 = gimple_assign_rhs1 (stmt); 691 tree rhs2 = gimple_assign_rhs2 (stmt); 692 if (!can_reassociate_op_p (rhs1) 693 || (rhs2 && !can_reassociate_op_p (rhs2))) 694 return false; 695 return true; 696 } 697 698 return false; 699 } 700 701 702 /* Return true if STMT is a nop-conversion. */ 703 704 static bool 705 gimple_nop_conversion_p (gimple *stmt) 706 { 707 if (gassign *ass = dyn_cast <gassign *> (stmt)) 708 { 709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 711 TREE_TYPE (gimple_assign_rhs1 (ass)))) 712 return true; 713 } 714 return false; 715 } 716 717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 718 operand of the negate operation. Otherwise, return NULL. */ 719 720 static tree 721 get_unary_op (tree name, enum tree_code opcode) 722 { 723 gimple *stmt = SSA_NAME_DEF_STMT (name); 724 725 /* Look through nop conversions (sign changes). */ 726 if (gimple_nop_conversion_p (stmt) 727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 729 730 if (!is_gimple_assign (stmt)) 731 return NULL_TREE; 732 733 if (gimple_assign_rhs_code (stmt) == opcode) 734 return gimple_assign_rhs1 (stmt); 735 return NULL_TREE; 736 } 737 738 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 739 740 static bool 741 ops_equal_values_p (tree op1, tree op2) 742 { 743 if (op1 == op2) 744 return true; 745 746 tree orig_op1 = op1; 747 if (TREE_CODE (op1) == SSA_NAME) 748 { 749 gimple *stmt = SSA_NAME_DEF_STMT (op1); 750 if (gimple_nop_conversion_p (stmt)) 751 { 752 op1 = gimple_assign_rhs1 (stmt); 753 if (op1 == op2) 754 return true; 755 } 756 } 757 758 if (TREE_CODE (op2) == SSA_NAME) 759 { 760 gimple *stmt = SSA_NAME_DEF_STMT (op2); 761 if (gimple_nop_conversion_p (stmt)) 762 { 763 op2 = gimple_assign_rhs1 (stmt); 764 if (op1 == op2 765 || orig_op1 == op2) 766 return true; 767 } 768 } 769 770 return false; 771 } 772 773 774 /* If CURR and LAST are a pair of ops that OPCODE allows us to 775 eliminate through equivalences, do so, remove them from OPS, and 776 return true. Otherwise, return false. */ 777 778 static bool 779 eliminate_duplicate_pair (enum tree_code opcode, 780 vec<operand_entry *> *ops, 781 bool *all_done, 782 unsigned int i, 783 operand_entry *curr, 784 operand_entry *last) 785 { 786 787 /* If we have two of the same op, and the opcode is & |, min, or max, 788 we can eliminate one of them. 789 If we have two of the same op, and the opcode is ^, we can 790 eliminate both of them. */ 791 792 if (last && last->op == curr->op) 793 { 794 switch (opcode) 795 { 796 case MAX_EXPR: 797 case MIN_EXPR: 798 case BIT_IOR_EXPR: 799 case BIT_AND_EXPR: 800 if (dump_file && (dump_flags & TDF_DETAILS)) 801 { 802 fprintf (dump_file, "Equivalence: "); 803 print_generic_expr (dump_file, curr->op); 804 fprintf (dump_file, " [&|minmax] "); 805 print_generic_expr (dump_file, last->op); 806 fprintf (dump_file, " -> "); 807 print_generic_stmt (dump_file, last->op); 808 } 809 810 ops->ordered_remove (i); 811 reassociate_stats.ops_eliminated ++; 812 813 return true; 814 815 case BIT_XOR_EXPR: 816 if (dump_file && (dump_flags & TDF_DETAILS)) 817 { 818 fprintf (dump_file, "Equivalence: "); 819 print_generic_expr (dump_file, curr->op); 820 fprintf (dump_file, " ^ "); 821 print_generic_expr (dump_file, last->op); 822 fprintf (dump_file, " -> nothing\n"); 823 } 824 825 reassociate_stats.ops_eliminated += 2; 826 827 if (ops->length () == 2) 828 { 829 ops->truncate (0); 830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 831 *all_done = true; 832 } 833 else 834 { 835 ops->ordered_remove (i-1); 836 ops->ordered_remove (i-1); 837 } 838 839 return true; 840 841 default: 842 break; 843 } 844 } 845 return false; 846 } 847 848 static vec<tree> plus_negates; 849 850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 851 expression, look in OPS for a corresponding positive operation to cancel 852 it out. If we find one, remove the other from OPS, replace 853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 854 return false. */ 855 856 static bool 857 eliminate_plus_minus_pair (enum tree_code opcode, 858 vec<operand_entry *> *ops, 859 unsigned int currindex, 860 operand_entry *curr) 861 { 862 tree negateop; 863 tree notop; 864 unsigned int i; 865 operand_entry *oe; 866 867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 868 return false; 869 870 negateop = get_unary_op (curr->op, NEGATE_EXPR); 871 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 872 if (negateop == NULL_TREE && notop == NULL_TREE) 873 return false; 874 875 /* Any non-negated version will have a rank that is one less than 876 the current rank. So once we hit those ranks, if we don't find 877 one, we can stop. */ 878 879 for (i = currindex + 1; 880 ops->iterate (i, &oe) 881 && oe->rank >= curr->rank - 1 ; 882 i++) 883 { 884 if (negateop 885 && ops_equal_values_p (oe->op, negateop)) 886 { 887 if (dump_file && (dump_flags & TDF_DETAILS)) 888 { 889 fprintf (dump_file, "Equivalence: "); 890 print_generic_expr (dump_file, negateop); 891 fprintf (dump_file, " + -"); 892 print_generic_expr (dump_file, oe->op); 893 fprintf (dump_file, " -> 0\n"); 894 } 895 896 ops->ordered_remove (i); 897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 898 ops->ordered_remove (currindex); 899 reassociate_stats.ops_eliminated ++; 900 901 return true; 902 } 903 else if (notop 904 && ops_equal_values_p (oe->op, notop)) 905 { 906 tree op_type = TREE_TYPE (oe->op); 907 908 if (dump_file && (dump_flags & TDF_DETAILS)) 909 { 910 fprintf (dump_file, "Equivalence: "); 911 print_generic_expr (dump_file, notop); 912 fprintf (dump_file, " + ~"); 913 print_generic_expr (dump_file, oe->op); 914 fprintf (dump_file, " -> -1\n"); 915 } 916 917 ops->ordered_remove (i); 918 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 919 ops->ordered_remove (currindex); 920 reassociate_stats.ops_eliminated ++; 921 922 return true; 923 } 924 } 925 926 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 927 save it for later inspection in repropagate_negates(). */ 928 if (negateop != NULL_TREE 929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 930 plus_negates.safe_push (curr->op); 931 932 return false; 933 } 934 935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 936 bitwise not expression, look in OPS for a corresponding operand to 937 cancel it out. If we find one, remove the other from OPS, replace 938 OPS[CURRINDEX] with 0, and return true. Otherwise, return 939 false. */ 940 941 static bool 942 eliminate_not_pairs (enum tree_code opcode, 943 vec<operand_entry *> *ops, 944 unsigned int currindex, 945 operand_entry *curr) 946 { 947 tree notop; 948 unsigned int i; 949 operand_entry *oe; 950 951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 952 || TREE_CODE (curr->op) != SSA_NAME) 953 return false; 954 955 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 956 if (notop == NULL_TREE) 957 return false; 958 959 /* Any non-not version will have a rank that is one less than 960 the current rank. So once we hit those ranks, if we don't find 961 one, we can stop. */ 962 963 for (i = currindex + 1; 964 ops->iterate (i, &oe) 965 && oe->rank >= curr->rank - 1; 966 i++) 967 { 968 if (oe->op == notop) 969 { 970 if (dump_file && (dump_flags & TDF_DETAILS)) 971 { 972 fprintf (dump_file, "Equivalence: "); 973 print_generic_expr (dump_file, notop); 974 if (opcode == BIT_AND_EXPR) 975 fprintf (dump_file, " & ~"); 976 else if (opcode == BIT_IOR_EXPR) 977 fprintf (dump_file, " | ~"); 978 print_generic_expr (dump_file, oe->op); 979 if (opcode == BIT_AND_EXPR) 980 fprintf (dump_file, " -> 0\n"); 981 else if (opcode == BIT_IOR_EXPR) 982 fprintf (dump_file, " -> -1\n"); 983 } 984 985 if (opcode == BIT_AND_EXPR) 986 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 987 else if (opcode == BIT_IOR_EXPR) 988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 989 990 reassociate_stats.ops_eliminated += ops->length () - 1; 991 ops->truncate (0); 992 ops->quick_push (oe); 993 return true; 994 } 995 } 996 997 return false; 998 } 999 1000 /* Use constant value that may be present in OPS to try to eliminate 1001 operands. Note that this function is only really used when we've 1002 eliminated ops for other reasons, or merged constants. Across 1003 single statements, fold already does all of this, plus more. There 1004 is little point in duplicating logic, so I've only included the 1005 identities that I could ever construct testcases to trigger. */ 1006 1007 static void 1008 eliminate_using_constants (enum tree_code opcode, 1009 vec<operand_entry *> *ops) 1010 { 1011 operand_entry *oelast = ops->last (); 1012 tree type = TREE_TYPE (oelast->op); 1013 1014 if (oelast->rank == 0 1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 1016 { 1017 switch (opcode) 1018 { 1019 case BIT_AND_EXPR: 1020 if (integer_zerop (oelast->op)) 1021 { 1022 if (ops->length () != 1) 1023 { 1024 if (dump_file && (dump_flags & TDF_DETAILS)) 1025 fprintf (dump_file, "Found & 0, removing all other ops\n"); 1026 1027 reassociate_stats.ops_eliminated += ops->length () - 1; 1028 1029 ops->truncate (0); 1030 ops->quick_push (oelast); 1031 return; 1032 } 1033 } 1034 else if (integer_all_onesp (oelast->op)) 1035 { 1036 if (ops->length () != 1) 1037 { 1038 if (dump_file && (dump_flags & TDF_DETAILS)) 1039 fprintf (dump_file, "Found & -1, removing\n"); 1040 ops->pop (); 1041 reassociate_stats.ops_eliminated++; 1042 } 1043 } 1044 break; 1045 case BIT_IOR_EXPR: 1046 if (integer_all_onesp (oelast->op)) 1047 { 1048 if (ops->length () != 1) 1049 { 1050 if (dump_file && (dump_flags & TDF_DETAILS)) 1051 fprintf (dump_file, "Found | -1, removing all other ops\n"); 1052 1053 reassociate_stats.ops_eliminated += ops->length () - 1; 1054 1055 ops->truncate (0); 1056 ops->quick_push (oelast); 1057 return; 1058 } 1059 } 1060 else if (integer_zerop (oelast->op)) 1061 { 1062 if (ops->length () != 1) 1063 { 1064 if (dump_file && (dump_flags & TDF_DETAILS)) 1065 fprintf (dump_file, "Found | 0, removing\n"); 1066 ops->pop (); 1067 reassociate_stats.ops_eliminated++; 1068 } 1069 } 1070 break; 1071 case MULT_EXPR: 1072 if (integer_zerop (oelast->op) 1073 || (FLOAT_TYPE_P (type) 1074 && !HONOR_NANS (type) 1075 && !HONOR_SIGNED_ZEROS (type) 1076 && real_zerop (oelast->op))) 1077 { 1078 if (ops->length () != 1) 1079 { 1080 if (dump_file && (dump_flags & TDF_DETAILS)) 1081 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1082 1083 reassociate_stats.ops_eliminated += ops->length () - 1; 1084 ops->truncate (0); 1085 ops->quick_push (oelast); 1086 return; 1087 } 1088 } 1089 else if (integer_onep (oelast->op) 1090 || (FLOAT_TYPE_P (type) 1091 && !HONOR_SNANS (type) 1092 && real_onep (oelast->op))) 1093 { 1094 if (ops->length () != 1) 1095 { 1096 if (dump_file && (dump_flags & TDF_DETAILS)) 1097 fprintf (dump_file, "Found * 1, removing\n"); 1098 ops->pop (); 1099 reassociate_stats.ops_eliminated++; 1100 return; 1101 } 1102 } 1103 break; 1104 case BIT_XOR_EXPR: 1105 case PLUS_EXPR: 1106 case MINUS_EXPR: 1107 if (integer_zerop (oelast->op) 1108 || (FLOAT_TYPE_P (type) 1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1110 && fold_real_zero_addition_p (type, 0, oelast->op, 1111 opcode == MINUS_EXPR))) 1112 { 1113 if (ops->length () != 1) 1114 { 1115 if (dump_file && (dump_flags & TDF_DETAILS)) 1116 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1117 ops->pop (); 1118 reassociate_stats.ops_eliminated++; 1119 return; 1120 } 1121 } 1122 break; 1123 default: 1124 break; 1125 } 1126 } 1127 } 1128 1129 1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1131 bool, bool); 1132 1133 /* Structure for tracking and counting operands. */ 1134 struct oecount { 1135 unsigned int cnt; 1136 unsigned int id; 1137 enum tree_code oecode; 1138 tree op; 1139 }; 1140 1141 1142 /* The heap for the oecount hashtable and the sorted list of operands. */ 1143 static vec<oecount> cvec; 1144 1145 1146 /* Oecount hashtable helpers. */ 1147 1148 struct oecount_hasher : int_hash <int, 0, 1> 1149 { 1150 static inline hashval_t hash (int); 1151 static inline bool equal (int, int); 1152 }; 1153 1154 /* Hash function for oecount. */ 1155 1156 inline hashval_t 1157 oecount_hasher::hash (int p) 1158 { 1159 const oecount *c = &cvec[p - 42]; 1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1161 } 1162 1163 /* Comparison function for oecount. */ 1164 1165 inline bool 1166 oecount_hasher::equal (int p1, int p2) 1167 { 1168 const oecount *c1 = &cvec[p1 - 42]; 1169 const oecount *c2 = &cvec[p2 - 42]; 1170 return c1->oecode == c2->oecode && c1->op == c2->op; 1171 } 1172 1173 /* Comparison function for qsort sorting oecount elements by count. */ 1174 1175 static int 1176 oecount_cmp (const void *p1, const void *p2) 1177 { 1178 const oecount *c1 = (const oecount *)p1; 1179 const oecount *c2 = (const oecount *)p2; 1180 if (c1->cnt != c2->cnt) 1181 return c1->cnt > c2->cnt ? 1 : -1; 1182 else 1183 /* If counts are identical, use unique IDs to stabilize qsort. */ 1184 return c1->id > c2->id ? 1 : -1; 1185 } 1186 1187 /* Return TRUE iff STMT represents a builtin call that raises OP 1188 to some exponent. */ 1189 1190 static bool 1191 stmt_is_power_of_op (gimple *stmt, tree op) 1192 { 1193 if (!is_gimple_call (stmt)) 1194 return false; 1195 1196 switch (gimple_call_combined_fn (stmt)) 1197 { 1198 CASE_CFN_POW: 1199 CASE_CFN_POWI: 1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1201 1202 default: 1203 return false; 1204 } 1205 } 1206 1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1208 in place and return the result. Assumes that stmt_is_power_of_op 1209 was previously called for STMT and returned TRUE. */ 1210 1211 static HOST_WIDE_INT 1212 decrement_power (gimple *stmt) 1213 { 1214 REAL_VALUE_TYPE c, cint; 1215 HOST_WIDE_INT power; 1216 tree arg1; 1217 1218 switch (gimple_call_combined_fn (stmt)) 1219 { 1220 CASE_CFN_POW: 1221 arg1 = gimple_call_arg (stmt, 1); 1222 c = TREE_REAL_CST (arg1); 1223 power = real_to_integer (&c) - 1; 1224 real_from_integer (&cint, VOIDmode, power, SIGNED); 1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1226 return power; 1227 1228 CASE_CFN_POWI: 1229 arg1 = gimple_call_arg (stmt, 1); 1230 power = TREE_INT_CST_LOW (arg1) - 1; 1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1232 return power; 1233 1234 default: 1235 gcc_unreachable (); 1236 } 1237 } 1238 1239 /* Replace SSA defined by STMT and replace all its uses with new 1240 SSA. Also return the new SSA. */ 1241 1242 static tree 1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1244 { 1245 gimple *use_stmt; 1246 use_operand_p use; 1247 imm_use_iterator iter; 1248 tree new_lhs, new_debug_lhs = NULL_TREE; 1249 tree lhs = gimple_get_lhs (stmt); 1250 1251 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1252 gimple_set_lhs (stmt, new_lhs); 1253 1254 /* Also need to update GIMPLE_DEBUGs. */ 1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1256 { 1257 tree repl = new_lhs; 1258 if (is_gimple_debug (use_stmt)) 1259 { 1260 if (new_debug_lhs == NULL_TREE) 1261 { 1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs)); 1263 gdebug *def_temp 1264 = gimple_build_debug_bind (new_debug_lhs, 1265 build2 (opcode, TREE_TYPE (lhs), 1266 new_lhs, op), 1267 stmt); 1268 gimple_set_uid (def_temp, gimple_uid (stmt)); 1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1271 } 1272 repl = new_debug_lhs; 1273 } 1274 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1275 SET_USE (use, repl); 1276 update_stmt (use_stmt); 1277 } 1278 return new_lhs; 1279 } 1280 1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1282 uses with new SSAs. Also do this for the stmt that defines DEF 1283 if *DEF is not OP. */ 1284 1285 static void 1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1287 vec<gimple *> &stmts_to_fix) 1288 { 1289 unsigned i; 1290 gimple *stmt; 1291 1292 if (*def != op 1293 && TREE_CODE (*def) == SSA_NAME 1294 && (stmt = SSA_NAME_DEF_STMT (*def)) 1295 && gimple_code (stmt) != GIMPLE_NOP) 1296 *def = make_new_ssa_for_def (stmt, opcode, op); 1297 1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1299 make_new_ssa_for_def (stmt, opcode, op); 1300 } 1301 1302 /* Find the single immediate use of STMT's LHS, and replace it 1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1304 replace *DEF with OP as well. */ 1305 1306 static void 1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1308 { 1309 tree lhs; 1310 gimple *use_stmt; 1311 use_operand_p use; 1312 gimple_stmt_iterator gsi; 1313 1314 if (is_gimple_call (stmt)) 1315 lhs = gimple_call_lhs (stmt); 1316 else 1317 lhs = gimple_assign_lhs (stmt); 1318 1319 gcc_assert (has_single_use (lhs)); 1320 single_imm_use (lhs, &use, &use_stmt); 1321 if (lhs == *def) 1322 *def = op; 1323 SET_USE (use, op); 1324 if (TREE_CODE (op) != SSA_NAME) 1325 update_stmt (use_stmt); 1326 gsi = gsi_for_stmt (stmt); 1327 unlink_stmt_vdef (stmt); 1328 reassoc_remove_stmt (&gsi); 1329 release_defs (stmt); 1330 } 1331 1332 /* Walks the linear chain with result *DEF searching for an operation 1333 with operand OP and code OPCODE removing that from the chain. *DEF 1334 is updated if there is only one operand but no operation left. */ 1335 1336 static void 1337 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1338 { 1339 tree orig_def = *def; 1340 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1341 /* PR72835 - Record the stmt chain that has to be updated such that 1342 we dont use the same LHS when the values computed are different. */ 1343 auto_vec<gimple *, 64> stmts_to_fix; 1344 1345 do 1346 { 1347 tree name; 1348 1349 if (opcode == MULT_EXPR) 1350 { 1351 if (stmt_is_power_of_op (stmt, op)) 1352 { 1353 if (decrement_power (stmt) == 1) 1354 { 1355 if (stmts_to_fix.length () > 0) 1356 stmts_to_fix.pop (); 1357 propagate_op_to_single_use (op, stmt, def); 1358 } 1359 break; 1360 } 1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1362 { 1363 if (gimple_assign_rhs1 (stmt) == op) 1364 { 1365 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1366 if (stmts_to_fix.length () > 0) 1367 stmts_to_fix.pop (); 1368 propagate_op_to_single_use (cst, stmt, def); 1369 break; 1370 } 1371 else if (integer_minus_onep (op) 1372 || real_minus_onep (op)) 1373 { 1374 gimple_assign_set_rhs_code 1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1376 break; 1377 } 1378 } 1379 } 1380 1381 name = gimple_assign_rhs1 (stmt); 1382 1383 /* If this is the operation we look for and one of the operands 1384 is ours simply propagate the other operand into the stmts 1385 single use. */ 1386 if (gimple_assign_rhs_code (stmt) == opcode 1387 && (name == op 1388 || gimple_assign_rhs2 (stmt) == op)) 1389 { 1390 if (name == op) 1391 name = gimple_assign_rhs2 (stmt); 1392 if (stmts_to_fix.length () > 0) 1393 stmts_to_fix.pop (); 1394 propagate_op_to_single_use (name, stmt, def); 1395 break; 1396 } 1397 1398 /* We might have a multiply of two __builtin_pow* calls, and 1399 the operand might be hiding in the rightmost one. Likewise 1400 this can happen for a negate. */ 1401 if (opcode == MULT_EXPR 1402 && gimple_assign_rhs_code (stmt) == opcode 1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1404 && has_single_use (gimple_assign_rhs2 (stmt))) 1405 { 1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1407 if (stmt_is_power_of_op (stmt2, op)) 1408 { 1409 if (decrement_power (stmt2) == 1) 1410 propagate_op_to_single_use (op, stmt2, def); 1411 else 1412 stmts_to_fix.safe_push (stmt2); 1413 break; 1414 } 1415 else if (is_gimple_assign (stmt2) 1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1417 { 1418 if (gimple_assign_rhs1 (stmt2) == op) 1419 { 1420 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1421 propagate_op_to_single_use (cst, stmt2, def); 1422 break; 1423 } 1424 else if (integer_minus_onep (op) 1425 || real_minus_onep (op)) 1426 { 1427 stmts_to_fix.safe_push (stmt2); 1428 gimple_assign_set_rhs_code 1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1430 break; 1431 } 1432 } 1433 } 1434 1435 /* Continue walking the chain. */ 1436 gcc_assert (name != op 1437 && TREE_CODE (name) == SSA_NAME); 1438 stmt = SSA_NAME_DEF_STMT (name); 1439 stmts_to_fix.safe_push (stmt); 1440 } 1441 while (1); 1442 1443 if (stmts_to_fix.length () > 0 || *def == orig_def) 1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1445 } 1446 1447 /* Returns true if statement S1 dominates statement S2. Like 1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1449 1450 static bool 1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1452 { 1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1454 1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1456 SSA_NAME. Assume it lives at the beginning of function and 1457 thus dominates everything. */ 1458 if (!bb1 || s1 == s2) 1459 return true; 1460 1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1462 if (!bb2) 1463 return false; 1464 1465 if (bb1 == bb2) 1466 { 1467 /* PHIs in the same basic block are assumed to be 1468 executed all in parallel, if only one stmt is a PHI, 1469 it dominates the other stmt in the same basic block. */ 1470 if (gimple_code (s1) == GIMPLE_PHI) 1471 return true; 1472 1473 if (gimple_code (s2) == GIMPLE_PHI) 1474 return false; 1475 1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1477 1478 if (gimple_uid (s1) < gimple_uid (s2)) 1479 return true; 1480 1481 if (gimple_uid (s1) > gimple_uid (s2)) 1482 return false; 1483 1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1485 unsigned int uid = gimple_uid (s1); 1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1487 { 1488 gimple *s = gsi_stmt (gsi); 1489 if (gimple_uid (s) != uid) 1490 break; 1491 if (s == s2) 1492 return true; 1493 } 1494 1495 return false; 1496 } 1497 1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1499 } 1500 1501 /* Insert STMT after INSERT_POINT. */ 1502 1503 static void 1504 insert_stmt_after (gimple *stmt, gimple *insert_point) 1505 { 1506 gimple_stmt_iterator gsi; 1507 basic_block bb; 1508 1509 if (gimple_code (insert_point) == GIMPLE_PHI) 1510 bb = gimple_bb (insert_point); 1511 else if (!stmt_ends_bb_p (insert_point)) 1512 { 1513 gsi = gsi_for_stmt (insert_point); 1514 gimple_set_uid (stmt, gimple_uid (insert_point)); 1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1516 return; 1517 } 1518 else if (gimple_code (insert_point) == GIMPLE_ASM 1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0) 1520 /* We have no idea where to insert - it depends on where the 1521 uses will be placed. */ 1522 gcc_unreachable (); 1523 else 1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1525 thus if it must end a basic block, it should be a call that can 1526 throw, or some assignment that can throw. If it throws, the LHS 1527 of it will not be initialized though, so only valid places using 1528 the SSA_NAME should be dominated by the fallthru edge. */ 1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1530 gsi = gsi_after_labels (bb); 1531 if (gsi_end_p (gsi)) 1532 { 1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1534 gimple_set_uid (stmt, 1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1536 } 1537 else 1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1540 } 1541 1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1543 the result. Places the statement after the definition of either 1544 OP1 or OP2. Returns the new statement. */ 1545 1546 static gimple * 1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1548 { 1549 gimple *op1def = NULL, *op2def = NULL; 1550 gimple_stmt_iterator gsi; 1551 tree op; 1552 gassign *sum; 1553 1554 /* Create the addition statement. */ 1555 op = make_ssa_name (type); 1556 sum = gimple_build_assign (op, opcode, op1, op2); 1557 1558 /* Find an insertion place and insert. */ 1559 if (TREE_CODE (op1) == SSA_NAME) 1560 op1def = SSA_NAME_DEF_STMT (op1); 1561 if (TREE_CODE (op2) == SSA_NAME) 1562 op2def = SSA_NAME_DEF_STMT (op2); 1563 if ((!op1def || gimple_nop_p (op1def)) 1564 && (!op2def || gimple_nop_p (op2def))) 1565 { 1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1567 if (!gsi_end_p (gsi) 1568 && is_gimple_call (gsi_stmt (gsi)) 1569 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE)) 1570 { 1571 /* Don't add statements before a returns_twice call at the start 1572 of a function. */ 1573 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1574 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1575 } 1576 if (gsi_end_p (gsi)) 1577 { 1578 gimple_stmt_iterator gsi2 1579 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1580 gimple_set_uid (sum, 1581 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1582 } 1583 else 1584 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1585 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1586 } 1587 else 1588 { 1589 gimple *insert_point; 1590 if ((!op1def || gimple_nop_p (op1def)) 1591 || (op2def && !gimple_nop_p (op2def) 1592 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1593 insert_point = op2def; 1594 else 1595 insert_point = op1def; 1596 insert_stmt_after (sum, insert_point); 1597 } 1598 update_stmt (sum); 1599 1600 return sum; 1601 } 1602 1603 /* Perform un-distribution of divisions and multiplications. 1604 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1605 to (A + B) / X for real X. 1606 1607 The algorithm is organized as follows. 1608 1609 - First we walk the addition chain *OPS looking for summands that 1610 are defined by a multiplication or a real division. This results 1611 in the candidates bitmap with relevant indices into *OPS. 1612 1613 - Second we build the chains of multiplications or divisions for 1614 these candidates, counting the number of occurrences of (operand, code) 1615 pairs in all of the candidates chains. 1616 1617 - Third we sort the (operand, code) pairs by number of occurrence and 1618 process them starting with the pair with the most uses. 1619 1620 * For each such pair we walk the candidates again to build a 1621 second candidate bitmap noting all multiplication/division chains 1622 that have at least one occurrence of (operand, code). 1623 1624 * We build an alternate addition chain only covering these 1625 candidates with one (operand, code) operation removed from their 1626 multiplication/division chain. 1627 1628 * The first candidate gets replaced by the alternate addition chain 1629 multiplied/divided by the operand. 1630 1631 * All candidate chains get disabled for further processing and 1632 processing of (operand, code) pairs continues. 1633 1634 The alternate addition chains built are re-processed by the main 1635 reassociation algorithm which allows optimizing a * x * y + b * y * x 1636 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1637 1638 static bool 1639 undistribute_ops_list (enum tree_code opcode, 1640 vec<operand_entry *> *ops, class loop *loop) 1641 { 1642 unsigned int length = ops->length (); 1643 operand_entry *oe1; 1644 unsigned i, j; 1645 unsigned nr_candidates, nr_candidates2; 1646 sbitmap_iterator sbi0; 1647 vec<operand_entry *> *subops; 1648 bool changed = false; 1649 unsigned int next_oecount_id = 0; 1650 1651 if (length <= 1 1652 || opcode != PLUS_EXPR) 1653 return false; 1654 1655 /* Build a list of candidates to process. */ 1656 auto_sbitmap candidates (length); 1657 bitmap_clear (candidates); 1658 nr_candidates = 0; 1659 FOR_EACH_VEC_ELT (*ops, i, oe1) 1660 { 1661 enum tree_code dcode; 1662 gimple *oe1def; 1663 1664 if (TREE_CODE (oe1->op) != SSA_NAME) 1665 continue; 1666 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1667 if (!is_gimple_assign (oe1def)) 1668 continue; 1669 dcode = gimple_assign_rhs_code (oe1def); 1670 if ((dcode != MULT_EXPR 1671 && dcode != RDIV_EXPR) 1672 || !is_reassociable_op (oe1def, dcode, loop)) 1673 continue; 1674 1675 bitmap_set_bit (candidates, i); 1676 nr_candidates++; 1677 } 1678 1679 if (nr_candidates < 2) 1680 return false; 1681 1682 if (dump_file && (dump_flags & TDF_DETAILS)) 1683 { 1684 fprintf (dump_file, "searching for un-distribute opportunities "); 1685 print_generic_expr (dump_file, 1686 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); 1687 fprintf (dump_file, " %d\n", nr_candidates); 1688 } 1689 1690 /* Build linearized sub-operand lists and the counting table. */ 1691 cvec.create (0); 1692 1693 hash_table<oecount_hasher> ctable (15); 1694 1695 /* ??? Macro arguments cannot have multi-argument template types in 1696 them. This typedef is needed to workaround that limitation. */ 1697 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1698 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1699 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1700 { 1701 gimple *oedef; 1702 enum tree_code oecode; 1703 unsigned j; 1704 1705 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1706 oecode = gimple_assign_rhs_code (oedef); 1707 linearize_expr_tree (&subops[i], oedef, 1708 associative_tree_code (oecode), false); 1709 1710 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1711 { 1712 oecount c; 1713 int *slot; 1714 int idx; 1715 c.oecode = oecode; 1716 c.cnt = 1; 1717 c.id = next_oecount_id++; 1718 c.op = oe1->op; 1719 cvec.safe_push (c); 1720 idx = cvec.length () + 41; 1721 slot = ctable.find_slot (idx, INSERT); 1722 if (!*slot) 1723 { 1724 *slot = idx; 1725 } 1726 else 1727 { 1728 cvec.pop (); 1729 cvec[*slot - 42].cnt++; 1730 } 1731 } 1732 } 1733 1734 /* Sort the counting table. */ 1735 cvec.qsort (oecount_cmp); 1736 1737 if (dump_file && (dump_flags & TDF_DETAILS)) 1738 { 1739 oecount *c; 1740 fprintf (dump_file, "Candidates:\n"); 1741 FOR_EACH_VEC_ELT (cvec, j, c) 1742 { 1743 fprintf (dump_file, " %u %s: ", c->cnt, 1744 c->oecode == MULT_EXPR 1745 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1746 print_generic_expr (dump_file, c->op); 1747 fprintf (dump_file, "\n"); 1748 } 1749 } 1750 1751 /* Process the (operand, code) pairs in order of most occurrence. */ 1752 auto_sbitmap candidates2 (length); 1753 while (!cvec.is_empty ()) 1754 { 1755 oecount *c = &cvec.last (); 1756 if (c->cnt < 2) 1757 break; 1758 1759 /* Now collect the operands in the outer chain that contain 1760 the common operand in their inner chain. */ 1761 bitmap_clear (candidates2); 1762 nr_candidates2 = 0; 1763 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1764 { 1765 gimple *oedef; 1766 enum tree_code oecode; 1767 unsigned j; 1768 tree op = (*ops)[i]->op; 1769 1770 /* If we undistributed in this chain already this may be 1771 a constant. */ 1772 if (TREE_CODE (op) != SSA_NAME) 1773 continue; 1774 1775 oedef = SSA_NAME_DEF_STMT (op); 1776 oecode = gimple_assign_rhs_code (oedef); 1777 if (oecode != c->oecode) 1778 continue; 1779 1780 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1781 { 1782 if (oe1->op == c->op) 1783 { 1784 bitmap_set_bit (candidates2, i); 1785 ++nr_candidates2; 1786 break; 1787 } 1788 } 1789 } 1790 1791 if (nr_candidates2 >= 2) 1792 { 1793 operand_entry *oe1, *oe2; 1794 gimple *prod; 1795 int first = bitmap_first_set_bit (candidates2); 1796 1797 /* Build the new addition chain. */ 1798 oe1 = (*ops)[first]; 1799 if (dump_file && (dump_flags & TDF_DETAILS)) 1800 { 1801 fprintf (dump_file, "Building ("); 1802 print_generic_expr (dump_file, oe1->op); 1803 } 1804 zero_one_operation (&oe1->op, c->oecode, c->op); 1805 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1806 { 1807 gimple *sum; 1808 oe2 = (*ops)[i]; 1809 if (dump_file && (dump_flags & TDF_DETAILS)) 1810 { 1811 fprintf (dump_file, " + "); 1812 print_generic_expr (dump_file, oe2->op); 1813 } 1814 zero_one_operation (&oe2->op, c->oecode, c->op); 1815 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1816 oe1->op, oe2->op, opcode); 1817 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1818 oe2->rank = 0; 1819 oe1->op = gimple_get_lhs (sum); 1820 } 1821 1822 /* Apply the multiplication/division. */ 1823 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1824 oe1->op, c->op, c->oecode); 1825 if (dump_file && (dump_flags & TDF_DETAILS)) 1826 { 1827 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1828 print_generic_expr (dump_file, c->op); 1829 fprintf (dump_file, "\n"); 1830 } 1831 1832 /* Record it in the addition chain and disable further 1833 undistribution with this op. */ 1834 oe1->op = gimple_assign_lhs (prod); 1835 oe1->rank = get_rank (oe1->op); 1836 subops[first].release (); 1837 1838 changed = true; 1839 } 1840 1841 cvec.pop (); 1842 } 1843 1844 for (i = 0; i < ops->length (); ++i) 1845 subops[i].release (); 1846 free (subops); 1847 cvec.release (); 1848 1849 return changed; 1850 } 1851 1852 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: 1853 first: element index for each relevant BIT_FIELD_REF. 1854 second: the index of vec ops* for each relevant BIT_FIELD_REF. */ 1855 typedef std::pair<unsigned, unsigned> v_info_elem; 1856 struct v_info { 1857 tree vec_type; 1858 auto_vec<v_info_elem, 32> vec; 1859 }; 1860 typedef v_info *v_info_ptr; 1861 1862 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ 1863 static int 1864 sort_by_mach_mode (const void *p_i, const void *p_j) 1865 { 1866 const tree tr1 = *((const tree *) p_i); 1867 const tree tr2 = *((const tree *) p_j); 1868 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); 1869 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); 1870 if (mode1 > mode2) 1871 return 1; 1872 else if (mode1 < mode2) 1873 return -1; 1874 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2)) 1875 return -1; 1876 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2)) 1877 return 1; 1878 return 0; 1879 } 1880 1881 /* Cleanup hash map for VECTOR information. */ 1882 static void 1883 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) 1884 { 1885 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); 1886 it != info_map.end (); ++it) 1887 { 1888 v_info_ptr info = (*it).second; 1889 delete info; 1890 (*it).second = NULL; 1891 } 1892 } 1893 1894 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. 1895 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] 1896 is transformed to 1897 Vs = (V1 + V2 + ... + Vn) 1898 Vs[0] + Vs[1] + ... + Vs[k] 1899 1900 The basic steps are listed below: 1901 1902 1) Check the addition chain *OPS by looking those summands coming from 1903 VECTOR bit_field_ref on VECTOR type. Put the information into 1904 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. 1905 1906 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are 1907 continuous, they can cover the whole VECTOR perfectly without any holes. 1908 Obtain one VECTOR list which contain candidates to be transformed. 1909 1910 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of 1911 candidates with same mode, build the addition statements for them and 1912 generate BIT_FIELD_REFs accordingly. 1913 1914 TODO: 1915 The current implementation requires the whole VECTORs should be fully 1916 covered, but it can be extended to support partial, checking adjacent 1917 but not fill the whole, it may need some cost model to define the 1918 boundary to do or not. 1919 */ 1920 static bool 1921 undistribute_bitref_for_vector (enum tree_code opcode, 1922 vec<operand_entry *> *ops, struct loop *loop) 1923 { 1924 if (ops->length () <= 1) 1925 return false; 1926 1927 if (opcode != PLUS_EXPR 1928 && opcode != MULT_EXPR 1929 && opcode != BIT_XOR_EXPR 1930 && opcode != BIT_IOR_EXPR 1931 && opcode != BIT_AND_EXPR) 1932 return false; 1933 1934 hash_map<tree, v_info_ptr> v_info_map; 1935 operand_entry *oe1; 1936 unsigned i; 1937 1938 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the 1939 information into map. */ 1940 FOR_EACH_VEC_ELT (*ops, i, oe1) 1941 { 1942 enum tree_code dcode; 1943 gimple *oe1def; 1944 1945 if (TREE_CODE (oe1->op) != SSA_NAME) 1946 continue; 1947 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1948 if (!is_gimple_assign (oe1def)) 1949 continue; 1950 dcode = gimple_assign_rhs_code (oe1def); 1951 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop)) 1952 continue; 1953 1954 tree rhs = gimple_assign_rhs1 (oe1def); 1955 tree vec = TREE_OPERAND (rhs, 0); 1956 tree vec_type = TREE_TYPE (vec); 1957 1958 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) 1959 continue; 1960 1961 /* Ignore it if target machine can't support this VECTOR type. */ 1962 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1963 continue; 1964 1965 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ 1966 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1967 continue; 1968 1969 if (VECTOR_TYPE_P (TREE_TYPE (rhs)) 1970 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) 1971 continue; 1972 1973 /* The type of BIT_FIELD_REF might not be equal to the element type of 1974 the vector. We want to use a vector type with element type the 1975 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ 1976 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) 1977 { 1978 machine_mode simd_mode; 1979 unsigned HOST_WIDE_INT size, nunits; 1980 unsigned HOST_WIDE_INT elem_size 1981 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); 1982 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) 1983 continue; 1984 if (size <= elem_size || (size % elem_size) != 0) 1985 continue; 1986 nunits = size / elem_size; 1987 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), 1988 nunits).exists (&simd_mode)) 1989 continue; 1990 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); 1991 1992 /* Ignore it if target machine can't support this VECTOR type. */ 1993 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1994 continue; 1995 1996 /* Check const vector type, constrain BIT_FIELD_REF offset and 1997 size. */ 1998 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1999 continue; 2000 2001 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), 2002 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) 2003 continue; 2004 } 2005 2006 tree elem_type = TREE_TYPE (vec_type); 2007 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); 2008 if (maybe_ne (bit_field_size (rhs), elem_size)) 2009 continue; 2010 2011 unsigned idx; 2012 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx)) 2013 continue; 2014 2015 /* Ignore it if target machine can't support this type of VECTOR 2016 operation. */ 2017 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); 2018 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) 2019 continue; 2020 2021 bool existed; 2022 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); 2023 if (!existed) 2024 { 2025 info = new v_info; 2026 info->vec_type = vec_type; 2027 } 2028 else if (!types_compatible_p (vec_type, info->vec_type)) 2029 continue; 2030 info->vec.safe_push (std::make_pair (idx, i)); 2031 } 2032 2033 /* At least two VECTOR to combine. */ 2034 if (v_info_map.elements () <= 1) 2035 { 2036 cleanup_vinfo_map (v_info_map); 2037 return false; 2038 } 2039 2040 /* Verify all VECTOR candidates by checking two conditions: 2041 1) sorted offsets are adjacent, no holes. 2042 2) can fill the whole VECTOR perfectly. 2043 And add the valid candidates to a vector for further handling. */ 2044 auto_vec<tree> valid_vecs (v_info_map.elements ()); 2045 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); 2046 it != v_info_map.end (); ++it) 2047 { 2048 tree cand_vec = (*it).first; 2049 v_info_ptr cand_info = (*it).second; 2050 unsigned int num_elems 2051 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); 2052 if (cand_info->vec.length () != num_elems) 2053 continue; 2054 sbitmap holes = sbitmap_alloc (num_elems); 2055 bitmap_ones (holes); 2056 bool valid = true; 2057 v_info_elem *curr; 2058 FOR_EACH_VEC_ELT (cand_info->vec, i, curr) 2059 { 2060 if (!bitmap_bit_p (holes, curr->first)) 2061 { 2062 valid = false; 2063 break; 2064 } 2065 else 2066 bitmap_clear_bit (holes, curr->first); 2067 } 2068 if (valid && bitmap_empty_p (holes)) 2069 valid_vecs.quick_push (cand_vec); 2070 sbitmap_free (holes); 2071 } 2072 2073 /* At least two VECTOR to combine. */ 2074 if (valid_vecs.length () <= 1) 2075 { 2076 cleanup_vinfo_map (v_info_map); 2077 return false; 2078 } 2079 2080 valid_vecs.qsort (sort_by_mach_mode); 2081 /* Go through all candidates by machine mode order, query the mode_to_total 2082 to get the total number for each mode and skip the single one. */ 2083 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) 2084 { 2085 tree tvec = valid_vecs[i]; 2086 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); 2087 2088 /* Skip modes with only a single candidate. */ 2089 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) 2090 continue; 2091 2092 unsigned int idx, j; 2093 gimple *sum = NULL; 2094 tree sum_vec = tvec; 2095 v_info_ptr info_ptr = *(v_info_map.get (tvec)); 2096 v_info_elem *elem; 2097 tree vec_type = info_ptr->vec_type; 2098 2099 /* Build the sum for all candidates with same mode. */ 2100 do 2101 { 2102 sum = build_and_add_sum (vec_type, sum_vec, 2103 valid_vecs[i + 1], opcode); 2104 /* Update the operands only after build_and_add_sum, 2105 so that we don't have to repeat the placement algorithm 2106 of build_and_add_sum. */ 2107 if (sum_vec == tvec 2108 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec))) 2109 { 2110 gimple_stmt_iterator gsi = gsi_for_stmt (sum); 2111 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); 2112 tree lhs = make_ssa_name (vec_type); 2113 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2114 gimple_set_uid (g, gimple_uid (sum)); 2115 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2116 gimple_assign_set_rhs1 (sum, lhs); 2117 update_stmt (sum); 2118 } 2119 if (!useless_type_conversion_p (vec_type, 2120 TREE_TYPE (valid_vecs[i + 1]))) 2121 { 2122 gimple_stmt_iterator gsi = gsi_for_stmt (sum); 2123 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, 2124 valid_vecs[i + 1]); 2125 tree lhs = make_ssa_name (vec_type); 2126 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2127 gimple_set_uid (g, gimple_uid (sum)); 2128 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2129 gimple_assign_set_rhs2 (sum, lhs); 2130 update_stmt (sum); 2131 } 2132 sum_vec = gimple_get_lhs (sum); 2133 info_ptr = *(v_info_map.get (valid_vecs[i + 1])); 2134 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); 2135 /* Update those related ops of current candidate VECTOR. */ 2136 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2137 { 2138 idx = elem->second; 2139 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2140 /* Set this then op definition will get DCEd later. */ 2141 gimple_set_visited (def, true); 2142 if (opcode == PLUS_EXPR 2143 || opcode == BIT_XOR_EXPR 2144 || opcode == BIT_IOR_EXPR) 2145 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); 2146 else if (opcode == MULT_EXPR) 2147 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); 2148 else 2149 { 2150 gcc_assert (opcode == BIT_AND_EXPR); 2151 (*ops)[idx]->op 2152 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); 2153 } 2154 (*ops)[idx]->rank = 0; 2155 } 2156 if (dump_file && (dump_flags & TDF_DETAILS)) 2157 { 2158 fprintf (dump_file, "Generating addition -> "); 2159 print_gimple_stmt (dump_file, sum, 0); 2160 } 2161 i++; 2162 } 2163 while ((i < valid_vecs.length () - 1) 2164 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); 2165 2166 /* Referring to first valid VECTOR with this mode, generate the 2167 BIT_FIELD_REF statements accordingly. */ 2168 info_ptr = *(v_info_map.get (tvec)); 2169 gcc_assert (sum); 2170 tree elem_type = TREE_TYPE (vec_type); 2171 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2172 { 2173 idx = elem->second; 2174 tree dst = make_ssa_name (elem_type); 2175 tree pos = bitsize_int (elem->first 2176 * tree_to_uhwi (TYPE_SIZE (elem_type))); 2177 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, 2178 TYPE_SIZE (elem_type), pos); 2179 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); 2180 insert_stmt_after (gs, sum); 2181 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2182 /* Set this then op definition will get DCEd later. */ 2183 gimple_set_visited (def, true); 2184 (*ops)[idx]->op = gimple_assign_lhs (gs); 2185 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); 2186 if (dump_file && (dump_flags & TDF_DETAILS)) 2187 { 2188 fprintf (dump_file, "Generating bit_field_ref -> "); 2189 print_gimple_stmt (dump_file, gs, 0); 2190 } 2191 } 2192 } 2193 2194 if (dump_file && (dump_flags & TDF_DETAILS)) 2195 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n"); 2196 2197 cleanup_vinfo_map (v_info_map); 2198 2199 return true; 2200 } 2201 2202 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 2203 expression, examine the other OPS to see if any of them are comparisons 2204 of the same values, which we may be able to combine or eliminate. 2205 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 2206 2207 static bool 2208 eliminate_redundant_comparison (enum tree_code opcode, 2209 vec<operand_entry *> *ops, 2210 unsigned int currindex, 2211 operand_entry *curr) 2212 { 2213 tree op1, op2; 2214 enum tree_code lcode, rcode; 2215 gimple *def1, *def2; 2216 int i; 2217 operand_entry *oe; 2218 2219 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 2220 return false; 2221 2222 /* Check that CURR is a comparison. */ 2223 if (TREE_CODE (curr->op) != SSA_NAME) 2224 return false; 2225 def1 = SSA_NAME_DEF_STMT (curr->op); 2226 if (!is_gimple_assign (def1)) 2227 return false; 2228 lcode = gimple_assign_rhs_code (def1); 2229 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 2230 return false; 2231 op1 = gimple_assign_rhs1 (def1); 2232 op2 = gimple_assign_rhs2 (def1); 2233 2234 /* Now look for a similar comparison in the remaining OPS. */ 2235 for (i = currindex + 1; ops->iterate (i, &oe); i++) 2236 { 2237 tree t; 2238 2239 if (TREE_CODE (oe->op) != SSA_NAME) 2240 continue; 2241 def2 = SSA_NAME_DEF_STMT (oe->op); 2242 if (!is_gimple_assign (def2)) 2243 continue; 2244 rcode = gimple_assign_rhs_code (def2); 2245 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 2246 continue; 2247 2248 /* If we got here, we have a match. See if we can combine the 2249 two comparisons. */ 2250 tree type = TREE_TYPE (gimple_assign_lhs (def1)); 2251 if (opcode == BIT_IOR_EXPR) 2252 t = maybe_fold_or_comparisons (type, 2253 lcode, op1, op2, 2254 rcode, gimple_assign_rhs1 (def2), 2255 gimple_assign_rhs2 (def2)); 2256 else 2257 t = maybe_fold_and_comparisons (type, 2258 lcode, op1, op2, 2259 rcode, gimple_assign_rhs1 (def2), 2260 gimple_assign_rhs2 (def2)); 2261 if (!t) 2262 continue; 2263 2264 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 2265 always give us a boolean_type_node value back. If the original 2266 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 2267 we need to convert. */ 2268 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 2269 { 2270 if (!fold_convertible_p (TREE_TYPE (curr->op), t)) 2271 continue; 2272 t = fold_convert (TREE_TYPE (curr->op), t); 2273 } 2274 2275 if (TREE_CODE (t) != INTEGER_CST 2276 && !operand_equal_p (t, curr->op, 0)) 2277 { 2278 enum tree_code subcode; 2279 tree newop1, newop2; 2280 if (!COMPARISON_CLASS_P (t)) 2281 continue; 2282 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2283 STRIP_USELESS_TYPE_CONVERSION (newop1); 2284 STRIP_USELESS_TYPE_CONVERSION (newop2); 2285 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 2286 continue; 2287 } 2288 2289 if (dump_file && (dump_flags & TDF_DETAILS)) 2290 { 2291 fprintf (dump_file, "Equivalence: "); 2292 print_generic_expr (dump_file, curr->op); 2293 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 2294 print_generic_expr (dump_file, oe->op); 2295 fprintf (dump_file, " -> "); 2296 print_generic_expr (dump_file, t); 2297 fprintf (dump_file, "\n"); 2298 } 2299 2300 /* Now we can delete oe, as it has been subsumed by the new combined 2301 expression t. */ 2302 ops->ordered_remove (i); 2303 reassociate_stats.ops_eliminated ++; 2304 2305 /* If t is the same as curr->op, we're done. Otherwise we must 2306 replace curr->op with t. Special case is if we got a constant 2307 back, in which case we add it to the end instead of in place of 2308 the current entry. */ 2309 if (TREE_CODE (t) == INTEGER_CST) 2310 { 2311 ops->ordered_remove (currindex); 2312 add_to_ops_vec (ops, t); 2313 } 2314 else if (!operand_equal_p (t, curr->op, 0)) 2315 { 2316 gimple *sum; 2317 enum tree_code subcode; 2318 tree newop1; 2319 tree newop2; 2320 gcc_assert (COMPARISON_CLASS_P (t)); 2321 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2322 STRIP_USELESS_TYPE_CONVERSION (newop1); 2323 STRIP_USELESS_TYPE_CONVERSION (newop2); 2324 gcc_checking_assert (is_gimple_val (newop1) 2325 && is_gimple_val (newop2)); 2326 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 2327 curr->op = gimple_get_lhs (sum); 2328 } 2329 return true; 2330 } 2331 2332 return false; 2333 } 2334 2335 2336 /* Transform repeated addition of same values into multiply with 2337 constant. */ 2338 static bool 2339 transform_add_to_multiply (vec<operand_entry *> *ops) 2340 { 2341 operand_entry *oe; 2342 tree op = NULL_TREE; 2343 int j; 2344 int i, start = -1, end = 0, count = 0; 2345 auto_vec<std::pair <int, int> > indxs; 2346 bool changed = false; 2347 2348 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2349 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2350 || !flag_unsafe_math_optimizations)) 2351 return false; 2352 2353 /* Look for repeated operands. */ 2354 FOR_EACH_VEC_ELT (*ops, i, oe) 2355 { 2356 if (start == -1) 2357 { 2358 count = 1; 2359 op = oe->op; 2360 start = i; 2361 } 2362 else if (operand_equal_p (oe->op, op, 0)) 2363 { 2364 count++; 2365 end = i; 2366 } 2367 else 2368 { 2369 if (count > 1) 2370 indxs.safe_push (std::make_pair (start, end)); 2371 count = 1; 2372 op = oe->op; 2373 start = i; 2374 } 2375 } 2376 2377 if (count > 1) 2378 indxs.safe_push (std::make_pair (start, end)); 2379 2380 for (j = indxs.length () - 1; j >= 0; --j) 2381 { 2382 /* Convert repeated operand addition to multiplication. */ 2383 start = indxs[j].first; 2384 end = indxs[j].second; 2385 op = (*ops)[start]->op; 2386 count = end - start + 1; 2387 for (i = end; i >= start; --i) 2388 ops->unordered_remove (i); 2389 tree tmp = make_ssa_name (TREE_TYPE (op)); 2390 tree cst = build_int_cst (integer_type_node, count); 2391 gassign *mul_stmt 2392 = gimple_build_assign (tmp, MULT_EXPR, 2393 op, fold_convert (TREE_TYPE (op), cst)); 2394 gimple_set_visited (mul_stmt, true); 2395 add_to_ops_vec (ops, tmp, mul_stmt); 2396 changed = true; 2397 } 2398 2399 return changed; 2400 } 2401 2402 2403 /* Perform various identities and other optimizations on the list of 2404 operand entries, stored in OPS. The tree code for the binary 2405 operation between all the operands is OPCODE. */ 2406 2407 static void 2408 optimize_ops_list (enum tree_code opcode, 2409 vec<operand_entry *> *ops) 2410 { 2411 unsigned int length = ops->length (); 2412 unsigned int i; 2413 operand_entry *oe; 2414 operand_entry *oelast = NULL; 2415 bool iterate = false; 2416 2417 if (length == 1) 2418 return; 2419 2420 oelast = ops->last (); 2421 2422 /* If the last two are constants, pop the constants off, merge them 2423 and try the next two. */ 2424 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 2425 { 2426 operand_entry *oelm1 = (*ops)[length - 2]; 2427 2428 if (oelm1->rank == 0 2429 && is_gimple_min_invariant (oelm1->op) 2430 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 2431 TREE_TYPE (oelast->op))) 2432 { 2433 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 2434 oelm1->op, oelast->op); 2435 2436 if (folded && is_gimple_min_invariant (folded)) 2437 { 2438 if (dump_file && (dump_flags & TDF_DETAILS)) 2439 fprintf (dump_file, "Merging constants\n"); 2440 2441 ops->pop (); 2442 ops->pop (); 2443 2444 add_to_ops_vec (ops, folded); 2445 reassociate_stats.constants_eliminated++; 2446 2447 optimize_ops_list (opcode, ops); 2448 return; 2449 } 2450 } 2451 } 2452 2453 eliminate_using_constants (opcode, ops); 2454 oelast = NULL; 2455 2456 for (i = 0; ops->iterate (i, &oe);) 2457 { 2458 bool done = false; 2459 2460 if (eliminate_not_pairs (opcode, ops, i, oe)) 2461 return; 2462 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2463 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2464 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2465 { 2466 if (done) 2467 return; 2468 iterate = true; 2469 oelast = NULL; 2470 continue; 2471 } 2472 oelast = oe; 2473 i++; 2474 } 2475 2476 if (iterate) 2477 optimize_ops_list (opcode, ops); 2478 } 2479 2480 /* The following functions are subroutines to optimize_range_tests and allow 2481 it to try to change a logical combination of comparisons into a range 2482 test. 2483 2484 For example, both 2485 X == 2 || X == 5 || X == 3 || X == 4 2486 and 2487 X >= 2 && X <= 5 2488 are converted to 2489 (unsigned) (X - 2) <= 3 2490 2491 For more information see comments above fold_test_range in fold-const.cc, 2492 this implementation is for GIMPLE. */ 2493 2494 2495 2496 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */ 2497 2498 void 2499 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp) 2500 { 2501 if (!skip_exp) 2502 print_generic_expr (file, r->exp); 2503 fprintf (file, " %c[", r->in_p ? '+' : '-'); 2504 print_generic_expr (file, r->low); 2505 fputs (", ", file); 2506 print_generic_expr (file, r->high); 2507 fputc (']', file); 2508 } 2509 2510 /* Dump the range entry R to STDERR. */ 2511 2512 DEBUG_FUNCTION void 2513 debug_range_entry (struct range_entry *r) 2514 { 2515 dump_range_entry (stderr, r, false); 2516 fputc ('\n', stderr); 2517 } 2518 2519 /* This is similar to make_range in fold-const.cc, but on top of 2520 GIMPLE instead of trees. If EXP is non-NULL, it should be 2521 an SSA_NAME and STMT argument is ignored, otherwise STMT 2522 argument should be a GIMPLE_COND. */ 2523 2524 void 2525 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2526 { 2527 int in_p; 2528 tree low, high; 2529 bool is_bool, strict_overflow_p; 2530 2531 r->exp = NULL_TREE; 2532 r->in_p = false; 2533 r->strict_overflow_p = false; 2534 r->low = NULL_TREE; 2535 r->high = NULL_TREE; 2536 if (exp != NULL_TREE 2537 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2538 return; 2539 2540 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2541 and see if we can refine the range. Some of the cases below may not 2542 happen, but it doesn't seem worth worrying about this. We "continue" 2543 the outer loop when we've changed something; otherwise we "break" 2544 the switch, which will "break" the while. */ 2545 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2546 high = low; 2547 in_p = 0; 2548 strict_overflow_p = false; 2549 is_bool = false; 2550 if (exp == NULL_TREE) 2551 is_bool = true; 2552 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2553 { 2554 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2555 is_bool = true; 2556 else 2557 return; 2558 } 2559 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2560 is_bool = true; 2561 2562 while (1) 2563 { 2564 enum tree_code code; 2565 tree arg0, arg1, exp_type; 2566 tree nexp; 2567 location_t loc; 2568 2569 if (exp != NULL_TREE) 2570 { 2571 if (TREE_CODE (exp) != SSA_NAME 2572 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2573 break; 2574 2575 stmt = SSA_NAME_DEF_STMT (exp); 2576 if (!is_gimple_assign (stmt)) 2577 break; 2578 2579 code = gimple_assign_rhs_code (stmt); 2580 arg0 = gimple_assign_rhs1 (stmt); 2581 arg1 = gimple_assign_rhs2 (stmt); 2582 exp_type = TREE_TYPE (exp); 2583 } 2584 else 2585 { 2586 code = gimple_cond_code (stmt); 2587 arg0 = gimple_cond_lhs (stmt); 2588 arg1 = gimple_cond_rhs (stmt); 2589 exp_type = boolean_type_node; 2590 } 2591 2592 if (TREE_CODE (arg0) != SSA_NAME 2593 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) 2594 break; 2595 loc = gimple_location (stmt); 2596 switch (code) 2597 { 2598 case BIT_NOT_EXPR: 2599 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2600 /* Ensure the range is either +[-,0], +[0,0], 2601 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2602 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2603 or similar expression of unconditional true or 2604 false, it should not be negated. */ 2605 && ((high && integer_zerop (high)) 2606 || (low && integer_onep (low)))) 2607 { 2608 in_p = !in_p; 2609 exp = arg0; 2610 continue; 2611 } 2612 break; 2613 case SSA_NAME: 2614 exp = arg0; 2615 continue; 2616 CASE_CONVERT: 2617 if (is_bool) 2618 { 2619 if ((TYPE_PRECISION (exp_type) == 1 2620 || TREE_CODE (exp_type) == BOOLEAN_TYPE) 2621 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) 2622 return; 2623 } 2624 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2625 { 2626 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2627 is_bool = true; 2628 else 2629 return; 2630 } 2631 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2632 is_bool = true; 2633 goto do_default; 2634 case EQ_EXPR: 2635 case NE_EXPR: 2636 case LT_EXPR: 2637 case LE_EXPR: 2638 case GE_EXPR: 2639 case GT_EXPR: 2640 is_bool = true; 2641 /* FALLTHRU */ 2642 default: 2643 if (!is_bool) 2644 return; 2645 do_default: 2646 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2647 &low, &high, &in_p, 2648 &strict_overflow_p); 2649 if (nexp != NULL_TREE) 2650 { 2651 exp = nexp; 2652 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2653 continue; 2654 } 2655 break; 2656 } 2657 break; 2658 } 2659 if (is_bool) 2660 { 2661 r->exp = exp; 2662 r->in_p = in_p; 2663 r->low = low; 2664 r->high = high; 2665 r->strict_overflow_p = strict_overflow_p; 2666 } 2667 } 2668 2669 /* Comparison function for qsort. Sort entries 2670 without SSA_NAME exp first, then with SSA_NAMEs sorted 2671 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2672 by increasing ->low and if ->low is the same, by increasing 2673 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2674 maximum. */ 2675 2676 static int 2677 range_entry_cmp (const void *a, const void *b) 2678 { 2679 const struct range_entry *p = (const struct range_entry *) a; 2680 const struct range_entry *q = (const struct range_entry *) b; 2681 2682 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2683 { 2684 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2685 { 2686 /* Group range_entries for the same SSA_NAME together. */ 2687 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2688 return -1; 2689 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2690 return 1; 2691 /* If ->low is different, NULL low goes first, then by 2692 ascending low. */ 2693 if (p->low != NULL_TREE) 2694 { 2695 if (q->low != NULL_TREE) 2696 { 2697 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2698 p->low, q->low); 2699 if (tem && integer_onep (tem)) 2700 return -1; 2701 tem = fold_binary (GT_EXPR, boolean_type_node, 2702 p->low, q->low); 2703 if (tem && integer_onep (tem)) 2704 return 1; 2705 } 2706 else 2707 return 1; 2708 } 2709 else if (q->low != NULL_TREE) 2710 return -1; 2711 /* If ->high is different, NULL high goes last, before that by 2712 ascending high. */ 2713 if (p->high != NULL_TREE) 2714 { 2715 if (q->high != NULL_TREE) 2716 { 2717 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2718 p->high, q->high); 2719 if (tem && integer_onep (tem)) 2720 return -1; 2721 tem = fold_binary (GT_EXPR, boolean_type_node, 2722 p->high, q->high); 2723 if (tem && integer_onep (tem)) 2724 return 1; 2725 } 2726 else 2727 return -1; 2728 } 2729 else if (q->high != NULL_TREE) 2730 return 1; 2731 /* If both ranges are the same, sort below by ascending idx. */ 2732 } 2733 else 2734 return 1; 2735 } 2736 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2737 return -1; 2738 2739 if (p->idx < q->idx) 2740 return -1; 2741 else 2742 { 2743 gcc_checking_assert (p->idx > q->idx); 2744 return 1; 2745 } 2746 } 2747 2748 /* Helper function for update_range_test. Force EXPR into an SSA_NAME, 2749 insert needed statements BEFORE or after GSI. */ 2750 2751 static tree 2752 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) 2753 { 2754 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; 2755 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); 2756 if (TREE_CODE (ret) != SSA_NAME) 2757 { 2758 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); 2759 if (before) 2760 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2761 else 2762 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); 2763 ret = gimple_assign_lhs (g); 2764 } 2765 return ret; 2766 } 2767 2768 /* Helper routine of optimize_range_test. 2769 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2770 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2771 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2772 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2773 an array of COUNT pointers to other ranges. Return 2774 true if the range merge has been successful. 2775 If OPCODE is ERROR_MARK, this is called from within 2776 maybe_optimize_range_tests and is performing inter-bb range optimization. 2777 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2778 oe->rank. */ 2779 2780 static bool 2781 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2782 struct range_entry **otherrangep, 2783 unsigned int count, enum tree_code opcode, 2784 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2785 bool in_p, tree low, tree high, bool strict_overflow_p) 2786 { 2787 unsigned int idx = range->idx; 2788 struct range_entry *swap_with = NULL; 2789 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL; 2790 if (opcode == ERROR_MARK) 2791 { 2792 /* For inter-bb range test optimization, pick from the range tests 2793 the one which is tested in the earliest condition (one dominating 2794 the others), because otherwise there could be some UB (e.g. signed 2795 overflow) in following bbs that we'd expose which wasn't there in 2796 the original program. See PR104196. */ 2797 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id); 2798 basic_block range_bb = orig_range_bb; 2799 for (unsigned int i = 0; i < count; i++) 2800 { 2801 struct range_entry *this_range; 2802 if (otherrange) 2803 this_range = otherrange + i; 2804 else 2805 this_range = otherrangep[i]; 2806 operand_entry *oe = (*ops)[this_range->idx]; 2807 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id); 2808 if (range_bb != this_bb 2809 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb)) 2810 { 2811 swap_with = this_range; 2812 range_bb = this_bb; 2813 idx = this_range->idx; 2814 } 2815 } 2816 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs 2817 only defined in later blocks. In this case we can't move the 2818 merged comparison earlier, so instead check if there are any stmts 2819 that might trigger signed integer overflow in between and rewrite 2820 them. But only after we check if the optimization is possible. */ 2821 if (seq && swap_with) 2822 { 2823 rewrite_bb_first = range_bb; 2824 rewrite_bb_last = orig_range_bb; 2825 idx = range->idx; 2826 swap_with = NULL; 2827 } 2828 } 2829 operand_entry *oe = (*ops)[idx]; 2830 tree op = oe->op; 2831 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2832 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2833 location_t loc = gimple_location (stmt); 2834 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2835 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2836 in_p, low, high); 2837 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2838 gimple_stmt_iterator gsi; 2839 unsigned int i, uid; 2840 2841 if (tem == NULL_TREE) 2842 return false; 2843 2844 /* If op is default def SSA_NAME, there is no place to insert the 2845 new comparison. Give up, unless we can use OP itself as the 2846 range test. */ 2847 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2848 { 2849 if (op == range->exp 2850 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2851 || TREE_CODE (optype) == BOOLEAN_TYPE) 2852 && (op == tem 2853 || (TREE_CODE (tem) == EQ_EXPR 2854 && TREE_OPERAND (tem, 0) == op 2855 && integer_onep (TREE_OPERAND (tem, 1)))) 2856 && opcode != BIT_IOR_EXPR 2857 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2858 { 2859 stmt = NULL; 2860 tem = op; 2861 } 2862 else 2863 return false; 2864 } 2865 2866 if (swap_with) 2867 std::swap (range->idx, swap_with->idx); 2868 2869 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2870 warning_at (loc, OPT_Wstrict_overflow, 2871 "assuming signed overflow does not occur " 2872 "when simplifying range test"); 2873 2874 if (dump_file && (dump_flags & TDF_DETAILS)) 2875 { 2876 struct range_entry *r; 2877 fprintf (dump_file, "Optimizing range tests "); 2878 dump_range_entry (dump_file, range, false); 2879 for (i = 0; i < count; i++) 2880 { 2881 if (otherrange) 2882 r = otherrange + i; 2883 else 2884 r = otherrangep[i]; 2885 if (r->exp 2886 && r->exp != range->exp 2887 && TREE_CODE (r->exp) == SSA_NAME) 2888 { 2889 fprintf (dump_file, " and "); 2890 dump_range_entry (dump_file, r, false); 2891 } 2892 else 2893 { 2894 fprintf (dump_file, " and"); 2895 dump_range_entry (dump_file, r, true); 2896 } 2897 } 2898 fprintf (dump_file, "\n into "); 2899 print_generic_expr (dump_file, tem); 2900 fprintf (dump_file, "\n"); 2901 } 2902 2903 /* In inter-bb range optimization mode, if we have a seq, we can't 2904 move the merged comparison to the earliest bb from the comparisons 2905 being replaced, so instead rewrite stmts that could trigger signed 2906 integer overflow. */ 2907 for (basic_block bb = rewrite_bb_last; 2908 bb != rewrite_bb_first; bb = single_pred (bb)) 2909 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); 2910 !gsi_end_p (gsi); gsi_next (&gsi)) 2911 { 2912 gimple *stmt = gsi_stmt (gsi); 2913 if (is_gimple_assign (stmt)) 2914 if (tree lhs = gimple_assign_lhs (stmt)) 2915 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2916 || POINTER_TYPE_P (TREE_TYPE (lhs))) 2917 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) 2918 { 2919 enum tree_code code = gimple_assign_rhs_code (stmt); 2920 if (arith_code_with_undefined_signed_overflow (code)) 2921 { 2922 gimple_stmt_iterator gsip = gsi; 2923 gimple_stmt_iterator gsin = gsi; 2924 gsi_prev (&gsip); 2925 gsi_next (&gsin); 2926 rewrite_to_defined_overflow (stmt, true); 2927 unsigned uid = gimple_uid (stmt); 2928 if (gsi_end_p (gsip)) 2929 gsip = gsi_after_labels (bb); 2930 else 2931 gsi_next (&gsip); 2932 for (; gsi_stmt (gsip) != gsi_stmt (gsin); 2933 gsi_next (&gsip)) 2934 gimple_set_uid (gsi_stmt (gsip), uid); 2935 } 2936 } 2937 } 2938 2939 if (opcode == BIT_IOR_EXPR 2940 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2941 tem = invert_truthvalue_loc (loc, tem); 2942 2943 tem = fold_convert_loc (loc, optype, tem); 2944 if (stmt) 2945 { 2946 gsi = gsi_for_stmt (stmt); 2947 uid = gimple_uid (stmt); 2948 } 2949 else 2950 { 2951 gsi = gsi_none (); 2952 uid = 0; 2953 } 2954 if (stmt == NULL) 2955 gcc_checking_assert (tem == op); 2956 /* In rare cases range->exp can be equal to lhs of stmt. 2957 In that case we have to insert after the stmt rather then before 2958 it. If stmt is a PHI, insert it at the start of the basic block. */ 2959 else if (op != range->exp) 2960 { 2961 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2962 tem = force_into_ssa_name (&gsi, tem, true); 2963 gsi_prev (&gsi); 2964 } 2965 else if (gimple_code (stmt) != GIMPLE_PHI) 2966 { 2967 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2968 tem = force_into_ssa_name (&gsi, tem, false); 2969 } 2970 else 2971 { 2972 gsi = gsi_after_labels (gimple_bb (stmt)); 2973 if (!gsi_end_p (gsi)) 2974 uid = gimple_uid (gsi_stmt (gsi)); 2975 else 2976 { 2977 gsi = gsi_start_bb (gimple_bb (stmt)); 2978 uid = 1; 2979 while (!gsi_end_p (gsi)) 2980 { 2981 uid = gimple_uid (gsi_stmt (gsi)); 2982 gsi_next (&gsi); 2983 } 2984 } 2985 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2986 tem = force_into_ssa_name (&gsi, tem, true); 2987 if (gsi_end_p (gsi)) 2988 gsi = gsi_last_bb (gimple_bb (stmt)); 2989 else 2990 gsi_prev (&gsi); 2991 } 2992 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2993 if (gimple_uid (gsi_stmt (gsi))) 2994 break; 2995 else 2996 gimple_set_uid (gsi_stmt (gsi), uid); 2997 2998 oe->op = tem; 2999 range->exp = exp; 3000 range->low = low; 3001 range->high = high; 3002 range->in_p = in_p; 3003 range->strict_overflow_p = false; 3004 3005 for (i = 0; i < count; i++) 3006 { 3007 if (otherrange) 3008 range = otherrange + i; 3009 else 3010 range = otherrangep[i]; 3011 oe = (*ops)[range->idx]; 3012 /* Now change all the other range test immediate uses, so that 3013 those tests will be optimized away. */ 3014 if (opcode == ERROR_MARK) 3015 { 3016 if (oe->op) 3017 oe->op = build_int_cst (TREE_TYPE (oe->op), 3018 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3019 else 3020 oe->op = (oe->rank == BIT_IOR_EXPR 3021 ? boolean_false_node : boolean_true_node); 3022 } 3023 else 3024 oe->op = error_mark_node; 3025 range->exp = NULL_TREE; 3026 range->low = NULL_TREE; 3027 range->high = NULL_TREE; 3028 } 3029 return true; 3030 } 3031 3032 /* Optimize X == CST1 || X == CST2 3033 if popcount (CST1 ^ CST2) == 1 into 3034 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 3035 Similarly for ranges. E.g. 3036 X != 2 && X != 3 && X != 10 && X != 11 3037 will be transformed by the previous optimization into 3038 !((X - 2U) <= 1U || (X - 10U) <= 1U) 3039 and this loop can transform that into 3040 !(((X & ~8) - 2U) <= 1U). */ 3041 3042 static bool 3043 optimize_range_tests_xor (enum tree_code opcode, tree type, 3044 tree lowi, tree lowj, tree highi, tree highj, 3045 vec<operand_entry *> *ops, 3046 struct range_entry *rangei, 3047 struct range_entry *rangej) 3048 { 3049 tree lowxor, highxor, tem, exp; 3050 /* Check lowi ^ lowj == highi ^ highj and 3051 popcount (lowi ^ lowj) == 1. */ 3052 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 3053 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 3054 return false; 3055 if (!integer_pow2p (lowxor)) 3056 return false; 3057 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 3058 if (!tree_int_cst_equal (lowxor, highxor)) 3059 return false; 3060 3061 exp = rangei->exp; 3062 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 3063 int prec = GET_MODE_PRECISION (mode); 3064 if (TYPE_PRECISION (type) < prec 3065 || (wi::to_wide (TYPE_MIN_VALUE (type)) 3066 != wi::min_value (prec, TYPE_SIGN (type))) 3067 || (wi::to_wide (TYPE_MAX_VALUE (type)) 3068 != wi::max_value (prec, TYPE_SIGN (type)))) 3069 { 3070 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); 3071 exp = fold_convert (type, exp); 3072 lowxor = fold_convert (type, lowxor); 3073 lowi = fold_convert (type, lowi); 3074 highi = fold_convert (type, highi); 3075 } 3076 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 3077 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); 3078 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 3079 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 3080 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 3081 NULL, rangei->in_p, lowj, highj, 3082 rangei->strict_overflow_p 3083 || rangej->strict_overflow_p)) 3084 return true; 3085 return false; 3086 } 3087 3088 /* Optimize X == CST1 || X == CST2 3089 if popcount (CST2 - CST1) == 1 into 3090 ((X - CST1) & ~(CST2 - CST1)) == 0. 3091 Similarly for ranges. E.g. 3092 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 3093 || X == 75 || X == 45 3094 will be transformed by the previous optimization into 3095 (X - 43U) <= 3U || (X - 75U) <= 3U 3096 and this loop can transform that into 3097 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 3098 static bool 3099 optimize_range_tests_diff (enum tree_code opcode, tree type, 3100 tree lowi, tree lowj, tree highi, tree highj, 3101 vec<operand_entry *> *ops, 3102 struct range_entry *rangei, 3103 struct range_entry *rangej) 3104 { 3105 tree tem1, tem2, mask; 3106 /* Check highi - lowi == highj - lowj. */ 3107 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 3108 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 3109 return false; 3110 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 3111 if (!tree_int_cst_equal (tem1, tem2)) 3112 return false; 3113 /* Check popcount (lowj - lowi) == 1. */ 3114 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 3115 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 3116 return false; 3117 if (!integer_pow2p (tem1)) 3118 return false; 3119 3120 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 3121 int prec = GET_MODE_PRECISION (mode); 3122 if (TYPE_PRECISION (type) < prec 3123 || (wi::to_wide (TYPE_MIN_VALUE (type)) 3124 != wi::min_value (prec, TYPE_SIGN (type))) 3125 || (wi::to_wide (TYPE_MAX_VALUE (type)) 3126 != wi::max_value (prec, TYPE_SIGN (type)))) 3127 type = build_nonstandard_integer_type (prec, 1); 3128 else 3129 type = unsigned_type_for (type); 3130 tem1 = fold_convert (type, tem1); 3131 tem2 = fold_convert (type, tem2); 3132 lowi = fold_convert (type, lowi); 3133 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 3134 tem1 = fold_build2 (MINUS_EXPR, type, 3135 fold_convert (type, rangei->exp), lowi); 3136 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 3137 lowj = build_int_cst (type, 0); 3138 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 3139 NULL, rangei->in_p, lowj, tem2, 3140 rangei->strict_overflow_p 3141 || rangej->strict_overflow_p)) 3142 return true; 3143 return false; 3144 } 3145 3146 /* It does some common checks for function optimize_range_tests_xor and 3147 optimize_range_tests_diff. 3148 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 3149 Else it calls optimize_range_tests_diff. */ 3150 3151 static bool 3152 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 3153 bool optimize_xor, vec<operand_entry *> *ops, 3154 struct range_entry *ranges) 3155 { 3156 int i, j; 3157 bool any_changes = false; 3158 for (i = first; i < length; i++) 3159 { 3160 tree lowi, highi, lowj, highj, type, tem; 3161 3162 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 3163 continue; 3164 type = TREE_TYPE (ranges[i].exp); 3165 if (!INTEGRAL_TYPE_P (type)) 3166 continue; 3167 lowi = ranges[i].low; 3168 if (lowi == NULL_TREE) 3169 lowi = TYPE_MIN_VALUE (type); 3170 highi = ranges[i].high; 3171 if (highi == NULL_TREE) 3172 continue; 3173 for (j = i + 1; j < length && j < i + 64; j++) 3174 { 3175 bool changes; 3176 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 3177 continue; 3178 lowj = ranges[j].low; 3179 if (lowj == NULL_TREE) 3180 continue; 3181 highj = ranges[j].high; 3182 if (highj == NULL_TREE) 3183 highj = TYPE_MAX_VALUE (type); 3184 /* Check lowj > highi. */ 3185 tem = fold_binary (GT_EXPR, boolean_type_node, 3186 lowj, highi); 3187 if (tem == NULL_TREE || !integer_onep (tem)) 3188 continue; 3189 if (optimize_xor) 3190 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 3191 highi, highj, ops, 3192 ranges + i, ranges + j); 3193 else 3194 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 3195 highi, highj, ops, 3196 ranges + i, ranges + j); 3197 if (changes) 3198 { 3199 any_changes = true; 3200 break; 3201 } 3202 } 3203 } 3204 return any_changes; 3205 } 3206 3207 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 3208 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 3209 EXP on success, NULL otherwise. */ 3210 3211 static tree 3212 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 3213 wide_int *mask, tree *totallowp) 3214 { 3215 tree tem = int_const_binop (MINUS_EXPR, high, low); 3216 if (tem == NULL_TREE 3217 || TREE_CODE (tem) != INTEGER_CST 3218 || TREE_OVERFLOW (tem) 3219 || tree_int_cst_sgn (tem) == -1 3220 || compare_tree_int (tem, prec) != -1) 3221 return NULL_TREE; 3222 3223 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 3224 *mask = wi::shifted_mask (0, max, false, prec); 3225 if (TREE_CODE (exp) == BIT_AND_EXPR 3226 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3227 { 3228 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 3229 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 3230 if (wi::popcount (msk) == 1 3231 && wi::ltu_p (msk, prec - max)) 3232 { 3233 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 3234 max += msk.to_uhwi (); 3235 exp = TREE_OPERAND (exp, 0); 3236 if (integer_zerop (low) 3237 && TREE_CODE (exp) == PLUS_EXPR 3238 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3239 { 3240 tree ret = TREE_OPERAND (exp, 0); 3241 STRIP_NOPS (ret); 3242 widest_int bias 3243 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 3244 TYPE_PRECISION (TREE_TYPE (low)))); 3245 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 3246 if (totallowp) 3247 { 3248 *totallowp = tbias; 3249 return ret; 3250 } 3251 else if (!tree_int_cst_lt (totallow, tbias)) 3252 return NULL_TREE; 3253 bias = wi::to_widest (tbias); 3254 bias -= wi::to_widest (totallow); 3255 if (bias >= 0 && bias < prec - max) 3256 { 3257 *mask = wi::lshift (*mask, bias); 3258 return ret; 3259 } 3260 } 3261 } 3262 } 3263 if (totallowp) 3264 return exp; 3265 if (!tree_int_cst_lt (totallow, low)) 3266 return exp; 3267 tem = int_const_binop (MINUS_EXPR, low, totallow); 3268 if (tem == NULL_TREE 3269 || TREE_CODE (tem) != INTEGER_CST 3270 || TREE_OVERFLOW (tem) 3271 || compare_tree_int (tem, prec - max) == 1) 3272 return NULL_TREE; 3273 3274 *mask = wi::lshift (*mask, wi::to_widest (tem)); 3275 return exp; 3276 } 3277 3278 /* Attempt to optimize small range tests using bit test. 3279 E.g. 3280 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 3281 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 3282 has been by earlier optimizations optimized into: 3283 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 3284 As all the 43 through 82 range is less than 64 numbers, 3285 for 64-bit word targets optimize that into: 3286 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 3287 3288 static bool 3289 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 3290 vec<operand_entry *> *ops, 3291 struct range_entry *ranges) 3292 { 3293 int i, j; 3294 bool any_changes = false; 3295 int prec = GET_MODE_BITSIZE (word_mode); 3296 auto_vec<struct range_entry *, 64> candidates; 3297 3298 for (i = first; i < length - 1; i++) 3299 { 3300 tree lowi, highi, lowj, highj, type; 3301 3302 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 3303 continue; 3304 type = TREE_TYPE (ranges[i].exp); 3305 if (!INTEGRAL_TYPE_P (type)) 3306 continue; 3307 lowi = ranges[i].low; 3308 if (lowi == NULL_TREE) 3309 lowi = TYPE_MIN_VALUE (type); 3310 highi = ranges[i].high; 3311 if (highi == NULL_TREE) 3312 continue; 3313 wide_int mask; 3314 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 3315 highi, &mask, &lowi); 3316 if (exp == NULL_TREE) 3317 continue; 3318 bool strict_overflow_p = ranges[i].strict_overflow_p; 3319 candidates.truncate (0); 3320 int end = MIN (i + 64, length); 3321 for (j = i + 1; j < end; j++) 3322 { 3323 tree exp2; 3324 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 3325 continue; 3326 if (ranges[j].exp == exp) 3327 ; 3328 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 3329 { 3330 exp2 = TREE_OPERAND (ranges[j].exp, 0); 3331 if (exp2 == exp) 3332 ; 3333 else if (TREE_CODE (exp2) == PLUS_EXPR) 3334 { 3335 exp2 = TREE_OPERAND (exp2, 0); 3336 STRIP_NOPS (exp2); 3337 if (exp2 != exp) 3338 continue; 3339 } 3340 else 3341 continue; 3342 } 3343 else 3344 continue; 3345 lowj = ranges[j].low; 3346 if (lowj == NULL_TREE) 3347 continue; 3348 highj = ranges[j].high; 3349 if (highj == NULL_TREE) 3350 highj = TYPE_MAX_VALUE (type); 3351 wide_int mask2; 3352 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 3353 highj, &mask2, NULL); 3354 if (exp2 != exp) 3355 continue; 3356 mask |= mask2; 3357 strict_overflow_p |= ranges[j].strict_overflow_p; 3358 candidates.safe_push (&ranges[j]); 3359 } 3360 3361 /* If every possible relative value of the expression is a valid shift 3362 amount, then we can merge the entry test in the bit test. In this 3363 case, if we would need otherwise 2 or more comparisons, then use 3364 the bit test; in the other cases, the threshold is 3 comparisons. */ 3365 bool entry_test_needed; 3366 value_range r; 3367 if (TREE_CODE (exp) == SSA_NAME 3368 && get_range_query (cfun)->range_of_expr (r, exp) 3369 && r.kind () == VR_RANGE 3370 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1)) 3371 { 3372 wide_int min = r.lower_bound (); 3373 wide_int ilowi = wi::to_wide (lowi); 3374 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) 3375 { 3376 lowi = wide_int_to_tree (TREE_TYPE (lowi), min); 3377 mask = wi::lshift (mask, ilowi - min); 3378 } 3379 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) 3380 { 3381 lowi = wide_int_to_tree (TREE_TYPE (lowi), min); 3382 mask = wi::lrshift (mask, min - ilowi); 3383 } 3384 entry_test_needed = false; 3385 } 3386 else 3387 entry_test_needed = true; 3388 if (candidates.length () >= (entry_test_needed ? 2 : 1)) 3389 { 3390 tree high = wide_int_to_tree (TREE_TYPE (lowi), 3391 wi::to_widest (lowi) 3392 + prec - 1 - wi::clz (mask)); 3393 operand_entry *oe = (*ops)[ranges[i].idx]; 3394 tree op = oe->op; 3395 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 3396 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3397 location_t loc = gimple_location (stmt); 3398 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 3399 3400 /* See if it isn't cheaper to pretend the minimum value of the 3401 range is 0, if maximum value is small enough. 3402 We can avoid then subtraction of the minimum value, but the 3403 mask constant could be perhaps more expensive. */ 3404 if (compare_tree_int (lowi, 0) > 0 3405 && compare_tree_int (high, prec) < 0) 3406 { 3407 int cost_diff; 3408 HOST_WIDE_INT m = tree_to_uhwi (lowi); 3409 rtx reg = gen_raw_REG (word_mode, 10000); 3410 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 3411 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg, 3412 GEN_INT (-m)), 3413 word_mode, speed_p); 3414 rtx r = immed_wide_int_const (mask, word_mode); 3415 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 3416 word_mode, speed_p); 3417 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 3418 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 3419 word_mode, speed_p); 3420 if (cost_diff > 0) 3421 { 3422 mask = wi::lshift (mask, m); 3423 lowi = build_zero_cst (TREE_TYPE (lowi)); 3424 } 3425 } 3426 3427 tree tem; 3428 if (entry_test_needed) 3429 { 3430 tem = build_range_check (loc, optype, unshare_expr (exp), 3431 false, lowi, high); 3432 if (tem == NULL_TREE || is_gimple_val (tem)) 3433 continue; 3434 } 3435 else 3436 tem = NULL_TREE; 3437 tree etype = unsigned_type_for (TREE_TYPE (exp)); 3438 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 3439 fold_convert_loc (loc, etype, exp), 3440 fold_convert_loc (loc, etype, lowi)); 3441 exp = fold_convert_loc (loc, integer_type_node, exp); 3442 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 3443 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 3444 build_int_cst (word_type, 1), exp); 3445 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 3446 wide_int_to_tree (word_type, mask)); 3447 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 3448 build_zero_cst (word_type)); 3449 if (is_gimple_val (exp)) 3450 continue; 3451 3452 /* The shift might have undefined behavior if TEM is true, 3453 but reassociate_bb isn't prepared to have basic blocks 3454 split when it is running. So, temporarily emit a code 3455 with BIT_IOR_EXPR instead of &&, and fix it up in 3456 branch_fixup. */ 3457 gimple_seq seq = NULL; 3458 if (tem) 3459 { 3460 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 3461 gcc_assert (TREE_CODE (tem) == SSA_NAME); 3462 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 3463 } 3464 gimple_seq seq2; 3465 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 3466 gimple_seq_add_seq_without_update (&seq, seq2); 3467 gcc_assert (TREE_CODE (exp) == SSA_NAME); 3468 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 3469 if (tem) 3470 { 3471 gimple *g = gimple_build_assign (make_ssa_name (optype), 3472 BIT_IOR_EXPR, tem, exp); 3473 gimple_set_location (g, loc); 3474 gimple_seq_add_stmt_without_update (&seq, g); 3475 exp = gimple_assign_lhs (g); 3476 } 3477 tree val = build_zero_cst (optype); 3478 if (update_range_test (&ranges[i], NULL, candidates.address (), 3479 candidates.length (), opcode, ops, exp, 3480 seq, false, val, val, strict_overflow_p)) 3481 { 3482 any_changes = true; 3483 if (tem) 3484 reassoc_branch_fixups.safe_push (tem); 3485 } 3486 else 3487 gimple_seq_discard (seq); 3488 } 3489 } 3490 return any_changes; 3491 } 3492 3493 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 3494 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. 3495 Also, handle x < C && y < C && z < C where C is power of two as 3496 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0 3497 as (x | y | z) < 0. */ 3498 3499 static bool 3500 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, 3501 vec<operand_entry *> *ops, 3502 struct range_entry *ranges) 3503 { 3504 int i; 3505 unsigned int b; 3506 bool any_changes = false; 3507 auto_vec<int, 128> buckets; 3508 auto_vec<int, 32> chains; 3509 auto_vec<struct range_entry *, 32> candidates; 3510 3511 for (i = first; i < length; i++) 3512 { 3513 int idx; 3514 3515 if (ranges[i].exp == NULL_TREE 3516 || TREE_CODE (ranges[i].exp) != SSA_NAME 3517 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 3518 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) 3519 continue; 3520 3521 if (ranges[i].low != NULL_TREE 3522 && ranges[i].high != NULL_TREE 3523 && ranges[i].in_p 3524 && tree_int_cst_equal (ranges[i].low, ranges[i].high)) 3525 { 3526 idx = !integer_zerop (ranges[i].low); 3527 if (idx && !integer_all_onesp (ranges[i].low)) 3528 continue; 3529 } 3530 else if (ranges[i].high != NULL_TREE 3531 && TREE_CODE (ranges[i].high) == INTEGER_CST 3532 && ranges[i].in_p) 3533 { 3534 wide_int w = wi::to_wide (ranges[i].high); 3535 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); 3536 int l = wi::clz (w); 3537 idx = 2; 3538 if (l <= 0 3539 || l >= prec 3540 || w != wi::mask (prec - l, false, prec)) 3541 continue; 3542 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3543 && ranges[i].low == NULL_TREE) 3544 || (ranges[i].low 3545 && integer_zerop (ranges[i].low)))) 3546 continue; 3547 } 3548 else if (ranges[i].high == NULL_TREE 3549 && ranges[i].low != NULL_TREE 3550 /* Perform this optimization only in the last 3551 reassoc pass, as it interferes with the reassociation 3552 itself or could also with VRP etc. which might not 3553 be able to virtually undo the optimization. */ 3554 && !reassoc_insert_powi_p 3555 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3556 && integer_zerop (ranges[i].low)) 3557 idx = 3; 3558 else 3559 continue; 3560 3561 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx; 3562 if (buckets.length () <= b) 3563 buckets.safe_grow_cleared (b + 1, true); 3564 if (chains.length () <= (unsigned) i) 3565 chains.safe_grow (i + 1, true); 3566 chains[i] = buckets[b]; 3567 buckets[b] = i + 1; 3568 } 3569 3570 FOR_EACH_VEC_ELT (buckets, b, i) 3571 if (i && chains[i - 1]) 3572 { 3573 int j, k = i; 3574 if ((b % 4) == 2) 3575 { 3576 /* When ranges[X - 1].high + 1 is a power of two, 3577 we need to process the same bucket up to 3578 precision - 1 times, each time split the entries 3579 with the same high bound into one chain and the 3580 rest into another one to be processed later. */ 3581 int this_prev = i; 3582 int other_prev = 0; 3583 for (j = chains[i - 1]; j; j = chains[j - 1]) 3584 { 3585 if (tree_int_cst_equal (ranges[i - 1].high, 3586 ranges[j - 1].high)) 3587 { 3588 chains[this_prev - 1] = j; 3589 this_prev = j; 3590 } 3591 else if (other_prev == 0) 3592 { 3593 buckets[b] = j; 3594 other_prev = j; 3595 } 3596 else 3597 { 3598 chains[other_prev - 1] = j; 3599 other_prev = j; 3600 } 3601 } 3602 chains[this_prev - 1] = 0; 3603 if (other_prev) 3604 chains[other_prev - 1] = 0; 3605 if (chains[i - 1] == 0) 3606 { 3607 if (other_prev) 3608 b--; 3609 continue; 3610 } 3611 } 3612 for (j = chains[i - 1]; j; j = chains[j - 1]) 3613 { 3614 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); 3615 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); 3616 if (reassoc_stmt_dominates_stmt_p (gk, gj)) 3617 k = j; 3618 } 3619 tree type1 = TREE_TYPE (ranges[k - 1].exp); 3620 tree type2 = NULL_TREE; 3621 bool strict_overflow_p = false; 3622 candidates.truncate (0); 3623 for (j = i; j; j = chains[j - 1]) 3624 { 3625 tree type = TREE_TYPE (ranges[j - 1].exp); 3626 strict_overflow_p |= ranges[j - 1].strict_overflow_p; 3627 if ((b % 4) == 3) 3628 { 3629 /* For the signed < 0 cases, the types should be 3630 really compatible (all signed with the same precision, 3631 instead put ranges that have different in_p from 3632 k first. */ 3633 if (!useless_type_conversion_p (type1, type)) 3634 continue; 3635 if (ranges[j - 1].in_p != ranges[k - 1].in_p) 3636 candidates.safe_push (&ranges[j - 1]); 3637 type2 = type1; 3638 continue; 3639 } 3640 if (j == k 3641 || useless_type_conversion_p (type1, type)) 3642 ; 3643 else if (type2 == NULL_TREE 3644 || useless_type_conversion_p (type2, type)) 3645 { 3646 if (type2 == NULL_TREE) 3647 type2 = type; 3648 candidates.safe_push (&ranges[j - 1]); 3649 } 3650 } 3651 unsigned l = candidates.length (); 3652 for (j = i; j; j = chains[j - 1]) 3653 { 3654 tree type = TREE_TYPE (ranges[j - 1].exp); 3655 if (j == k) 3656 continue; 3657 if ((b % 4) == 3) 3658 { 3659 if (!useless_type_conversion_p (type1, type)) 3660 continue; 3661 if (ranges[j - 1].in_p == ranges[k - 1].in_p) 3662 candidates.safe_push (&ranges[j - 1]); 3663 continue; 3664 } 3665 if (useless_type_conversion_p (type1, type)) 3666 ; 3667 else if (type2 == NULL_TREE 3668 || useless_type_conversion_p (type2, type)) 3669 continue; 3670 candidates.safe_push (&ranges[j - 1]); 3671 } 3672 gimple_seq seq = NULL; 3673 tree op = NULL_TREE; 3674 unsigned int id; 3675 struct range_entry *r; 3676 candidates.safe_push (&ranges[k - 1]); 3677 FOR_EACH_VEC_ELT (candidates, id, r) 3678 { 3679 gimple *g; 3680 enum tree_code code; 3681 if (id == 0) 3682 { 3683 op = r->exp; 3684 continue; 3685 } 3686 if (id == l) 3687 { 3688 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; 3689 g = gimple_build_assign (make_ssa_name (type1), code, op); 3690 gimple_seq_add_stmt_without_update (&seq, g); 3691 op = gimple_assign_lhs (g); 3692 } 3693 tree type = TREE_TYPE (r->exp); 3694 tree exp = r->exp; 3695 if (id >= l && !useless_type_conversion_p (type1, type)) 3696 { 3697 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); 3698 gimple_seq_add_stmt_without_update (&seq, g); 3699 exp = gimple_assign_lhs (g); 3700 } 3701 if ((b % 4) == 3) 3702 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR; 3703 else 3704 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR; 3705 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), 3706 code, op, exp); 3707 gimple_seq_add_stmt_without_update (&seq, g); 3708 op = gimple_assign_lhs (g); 3709 } 3710 candidates.pop (); 3711 if (update_range_test (&ranges[k - 1], NULL, candidates.address (), 3712 candidates.length (), opcode, ops, op, 3713 seq, ranges[k - 1].in_p, ranges[k - 1].low, 3714 ranges[k - 1].high, strict_overflow_p)) 3715 any_changes = true; 3716 else 3717 gimple_seq_discard (seq); 3718 if ((b % 4) == 2 && buckets[b] != i) 3719 /* There is more work to do for this bucket. */ 3720 b--; 3721 } 3722 3723 return any_changes; 3724 } 3725 3726 /* Attempt to optimize for signed a and b where b is known to be >= 0: 3727 a >= 0 && a < b into (unsigned) a < (unsigned) b 3728 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 3729 3730 static bool 3731 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 3732 vec<operand_entry *> *ops, 3733 struct range_entry *ranges, 3734 basic_block first_bb) 3735 { 3736 int i; 3737 bool any_changes = false; 3738 hash_map<tree, int> *map = NULL; 3739 3740 for (i = first; i < length; i++) 3741 { 3742 if (ranges[i].exp == NULL_TREE 3743 || TREE_CODE (ranges[i].exp) != SSA_NAME 3744 || !ranges[i].in_p) 3745 continue; 3746 3747 tree type = TREE_TYPE (ranges[i].exp); 3748 if (!INTEGRAL_TYPE_P (type) 3749 || TYPE_UNSIGNED (type) 3750 || ranges[i].low == NULL_TREE 3751 || !integer_zerop (ranges[i].low) 3752 || ranges[i].high != NULL_TREE) 3753 continue; 3754 /* EXP >= 0 here. */ 3755 if (map == NULL) 3756 map = new hash_map <tree, int>; 3757 map->put (ranges[i].exp, i); 3758 } 3759 3760 if (map == NULL) 3761 return false; 3762 3763 for (i = 0; i < length; i++) 3764 { 3765 bool in_p = ranges[i].in_p; 3766 if (ranges[i].low == NULL_TREE 3767 || ranges[i].high == NULL_TREE) 3768 continue; 3769 if (!integer_zerop (ranges[i].low) 3770 || !integer_zerop (ranges[i].high)) 3771 { 3772 if (ranges[i].exp 3773 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 3774 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3775 && integer_onep (ranges[i].low) 3776 && integer_onep (ranges[i].high)) 3777 in_p = !in_p; 3778 else 3779 continue; 3780 } 3781 3782 gimple *stmt; 3783 tree_code ccode; 3784 tree rhs1, rhs2; 3785 if (ranges[i].exp) 3786 { 3787 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 3788 continue; 3789 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 3790 if (!is_gimple_assign (stmt)) 3791 continue; 3792 ccode = gimple_assign_rhs_code (stmt); 3793 rhs1 = gimple_assign_rhs1 (stmt); 3794 rhs2 = gimple_assign_rhs2 (stmt); 3795 } 3796 else 3797 { 3798 operand_entry *oe = (*ops)[ranges[i].idx]; 3799 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3800 if (gimple_code (stmt) != GIMPLE_COND) 3801 continue; 3802 ccode = gimple_cond_code (stmt); 3803 rhs1 = gimple_cond_lhs (stmt); 3804 rhs2 = gimple_cond_rhs (stmt); 3805 } 3806 3807 if (TREE_CODE (rhs1) != SSA_NAME 3808 || rhs2 == NULL_TREE 3809 || TREE_CODE (rhs2) != SSA_NAME) 3810 continue; 3811 3812 switch (ccode) 3813 { 3814 case GT_EXPR: 3815 case GE_EXPR: 3816 case LT_EXPR: 3817 case LE_EXPR: 3818 break; 3819 default: 3820 continue; 3821 } 3822 if (in_p) 3823 ccode = invert_tree_comparison (ccode, false); 3824 switch (ccode) 3825 { 3826 case GT_EXPR: 3827 case GE_EXPR: 3828 std::swap (rhs1, rhs2); 3829 ccode = swap_tree_comparison (ccode); 3830 break; 3831 case LT_EXPR: 3832 case LE_EXPR: 3833 break; 3834 default: 3835 gcc_unreachable (); 3836 } 3837 3838 int *idx = map->get (rhs1); 3839 if (idx == NULL) 3840 continue; 3841 3842 /* maybe_optimize_range_tests allows statements without side-effects 3843 in the basic blocks as long as they are consumed in the same bb. 3844 Make sure rhs2's def stmt is not among them, otherwise we can't 3845 use safely get_nonzero_bits on it. E.g. in: 3846 # RANGE [-83, 1] NONZERO 173 3847 # k_32 = PHI <k_47(13), k_12(9)> 3848 ... 3849 if (k_32 >= 0) 3850 goto <bb 5>; [26.46%] 3851 else 3852 goto <bb 9>; [73.54%] 3853 3854 <bb 5> [local count: 140323371]: 3855 # RANGE [0, 1] NONZERO 1 3856 _5 = (int) k_32; 3857 # RANGE [0, 4] NONZERO 4 3858 _21 = _5 << 2; 3859 # RANGE [0, 4] NONZERO 4 3860 iftmp.0_44 = (char) _21; 3861 if (k_32 < iftmp.0_44) 3862 goto <bb 6>; [84.48%] 3863 else 3864 goto <bb 9>; [15.52%] 3865 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3866 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3867 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3868 those stmts even for negative k_32 and the value ranges would be no 3869 longer guaranteed and so the optimization would be invalid. */ 3870 while (opcode == ERROR_MARK) 3871 { 3872 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3873 basic_block bb2 = gimple_bb (g); 3874 if (bb2 3875 && bb2 != first_bb 3876 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3877 { 3878 /* As an exception, handle a few common cases. */ 3879 if (gimple_assign_cast_p (g) 3880 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) 3881 { 3882 tree op0 = gimple_assign_rhs1 (g); 3883 if (TYPE_UNSIGNED (TREE_TYPE (op0)) 3884 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3885 > TYPE_PRECISION (TREE_TYPE (op0)))) 3886 /* Zero-extension is always ok. */ 3887 break; 3888 else if (TYPE_PRECISION (TREE_TYPE (rhs2)) 3889 == TYPE_PRECISION (TREE_TYPE (op0)) 3890 && TREE_CODE (op0) == SSA_NAME) 3891 { 3892 /* Cast from signed to unsigned or vice versa. Retry 3893 with the op0 as new rhs2. */ 3894 rhs2 = op0; 3895 continue; 3896 } 3897 } 3898 else if (is_gimple_assign (g) 3899 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3900 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3901 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) 3902 /* Masking with INTEGER_CST with MSB clear is always ok 3903 too. */ 3904 break; 3905 rhs2 = NULL_TREE; 3906 } 3907 break; 3908 } 3909 if (rhs2 == NULL_TREE) 3910 continue; 3911 3912 wide_int nz = get_nonzero_bits (rhs2); 3913 if (wi::neg_p (nz)) 3914 continue; 3915 3916 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3917 and RHS2 is known to be RHS2 >= 0. */ 3918 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3919 3920 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3921 if ((ranges[*idx].strict_overflow_p 3922 || ranges[i].strict_overflow_p) 3923 && issue_strict_overflow_warning (wc)) 3924 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3925 "assuming signed overflow does not occur " 3926 "when simplifying range test"); 3927 3928 if (dump_file && (dump_flags & TDF_DETAILS)) 3929 { 3930 struct range_entry *r = &ranges[*idx]; 3931 fprintf (dump_file, "Optimizing range test "); 3932 print_generic_expr (dump_file, r->exp); 3933 fprintf (dump_file, " +["); 3934 print_generic_expr (dump_file, r->low); 3935 fprintf (dump_file, ", "); 3936 print_generic_expr (dump_file, r->high); 3937 fprintf (dump_file, "] and comparison "); 3938 print_generic_expr (dump_file, rhs1); 3939 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3940 print_generic_expr (dump_file, rhs2); 3941 fprintf (dump_file, "\n into ("); 3942 print_generic_expr (dump_file, utype); 3943 fprintf (dump_file, ") "); 3944 print_generic_expr (dump_file, rhs1); 3945 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3946 print_generic_expr (dump_file, utype); 3947 fprintf (dump_file, ") "); 3948 print_generic_expr (dump_file, rhs2); 3949 fprintf (dump_file, "\n"); 3950 } 3951 3952 operand_entry *oe = (*ops)[ranges[i].idx]; 3953 ranges[i].in_p = 0; 3954 if (opcode == BIT_IOR_EXPR 3955 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3956 { 3957 ranges[i].in_p = 1; 3958 ccode = invert_tree_comparison (ccode, false); 3959 } 3960 3961 unsigned int uid = gimple_uid (stmt); 3962 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3963 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3964 gimple_set_uid (g, uid); 3965 rhs1 = gimple_assign_lhs (g); 3966 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3967 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) 3968 { 3969 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3970 gimple_set_uid (g, uid); 3971 rhs2 = gimple_assign_lhs (g); 3972 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3973 } 3974 if (tree_swap_operands_p (rhs1, rhs2)) 3975 { 3976 std::swap (rhs1, rhs2); 3977 ccode = swap_tree_comparison (ccode); 3978 } 3979 if (gimple_code (stmt) == GIMPLE_COND) 3980 { 3981 gcond *c = as_a <gcond *> (stmt); 3982 gimple_cond_set_code (c, ccode); 3983 gimple_cond_set_lhs (c, rhs1); 3984 gimple_cond_set_rhs (c, rhs2); 3985 update_stmt (stmt); 3986 } 3987 else 3988 { 3989 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3990 if (!INTEGRAL_TYPE_P (ctype) 3991 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3992 && TYPE_PRECISION (ctype) != 1)) 3993 ctype = boolean_type_node; 3994 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3995 gimple_set_uid (g, uid); 3996 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3997 if (oe->op && ctype != TREE_TYPE (oe->op)) 3998 { 3999 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 4000 NOP_EXPR, gimple_assign_lhs (g)); 4001 gimple_set_uid (g, uid); 4002 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4003 } 4004 ranges[i].exp = gimple_assign_lhs (g); 4005 oe->op = ranges[i].exp; 4006 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 4007 ranges[i].high = ranges[i].low; 4008 } 4009 ranges[i].strict_overflow_p = false; 4010 oe = (*ops)[ranges[*idx].idx]; 4011 /* Now change all the other range test immediate uses, so that 4012 those tests will be optimized away. */ 4013 if (opcode == ERROR_MARK) 4014 { 4015 if (oe->op) 4016 oe->op = build_int_cst (TREE_TYPE (oe->op), 4017 oe->rank == BIT_IOR_EXPR ? 0 : 1); 4018 else 4019 oe->op = (oe->rank == BIT_IOR_EXPR 4020 ? boolean_false_node : boolean_true_node); 4021 } 4022 else 4023 oe->op = error_mark_node; 4024 ranges[*idx].exp = NULL_TREE; 4025 ranges[*idx].low = NULL_TREE; 4026 ranges[*idx].high = NULL_TREE; 4027 any_changes = true; 4028 } 4029 4030 delete map; 4031 return any_changes; 4032 } 4033 4034 /* Optimize range tests, similarly how fold_range_test optimizes 4035 it on trees. The tree code for the binary 4036 operation between all the operands is OPCODE. 4037 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 4038 maybe_optimize_range_tests for inter-bb range optimization. 4039 In that case if oe->op is NULL, oe->id is bb->index whose 4040 GIMPLE_COND is && or ||ed into the test, and oe->rank says 4041 the actual opcode. 4042 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 4043 4044 static bool 4045 optimize_range_tests (enum tree_code opcode, 4046 vec<operand_entry *> *ops, basic_block first_bb) 4047 { 4048 unsigned int length = ops->length (), i, j, first; 4049 operand_entry *oe; 4050 struct range_entry *ranges; 4051 bool any_changes = false; 4052 4053 if (length == 1) 4054 return false; 4055 4056 ranges = XNEWVEC (struct range_entry, length); 4057 for (i = 0; i < length; i++) 4058 { 4059 oe = (*ops)[i]; 4060 ranges[i].idx = i; 4061 init_range_entry (ranges + i, oe->op, 4062 oe->op 4063 ? NULL 4064 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 4065 /* For | invert it now, we will invert it again before emitting 4066 the optimized expression. */ 4067 if (opcode == BIT_IOR_EXPR 4068 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 4069 ranges[i].in_p = !ranges[i].in_p; 4070 } 4071 4072 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 4073 for (i = 0; i < length; i++) 4074 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 4075 break; 4076 4077 /* Try to merge ranges. */ 4078 for (first = i; i < length; i++) 4079 { 4080 tree low = ranges[i].low; 4081 tree high = ranges[i].high; 4082 int in_p = ranges[i].in_p; 4083 bool strict_overflow_p = ranges[i].strict_overflow_p; 4084 int update_fail_count = 0; 4085 4086 for (j = i + 1; j < length; j++) 4087 { 4088 if (ranges[i].exp != ranges[j].exp) 4089 break; 4090 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 4091 ranges[j].in_p, ranges[j].low, ranges[j].high)) 4092 break; 4093 strict_overflow_p |= ranges[j].strict_overflow_p; 4094 } 4095 4096 if (j == i + 1) 4097 continue; 4098 4099 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 4100 opcode, ops, ranges[i].exp, NULL, in_p, 4101 low, high, strict_overflow_p)) 4102 { 4103 i = j - 1; 4104 any_changes = true; 4105 } 4106 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 4107 while update_range_test would fail. */ 4108 else if (update_fail_count == 64) 4109 i = j - 1; 4110 else 4111 ++update_fail_count; 4112 } 4113 4114 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 4115 ops, ranges); 4116 4117 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 4118 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 4119 ops, ranges); 4120 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 4121 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 4122 ops, ranges); 4123 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 4124 ranges, first_bb); 4125 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, 4126 ops, ranges); 4127 4128 if (any_changes && opcode != ERROR_MARK) 4129 { 4130 j = 0; 4131 FOR_EACH_VEC_ELT (*ops, i, oe) 4132 { 4133 if (oe->op == error_mark_node) 4134 continue; 4135 else if (i != j) 4136 (*ops)[j] = oe; 4137 j++; 4138 } 4139 ops->truncate (j); 4140 } 4141 4142 XDELETEVEC (ranges); 4143 return any_changes; 4144 } 4145 4146 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 4147 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 4148 otherwise the comparison code. TYPE is a return value that is set 4149 to type of comparison. */ 4150 4151 static tree_code 4152 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type, 4153 tree *lhs, tree *rhs, gassign **vcond) 4154 { 4155 if (TREE_CODE (var) != SSA_NAME) 4156 return ERROR_MARK; 4157 4158 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 4159 if (stmt == NULL) 4160 return ERROR_MARK; 4161 if (vcond) 4162 *vcond = stmt; 4163 4164 /* ??? If we start creating more COND_EXPR, we could perform 4165 this same optimization with them. For now, simplify. */ 4166 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 4167 return ERROR_MARK; 4168 4169 tree cond = gimple_assign_rhs1 (stmt); 4170 tree_code cmp = TREE_CODE (cond); 4171 if (cmp != SSA_NAME) 4172 return ERROR_MARK; 4173 4174 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond)); 4175 if (assign == NULL 4176 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) 4177 return ERROR_MARK; 4178 4179 cmp = gimple_assign_rhs_code (assign); 4180 if (lhs) 4181 *lhs = gimple_assign_rhs1 (assign); 4182 if (rhs) 4183 *rhs = gimple_assign_rhs2 (assign); 4184 4185 /* ??? For now, allow only canonical true and false result vectors. 4186 We could expand this to other constants should the need arise, 4187 but at the moment we don't create them. */ 4188 tree t = gimple_assign_rhs2 (stmt); 4189 tree f = gimple_assign_rhs3 (stmt); 4190 bool inv; 4191 if (integer_all_onesp (t)) 4192 inv = false; 4193 else if (integer_all_onesp (f)) 4194 { 4195 cmp = invert_tree_comparison (cmp, false); 4196 inv = true; 4197 } 4198 else 4199 return ERROR_MARK; 4200 if (!integer_zerop (f)) 4201 return ERROR_MARK; 4202 4203 /* Success! */ 4204 if (rets) 4205 *rets = assign; 4206 if (reti) 4207 *reti = inv; 4208 if (type) 4209 *type = TREE_TYPE (cond); 4210 return cmp; 4211 } 4212 4213 /* Optimize the condition of VEC_COND_EXPRs which have been combined 4214 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 4215 4216 static bool 4217 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 4218 { 4219 unsigned int length = ops->length (), i, j; 4220 bool any_changes = false; 4221 4222 if (length == 1) 4223 return false; 4224 4225 for (i = 0; i < length; ++i) 4226 { 4227 tree elt0 = (*ops)[i]->op; 4228 4229 gassign *stmt0, *vcond0; 4230 bool invert; 4231 tree type, lhs0, rhs0; 4232 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0, 4233 &rhs0, &vcond0); 4234 if (cmp0 == ERROR_MARK) 4235 continue; 4236 4237 for (j = i + 1; j < length; ++j) 4238 { 4239 tree &elt1 = (*ops)[j]->op; 4240 4241 gassign *stmt1, *vcond1; 4242 tree lhs1, rhs1; 4243 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1, 4244 &rhs1, &vcond1); 4245 if (cmp1 == ERROR_MARK) 4246 continue; 4247 4248 tree comb; 4249 if (opcode == BIT_AND_EXPR) 4250 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0, 4251 cmp1, lhs1, rhs1); 4252 else if (opcode == BIT_IOR_EXPR) 4253 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0, 4254 cmp1, lhs1, rhs1); 4255 else 4256 gcc_unreachable (); 4257 if (comb == NULL) 4258 continue; 4259 4260 /* Success! */ 4261 if (dump_file && (dump_flags & TDF_DETAILS)) 4262 { 4263 fprintf (dump_file, "Transforming "); 4264 print_generic_expr (dump_file, gimple_assign_lhs (stmt0)); 4265 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 4266 print_generic_expr (dump_file, gimple_assign_lhs (stmt1)); 4267 fprintf (dump_file, " into "); 4268 print_generic_expr (dump_file, comb); 4269 fputc ('\n', dump_file); 4270 } 4271 4272 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0); 4273 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE, 4274 true, GSI_SAME_STMT); 4275 if (invert) 4276 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0), 4277 gimple_assign_rhs3_ptr (vcond0)); 4278 gimple_assign_set_rhs1 (vcond0, exp); 4279 update_stmt (vcond0); 4280 4281 elt1 = error_mark_node; 4282 any_changes = true; 4283 } 4284 } 4285 4286 if (any_changes) 4287 { 4288 operand_entry *oe; 4289 j = 0; 4290 FOR_EACH_VEC_ELT (*ops, i, oe) 4291 { 4292 if (oe->op == error_mark_node) 4293 continue; 4294 else if (i != j) 4295 (*ops)[j] = oe; 4296 j++; 4297 } 4298 ops->truncate (j); 4299 } 4300 4301 return any_changes; 4302 } 4303 4304 /* Return true if STMT is a cast like: 4305 <bb N>: 4306 ... 4307 _123 = (int) _234; 4308 4309 <bb M>: 4310 # _345 = PHI <_123(N), 1(...), 1(...)> 4311 where _234 has bool type, _123 has single use and 4312 bb N has a single successor M. This is commonly used in 4313 the last block of a range test. 4314 4315 Also Return true if STMT is tcc_compare like: 4316 <bb N>: 4317 ... 4318 _234 = a_2(D) == 2; 4319 4320 <bb M>: 4321 # _345 = PHI <_234(N), 1(...), 1(...)> 4322 _346 = (int) _345; 4323 where _234 has booltype, single use and 4324 bb N has a single successor M. This is commonly used in 4325 the last block of a range test. */ 4326 4327 static bool 4328 final_range_test_p (gimple *stmt) 4329 { 4330 basic_block bb, rhs_bb, lhs_bb; 4331 edge e; 4332 tree lhs, rhs; 4333 use_operand_p use_p; 4334 gimple *use_stmt; 4335 4336 if (!gimple_assign_cast_p (stmt) 4337 && (!is_gimple_assign (stmt) 4338 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4339 != tcc_comparison))) 4340 return false; 4341 bb = gimple_bb (stmt); 4342 if (!single_succ_p (bb)) 4343 return false; 4344 e = single_succ_edge (bb); 4345 if (e->flags & EDGE_COMPLEX) 4346 return false; 4347 4348 lhs = gimple_assign_lhs (stmt); 4349 rhs = gimple_assign_rhs1 (stmt); 4350 if (gimple_assign_cast_p (stmt) 4351 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4352 || TREE_CODE (rhs) != SSA_NAME 4353 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 4354 return false; 4355 4356 if (!gimple_assign_cast_p (stmt) 4357 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 4358 return false; 4359 4360 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 4361 if (!single_imm_use (lhs, &use_p, &use_stmt)) 4362 return false; 4363 4364 if (gimple_code (use_stmt) != GIMPLE_PHI 4365 || gimple_bb (use_stmt) != e->dest) 4366 return false; 4367 4368 /* And that the rhs is defined in the same loop. */ 4369 if (gimple_assign_cast_p (stmt)) 4370 { 4371 if (TREE_CODE (rhs) != SSA_NAME 4372 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 4373 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 4374 return false; 4375 } 4376 else 4377 { 4378 if (TREE_CODE (lhs) != SSA_NAME 4379 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 4380 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 4381 return false; 4382 } 4383 4384 return true; 4385 } 4386 4387 /* Return true if BB is suitable basic block for inter-bb range test 4388 optimization. If BACKWARD is true, BB should be the only predecessor 4389 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 4390 or compared with to find a common basic block to which all conditions 4391 branch to if true resp. false. If BACKWARD is false, TEST_BB should 4392 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if 4393 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB 4394 block points to an empty block that falls through into *OTHER_BB and 4395 the phi args match that path. */ 4396 4397 static bool 4398 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 4399 bool *test_swapped_p, bool backward) 4400 { 4401 edge_iterator ei, ei2; 4402 edge e, e2; 4403 gimple *stmt; 4404 gphi_iterator gsi; 4405 bool other_edge_seen = false; 4406 bool is_cond; 4407 4408 if (test_bb == bb) 4409 return false; 4410 /* Check last stmt first. */ 4411 stmt = last_stmt (bb); 4412 if (stmt == NULL 4413 || (gimple_code (stmt) != GIMPLE_COND 4414 && (backward || !final_range_test_p (stmt))) 4415 || gimple_visited_p (stmt) 4416 || stmt_could_throw_p (cfun, stmt) 4417 || *other_bb == bb) 4418 return false; 4419 is_cond = gimple_code (stmt) == GIMPLE_COND; 4420 if (is_cond) 4421 { 4422 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 4423 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 4424 to *OTHER_BB (if not set yet, try to find it out). */ 4425 if (EDGE_COUNT (bb->succs) != 2) 4426 return false; 4427 FOR_EACH_EDGE (e, ei, bb->succs) 4428 { 4429 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4430 return false; 4431 if (e->dest == test_bb) 4432 { 4433 if (backward) 4434 continue; 4435 else 4436 return false; 4437 } 4438 if (e->dest == bb) 4439 return false; 4440 if (*other_bb == NULL) 4441 { 4442 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 4443 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4444 return false; 4445 else if (e->dest == e2->dest) 4446 *other_bb = e->dest; 4447 if (*other_bb == NULL) 4448 return false; 4449 } 4450 if (e->dest == *other_bb) 4451 other_edge_seen = true; 4452 else if (backward) 4453 return false; 4454 } 4455 if (*other_bb == NULL || !other_edge_seen) 4456 return false; 4457 } 4458 else if (single_succ (bb) != *other_bb) 4459 return false; 4460 4461 /* Now check all PHIs of *OTHER_BB. */ 4462 e = find_edge (bb, *other_bb); 4463 e2 = find_edge (test_bb, *other_bb); 4464 retry:; 4465 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 4466 { 4467 gphi *phi = gsi.phi (); 4468 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 4469 corresponding to BB and TEST_BB predecessor must be the same. */ 4470 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 4471 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 4472 { 4473 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 4474 one of the PHIs should have the lhs of the last stmt in 4475 that block as PHI arg and that PHI should have 0 or 1 4476 corresponding to it in all other range test basic blocks 4477 considered. */ 4478 if (!is_cond) 4479 { 4480 if (gimple_phi_arg_def (phi, e->dest_idx) 4481 == gimple_assign_lhs (stmt) 4482 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 4483 || integer_onep (gimple_phi_arg_def (phi, 4484 e2->dest_idx)))) 4485 continue; 4486 } 4487 else 4488 { 4489 gimple *test_last = last_stmt (test_bb); 4490 if (gimple_code (test_last) == GIMPLE_COND) 4491 { 4492 if (backward ? e2->src != test_bb : e->src != bb) 4493 return false; 4494 4495 /* For last_bb, handle also: 4496 if (x_3(D) == 3) 4497 goto <bb 6>; [34.00%] 4498 else 4499 goto <bb 7>; [66.00%] 4500 4501 <bb 6> [local count: 79512730]: 4502 4503 <bb 7> [local count: 1073741824]: 4504 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> 4505 where bb 7 is *OTHER_BB, but the PHI values from the 4506 earlier bbs match the path through the empty bb 4507 in between. */ 4508 edge e3; 4509 if (backward) 4510 e3 = EDGE_SUCC (test_bb, 4511 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0); 4512 else 4513 e3 = EDGE_SUCC (bb, 4514 e == EDGE_SUCC (bb, 0) ? 1 : 0); 4515 if (empty_block_p (e3->dest) 4516 && single_succ_p (e3->dest) 4517 && single_succ (e3->dest) == *other_bb 4518 && single_pred_p (e3->dest) 4519 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU) 4520 { 4521 if (backward) 4522 e2 = single_succ_edge (e3->dest); 4523 else 4524 e = single_succ_edge (e3->dest); 4525 if (test_swapped_p) 4526 *test_swapped_p = true; 4527 goto retry; 4528 } 4529 } 4530 else if (gimple_phi_arg_def (phi, e2->dest_idx) 4531 == gimple_assign_lhs (test_last) 4532 && (integer_zerop (gimple_phi_arg_def (phi, 4533 e->dest_idx)) 4534 || integer_onep (gimple_phi_arg_def (phi, 4535 e->dest_idx)))) 4536 continue; 4537 } 4538 4539 return false; 4540 } 4541 } 4542 return true; 4543 } 4544 4545 /* Return true if BB doesn't have side-effects that would disallow 4546 range test optimization, all SSA_NAMEs set in the bb are consumed 4547 in the bb and there are no PHIs. */ 4548 4549 bool 4550 no_side_effect_bb (basic_block bb) 4551 { 4552 gimple_stmt_iterator gsi; 4553 gimple *last; 4554 4555 if (!gimple_seq_empty_p (phi_nodes (bb))) 4556 return false; 4557 last = last_stmt (bb); 4558 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 4559 { 4560 gimple *stmt = gsi_stmt (gsi); 4561 tree lhs; 4562 imm_use_iterator imm_iter; 4563 use_operand_p use_p; 4564 4565 if (is_gimple_debug (stmt)) 4566 continue; 4567 if (gimple_has_side_effects (stmt)) 4568 return false; 4569 if (stmt == last) 4570 return true; 4571 if (!is_gimple_assign (stmt)) 4572 return false; 4573 lhs = gimple_assign_lhs (stmt); 4574 if (TREE_CODE (lhs) != SSA_NAME) 4575 return false; 4576 if (gimple_assign_rhs_could_trap_p (stmt)) 4577 return false; 4578 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 4579 { 4580 gimple *use_stmt = USE_STMT (use_p); 4581 if (is_gimple_debug (use_stmt)) 4582 continue; 4583 if (gimple_bb (use_stmt) != bb) 4584 return false; 4585 } 4586 } 4587 return false; 4588 } 4589 4590 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 4591 return true and fill in *OPS recursively. */ 4592 4593 static bool 4594 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 4595 class loop *loop) 4596 { 4597 gimple *stmt = SSA_NAME_DEF_STMT (var); 4598 tree rhs[2]; 4599 int i; 4600 4601 if (!is_reassociable_op (stmt, code, loop)) 4602 return false; 4603 4604 rhs[0] = gimple_assign_rhs1 (stmt); 4605 rhs[1] = gimple_assign_rhs2 (stmt); 4606 gimple_set_visited (stmt, true); 4607 for (i = 0; i < 2; i++) 4608 if (TREE_CODE (rhs[i]) == SSA_NAME 4609 && !get_ops (rhs[i], code, ops, loop) 4610 && has_single_use (rhs[i])) 4611 { 4612 operand_entry *oe = operand_entry_pool.allocate (); 4613 4614 oe->op = rhs[i]; 4615 oe->rank = code; 4616 oe->id = 0; 4617 oe->count = 1; 4618 oe->stmt_to_insert = NULL; 4619 ops->safe_push (oe); 4620 } 4621 return true; 4622 } 4623 4624 /* Find the ops that were added by get_ops starting from VAR, see if 4625 they were changed during update_range_test and if yes, create new 4626 stmts. */ 4627 4628 static tree 4629 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops, 4630 unsigned int *pidx, class loop *loop) 4631 { 4632 gimple *stmt = SSA_NAME_DEF_STMT (var); 4633 tree rhs[4]; 4634 int i; 4635 4636 if (!is_reassociable_op (stmt, code, loop)) 4637 return NULL; 4638 4639 rhs[0] = gimple_assign_rhs1 (stmt); 4640 rhs[1] = gimple_assign_rhs2 (stmt); 4641 rhs[2] = rhs[0]; 4642 rhs[3] = rhs[1]; 4643 for (i = 0; i < 2; i++) 4644 if (TREE_CODE (rhs[i]) == SSA_NAME) 4645 { 4646 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 4647 if (rhs[2 + i] == NULL_TREE) 4648 { 4649 if (has_single_use (rhs[i])) 4650 rhs[2 + i] = ops[(*pidx)++]->op; 4651 else 4652 rhs[2 + i] = rhs[i]; 4653 } 4654 } 4655 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 4656 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 4657 { 4658 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4659 var = make_ssa_name (TREE_TYPE (var)); 4660 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 4661 rhs[2], rhs[3]); 4662 gimple_set_uid (g, gimple_uid (stmt)); 4663 gimple_set_visited (g, true); 4664 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4665 gimple_stmt_iterator gsi2 = gsi_for_stmt (g); 4666 if (fold_stmt_inplace (&gsi2)) 4667 update_stmt (g); 4668 } 4669 return var; 4670 } 4671 4672 /* Structure to track the initial value passed to get_ops and 4673 the range in the ops vector for each basic block. */ 4674 4675 struct inter_bb_range_test_entry 4676 { 4677 tree op; 4678 unsigned int first_idx, last_idx; 4679 }; 4680 4681 /* Inter-bb range test optimization. 4682 4683 Returns TRUE if a gimple conditional is optimized to a true/false, 4684 otherwise return FALSE. 4685 4686 This indicates to the caller that it should run a CFG cleanup pass 4687 once reassociation is completed. */ 4688 4689 static bool 4690 maybe_optimize_range_tests (gimple *stmt) 4691 { 4692 basic_block first_bb = gimple_bb (stmt); 4693 basic_block last_bb = first_bb; 4694 basic_block other_bb = NULL; 4695 basic_block bb; 4696 edge_iterator ei; 4697 edge e; 4698 auto_vec<operand_entry *> ops; 4699 auto_vec<inter_bb_range_test_entry> bbinfo; 4700 bool any_changes = false; 4701 bool cfg_cleanup_needed = false; 4702 4703 /* Consider only basic blocks that end with GIMPLE_COND or 4704 a cast statement satisfying final_range_test_p. All 4705 but the last bb in the first_bb .. last_bb range 4706 should end with GIMPLE_COND. */ 4707 if (gimple_code (stmt) == GIMPLE_COND) 4708 { 4709 if (EDGE_COUNT (first_bb->succs) != 2) 4710 return cfg_cleanup_needed; 4711 } 4712 else if (final_range_test_p (stmt)) 4713 other_bb = single_succ (first_bb); 4714 else 4715 return cfg_cleanup_needed; 4716 4717 if (stmt_could_throw_p (cfun, stmt)) 4718 return cfg_cleanup_needed; 4719 4720 /* As relative ordering of post-dominator sons isn't fixed, 4721 maybe_optimize_range_tests can be called first on any 4722 bb in the range we want to optimize. So, start searching 4723 backwards, if first_bb can be set to a predecessor. */ 4724 while (single_pred_p (first_bb)) 4725 { 4726 basic_block pred_bb = single_pred (first_bb); 4727 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true)) 4728 break; 4729 if (!no_side_effect_bb (first_bb)) 4730 break; 4731 first_bb = pred_bb; 4732 } 4733 /* If first_bb is last_bb, other_bb hasn't been computed yet. 4734 Before starting forward search in last_bb successors, find 4735 out the other_bb. */ 4736 if (first_bb == last_bb) 4737 { 4738 other_bb = NULL; 4739 /* As non-GIMPLE_COND last stmt always terminates the range, 4740 if forward search didn't discover anything, just give up. */ 4741 if (gimple_code (stmt) != GIMPLE_COND) 4742 return cfg_cleanup_needed; 4743 /* Look at both successors. Either it ends with a GIMPLE_COND 4744 and satisfies suitable_cond_bb, or ends with a cast and 4745 other_bb is that cast's successor. */ 4746 FOR_EACH_EDGE (e, ei, first_bb->succs) 4747 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 4748 || e->dest == first_bb) 4749 return cfg_cleanup_needed; 4750 else if (single_pred_p (e->dest)) 4751 { 4752 stmt = last_stmt (e->dest); 4753 if (stmt 4754 && gimple_code (stmt) == GIMPLE_COND 4755 && EDGE_COUNT (e->dest->succs) == 2) 4756 { 4757 if (suitable_cond_bb (first_bb, e->dest, &other_bb, 4758 NULL, true)) 4759 break; 4760 else 4761 other_bb = NULL; 4762 } 4763 else if (stmt 4764 && final_range_test_p (stmt) 4765 && find_edge (first_bb, single_succ (e->dest))) 4766 { 4767 other_bb = single_succ (e->dest); 4768 if (other_bb == first_bb) 4769 other_bb = NULL; 4770 } 4771 } 4772 if (other_bb == NULL) 4773 return cfg_cleanup_needed; 4774 } 4775 /* Now do the forward search, moving last_bb to successor bbs 4776 that aren't other_bb. */ 4777 while (EDGE_COUNT (last_bb->succs) == 2) 4778 { 4779 FOR_EACH_EDGE (e, ei, last_bb->succs) 4780 if (e->dest != other_bb) 4781 break; 4782 if (e == NULL) 4783 break; 4784 if (!single_pred_p (e->dest)) 4785 break; 4786 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false)) 4787 break; 4788 if (!no_side_effect_bb (e->dest)) 4789 break; 4790 last_bb = e->dest; 4791 } 4792 if (first_bb == last_bb) 4793 return cfg_cleanup_needed; 4794 /* Here basic blocks first_bb through last_bb's predecessor 4795 end with GIMPLE_COND, all of them have one of the edges to 4796 other_bb and another to another block in the range, 4797 all blocks except first_bb don't have side-effects and 4798 last_bb ends with either GIMPLE_COND, or cast satisfying 4799 final_range_test_p. */ 4800 for (bb = last_bb; ; bb = single_pred (bb)) 4801 { 4802 enum tree_code code; 4803 tree lhs, rhs; 4804 inter_bb_range_test_entry bb_ent; 4805 4806 bb_ent.op = NULL_TREE; 4807 bb_ent.first_idx = ops.length (); 4808 bb_ent.last_idx = bb_ent.first_idx; 4809 e = find_edge (bb, other_bb); 4810 stmt = last_stmt (bb); 4811 gimple_set_visited (stmt, true); 4812 if (gimple_code (stmt) != GIMPLE_COND) 4813 { 4814 use_operand_p use_p; 4815 gimple *phi; 4816 edge e2; 4817 unsigned int d; 4818 4819 lhs = gimple_assign_lhs (stmt); 4820 rhs = gimple_assign_rhs1 (stmt); 4821 gcc_assert (bb == last_bb); 4822 4823 /* stmt is 4824 _123 = (int) _234; 4825 OR 4826 _234 = a_2(D) == 2; 4827 4828 followed by: 4829 <bb M>: 4830 # _345 = PHI <_123(N), 1(...), 1(...)> 4831 4832 or 0 instead of 1. If it is 0, the _234 4833 range test is anded together with all the 4834 other range tests, if it is 1, it is ored with 4835 them. */ 4836 single_imm_use (lhs, &use_p, &phi); 4837 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 4838 e2 = find_edge (first_bb, other_bb); 4839 d = e2->dest_idx; 4840 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 4841 if (integer_zerop (gimple_phi_arg_def (phi, d))) 4842 code = BIT_AND_EXPR; 4843 else 4844 { 4845 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 4846 code = BIT_IOR_EXPR; 4847 } 4848 4849 /* If _234 SSA_NAME_DEF_STMT is 4850 _234 = _567 | _789; 4851 (or &, corresponding to 1/0 in the phi arguments, 4852 push into ops the individual range test arguments 4853 of the bitwise or resp. and, recursively. */ 4854 if (TREE_CODE (rhs) == SSA_NAME 4855 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4856 != tcc_comparison) 4857 && !get_ops (rhs, code, &ops, 4858 loop_containing_stmt (stmt)) 4859 && has_single_use (rhs)) 4860 { 4861 /* Otherwise, push the _234 range test itself. */ 4862 operand_entry *oe = operand_entry_pool.allocate (); 4863 4864 oe->op = rhs; 4865 oe->rank = code; 4866 oe->id = 0; 4867 oe->count = 1; 4868 oe->stmt_to_insert = NULL; 4869 ops.safe_push (oe); 4870 bb_ent.last_idx++; 4871 bb_ent.op = rhs; 4872 } 4873 else if (is_gimple_assign (stmt) 4874 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4875 == tcc_comparison) 4876 && !get_ops (lhs, code, &ops, 4877 loop_containing_stmt (stmt)) 4878 && has_single_use (lhs)) 4879 { 4880 operand_entry *oe = operand_entry_pool.allocate (); 4881 oe->op = lhs; 4882 oe->rank = code; 4883 oe->id = 0; 4884 oe->count = 1; 4885 ops.safe_push (oe); 4886 bb_ent.last_idx++; 4887 bb_ent.op = lhs; 4888 } 4889 else 4890 { 4891 bb_ent.last_idx = ops.length (); 4892 bb_ent.op = rhs; 4893 } 4894 bbinfo.safe_push (bb_ent); 4895 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) 4896 ops[i]->id = bb->index; 4897 continue; 4898 } 4899 else if (bb == last_bb) 4900 { 4901 /* For last_bb, handle also: 4902 if (x_3(D) == 3) 4903 goto <bb 6>; [34.00%] 4904 else 4905 goto <bb 7>; [66.00%] 4906 4907 <bb 6> [local count: 79512730]: 4908 4909 <bb 7> [local count: 1073741824]: 4910 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> 4911 where bb 7 is OTHER_BB, but the PHI values from the 4912 earlier bbs match the path through the empty bb 4913 in between. */ 4914 bool test_swapped_p = false; 4915 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb, 4916 &other_bb, &test_swapped_p, true); 4917 gcc_assert (ok); 4918 if (test_swapped_p) 4919 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0); 4920 } 4921 /* Otherwise stmt is GIMPLE_COND. */ 4922 code = gimple_cond_code (stmt); 4923 lhs = gimple_cond_lhs (stmt); 4924 rhs = gimple_cond_rhs (stmt); 4925 if (TREE_CODE (lhs) == SSA_NAME 4926 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4927 && ((code != EQ_EXPR && code != NE_EXPR) 4928 || rhs != boolean_false_node 4929 /* Either push into ops the individual bitwise 4930 or resp. and operands, depending on which 4931 edge is other_bb. */ 4932 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 4933 ^ (code == EQ_EXPR)) 4934 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 4935 loop_containing_stmt (stmt)))) 4936 { 4937 /* Or push the GIMPLE_COND stmt itself. */ 4938 operand_entry *oe = operand_entry_pool.allocate (); 4939 4940 oe->op = NULL; 4941 oe->rank = (e->flags & EDGE_TRUE_VALUE) 4942 ? BIT_IOR_EXPR : BIT_AND_EXPR; 4943 /* oe->op = NULL signs that there is no SSA_NAME 4944 for the range test, and oe->id instead is the 4945 basic block number, at which's end the GIMPLE_COND 4946 is. */ 4947 oe->id = bb->index; 4948 oe->count = 1; 4949 oe->stmt_to_insert = NULL; 4950 ops.safe_push (oe); 4951 bb_ent.op = NULL; 4952 bb_ent.last_idx++; 4953 } 4954 else if (ops.length () > bb_ent.first_idx) 4955 { 4956 bb_ent.op = lhs; 4957 bb_ent.last_idx = ops.length (); 4958 } 4959 bbinfo.safe_push (bb_ent); 4960 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) 4961 ops[i]->id = bb->index; 4962 if (bb == first_bb) 4963 break; 4964 } 4965 if (ops.length () > 1) 4966 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 4967 if (any_changes) 4968 { 4969 unsigned int idx, max_idx = 0; 4970 /* update_ops relies on has_single_use predicates returning the 4971 same values as it did during get_ops earlier. Additionally it 4972 never removes statements, only adds new ones and it should walk 4973 from the single imm use and check the predicate already before 4974 making those changes. 4975 On the other side, the handling of GIMPLE_COND directly can turn 4976 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4977 it needs to be done in a separate loop afterwards. */ 4978 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4979 { 4980 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4981 && bbinfo[idx].op != NULL_TREE) 4982 { 4983 tree new_op; 4984 4985 max_idx = idx; 4986 stmt = last_stmt (bb); 4987 new_op = update_ops (bbinfo[idx].op, 4988 (enum tree_code) 4989 ops[bbinfo[idx].first_idx]->rank, 4990 ops, &bbinfo[idx].first_idx, 4991 loop_containing_stmt (stmt)); 4992 if (new_op == NULL_TREE) 4993 { 4994 gcc_assert (bb == last_bb); 4995 new_op = ops[bbinfo[idx].first_idx++]->op; 4996 } 4997 if (bbinfo[idx].op != new_op) 4998 { 4999 imm_use_iterator iter; 5000 use_operand_p use_p; 5001 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 5002 5003 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 5004 if (is_gimple_debug (use_stmt)) 5005 continue; 5006 else if (gimple_code (use_stmt) == GIMPLE_COND 5007 || gimple_code (use_stmt) == GIMPLE_PHI) 5008 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 5009 SET_USE (use_p, new_op); 5010 else if ((is_gimple_assign (use_stmt) 5011 && (TREE_CODE_CLASS 5012 (gimple_assign_rhs_code (use_stmt)) 5013 == tcc_comparison))) 5014 cast_or_tcc_cmp_stmt = use_stmt; 5015 else if (gimple_assign_cast_p (use_stmt)) 5016 cast_or_tcc_cmp_stmt = use_stmt; 5017 else 5018 gcc_unreachable (); 5019 5020 if (cast_or_tcc_cmp_stmt) 5021 { 5022 gcc_assert (bb == last_bb); 5023 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 5024 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 5025 enum tree_code rhs_code 5026 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 5027 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 5028 : CONVERT_EXPR; 5029 gassign *g; 5030 if (is_gimple_min_invariant (new_op)) 5031 { 5032 new_op = fold_convert (TREE_TYPE (lhs), new_op); 5033 g = gimple_build_assign (new_lhs, new_op); 5034 } 5035 else 5036 g = gimple_build_assign (new_lhs, rhs_code, new_op); 5037 gimple_stmt_iterator gsi 5038 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 5039 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 5040 gimple_set_visited (g, true); 5041 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5042 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 5043 if (is_gimple_debug (use_stmt)) 5044 continue; 5045 else if (gimple_code (use_stmt) == GIMPLE_COND 5046 || gimple_code (use_stmt) == GIMPLE_PHI) 5047 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 5048 SET_USE (use_p, new_lhs); 5049 else 5050 gcc_unreachable (); 5051 } 5052 } 5053 } 5054 if (bb == first_bb) 5055 break; 5056 } 5057 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 5058 { 5059 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 5060 && bbinfo[idx].op == NULL_TREE 5061 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 5062 { 5063 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 5064 5065 if (idx > max_idx) 5066 max_idx = idx; 5067 5068 /* If we collapse the conditional to a true/false 5069 condition, then bubble that knowledge up to our caller. */ 5070 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 5071 { 5072 gimple_cond_make_false (cond_stmt); 5073 cfg_cleanup_needed = true; 5074 } 5075 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 5076 { 5077 gimple_cond_make_true (cond_stmt); 5078 cfg_cleanup_needed = true; 5079 } 5080 else 5081 { 5082 gimple_cond_set_code (cond_stmt, NE_EXPR); 5083 gimple_cond_set_lhs (cond_stmt, 5084 ops[bbinfo[idx].first_idx]->op); 5085 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 5086 } 5087 update_stmt (cond_stmt); 5088 } 5089 if (bb == first_bb) 5090 break; 5091 } 5092 5093 /* The above changes could result in basic blocks after the first 5094 modified one, up to and including last_bb, to be executed even if 5095 they would not be in the original program. If the value ranges of 5096 assignment lhs' in those bbs were dependent on the conditions 5097 guarding those basic blocks which now can change, the VRs might 5098 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 5099 are only used within the same bb, it should be not a big deal if 5100 we just reset all the VRs in those bbs. See PR68671. */ 5101 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 5102 reset_flow_sensitive_info_in_bb (bb); 5103 } 5104 return cfg_cleanup_needed; 5105 } 5106 5107 /* Return true if OPERAND is defined by a PHI node which uses the LHS 5108 of STMT in it's operands. This is also known as a "destructive 5109 update" operation. */ 5110 5111 static bool 5112 is_phi_for_stmt (gimple *stmt, tree operand) 5113 { 5114 gimple *def_stmt; 5115 gphi *def_phi; 5116 tree lhs; 5117 use_operand_p arg_p; 5118 ssa_op_iter i; 5119 5120 if (TREE_CODE (operand) != SSA_NAME) 5121 return false; 5122 5123 lhs = gimple_assign_lhs (stmt); 5124 5125 def_stmt = SSA_NAME_DEF_STMT (operand); 5126 def_phi = dyn_cast <gphi *> (def_stmt); 5127 if (!def_phi) 5128 return false; 5129 5130 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 5131 if (lhs == USE_FROM_PTR (arg_p)) 5132 return true; 5133 return false; 5134 } 5135 5136 /* Remove def stmt of VAR if VAR has zero uses and recurse 5137 on rhs1 operand if so. */ 5138 5139 static void 5140 remove_visited_stmt_chain (tree var) 5141 { 5142 gimple *stmt; 5143 gimple_stmt_iterator gsi; 5144 5145 while (1) 5146 { 5147 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 5148 return; 5149 stmt = SSA_NAME_DEF_STMT (var); 5150 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 5151 { 5152 var = gimple_assign_rhs1 (stmt); 5153 gsi = gsi_for_stmt (stmt); 5154 reassoc_remove_stmt (&gsi); 5155 release_defs (stmt); 5156 } 5157 else 5158 return; 5159 } 5160 } 5161 5162 /* This function checks three consequtive operands in 5163 passed operands vector OPS starting from OPINDEX and 5164 swaps two operands if it is profitable for binary operation 5165 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 5166 5167 We pair ops with the same rank if possible. 5168 5169 The alternative we try is to see if STMT is a destructive 5170 update style statement, which is like: 5171 b = phi (a, ...) 5172 a = c + b; 5173 In that case, we want to use the destructive update form to 5174 expose the possible vectorizer sum reduction opportunity. 5175 In that case, the third operand will be the phi node. This 5176 check is not performed if STMT is null. 5177 5178 We could, of course, try to be better as noted above, and do a 5179 lot of work to try to find these opportunities in >3 operand 5180 cases, but it is unlikely to be worth it. */ 5181 5182 static void 5183 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops, 5184 unsigned int opindex, gimple *stmt) 5185 { 5186 operand_entry *oe1, *oe2, *oe3; 5187 5188 oe1 = ops[opindex]; 5189 oe2 = ops[opindex + 1]; 5190 oe3 = ops[opindex + 2]; 5191 5192 if ((oe1->rank == oe2->rank 5193 && oe2->rank != oe3->rank) 5194 || (stmt && is_phi_for_stmt (stmt, oe3->op) 5195 && !is_phi_for_stmt (stmt, oe1->op) 5196 && !is_phi_for_stmt (stmt, oe2->op))) 5197 std::swap (*oe1, *oe3); 5198 else if ((oe1->rank == oe3->rank 5199 && oe2->rank != oe3->rank) 5200 || (stmt && is_phi_for_stmt (stmt, oe2->op) 5201 && !is_phi_for_stmt (stmt, oe1->op) 5202 && !is_phi_for_stmt (stmt, oe3->op))) 5203 std::swap (*oe1, *oe2); 5204 } 5205 5206 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 5207 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate 5208 whether RHS1 op RHS2 can be inserted before or needs to be inserted 5209 after the returned stmt. */ 5210 5211 static inline gimple * 5212 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before) 5213 { 5214 insert_before = true; 5215 if (TREE_CODE (rhs1) == SSA_NAME 5216 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 5217 { 5218 stmt = SSA_NAME_DEF_STMT (rhs1); 5219 insert_before = false; 5220 } 5221 if (TREE_CODE (rhs2) == SSA_NAME 5222 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 5223 { 5224 stmt = SSA_NAME_DEF_STMT (rhs2); 5225 insert_before = false; 5226 } 5227 return stmt; 5228 } 5229 5230 /* If the stmt that defines operand has to be inserted, insert it 5231 before the use. */ 5232 static void 5233 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 5234 { 5235 gcc_assert (is_gimple_assign (stmt_to_insert)); 5236 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 5237 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 5238 bool insert_before; 5239 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before); 5240 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 5241 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 5242 5243 /* If the insert point is not stmt, then insert_point would be 5244 the point where operand rhs1 or rhs2 is defined. In this case, 5245 stmt_to_insert has to be inserted afterwards. This would 5246 only happen when the stmt insertion point is flexible. */ 5247 if (insert_before) 5248 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 5249 else 5250 insert_stmt_after (stmt_to_insert, insert_point); 5251 } 5252 5253 5254 /* Recursively rewrite our linearized statements so that the operators 5255 match those in OPS[OPINDEX], putting the computation in rank 5256 order. Return new lhs. 5257 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 5258 the current stmt and during recursive invocations. 5259 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 5260 recursive invocations. */ 5261 5262 static tree 5263 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex, 5264 const vec<operand_entry *> &ops, bool changed, 5265 bool next_changed) 5266 { 5267 tree rhs1 = gimple_assign_rhs1 (stmt); 5268 tree rhs2 = gimple_assign_rhs2 (stmt); 5269 tree lhs = gimple_assign_lhs (stmt); 5270 operand_entry *oe; 5271 5272 /* The final recursion case for this function is that you have 5273 exactly two operations left. 5274 If we had exactly one op in the entire list to start with, we 5275 would have never called this function, and the tail recursion 5276 rewrites them one at a time. */ 5277 if (opindex + 2 == ops.length ()) 5278 { 5279 operand_entry *oe1, *oe2; 5280 5281 oe1 = ops[opindex]; 5282 oe2 = ops[opindex + 1]; 5283 5284 if (rhs1 != oe1->op || rhs2 != oe2->op) 5285 { 5286 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5287 unsigned int uid = gimple_uid (stmt); 5288 5289 if (dump_file && (dump_flags & TDF_DETAILS)) 5290 { 5291 fprintf (dump_file, "Transforming "); 5292 print_gimple_stmt (dump_file, stmt, 0); 5293 } 5294 5295 /* If the stmt that defines operand has to be inserted, insert it 5296 before the use. */ 5297 if (oe1->stmt_to_insert) 5298 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 5299 if (oe2->stmt_to_insert) 5300 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 5301 /* Even when changed is false, reassociation could have e.g. removed 5302 some redundant operations, so unless we are just swapping the 5303 arguments or unless there is no change at all (then we just 5304 return lhs), force creation of a new SSA_NAME. */ 5305 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 5306 { 5307 bool insert_before; 5308 gimple *insert_point 5309 = find_insert_point (stmt, oe1->op, oe2->op, insert_before); 5310 lhs = make_ssa_name (TREE_TYPE (lhs)); 5311 stmt 5312 = gimple_build_assign (lhs, rhs_code, 5313 oe1->op, oe2->op); 5314 gimple_set_uid (stmt, uid); 5315 gimple_set_visited (stmt, true); 5316 if (insert_before) 5317 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5318 else 5319 insert_stmt_after (stmt, insert_point); 5320 } 5321 else 5322 { 5323 bool insert_before; 5324 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op, 5325 insert_before) 5326 == stmt); 5327 gimple_assign_set_rhs1 (stmt, oe1->op); 5328 gimple_assign_set_rhs2 (stmt, oe2->op); 5329 update_stmt (stmt); 5330 } 5331 5332 if (rhs1 != oe1->op && rhs1 != oe2->op) 5333 remove_visited_stmt_chain (rhs1); 5334 5335 if (dump_file && (dump_flags & TDF_DETAILS)) 5336 { 5337 fprintf (dump_file, " into "); 5338 print_gimple_stmt (dump_file, stmt, 0); 5339 } 5340 } 5341 return lhs; 5342 } 5343 5344 /* If we hit here, we should have 3 or more ops left. */ 5345 gcc_assert (opindex + 2 < ops.length ()); 5346 5347 /* Rewrite the next operator. */ 5348 oe = ops[opindex]; 5349 5350 /* If the stmt that defines operand has to be inserted, insert it 5351 before the use. */ 5352 if (oe->stmt_to_insert) 5353 insert_stmt_before_use (stmt, oe->stmt_to_insert); 5354 5355 /* Recurse on the LHS of the binary operator, which is guaranteed to 5356 be the non-leaf side. */ 5357 tree new_rhs1 5358 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops, 5359 changed || oe->op != rhs2 || next_changed, 5360 false); 5361 5362 if (oe->op != rhs2 || new_rhs1 != rhs1) 5363 { 5364 if (dump_file && (dump_flags & TDF_DETAILS)) 5365 { 5366 fprintf (dump_file, "Transforming "); 5367 print_gimple_stmt (dump_file, stmt, 0); 5368 } 5369 5370 /* If changed is false, this is either opindex == 0 5371 or all outer rhs2's were equal to corresponding oe->op, 5372 and powi_result is NULL. 5373 That means lhs is equivalent before and after reassociation. 5374 Otherwise ensure the old lhs SSA_NAME is not reused and 5375 create a new stmt as well, so that any debug stmts will be 5376 properly adjusted. */ 5377 if (changed) 5378 { 5379 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5380 unsigned int uid = gimple_uid (stmt); 5381 bool insert_before; 5382 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op, 5383 insert_before); 5384 5385 lhs = make_ssa_name (TREE_TYPE (lhs)); 5386 stmt = gimple_build_assign (lhs, rhs_code, 5387 new_rhs1, oe->op); 5388 gimple_set_uid (stmt, uid); 5389 gimple_set_visited (stmt, true); 5390 if (insert_before) 5391 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5392 else 5393 insert_stmt_after (stmt, insert_point); 5394 } 5395 else 5396 { 5397 bool insert_before; 5398 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op, 5399 insert_before) 5400 == stmt); 5401 gimple_assign_set_rhs1 (stmt, new_rhs1); 5402 gimple_assign_set_rhs2 (stmt, oe->op); 5403 update_stmt (stmt); 5404 } 5405 5406 if (dump_file && (dump_flags & TDF_DETAILS)) 5407 { 5408 fprintf (dump_file, " into "); 5409 print_gimple_stmt (dump_file, stmt, 0); 5410 } 5411 } 5412 return lhs; 5413 } 5414 5415 /* Find out how many cycles we need to compute statements chain. 5416 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 5417 maximum number of independent statements we may execute per cycle. */ 5418 5419 static int 5420 get_required_cycles (int ops_num, int cpu_width) 5421 { 5422 int res; 5423 int elog; 5424 unsigned int rest; 5425 5426 /* While we have more than 2 * cpu_width operands 5427 we may reduce number of operands by cpu_width 5428 per cycle. */ 5429 res = ops_num / (2 * cpu_width); 5430 5431 /* Remained operands count may be reduced twice per cycle 5432 until we have only one operand. */ 5433 rest = (unsigned)(ops_num - res * cpu_width); 5434 elog = exact_log2 (rest); 5435 if (elog >= 0) 5436 res += elog; 5437 else 5438 res += floor_log2 (rest) + 1; 5439 5440 return res; 5441 } 5442 5443 /* Returns an optimal number of registers to use for computation of 5444 given statements. */ 5445 5446 static int 5447 get_reassociation_width (int ops_num, enum tree_code opc, 5448 machine_mode mode) 5449 { 5450 int param_width = param_tree_reassoc_width; 5451 int width; 5452 int width_min; 5453 int cycles_best; 5454 5455 if (param_width > 0) 5456 width = param_width; 5457 else 5458 width = targetm.sched.reassociation_width (opc, mode); 5459 5460 if (width == 1) 5461 return width; 5462 5463 /* Get the minimal time required for sequence computation. */ 5464 cycles_best = get_required_cycles (ops_num, width); 5465 5466 /* Check if we may use less width and still compute sequence for 5467 the same time. It will allow us to reduce registers usage. 5468 get_required_cycles is monotonically increasing with lower width 5469 so we can perform a binary search for the minimal width that still 5470 results in the optimal cycle count. */ 5471 width_min = 1; 5472 while (width > width_min) 5473 { 5474 int width_mid = (width + width_min) / 2; 5475 5476 if (get_required_cycles (ops_num, width_mid) == cycles_best) 5477 width = width_mid; 5478 else if (width_min < width_mid) 5479 width_min = width_mid; 5480 else 5481 break; 5482 } 5483 5484 return width; 5485 } 5486 5487 /* Recursively rewrite our linearized statements so that the operators 5488 match those in OPS[OPINDEX], putting the computation in rank 5489 order and trying to allow operations to be executed in 5490 parallel. */ 5491 5492 static void 5493 rewrite_expr_tree_parallel (gassign *stmt, int width, 5494 const vec<operand_entry *> &ops) 5495 { 5496 enum tree_code opcode = gimple_assign_rhs_code (stmt); 5497 int op_num = ops.length (); 5498 gcc_assert (op_num > 0); 5499 int stmt_num = op_num - 1; 5500 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 5501 int op_index = op_num - 1; 5502 int stmt_index = 0; 5503 int ready_stmts_end = 0; 5504 int i = 0; 5505 gimple *stmt1 = NULL, *stmt2 = NULL; 5506 tree last_rhs1 = gimple_assign_rhs1 (stmt); 5507 5508 /* We start expression rewriting from the top statements. 5509 So, in this loop we create a full list of statements 5510 we will work with. */ 5511 stmts[stmt_num - 1] = stmt; 5512 for (i = stmt_num - 2; i >= 0; i--) 5513 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 5514 5515 for (i = 0; i < stmt_num; i++) 5516 { 5517 tree op1, op2; 5518 5519 /* Determine whether we should use results of 5520 already handled statements or not. */ 5521 if (ready_stmts_end == 0 5522 && (i - stmt_index >= width || op_index < 1)) 5523 ready_stmts_end = i; 5524 5525 /* Now we choose operands for the next statement. Non zero 5526 value in ready_stmts_end means here that we should use 5527 the result of already generated statements as new operand. */ 5528 if (ready_stmts_end > 0) 5529 { 5530 op1 = gimple_assign_lhs (stmts[stmt_index++]); 5531 if (ready_stmts_end > stmt_index) 5532 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5533 else if (op_index >= 0) 5534 { 5535 operand_entry *oe = ops[op_index--]; 5536 stmt2 = oe->stmt_to_insert; 5537 op2 = oe->op; 5538 } 5539 else 5540 { 5541 gcc_assert (stmt_index < i); 5542 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5543 } 5544 5545 if (stmt_index >= ready_stmts_end) 5546 ready_stmts_end = 0; 5547 } 5548 else 5549 { 5550 if (op_index > 1) 5551 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 5552 operand_entry *oe2 = ops[op_index--]; 5553 operand_entry *oe1 = ops[op_index--]; 5554 op2 = oe2->op; 5555 stmt2 = oe2->stmt_to_insert; 5556 op1 = oe1->op; 5557 stmt1 = oe1->stmt_to_insert; 5558 } 5559 5560 /* If we emit the last statement then we should put 5561 operands into the last statement. It will also 5562 break the loop. */ 5563 if (op_index < 0 && stmt_index == i) 5564 i = stmt_num - 1; 5565 5566 if (dump_file && (dump_flags & TDF_DETAILS)) 5567 { 5568 fprintf (dump_file, "Transforming "); 5569 print_gimple_stmt (dump_file, stmts[i], 0); 5570 } 5571 5572 /* If the stmt that defines operand has to be inserted, insert it 5573 before the use. */ 5574 if (stmt1) 5575 insert_stmt_before_use (stmts[i], stmt1); 5576 if (stmt2) 5577 insert_stmt_before_use (stmts[i], stmt2); 5578 stmt1 = stmt2 = NULL; 5579 5580 /* We keep original statement only for the last one. All 5581 others are recreated. */ 5582 if (i == stmt_num - 1) 5583 { 5584 gimple_assign_set_rhs1 (stmts[i], op1); 5585 gimple_assign_set_rhs2 (stmts[i], op2); 5586 update_stmt (stmts[i]); 5587 } 5588 else 5589 { 5590 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 5591 gimple_set_visited (stmts[i], true); 5592 } 5593 if (dump_file && (dump_flags & TDF_DETAILS)) 5594 { 5595 fprintf (dump_file, " into "); 5596 print_gimple_stmt (dump_file, stmts[i], 0); 5597 } 5598 } 5599 5600 remove_visited_stmt_chain (last_rhs1); 5601 } 5602 5603 /* Transform STMT, which is really (A +B) + (C + D) into the left 5604 linear form, ((A+B)+C)+D. 5605 Recurse on D if necessary. */ 5606 5607 static void 5608 linearize_expr (gimple *stmt) 5609 { 5610 gimple_stmt_iterator gsi; 5611 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 5612 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5613 gimple *oldbinrhs = binrhs; 5614 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5615 gimple *newbinrhs = NULL; 5616 class loop *loop = loop_containing_stmt (stmt); 5617 tree lhs = gimple_assign_lhs (stmt); 5618 5619 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 5620 && is_reassociable_op (binrhs, rhscode, loop)); 5621 5622 gsi = gsi_for_stmt (stmt); 5623 5624 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 5625 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 5626 gimple_assign_rhs_code (binrhs), 5627 gimple_assign_lhs (binlhs), 5628 gimple_assign_rhs2 (binrhs)); 5629 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 5630 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 5631 gimple_set_uid (binrhs, gimple_uid (stmt)); 5632 5633 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 5634 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5635 5636 if (dump_file && (dump_flags & TDF_DETAILS)) 5637 { 5638 fprintf (dump_file, "Linearized: "); 5639 print_gimple_stmt (dump_file, stmt, 0); 5640 } 5641 5642 reassociate_stats.linearized++; 5643 update_stmt (stmt); 5644 5645 gsi = gsi_for_stmt (oldbinrhs); 5646 reassoc_remove_stmt (&gsi); 5647 release_defs (oldbinrhs); 5648 5649 gimple_set_visited (stmt, true); 5650 gimple_set_visited (binlhs, true); 5651 gimple_set_visited (binrhs, true); 5652 5653 /* Tail recurse on the new rhs if it still needs reassociation. */ 5654 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 5655 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 5656 want to change the algorithm while converting to tuples. */ 5657 linearize_expr (stmt); 5658 } 5659 5660 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 5661 it. Otherwise, return NULL. */ 5662 5663 static gimple * 5664 get_single_immediate_use (tree lhs) 5665 { 5666 use_operand_p immuse; 5667 gimple *immusestmt; 5668 5669 if (TREE_CODE (lhs) == SSA_NAME 5670 && single_imm_use (lhs, &immuse, &immusestmt) 5671 && is_gimple_assign (immusestmt)) 5672 return immusestmt; 5673 5674 return NULL; 5675 } 5676 5677 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 5678 representing the negated value. Insertions of any necessary 5679 instructions go before GSI. 5680 This function is recursive in that, if you hand it "a_5" as the 5681 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 5682 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 5683 5684 static tree 5685 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 5686 { 5687 gimple *negatedefstmt = NULL; 5688 tree resultofnegate; 5689 gimple_stmt_iterator gsi; 5690 unsigned int uid; 5691 5692 /* If we are trying to negate a name, defined by an add, negate the 5693 add operands instead. */ 5694 if (TREE_CODE (tonegate) == SSA_NAME) 5695 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 5696 if (TREE_CODE (tonegate) == SSA_NAME 5697 && is_gimple_assign (negatedefstmt) 5698 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 5699 && has_single_use (gimple_assign_lhs (negatedefstmt)) 5700 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 5701 { 5702 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 5703 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 5704 tree lhs = gimple_assign_lhs (negatedefstmt); 5705 gimple *g; 5706 5707 gsi = gsi_for_stmt (negatedefstmt); 5708 rhs1 = negate_value (rhs1, &gsi); 5709 5710 gsi = gsi_for_stmt (negatedefstmt); 5711 rhs2 = negate_value (rhs2, &gsi); 5712 5713 gsi = gsi_for_stmt (negatedefstmt); 5714 lhs = make_ssa_name (TREE_TYPE (lhs)); 5715 gimple_set_visited (negatedefstmt, true); 5716 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 5717 gimple_set_uid (g, gimple_uid (negatedefstmt)); 5718 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5719 return lhs; 5720 } 5721 5722 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 5723 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 5724 NULL_TREE, true, GSI_SAME_STMT); 5725 gsi = *gsip; 5726 uid = gimple_uid (gsi_stmt (gsi)); 5727 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 5728 { 5729 gimple *stmt = gsi_stmt (gsi); 5730 if (gimple_uid (stmt) != 0) 5731 break; 5732 gimple_set_uid (stmt, uid); 5733 } 5734 return resultofnegate; 5735 } 5736 5737 /* Return true if we should break up the subtract in STMT into an add 5738 with negate. This is true when we the subtract operands are really 5739 adds, or the subtract itself is used in an add expression. In 5740 either case, breaking up the subtract into an add with negate 5741 exposes the adds to reassociation. */ 5742 5743 static bool 5744 should_break_up_subtract (gimple *stmt) 5745 { 5746 tree lhs = gimple_assign_lhs (stmt); 5747 tree binlhs = gimple_assign_rhs1 (stmt); 5748 tree binrhs = gimple_assign_rhs2 (stmt); 5749 gimple *immusestmt; 5750 class loop *loop = loop_containing_stmt (stmt); 5751 5752 if (TREE_CODE (binlhs) == SSA_NAME 5753 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 5754 return true; 5755 5756 if (TREE_CODE (binrhs) == SSA_NAME 5757 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 5758 return true; 5759 5760 if (TREE_CODE (lhs) == SSA_NAME 5761 && (immusestmt = get_single_immediate_use (lhs)) 5762 && is_gimple_assign (immusestmt) 5763 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 5764 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR 5765 && gimple_assign_rhs1 (immusestmt) == lhs) 5766 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 5767 return true; 5768 return false; 5769 } 5770 5771 /* Transform STMT from A - B into A + -B. */ 5772 5773 static void 5774 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 5775 { 5776 tree rhs1 = gimple_assign_rhs1 (stmt); 5777 tree rhs2 = gimple_assign_rhs2 (stmt); 5778 5779 if (dump_file && (dump_flags & TDF_DETAILS)) 5780 { 5781 fprintf (dump_file, "Breaking up subtract "); 5782 print_gimple_stmt (dump_file, stmt, 0); 5783 } 5784 5785 rhs2 = negate_value (rhs2, gsip); 5786 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 5787 update_stmt (stmt); 5788 } 5789 5790 /* Determine whether STMT is a builtin call that raises an SSA name 5791 to an integer power and has only one use. If so, and this is early 5792 reassociation and unsafe math optimizations are permitted, place 5793 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 5794 If any of these conditions does not hold, return FALSE. */ 5795 5796 static bool 5797 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 5798 { 5799 tree arg1; 5800 REAL_VALUE_TYPE c, cint; 5801 5802 switch (gimple_call_combined_fn (stmt)) 5803 { 5804 CASE_CFN_POW: 5805 if (flag_errno_math) 5806 return false; 5807 5808 *base = gimple_call_arg (stmt, 0); 5809 arg1 = gimple_call_arg (stmt, 1); 5810 5811 if (TREE_CODE (arg1) != REAL_CST) 5812 return false; 5813 5814 c = TREE_REAL_CST (arg1); 5815 5816 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 5817 return false; 5818 5819 *exponent = real_to_integer (&c); 5820 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 5821 if (!real_identical (&c, &cint)) 5822 return false; 5823 5824 break; 5825 5826 CASE_CFN_POWI: 5827 *base = gimple_call_arg (stmt, 0); 5828 arg1 = gimple_call_arg (stmt, 1); 5829 5830 if (!tree_fits_shwi_p (arg1)) 5831 return false; 5832 5833 *exponent = tree_to_shwi (arg1); 5834 break; 5835 5836 default: 5837 return false; 5838 } 5839 5840 /* Expanding negative exponents is generally unproductive, so we don't 5841 complicate matters with those. Exponents of zero and one should 5842 have been handled by expression folding. */ 5843 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 5844 return false; 5845 5846 return true; 5847 } 5848 5849 /* Try to derive and add operand entry for OP to *OPS. Return false if 5850 unsuccessful. */ 5851 5852 static bool 5853 try_special_add_to_ops (vec<operand_entry *> *ops, 5854 enum tree_code code, 5855 tree op, gimple* def_stmt) 5856 { 5857 tree base = NULL_TREE; 5858 HOST_WIDE_INT exponent = 0; 5859 5860 if (TREE_CODE (op) != SSA_NAME 5861 || ! has_single_use (op)) 5862 return false; 5863 5864 if (code == MULT_EXPR 5865 && reassoc_insert_powi_p 5866 && flag_unsafe_math_optimizations 5867 && is_gimple_call (def_stmt) 5868 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 5869 { 5870 add_repeat_to_ops_vec (ops, base, exponent); 5871 gimple_set_visited (def_stmt, true); 5872 return true; 5873 } 5874 else if (code == MULT_EXPR 5875 && is_gimple_assign (def_stmt) 5876 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 5877 && !HONOR_SNANS (TREE_TYPE (op)) 5878 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 5879 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))) 5880 && (!FLOAT_TYPE_P (TREE_TYPE (op)) 5881 || !DECIMAL_FLOAT_MODE_P (element_mode (op)))) 5882 { 5883 tree rhs1 = gimple_assign_rhs1 (def_stmt); 5884 tree cst = build_minus_one_cst (TREE_TYPE (op)); 5885 add_to_ops_vec (ops, rhs1); 5886 add_to_ops_vec (ops, cst); 5887 gimple_set_visited (def_stmt, true); 5888 return true; 5889 } 5890 5891 return false; 5892 } 5893 5894 /* Recursively linearize a binary expression that is the RHS of STMT. 5895 Place the operands of the expression tree in the vector named OPS. */ 5896 5897 static void 5898 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 5899 bool is_associative, bool set_visited) 5900 { 5901 tree binlhs = gimple_assign_rhs1 (stmt); 5902 tree binrhs = gimple_assign_rhs2 (stmt); 5903 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5904 bool binlhsisreassoc = false; 5905 bool binrhsisreassoc = false; 5906 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5907 class loop *loop = loop_containing_stmt (stmt); 5908 5909 if (set_visited) 5910 gimple_set_visited (stmt, true); 5911 5912 if (TREE_CODE (binlhs) == SSA_NAME) 5913 { 5914 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 5915 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 5916 && !stmt_could_throw_p (cfun, binlhsdef)); 5917 } 5918 5919 if (TREE_CODE (binrhs) == SSA_NAME) 5920 { 5921 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 5922 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 5923 && !stmt_could_throw_p (cfun, binrhsdef)); 5924 } 5925 5926 /* If the LHS is not reassociable, but the RHS is, we need to swap 5927 them. If neither is reassociable, there is nothing we can do, so 5928 just put them in the ops vector. If the LHS is reassociable, 5929 linearize it. If both are reassociable, then linearize the RHS 5930 and the LHS. */ 5931 5932 if (!binlhsisreassoc) 5933 { 5934 /* If this is not a associative operation like division, give up. */ 5935 if (!is_associative) 5936 { 5937 add_to_ops_vec (ops, binrhs); 5938 return; 5939 } 5940 5941 if (!binrhsisreassoc) 5942 { 5943 bool swap = false; 5944 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5945 /* If we add ops for the rhs we expect to be able to recurse 5946 to it via the lhs during expression rewrite so swap 5947 operands. */ 5948 swap = true; 5949 else 5950 add_to_ops_vec (ops, binrhs); 5951 5952 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 5953 add_to_ops_vec (ops, binlhs); 5954 5955 if (!swap) 5956 return; 5957 } 5958 5959 if (dump_file && (dump_flags & TDF_DETAILS)) 5960 { 5961 fprintf (dump_file, "swapping operands of "); 5962 print_gimple_stmt (dump_file, stmt, 0); 5963 } 5964 5965 swap_ssa_operands (stmt, 5966 gimple_assign_rhs1_ptr (stmt), 5967 gimple_assign_rhs2_ptr (stmt)); 5968 update_stmt (stmt); 5969 5970 if (dump_file && (dump_flags & TDF_DETAILS)) 5971 { 5972 fprintf (dump_file, " is now "); 5973 print_gimple_stmt (dump_file, stmt, 0); 5974 } 5975 if (!binrhsisreassoc) 5976 return; 5977 5978 /* We want to make it so the lhs is always the reassociative op, 5979 so swap. */ 5980 std::swap (binlhs, binrhs); 5981 } 5982 else if (binrhsisreassoc) 5983 { 5984 linearize_expr (stmt); 5985 binlhs = gimple_assign_rhs1 (stmt); 5986 binrhs = gimple_assign_rhs2 (stmt); 5987 } 5988 5989 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 5990 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 5991 rhscode, loop)); 5992 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 5993 is_associative, set_visited); 5994 5995 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5996 add_to_ops_vec (ops, binrhs); 5997 } 5998 5999 /* Repropagate the negates back into subtracts, since no other pass 6000 currently does it. */ 6001 6002 static void 6003 repropagate_negates (void) 6004 { 6005 unsigned int i = 0; 6006 tree negate; 6007 6008 FOR_EACH_VEC_ELT (plus_negates, i, negate) 6009 { 6010 gimple *user = get_single_immediate_use (negate); 6011 if (!user || !is_gimple_assign (user)) 6012 continue; 6013 6014 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 6015 if (TREE_CODE (negateop) == SSA_NAME 6016 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop)) 6017 continue; 6018 6019 /* The negate operand can be either operand of a PLUS_EXPR 6020 (it can be the LHS if the RHS is a constant for example). 6021 6022 Force the negate operand to the RHS of the PLUS_EXPR, then 6023 transform the PLUS_EXPR into a MINUS_EXPR. */ 6024 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 6025 { 6026 /* If the negated operand appears on the LHS of the 6027 PLUS_EXPR, exchange the operands of the PLUS_EXPR 6028 to force the negated operand to the RHS of the PLUS_EXPR. */ 6029 if (gimple_assign_rhs1 (user) == negate) 6030 { 6031 swap_ssa_operands (user, 6032 gimple_assign_rhs1_ptr (user), 6033 gimple_assign_rhs2_ptr (user)); 6034 } 6035 6036 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 6037 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 6038 if (gimple_assign_rhs2 (user) == negate) 6039 { 6040 tree rhs1 = gimple_assign_rhs1 (user); 6041 gimple_stmt_iterator gsi = gsi_for_stmt (user); 6042 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, 6043 negateop); 6044 update_stmt (user); 6045 } 6046 } 6047 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 6048 { 6049 if (gimple_assign_rhs1 (user) == negate) 6050 { 6051 /* We have 6052 x = -negateop 6053 y = x - b 6054 which we transform into 6055 x = negateop + b 6056 y = -x . 6057 This pushes down the negate which we possibly can merge 6058 into some other operation, hence insert it into the 6059 plus_negates vector. */ 6060 gimple *feed = SSA_NAME_DEF_STMT (negate); 6061 tree b = gimple_assign_rhs2 (user); 6062 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 6063 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 6064 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 6065 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b); 6066 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 6067 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 6068 user = gsi_stmt (gsi2); 6069 update_stmt (user); 6070 reassoc_remove_stmt (&gsi); 6071 release_defs (feed); 6072 plus_negates.safe_push (gimple_assign_lhs (user)); 6073 } 6074 else 6075 { 6076 /* Transform "x = -negateop; y = b - x" into "y = b + negateop", 6077 getting rid of one operation. */ 6078 tree rhs1 = gimple_assign_rhs1 (user); 6079 gimple_stmt_iterator gsi = gsi_for_stmt (user); 6080 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop); 6081 update_stmt (gsi_stmt (gsi)); 6082 } 6083 } 6084 } 6085 } 6086 6087 /* Break up subtract operations in block BB. 6088 6089 We do this top down because we don't know whether the subtract is 6090 part of a possible chain of reassociation except at the top. 6091 6092 IE given 6093 d = f + g 6094 c = a + e 6095 b = c - d 6096 q = b - r 6097 k = t - q 6098 6099 we want to break up k = t - q, but we won't until we've transformed q 6100 = b - r, which won't be broken up until we transform b = c - d. 6101 6102 En passant, clear the GIMPLE visited flag on every statement 6103 and set UIDs within each basic block. */ 6104 6105 static void 6106 break_up_subtract_bb (basic_block bb) 6107 { 6108 gimple_stmt_iterator gsi; 6109 basic_block son; 6110 unsigned int uid = 1; 6111 6112 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 6113 { 6114 gimple *stmt = gsi_stmt (gsi); 6115 gimple_set_visited (stmt, false); 6116 gimple_set_uid (stmt, uid++); 6117 6118 if (!is_gimple_assign (stmt) 6119 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt))) 6120 || !can_reassociate_op_p (gimple_assign_lhs (stmt))) 6121 continue; 6122 6123 /* Look for simple gimple subtract operations. */ 6124 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 6125 { 6126 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt)) 6127 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt))) 6128 continue; 6129 6130 /* Check for a subtract used only in an addition. If this 6131 is the case, transform it into add of a negate for better 6132 reassociation. IE transform C = A-B into C = A + -B if C 6133 is only used in an addition. */ 6134 if (should_break_up_subtract (stmt)) 6135 break_up_subtract (stmt, &gsi); 6136 } 6137 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 6138 && can_reassociate_op_p (gimple_assign_rhs1 (stmt))) 6139 plus_negates.safe_push (gimple_assign_lhs (stmt)); 6140 } 6141 for (son = first_dom_son (CDI_DOMINATORS, bb); 6142 son; 6143 son = next_dom_son (CDI_DOMINATORS, son)) 6144 break_up_subtract_bb (son); 6145 } 6146 6147 /* Used for repeated factor analysis. */ 6148 struct repeat_factor 6149 { 6150 /* An SSA name that occurs in a multiply chain. */ 6151 tree factor; 6152 6153 /* Cached rank of the factor. */ 6154 unsigned rank; 6155 6156 /* Number of occurrences of the factor in the chain. */ 6157 HOST_WIDE_INT count; 6158 6159 /* An SSA name representing the product of this factor and 6160 all factors appearing later in the repeated factor vector. */ 6161 tree repr; 6162 }; 6163 6164 6165 static vec<repeat_factor> repeat_factor_vec; 6166 6167 /* Used for sorting the repeat factor vector. Sort primarily by 6168 ascending occurrence count, secondarily by descending rank. */ 6169 6170 static int 6171 compare_repeat_factors (const void *x1, const void *x2) 6172 { 6173 const repeat_factor *rf1 = (const repeat_factor *) x1; 6174 const repeat_factor *rf2 = (const repeat_factor *) x2; 6175 6176 if (rf1->count != rf2->count) 6177 return rf1->count - rf2->count; 6178 6179 return rf2->rank - rf1->rank; 6180 } 6181 6182 /* Look for repeated operands in OPS in the multiply tree rooted at 6183 STMT. Replace them with an optimal sequence of multiplies and powi 6184 builtin calls, and remove the used operands from OPS. Return an 6185 SSA name representing the value of the replacement sequence. */ 6186 6187 static tree 6188 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 6189 { 6190 unsigned i, j, vec_len; 6191 int ii; 6192 operand_entry *oe; 6193 repeat_factor *rf1, *rf2; 6194 repeat_factor rfnew; 6195 tree result = NULL_TREE; 6196 tree target_ssa, iter_result; 6197 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 6198 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 6199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 6200 gimple *mul_stmt, *pow_stmt; 6201 6202 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 6203 target, unless type is integral. */ 6204 if (!powi_fndecl && !INTEGRAL_TYPE_P (type)) 6205 return NULL_TREE; 6206 6207 /* Allocate the repeated factor vector. */ 6208 repeat_factor_vec.create (10); 6209 6210 /* Scan the OPS vector for all SSA names in the product and build 6211 up a vector of occurrence counts for each factor. */ 6212 FOR_EACH_VEC_ELT (*ops, i, oe) 6213 { 6214 if (TREE_CODE (oe->op) == SSA_NAME) 6215 { 6216 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6217 { 6218 if (rf1->factor == oe->op) 6219 { 6220 rf1->count += oe->count; 6221 break; 6222 } 6223 } 6224 6225 if (j >= repeat_factor_vec.length ()) 6226 { 6227 rfnew.factor = oe->op; 6228 rfnew.rank = oe->rank; 6229 rfnew.count = oe->count; 6230 rfnew.repr = NULL_TREE; 6231 repeat_factor_vec.safe_push (rfnew); 6232 } 6233 } 6234 } 6235 6236 /* Sort the repeated factor vector by (a) increasing occurrence count, 6237 and (b) decreasing rank. */ 6238 repeat_factor_vec.qsort (compare_repeat_factors); 6239 6240 /* It is generally best to combine as many base factors as possible 6241 into a product before applying __builtin_powi to the result. 6242 However, the sort order chosen for the repeated factor vector 6243 allows us to cache partial results for the product of the base 6244 factors for subsequent use. When we already have a cached partial 6245 result from a previous iteration, it is best to make use of it 6246 before looking for another __builtin_pow opportunity. 6247 6248 As an example, consider x * x * y * y * y * z * z * z * z. 6249 We want to first compose the product x * y * z, raise it to the 6250 second power, then multiply this by y * z, and finally multiply 6251 by z. This can be done in 5 multiplies provided we cache y * z 6252 for use in both expressions: 6253 6254 t1 = y * z 6255 t2 = t1 * x 6256 t3 = t2 * t2 6257 t4 = t1 * t3 6258 result = t4 * z 6259 6260 If we instead ignored the cached y * z and first multiplied by 6261 the __builtin_pow opportunity z * z, we would get the inferior: 6262 6263 t1 = y * z 6264 t2 = t1 * x 6265 t3 = t2 * t2 6266 t4 = z * z 6267 t5 = t3 * t4 6268 result = t5 * y */ 6269 6270 vec_len = repeat_factor_vec.length (); 6271 6272 /* Repeatedly look for opportunities to create a builtin_powi call. */ 6273 while (true) 6274 { 6275 HOST_WIDE_INT power; 6276 6277 /* First look for the largest cached product of factors from 6278 preceding iterations. If found, create a builtin_powi for 6279 it if the minimum occurrence count for its factors is at 6280 least 2, or just use this cached product as our next 6281 multiplicand if the minimum occurrence count is 1. */ 6282 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6283 { 6284 if (rf1->repr && rf1->count > 0) 6285 break; 6286 } 6287 6288 if (j < vec_len) 6289 { 6290 power = rf1->count; 6291 6292 if (power == 1) 6293 { 6294 iter_result = rf1->repr; 6295 6296 if (dump_file && (dump_flags & TDF_DETAILS)) 6297 { 6298 unsigned elt; 6299 repeat_factor *rf; 6300 fputs ("Multiplying by cached product ", dump_file); 6301 for (elt = j; elt < vec_len; elt++) 6302 { 6303 rf = &repeat_factor_vec[elt]; 6304 print_generic_expr (dump_file, rf->factor); 6305 if (elt < vec_len - 1) 6306 fputs (" * ", dump_file); 6307 } 6308 fputs ("\n", dump_file); 6309 } 6310 } 6311 else 6312 { 6313 if (INTEGRAL_TYPE_P (type)) 6314 { 6315 gcc_assert (power > 1); 6316 gimple_stmt_iterator gsip = gsi; 6317 gsi_prev (&gsip); 6318 iter_result = powi_as_mults (&gsi, gimple_location (stmt), 6319 rf1->repr, power); 6320 gimple_stmt_iterator gsic = gsi; 6321 while (gsi_stmt (gsic) != gsi_stmt (gsip)) 6322 { 6323 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt)); 6324 gimple_set_visited (gsi_stmt (gsic), true); 6325 gsi_prev (&gsic); 6326 } 6327 } 6328 else 6329 { 6330 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6331 pow_stmt 6332 = gimple_build_call (powi_fndecl, 2, rf1->repr, 6333 build_int_cst (integer_type_node, 6334 power)); 6335 gimple_call_set_lhs (pow_stmt, iter_result); 6336 gimple_set_location (pow_stmt, gimple_location (stmt)); 6337 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 6338 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 6339 } 6340 6341 if (dump_file && (dump_flags & TDF_DETAILS)) 6342 { 6343 unsigned elt; 6344 repeat_factor *rf; 6345 fputs ("Building __builtin_pow call for cached product (", 6346 dump_file); 6347 for (elt = j; elt < vec_len; elt++) 6348 { 6349 rf = &repeat_factor_vec[elt]; 6350 print_generic_expr (dump_file, rf->factor); 6351 if (elt < vec_len - 1) 6352 fputs (" * ", dump_file); 6353 } 6354 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 6355 power); 6356 } 6357 } 6358 } 6359 else 6360 { 6361 /* Otherwise, find the first factor in the repeated factor 6362 vector whose occurrence count is at least 2. If no such 6363 factor exists, there are no builtin_powi opportunities 6364 remaining. */ 6365 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6366 { 6367 if (rf1->count >= 2) 6368 break; 6369 } 6370 6371 if (j >= vec_len) 6372 break; 6373 6374 power = rf1->count; 6375 6376 if (dump_file && (dump_flags & TDF_DETAILS)) 6377 { 6378 unsigned elt; 6379 repeat_factor *rf; 6380 fputs ("Building __builtin_pow call for (", dump_file); 6381 for (elt = j; elt < vec_len; elt++) 6382 { 6383 rf = &repeat_factor_vec[elt]; 6384 print_generic_expr (dump_file, rf->factor); 6385 if (elt < vec_len - 1) 6386 fputs (" * ", dump_file); 6387 } 6388 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 6389 } 6390 6391 reassociate_stats.pows_created++; 6392 6393 /* Visit each element of the vector in reverse order (so that 6394 high-occurrence elements are visited first, and within the 6395 same occurrence count, lower-ranked elements are visited 6396 first). Form a linear product of all elements in this order 6397 whose occurrencce count is at least that of element J. 6398 Record the SSA name representing the product of each element 6399 with all subsequent elements in the vector. */ 6400 if (j == vec_len - 1) 6401 rf1->repr = rf1->factor; 6402 else 6403 { 6404 for (ii = vec_len - 2; ii >= (int)j; ii--) 6405 { 6406 tree op1, op2; 6407 6408 rf1 = &repeat_factor_vec[ii]; 6409 rf2 = &repeat_factor_vec[ii + 1]; 6410 6411 /* Init the last factor's representative to be itself. */ 6412 if (!rf2->repr) 6413 rf2->repr = rf2->factor; 6414 6415 op1 = rf1->factor; 6416 op2 = rf2->repr; 6417 6418 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 6419 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 6420 op1, op2); 6421 gimple_set_location (mul_stmt, gimple_location (stmt)); 6422 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6423 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6424 rf1->repr = target_ssa; 6425 6426 /* Don't reprocess the multiply we just introduced. */ 6427 gimple_set_visited (mul_stmt, true); 6428 } 6429 } 6430 6431 /* Form a call to __builtin_powi for the maximum product 6432 just formed, raised to the power obtained earlier. */ 6433 rf1 = &repeat_factor_vec[j]; 6434 if (INTEGRAL_TYPE_P (type)) 6435 { 6436 gcc_assert (power > 1); 6437 gimple_stmt_iterator gsip = gsi; 6438 gsi_prev (&gsip); 6439 iter_result = powi_as_mults (&gsi, gimple_location (stmt), 6440 rf1->repr, power); 6441 gimple_stmt_iterator gsic = gsi; 6442 while (gsi_stmt (gsic) != gsi_stmt (gsip)) 6443 { 6444 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt)); 6445 gimple_set_visited (gsi_stmt (gsic), true); 6446 gsi_prev (&gsic); 6447 } 6448 } 6449 else 6450 { 6451 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6452 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 6453 build_int_cst (integer_type_node, 6454 power)); 6455 gimple_call_set_lhs (pow_stmt, iter_result); 6456 gimple_set_location (pow_stmt, gimple_location (stmt)); 6457 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 6458 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 6459 } 6460 } 6461 6462 /* If we previously formed at least one other builtin_powi call, 6463 form the product of this one and those others. */ 6464 if (result) 6465 { 6466 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6467 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 6468 result, iter_result); 6469 gimple_set_location (mul_stmt, gimple_location (stmt)); 6470 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6471 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6472 gimple_set_visited (mul_stmt, true); 6473 result = new_result; 6474 } 6475 else 6476 result = iter_result; 6477 6478 /* Decrement the occurrence count of each element in the product 6479 by the count found above, and remove this many copies of each 6480 factor from OPS. */ 6481 for (i = j; i < vec_len; i++) 6482 { 6483 unsigned k = power; 6484 unsigned n; 6485 6486 rf1 = &repeat_factor_vec[i]; 6487 rf1->count -= power; 6488 6489 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 6490 { 6491 if (oe->op == rf1->factor) 6492 { 6493 if (oe->count <= k) 6494 { 6495 ops->ordered_remove (n); 6496 k -= oe->count; 6497 6498 if (k == 0) 6499 break; 6500 } 6501 else 6502 { 6503 oe->count -= k; 6504 break; 6505 } 6506 } 6507 } 6508 } 6509 } 6510 6511 /* At this point all elements in the repeated factor vector have a 6512 remaining occurrence count of 0 or 1, and those with a count of 1 6513 don't have cached representatives. Re-sort the ops vector and 6514 clean up. */ 6515 ops->qsort (sort_by_operand_rank); 6516 repeat_factor_vec.release (); 6517 6518 /* Return the final product computed herein. Note that there may 6519 still be some elements with single occurrence count left in OPS; 6520 those will be handled by the normal reassociation logic. */ 6521 return result; 6522 } 6523 6524 /* Attempt to optimize 6525 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 6526 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 6527 6528 static void 6529 attempt_builtin_copysign (vec<operand_entry *> *ops) 6530 { 6531 operand_entry *oe; 6532 unsigned int i; 6533 unsigned int length = ops->length (); 6534 tree cst = ops->last ()->op; 6535 6536 if (length == 1 || TREE_CODE (cst) != REAL_CST) 6537 return; 6538 6539 FOR_EACH_VEC_ELT (*ops, i, oe) 6540 { 6541 if (TREE_CODE (oe->op) == SSA_NAME 6542 && has_single_use (oe->op)) 6543 { 6544 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 6545 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 6546 { 6547 tree arg0, arg1; 6548 switch (gimple_call_combined_fn (old_call)) 6549 { 6550 CASE_CFN_COPYSIGN: 6551 CASE_CFN_COPYSIGN_FN: 6552 arg0 = gimple_call_arg (old_call, 0); 6553 arg1 = gimple_call_arg (old_call, 1); 6554 /* The first argument of copysign must be a constant, 6555 otherwise there's nothing to do. */ 6556 if (TREE_CODE (arg0) == REAL_CST) 6557 { 6558 tree type = TREE_TYPE (arg0); 6559 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 6560 /* If we couldn't fold to a single constant, skip it. 6561 That happens e.g. for inexact multiplication when 6562 -frounding-math. */ 6563 if (mul == NULL_TREE) 6564 break; 6565 /* Instead of adjusting OLD_CALL, let's build a new 6566 call to not leak the LHS and prevent keeping bogus 6567 debug statements. DCE will clean up the old call. */ 6568 gcall *new_call; 6569 if (gimple_call_internal_p (old_call)) 6570 new_call = gimple_build_call_internal 6571 (IFN_COPYSIGN, 2, mul, arg1); 6572 else 6573 new_call = gimple_build_call 6574 (gimple_call_fndecl (old_call), 2, mul, arg1); 6575 tree lhs = make_ssa_name (type); 6576 gimple_call_set_lhs (new_call, lhs); 6577 gimple_set_location (new_call, 6578 gimple_location (old_call)); 6579 insert_stmt_after (new_call, old_call); 6580 /* We've used the constant, get rid of it. */ 6581 ops->pop (); 6582 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 6583 /* Handle the CST1 < 0 case by negating the result. */ 6584 if (cst1_neg) 6585 { 6586 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 6587 gimple *negate_stmt 6588 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 6589 insert_stmt_after (negate_stmt, new_call); 6590 oe->op = negrhs; 6591 } 6592 else 6593 oe->op = lhs; 6594 if (dump_file && (dump_flags & TDF_DETAILS)) 6595 { 6596 fprintf (dump_file, "Optimizing copysign: "); 6597 print_generic_expr (dump_file, cst); 6598 fprintf (dump_file, " * COPYSIGN ("); 6599 print_generic_expr (dump_file, arg0); 6600 fprintf (dump_file, ", "); 6601 print_generic_expr (dump_file, arg1); 6602 fprintf (dump_file, ") into %sCOPYSIGN (", 6603 cst1_neg ? "-" : ""); 6604 print_generic_expr (dump_file, mul); 6605 fprintf (dump_file, ", "); 6606 print_generic_expr (dump_file, arg1); 6607 fprintf (dump_file, "\n"); 6608 } 6609 return; 6610 } 6611 break; 6612 default: 6613 break; 6614 } 6615 } 6616 } 6617 } 6618 } 6619 6620 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 6621 6622 static void 6623 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 6624 { 6625 tree rhs1; 6626 6627 if (dump_file && (dump_flags & TDF_DETAILS)) 6628 { 6629 fprintf (dump_file, "Transforming "); 6630 print_gimple_stmt (dump_file, stmt, 0); 6631 } 6632 6633 rhs1 = gimple_assign_rhs1 (stmt); 6634 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 6635 update_stmt (stmt); 6636 remove_visited_stmt_chain (rhs1); 6637 6638 if (dump_file && (dump_flags & TDF_DETAILS)) 6639 { 6640 fprintf (dump_file, " into "); 6641 print_gimple_stmt (dump_file, stmt, 0); 6642 } 6643 } 6644 6645 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 6646 6647 static void 6648 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 6649 tree rhs1, tree rhs2) 6650 { 6651 if (dump_file && (dump_flags & TDF_DETAILS)) 6652 { 6653 fprintf (dump_file, "Transforming "); 6654 print_gimple_stmt (dump_file, stmt, 0); 6655 } 6656 6657 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 6658 update_stmt (gsi_stmt (*gsi)); 6659 remove_visited_stmt_chain (rhs1); 6660 6661 if (dump_file && (dump_flags & TDF_DETAILS)) 6662 { 6663 fprintf (dump_file, " into "); 6664 print_gimple_stmt (dump_file, stmt, 0); 6665 } 6666 } 6667 6668 /* Reassociate expressions in basic block BB and its post-dominator as 6669 children. 6670 6671 Bubble up return status from maybe_optimize_range_tests. */ 6672 6673 static bool 6674 reassociate_bb (basic_block bb) 6675 { 6676 gimple_stmt_iterator gsi; 6677 basic_block son; 6678 gimple *stmt = last_stmt (bb); 6679 bool cfg_cleanup_needed = false; 6680 6681 if (stmt && !gimple_visited_p (stmt)) 6682 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 6683 6684 bool do_prev = false; 6685 for (gsi = gsi_last_bb (bb); 6686 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0) 6687 { 6688 do_prev = true; 6689 stmt = gsi_stmt (gsi); 6690 6691 if (is_gimple_assign (stmt) 6692 && !stmt_could_throw_p (cfun, stmt)) 6693 { 6694 tree lhs, rhs1, rhs2; 6695 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 6696 6697 /* If this was part of an already processed statement, 6698 we don't need to touch it again. */ 6699 if (gimple_visited_p (stmt)) 6700 { 6701 /* This statement might have become dead because of previous 6702 reassociations. */ 6703 if (has_zero_uses (gimple_get_lhs (stmt))) 6704 { 6705 reassoc_remove_stmt (&gsi); 6706 release_defs (stmt); 6707 /* We might end up removing the last stmt above which 6708 places the iterator to the end of the sequence. 6709 Reset it to the last stmt in this case and make sure 6710 we don't do gsi_prev in that case. */ 6711 if (gsi_end_p (gsi)) 6712 { 6713 gsi = gsi_last_bb (bb); 6714 do_prev = false; 6715 } 6716 } 6717 continue; 6718 } 6719 6720 /* If this is not a gimple binary expression, there is 6721 nothing for us to do with it. */ 6722 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 6723 continue; 6724 6725 lhs = gimple_assign_lhs (stmt); 6726 rhs1 = gimple_assign_rhs1 (stmt); 6727 rhs2 = gimple_assign_rhs2 (stmt); 6728 6729 /* For non-bit or min/max operations we can't associate 6730 all types. Verify that here. */ 6731 if ((rhs_code != BIT_IOR_EXPR 6732 && rhs_code != BIT_AND_EXPR 6733 && rhs_code != BIT_XOR_EXPR 6734 && rhs_code != MIN_EXPR 6735 && rhs_code != MAX_EXPR 6736 && !can_reassociate_type_p (TREE_TYPE (lhs))) 6737 || !can_reassociate_op_p (rhs1) 6738 || !can_reassociate_op_p (rhs2)) 6739 continue; 6740 6741 if (associative_tree_code (rhs_code)) 6742 { 6743 auto_vec<operand_entry *> ops; 6744 tree powi_result = NULL_TREE; 6745 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 6746 6747 /* There may be no immediate uses left by the time we 6748 get here because we may have eliminated them all. */ 6749 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 6750 continue; 6751 6752 gimple_set_visited (stmt, true); 6753 linearize_expr_tree (&ops, stmt, true, true); 6754 ops.qsort (sort_by_operand_rank); 6755 int orig_len = ops.length (); 6756 optimize_ops_list (rhs_code, &ops); 6757 if (undistribute_ops_list (rhs_code, &ops, 6758 loop_containing_stmt (stmt))) 6759 { 6760 ops.qsort (sort_by_operand_rank); 6761 optimize_ops_list (rhs_code, &ops); 6762 } 6763 if (undistribute_bitref_for_vector (rhs_code, &ops, 6764 loop_containing_stmt (stmt))) 6765 { 6766 ops.qsort (sort_by_operand_rank); 6767 optimize_ops_list (rhs_code, &ops); 6768 } 6769 if (rhs_code == PLUS_EXPR 6770 && transform_add_to_multiply (&ops)) 6771 ops.qsort (sort_by_operand_rank); 6772 6773 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 6774 { 6775 if (is_vector) 6776 optimize_vec_cond_expr (rhs_code, &ops); 6777 else 6778 optimize_range_tests (rhs_code, &ops, NULL); 6779 } 6780 6781 if (rhs_code == MULT_EXPR && !is_vector) 6782 { 6783 attempt_builtin_copysign (&ops); 6784 6785 if (reassoc_insert_powi_p 6786 && (flag_unsafe_math_optimizations 6787 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs))))) 6788 powi_result = attempt_builtin_powi (stmt, &ops); 6789 } 6790 6791 operand_entry *last; 6792 bool negate_result = false; 6793 if (ops.length () > 1 6794 && rhs_code == MULT_EXPR) 6795 { 6796 last = ops.last (); 6797 if ((integer_minus_onep (last->op) 6798 || real_minus_onep (last->op)) 6799 && !HONOR_SNANS (TREE_TYPE (lhs)) 6800 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 6801 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 6802 { 6803 ops.pop (); 6804 negate_result = true; 6805 } 6806 } 6807 6808 tree new_lhs = lhs; 6809 /* If the operand vector is now empty, all operands were 6810 consumed by the __builtin_powi optimization. */ 6811 if (ops.length () == 0) 6812 transform_stmt_to_copy (&gsi, stmt, powi_result); 6813 else if (ops.length () == 1) 6814 { 6815 tree last_op = ops.last ()->op; 6816 6817 /* If the stmt that defines operand has to be inserted, insert it 6818 before the use. */ 6819 if (ops.last ()->stmt_to_insert) 6820 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 6821 if (powi_result) 6822 transform_stmt_to_multiply (&gsi, stmt, last_op, 6823 powi_result); 6824 else 6825 transform_stmt_to_copy (&gsi, stmt, last_op); 6826 } 6827 else 6828 { 6829 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 6830 int ops_num = ops.length (); 6831 int width; 6832 6833 /* For binary bit operations, if there are at least 3 6834 operands and the last operand in OPS is a constant, 6835 move it to the front. This helps ensure that we generate 6836 (X & Y) & C rather than (X & C) & Y. The former will 6837 often match a canonical bit test when we get to RTL. */ 6838 if (ops.length () > 2 6839 && (rhs_code == BIT_AND_EXPR 6840 || rhs_code == BIT_IOR_EXPR 6841 || rhs_code == BIT_XOR_EXPR) 6842 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 6843 std::swap (*ops[0], *ops[ops_num - 1]); 6844 6845 /* Only rewrite the expression tree to parallel in the 6846 last reassoc pass to avoid useless work back-and-forth 6847 with initial linearization. */ 6848 if (!reassoc_insert_powi_p 6849 && ops.length () > 3 6850 && (width = get_reassociation_width (ops_num, rhs_code, 6851 mode)) > 1) 6852 { 6853 if (dump_file && (dump_flags & TDF_DETAILS)) 6854 fprintf (dump_file, 6855 "Width = %d was chosen for reassociation\n", 6856 width); 6857 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 6858 width, ops); 6859 } 6860 else 6861 { 6862 /* When there are three operands left, we want 6863 to make sure the ones that get the double 6864 binary op are chosen wisely. */ 6865 int len = ops.length (); 6866 if (len >= 3) 6867 swap_ops_for_binary_stmt (ops, len - 3, stmt); 6868 6869 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops, 6870 powi_result != NULL 6871 || negate_result, 6872 len != orig_len); 6873 } 6874 6875 /* If we combined some repeated factors into a 6876 __builtin_powi call, multiply that result by the 6877 reassociated operands. */ 6878 if (powi_result) 6879 { 6880 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 6881 tree type = TREE_TYPE (lhs); 6882 tree target_ssa = make_temp_ssa_name (type, NULL, 6883 "reassocpow"); 6884 gimple_set_lhs (lhs_stmt, target_ssa); 6885 update_stmt (lhs_stmt); 6886 if (lhs != new_lhs) 6887 { 6888 target_ssa = new_lhs; 6889 new_lhs = lhs; 6890 } 6891 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 6892 powi_result, target_ssa); 6893 gimple_set_location (mul_stmt, gimple_location (stmt)); 6894 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6895 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 6896 } 6897 } 6898 6899 if (negate_result) 6900 { 6901 stmt = SSA_NAME_DEF_STMT (lhs); 6902 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 6903 gimple_set_lhs (stmt, tmp); 6904 if (lhs != new_lhs) 6905 tmp = new_lhs; 6906 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 6907 tmp); 6908 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 6909 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 6910 update_stmt (stmt); 6911 } 6912 } 6913 } 6914 } 6915 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 6916 son; 6917 son = next_dom_son (CDI_POST_DOMINATORS, son)) 6918 cfg_cleanup_needed |= reassociate_bb (son); 6919 6920 return cfg_cleanup_needed; 6921 } 6922 6923 /* Add jumps around shifts for range tests turned into bit tests. 6924 For each SSA_NAME VAR we have code like: 6925 VAR = ...; // final stmt of range comparison 6926 // bit test here...; 6927 OTHERVAR = ...; // final stmt of the bit test sequence 6928 RES = VAR | OTHERVAR; 6929 Turn the above into: 6930 VAR = ...; 6931 if (VAR != 0) 6932 goto <l3>; 6933 else 6934 goto <l2>; 6935 <l2>: 6936 // bit test here...; 6937 OTHERVAR = ...; 6938 <l3>: 6939 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 6940 6941 static void 6942 branch_fixup (void) 6943 { 6944 tree var; 6945 unsigned int i; 6946 6947 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 6948 { 6949 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 6950 gimple *use_stmt; 6951 use_operand_p use; 6952 bool ok = single_imm_use (var, &use, &use_stmt); 6953 gcc_assert (ok 6954 && is_gimple_assign (use_stmt) 6955 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 6956 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 6957 6958 basic_block cond_bb = gimple_bb (def_stmt); 6959 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 6960 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 6961 6962 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 6963 gimple *g = gimple_build_cond (NE_EXPR, var, 6964 build_zero_cst (TREE_TYPE (var)), 6965 NULL_TREE, NULL_TREE); 6966 location_t loc = gimple_location (use_stmt); 6967 gimple_set_location (g, loc); 6968 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 6969 6970 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 6971 etrue->probability = profile_probability::even (); 6972 edge efalse = find_edge (cond_bb, then_bb); 6973 efalse->flags = EDGE_FALSE_VALUE; 6974 efalse->probability -= etrue->probability; 6975 then_bb->count -= etrue->count (); 6976 6977 tree othervar = NULL_TREE; 6978 if (gimple_assign_rhs1 (use_stmt) == var) 6979 othervar = gimple_assign_rhs2 (use_stmt); 6980 else if (gimple_assign_rhs2 (use_stmt) == var) 6981 othervar = gimple_assign_rhs1 (use_stmt); 6982 else 6983 gcc_unreachable (); 6984 tree lhs = gimple_assign_lhs (use_stmt); 6985 gphi *phi = create_phi_node (lhs, merge_bb); 6986 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 6987 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 6988 gsi = gsi_for_stmt (use_stmt); 6989 gsi_remove (&gsi, true); 6990 6991 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 6992 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 6993 } 6994 reassoc_branch_fixups.release (); 6995 } 6996 6997 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 6998 void debug_ops_vector (vec<operand_entry *> ops); 6999 7000 /* Dump the operand entry vector OPS to FILE. */ 7001 7002 void 7003 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 7004 { 7005 operand_entry *oe; 7006 unsigned int i; 7007 7008 FOR_EACH_VEC_ELT (ops, i, oe) 7009 { 7010 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 7011 print_generic_expr (file, oe->op); 7012 fprintf (file, "\n"); 7013 } 7014 } 7015 7016 /* Dump the operand entry vector OPS to STDERR. */ 7017 7018 DEBUG_FUNCTION void 7019 debug_ops_vector (vec<operand_entry *> ops) 7020 { 7021 dump_ops_vector (stderr, ops); 7022 } 7023 7024 /* Bubble up return status from reassociate_bb. */ 7025 7026 static bool 7027 do_reassoc (void) 7028 { 7029 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 7030 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 7031 } 7032 7033 /* Initialize the reassociation pass. */ 7034 7035 static void 7036 init_reassoc (void) 7037 { 7038 int i; 7039 int64_t rank = 2; 7040 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 7041 7042 /* Find the loops, so that we can prevent moving calculations in 7043 them. */ 7044 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 7045 7046 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 7047 7048 next_operand_entry_id = 0; 7049 7050 /* Reverse RPO (Reverse Post Order) will give us something where 7051 deeper loops come later. */ 7052 pre_and_rev_post_order_compute (NULL, bbs, false); 7053 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun)); 7054 operand_rank = new hash_map<tree, int64_t>; 7055 7056 /* Give each default definition a distinct rank. This includes 7057 parameters and the static chain. Walk backwards over all 7058 SSA names so that we get proper rank ordering according 7059 to tree_swap_operands_p. */ 7060 for (i = num_ssa_names - 1; i > 0; --i) 7061 { 7062 tree name = ssa_name (i); 7063 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 7064 insert_operand_rank (name, ++rank); 7065 } 7066 7067 /* Set up rank for each BB */ 7068 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 7069 bb_rank[bbs[i]] = ++rank << 16; 7070 7071 free (bbs); 7072 calculate_dominance_info (CDI_POST_DOMINATORS); 7073 plus_negates = vNULL; 7074 } 7075 7076 /* Cleanup after the reassociation pass, and print stats if 7077 requested. */ 7078 7079 static void 7080 fini_reassoc (void) 7081 { 7082 statistics_counter_event (cfun, "Linearized", 7083 reassociate_stats.linearized); 7084 statistics_counter_event (cfun, "Constants eliminated", 7085 reassociate_stats.constants_eliminated); 7086 statistics_counter_event (cfun, "Ops eliminated", 7087 reassociate_stats.ops_eliminated); 7088 statistics_counter_event (cfun, "Statements rewritten", 7089 reassociate_stats.rewritten); 7090 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 7091 reassociate_stats.pows_encountered); 7092 statistics_counter_event (cfun, "Built-in powi calls created", 7093 reassociate_stats.pows_created); 7094 7095 delete operand_rank; 7096 bitmap_clear (biased_names); 7097 operand_entry_pool.release (); 7098 free (bb_rank); 7099 plus_negates.release (); 7100 free_dominance_info (CDI_POST_DOMINATORS); 7101 loop_optimizer_finalize (); 7102 } 7103 7104 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 7105 insertion of __builtin_powi calls. 7106 7107 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 7108 optimization of a gimple conditional. Otherwise returns zero. */ 7109 7110 static unsigned int 7111 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p) 7112 { 7113 reassoc_insert_powi_p = insert_powi_p; 7114 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p; 7115 7116 init_reassoc (); 7117 7118 bool cfg_cleanup_needed; 7119 cfg_cleanup_needed = do_reassoc (); 7120 repropagate_negates (); 7121 branch_fixup (); 7122 7123 fini_reassoc (); 7124 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 7125 } 7126 7127 namespace { 7128 7129 const pass_data pass_data_reassoc = 7130 { 7131 GIMPLE_PASS, /* type */ 7132 "reassoc", /* name */ 7133 OPTGROUP_NONE, /* optinfo_flags */ 7134 TV_TREE_REASSOC, /* tv_id */ 7135 ( PROP_cfg | PROP_ssa ), /* properties_required */ 7136 0, /* properties_provided */ 7137 0, /* properties_destroyed */ 7138 0, /* todo_flags_start */ 7139 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 7140 }; 7141 7142 class pass_reassoc : public gimple_opt_pass 7143 { 7144 public: 7145 pass_reassoc (gcc::context *ctxt) 7146 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 7147 {} 7148 7149 /* opt_pass methods: */ 7150 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 7151 void set_pass_param (unsigned int n, bool param) 7152 { 7153 gcc_assert (n == 0); 7154 insert_powi_p = param; 7155 bias_loop_carried_phi_ranks_p = !param; 7156 } 7157 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 7158 virtual unsigned int execute (function *) 7159 { 7160 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p); 7161 } 7162 7163 private: 7164 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 7165 point 3a in the pass header comment. */ 7166 bool insert_powi_p; 7167 bool bias_loop_carried_phi_ranks_p; 7168 }; // class pass_reassoc 7169 7170 } // anon namespace 7171 7172 gimple_opt_pass * 7173 make_pass_reassoc (gcc::context *ctxt) 7174 { 7175 return new pass_reassoc (ctxt); 7176 } 7177