1 /* Reassociation for trees. 2 Copyright (C) 2005-2022 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "target.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "gimple.h" 29 #include "cfghooks.h" 30 #include "alloc-pool.h" 31 #include "tree-pass.h" 32 #include "memmodel.h" 33 #include "tm_p.h" 34 #include "ssa.h" 35 #include "optabs-tree.h" 36 #include "gimple-pretty-print.h" 37 #include "diagnostic-core.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "cfganal.h" 41 #include "gimple-fold.h" 42 #include "tree-eh.h" 43 #include "gimple-iterator.h" 44 #include "gimplify-me.h" 45 #include "tree-cfg.h" 46 #include "tree-ssa-loop.h" 47 #include "flags.h" 48 #include "tree-ssa.h" 49 #include "langhooks.h" 50 #include "cfgloop.h" 51 #include "builtins.h" 52 #include "gimplify.h" 53 #include "case-cfn-macros.h" 54 #include "tree-ssa-reassoc.h" 55 #include "tree-ssa-math-opts.h" 56 #include "gimple-range.h" 57 58 /* This is a simple global reassociation pass. It is, in part, based 59 on the LLVM pass of the same name (They do some things more/less 60 than we do, in different orders, etc). 61 62 It consists of five steps: 63 64 1. Breaking up subtract operations into addition + negate, where 65 it would promote the reassociation of adds. 66 67 2. Left linearization of the expression trees, so that (A+B)+(C+D) 68 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 69 During linearization, we place the operands of the binary 70 expressions into a vector of operand_entry_* 71 72 3. Optimization of the operand lists, eliminating things like a + 73 -a, a & a, etc. 74 75 3a. Combine repeated factors with the same occurrence counts 76 into a __builtin_powi call that will later be optimized into 77 an optimal number of multiplies. 78 79 4. Rewrite the expression trees we linearized and optimized so 80 they are in proper rank order. 81 82 5. Repropagate negates, as nothing else will clean it up ATM. 83 84 A bit of theory on #4, since nobody seems to write anything down 85 about why it makes sense to do it the way they do it: 86 87 We could do this much nicer theoretically, but don't (for reasons 88 explained after how to do it theoretically nice :P). 89 90 In order to promote the most redundancy elimination, you want 91 binary expressions whose operands are the same rank (or 92 preferably, the same value) exposed to the redundancy eliminator, 93 for possible elimination. 94 95 So the way to do this if we really cared, is to build the new op 96 tree from the leaves to the roots, merging as you go, and putting the 97 new op on the end of the worklist, until you are left with one 98 thing on the worklist. 99 100 IE if you have to rewrite the following set of operands (listed with 101 rank in parentheses), with opcode PLUS_EXPR: 102 103 a (1), b (1), c (1), d (2), e (2) 104 105 106 We start with our merge worklist empty, and the ops list with all of 107 those on it. 108 109 You want to first merge all leaves of the same rank, as much as 110 possible. 111 112 So first build a binary op of 113 114 mergetmp = a + b, and put "mergetmp" on the merge worklist. 115 116 Because there is no three operand form of PLUS_EXPR, c is not going to 117 be exposed to redundancy elimination as a rank 1 operand. 118 119 So you might as well throw it on the merge worklist (you could also 120 consider it to now be a rank two operand, and merge it with d and e, 121 but in this case, you then have evicted e from a binary op. So at 122 least in this situation, you can't win.) 123 124 Then build a binary op of d + e 125 mergetmp2 = d + e 126 127 and put mergetmp2 on the merge worklist. 128 129 so merge worklist = {mergetmp, c, mergetmp2} 130 131 Continue building binary ops of these operations until you have only 132 one operation left on the worklist. 133 134 So we have 135 136 build binary op 137 mergetmp3 = mergetmp + c 138 139 worklist = {mergetmp2, mergetmp3} 140 141 mergetmp4 = mergetmp2 + mergetmp3 142 143 worklist = {mergetmp4} 144 145 because we have one operation left, we can now just set the original 146 statement equal to the result of that operation. 147 148 This will at least expose a + b and d + e to redundancy elimination 149 as binary operations. 150 151 For extra points, you can reuse the old statements to build the 152 mergetmps, since you shouldn't run out. 153 154 So why don't we do this? 155 156 Because it's expensive, and rarely will help. Most trees we are 157 reassociating have 3 or less ops. If they have 2 ops, they already 158 will be written into a nice single binary op. If you have 3 ops, a 159 single simple check suffices to tell you whether the first two are of the 160 same rank. If so, you know to order it 161 162 mergetmp = op1 + op2 163 newstmt = mergetmp + op3 164 165 instead of 166 mergetmp = op2 + op3 167 newstmt = mergetmp + op1 168 169 If all three are of the same rank, you can't expose them all in a 170 single binary operator anyway, so the above is *still* the best you 171 can do. 172 173 Thus, this is what we do. When we have three ops left, we check to see 174 what order to put them in, and call it a day. As a nod to vector sum 175 reduction, we check if any of the ops are really a phi node that is a 176 destructive update for the associating op, and keep the destructive 177 update together for vector sum reduction recognition. */ 178 179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 180 point 3a in the pass header comment. */ 181 static bool reassoc_insert_powi_p; 182 183 /* Enable biasing ranks of loop accumulators. We don't want this before 184 vectorization, since it interferes with reduction chains. */ 185 static bool reassoc_bias_loop_carried_phi_ranks_p; 186 187 /* Statistics */ 188 static struct 189 { 190 int linearized; 191 int constants_eliminated; 192 int ops_eliminated; 193 int rewritten; 194 int pows_encountered; 195 int pows_created; 196 } reassociate_stats; 197 198 199 static object_allocator<operand_entry> operand_entry_pool 200 ("operand entry pool"); 201 202 /* This is used to assign a unique ID to each struct operand_entry 203 so that qsort results are identical on different hosts. */ 204 static unsigned int next_operand_entry_id; 205 206 /* Starting rank number for a given basic block, so that we can rank 207 operations using unmovable instructions in that BB based on the bb 208 depth. */ 209 static int64_t *bb_rank; 210 211 /* Operand->rank hashtable. */ 212 static hash_map<tree, int64_t> *operand_rank; 213 214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be 215 biased. */ 216 static auto_bitmap biased_names; 217 218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with 219 all basic blocks the CFG should be adjusted - basic blocks 220 split right after that SSA_NAME's definition statement and before 221 the only use, which must be a bit ior. */ 222 static vec<tree> reassoc_branch_fixups; 223 224 /* Forward decls. */ 225 static int64_t get_rank (tree); 226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *); 227 228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 229 possibly added by gsi_remove. */ 230 231 static bool 232 reassoc_remove_stmt (gimple_stmt_iterator *gsi) 233 { 234 gimple *stmt = gsi_stmt (*gsi); 235 236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI) 237 return gsi_remove (gsi, true); 238 239 gimple_stmt_iterator prev = *gsi; 240 gsi_prev (&prev); 241 unsigned uid = gimple_uid (stmt); 242 basic_block bb = gimple_bb (stmt); 243 bool ret = gsi_remove (gsi, true); 244 if (!gsi_end_p (prev)) 245 gsi_next (&prev); 246 else 247 prev = gsi_start_bb (bb); 248 gimple *end_stmt = gsi_stmt (*gsi); 249 while ((stmt = gsi_stmt (prev)) != end_stmt) 250 { 251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 252 gimple_set_uid (stmt, uid); 253 gsi_next (&prev); 254 } 255 return ret; 256 } 257 258 /* Bias amount for loop-carried phis. We want this to be larger than 259 the depth of any reassociation tree we can see, but not larger than 260 the rank difference between two blocks. */ 261 #define PHI_LOOP_BIAS (1 << 15) 262 263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's 264 operands to the STMT's left-hand side. The goal is to preserve bias in code 265 like this: 266 267 x_1 = phi(x_0, x_2) 268 a = x_1 | 1 269 b = a ^ 2 270 .MEM = b 271 c = b + d 272 x_2 = c + e 273 274 That is, we need to preserve bias along single-use chains originating from 275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be 276 uses, because only they participate in rank propagation. */ 277 static bool 278 propagate_bias_p (gimple *stmt) 279 { 280 use_operand_p use; 281 imm_use_iterator use_iter; 282 gimple *single_use_stmt = NULL; 283 284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference) 285 return false; 286 287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt)) 288 { 289 gimple *current_use_stmt = USE_STMT (use); 290 291 if (is_gimple_assign (current_use_stmt) 292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME) 293 { 294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt) 295 return false; 296 single_use_stmt = current_use_stmt; 297 } 298 } 299 300 if (single_use_stmt == NULL) 301 return false; 302 303 if (gimple_bb (stmt)->loop_father 304 != gimple_bb (single_use_stmt)->loop_father) 305 return false; 306 307 return true; 308 } 309 310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of 311 an innermost loop, and the phi has only a single use which is inside 312 the loop, then the rank is the block rank of the loop latch plus an 313 extra bias for the loop-carried dependence. This causes expressions 314 calculated into an accumulator variable to be independent for each 315 iteration of the loop. If STMT is some other phi, the rank is the 316 block rank of its containing block. */ 317 static int64_t 318 phi_rank (gimple *stmt) 319 { 320 basic_block bb = gimple_bb (stmt); 321 class loop *father = bb->loop_father; 322 tree res; 323 unsigned i; 324 use_operand_p use; 325 gimple *use_stmt; 326 327 if (!reassoc_bias_loop_carried_phi_ranks_p) 328 return bb_rank[bb->index]; 329 330 /* We only care about real loops (those with a latch). */ 331 if (!father->latch) 332 return bb_rank[bb->index]; 333 334 /* Interesting phis must be in headers of innermost loops. */ 335 if (bb != father->header 336 || father->inner) 337 return bb_rank[bb->index]; 338 339 /* Ignore virtual SSA_NAMEs. */ 340 res = gimple_phi_result (stmt); 341 if (virtual_operand_p (res)) 342 return bb_rank[bb->index]; 343 344 /* The phi definition must have a single use, and that use must be 345 within the loop. Otherwise this isn't an accumulator pattern. */ 346 if (!single_imm_use (res, &use, &use_stmt) 347 || gimple_bb (use_stmt)->loop_father != father) 348 return bb_rank[bb->index]; 349 350 /* Look for phi arguments from within the loop. If found, bias this phi. */ 351 for (i = 0; i < gimple_phi_num_args (stmt); i++) 352 { 353 tree arg = gimple_phi_arg_def (stmt, i); 354 if (TREE_CODE (arg) == SSA_NAME 355 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 356 { 357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg); 358 if (gimple_bb (def_stmt)->loop_father == father) 359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 360 } 361 } 362 363 /* Must be an uninteresting phi. */ 364 return bb_rank[bb->index]; 365 } 366 367 /* Return the maximum of RANK and the rank that should be propagated 368 from expression OP. For most operands, this is just the rank of OP. 369 For loop-carried phis, the value is zero to avoid undoing the bias 370 in favor of the phi. */ 371 static int64_t 372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p) 373 { 374 int64_t op_rank; 375 376 op_rank = get_rank (op); 377 378 /* Check whether op is biased after the get_rank () call, since it might have 379 updated biased_names. */ 380 if (TREE_CODE (op) == SSA_NAME 381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op))) 382 { 383 if (maybe_biased_p == NULL) 384 return rank; 385 *maybe_biased_p = true; 386 } 387 388 return MAX (rank, op_rank); 389 } 390 391 /* Look up the operand rank structure for expression E. */ 392 393 static inline int64_t 394 find_operand_rank (tree e) 395 { 396 int64_t *slot = operand_rank->get (e); 397 return slot ? *slot : -1; 398 } 399 400 /* Insert {E,RANK} into the operand rank hashtable. */ 401 402 static inline void 403 insert_operand_rank (tree e, int64_t rank) 404 { 405 gcc_assert (rank > 0); 406 gcc_assert (!operand_rank->put (e, rank)); 407 } 408 409 /* Given an expression E, return the rank of the expression. */ 410 411 static int64_t 412 get_rank (tree e) 413 { 414 /* SSA_NAME's have the rank of the expression they are the result 415 of. 416 For globals and uninitialized values, the rank is 0. 417 For function arguments, use the pre-setup rank. 418 For PHI nodes, stores, asm statements, etc, we use the rank of 419 the BB. 420 For simple operations, the rank is the maximum rank of any of 421 its operands, or the bb_rank, whichever is less. 422 I make no claims that this is optimal, however, it gives good 423 results. */ 424 425 /* We make an exception to the normal ranking system to break 426 dependences of accumulator variables in loops. Suppose we 427 have a simple one-block loop containing: 428 429 x_1 = phi(x_0, x_2) 430 b = a + x_1 431 c = b + d 432 x_2 = c + e 433 434 As shown, each iteration of the calculation into x is fully 435 dependent upon the iteration before it. We would prefer to 436 see this in the form: 437 438 x_1 = phi(x_0, x_2) 439 b = a + d 440 c = b + e 441 x_2 = c + x_1 442 443 If the loop is unrolled, the calculations of b and c from 444 different iterations can be interleaved. 445 446 To obtain this result during reassociation, we bias the rank 447 of the phi definition x_1 upward, when it is recognized as an 448 accumulator pattern. The artificial rank causes it to be 449 added last, providing the desired independence. */ 450 451 if (TREE_CODE (e) == SSA_NAME) 452 { 453 ssa_op_iter iter; 454 gimple *stmt; 455 int64_t rank; 456 tree op; 457 458 /* If we already have a rank for this expression, use that. */ 459 rank = find_operand_rank (e); 460 if (rank != -1) 461 return rank; 462 463 stmt = SSA_NAME_DEF_STMT (e); 464 if (gimple_code (stmt) == GIMPLE_PHI) 465 { 466 rank = phi_rank (stmt); 467 if (rank != bb_rank[gimple_bb (stmt)->index]) 468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); 469 } 470 471 else if (!is_gimple_assign (stmt)) 472 rank = bb_rank[gimple_bb (stmt)->index]; 473 474 else 475 { 476 bool biased_p = false; 477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL; 478 479 /* Otherwise, find the maximum rank for the operands. As an 480 exception, remove the bias from loop-carried phis when propagating 481 the rank so that dependent operations are not also biased. */ 482 /* Simply walk over all SSA uses - this takes advatage of the 483 fact that non-SSA operands are is_gimple_min_invariant and 484 thus have rank 0. */ 485 rank = 0; 486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 487 rank = propagate_rank (rank, op, maybe_biased_p); 488 489 rank += 1; 490 if (biased_p) 491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e)); 492 } 493 494 if (dump_file && (dump_flags & TDF_DETAILS)) 495 { 496 fprintf (dump_file, "Rank for "); 497 print_generic_expr (dump_file, e); 498 fprintf (dump_file, " is %" PRId64 "\n", rank); 499 } 500 501 /* Note the rank in the hashtable so we don't recompute it. */ 502 insert_operand_rank (e, rank); 503 return rank; 504 } 505 506 /* Constants, globals, etc., are rank 0 */ 507 return 0; 508 } 509 510 511 /* We want integer ones to end up last no matter what, since they are 512 the ones we can do the most with. */ 513 #define INTEGER_CONST_TYPE 1 << 4 514 #define FLOAT_ONE_CONST_TYPE 1 << 3 515 #define FLOAT_CONST_TYPE 1 << 2 516 #define OTHER_CONST_TYPE 1 << 1 517 518 /* Classify an invariant tree into integer, float, or other, so that 519 we can sort them to be near other constants of the same type. */ 520 static inline int 521 constant_type (tree t) 522 { 523 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 524 return INTEGER_CONST_TYPE; 525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 526 { 527 /* Sort -1.0 and 1.0 constants last, while in some cases 528 const_binop can't optimize some inexact operations, multiplication 529 by -1.0 or 1.0 can be always merged with others. */ 530 if (real_onep (t) || real_minus_onep (t)) 531 return FLOAT_ONE_CONST_TYPE; 532 return FLOAT_CONST_TYPE; 533 } 534 else 535 return OTHER_CONST_TYPE; 536 } 537 538 /* qsort comparison function to sort operand entries PA and PB by rank 539 so that the sorted array is ordered by rank in decreasing order. */ 540 static int 541 sort_by_operand_rank (const void *pa, const void *pb) 542 { 543 const operand_entry *oea = *(const operand_entry *const *)pa; 544 const operand_entry *oeb = *(const operand_entry *const *)pb; 545 546 if (oeb->rank != oea->rank) 547 return oeb->rank > oea->rank ? 1 : -1; 548 549 /* It's nicer for optimize_expression if constants that are likely 550 to fold when added/multiplied/whatever are put next to each 551 other. Since all constants have rank 0, order them by type. */ 552 if (oea->rank == 0) 553 { 554 if (constant_type (oeb->op) != constant_type (oea->op)) 555 return constant_type (oea->op) - constant_type (oeb->op); 556 else 557 /* To make sorting result stable, we use unique IDs to determine 558 order. */ 559 return oeb->id > oea->id ? 1 : -1; 560 } 561 562 if (TREE_CODE (oea->op) != SSA_NAME) 563 { 564 if (TREE_CODE (oeb->op) != SSA_NAME) 565 return oeb->id > oea->id ? 1 : -1; 566 else 567 return 1; 568 } 569 else if (TREE_CODE (oeb->op) != SSA_NAME) 570 return -1; 571 572 /* Lastly, make sure the versions that are the same go next to each 573 other. */ 574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 575 { 576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 577 versions of removed SSA_NAMEs, so if possible, prefer to sort 578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 579 See PR60418. */ 580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op); 581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op); 582 basic_block bba = gimple_bb (stmta); 583 basic_block bbb = gimple_bb (stmtb); 584 if (bbb != bba) 585 { 586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert 587 but the other might not. */ 588 if (!bba) 589 return 1; 590 if (!bbb) 591 return -1; 592 /* If neither is, compare bb_rank. */ 593 if (bb_rank[bbb->index] != bb_rank[bba->index]) 594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16); 595 } 596 597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 599 if (da != db) 600 return da ? 1 : -1; 601 602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1; 603 } 604 605 return oeb->id > oea->id ? 1 : -1; 606 } 607 608 /* Add an operand entry to *OPS for the tree operand OP. */ 609 610 static void 611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL) 612 { 613 operand_entry *oe = operand_entry_pool.allocate (); 614 615 oe->op = op; 616 oe->rank = get_rank (op); 617 oe->id = next_operand_entry_id++; 618 oe->count = 1; 619 oe->stmt_to_insert = stmt_to_insert; 620 ops->safe_push (oe); 621 } 622 623 /* Add an operand entry to *OPS for the tree operand OP with repeat 624 count REPEAT. */ 625 626 static void 627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op, 628 HOST_WIDE_INT repeat) 629 { 630 operand_entry *oe = operand_entry_pool.allocate (); 631 632 oe->op = op; 633 oe->rank = get_rank (op); 634 oe->id = next_operand_entry_id++; 635 oe->count = repeat; 636 oe->stmt_to_insert = NULL; 637 ops->safe_push (oe); 638 639 reassociate_stats.pows_encountered++; 640 } 641 642 /* Returns true if we can associate the SSA def OP. */ 643 644 static bool 645 can_reassociate_op_p (tree op) 646 { 647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) 648 return false; 649 /* Make sure asm goto outputs do not participate in reassociation since 650 we have no way to find an insertion place after asm goto. */ 651 if (TREE_CODE (op) == SSA_NAME 652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM 653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0) 654 return false; 655 return true; 656 } 657 658 /* Returns true if we can reassociate operations of TYPE. 659 That is for integral or non-saturating fixed-point types, and for 660 floating point type when associative-math is enabled. */ 661 662 static bool 663 can_reassociate_type_p (tree type) 664 { 665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 666 || NON_SAT_FIXED_POINT_TYPE_P (type) 667 || (flag_associative_math && FLOAT_TYPE_P (type))) 668 return true; 669 return false; 670 } 671 672 /* Return true if STMT is reassociable operation containing a binary 673 operation with tree code CODE, and is inside LOOP. */ 674 675 static bool 676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) 677 { 678 basic_block bb = gimple_bb (stmt); 679 680 if (gimple_bb (stmt) == NULL) 681 return false; 682 683 if (!flow_bb_inside_loop_p (loop, bb)) 684 return false; 685 686 if (is_gimple_assign (stmt) 687 && gimple_assign_rhs_code (stmt) == code 688 && has_single_use (gimple_assign_lhs (stmt))) 689 { 690 tree rhs1 = gimple_assign_rhs1 (stmt); 691 tree rhs2 = gimple_assign_rhs2 (stmt); 692 if (!can_reassociate_op_p (rhs1) 693 || (rhs2 && !can_reassociate_op_p (rhs2))) 694 return false; 695 return true; 696 } 697 698 return false; 699 } 700 701 702 /* Return true if STMT is a nop-conversion. */ 703 704 static bool 705 gimple_nop_conversion_p (gimple *stmt) 706 { 707 if (gassign *ass = dyn_cast <gassign *> (stmt)) 708 { 709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) 710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)), 711 TREE_TYPE (gimple_assign_rhs1 (ass)))) 712 return true; 713 } 714 return false; 715 } 716 717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the 718 operand of the negate operation. Otherwise, return NULL. */ 719 720 static tree 721 get_unary_op (tree name, enum tree_code opcode) 722 { 723 gimple *stmt = SSA_NAME_DEF_STMT (name); 724 725 /* Look through nop conversions (sign changes). */ 726 if (gimple_nop_conversion_p (stmt) 727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) 728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 729 730 if (!is_gimple_assign (stmt)) 731 return NULL_TREE; 732 733 if (gimple_assign_rhs_code (stmt) == opcode) 734 return gimple_assign_rhs1 (stmt); 735 return NULL_TREE; 736 } 737 738 /* Return true if OP1 and OP2 have the same value if casted to either type. */ 739 740 static bool 741 ops_equal_values_p (tree op1, tree op2) 742 { 743 if (op1 == op2) 744 return true; 745 746 tree orig_op1 = op1; 747 if (TREE_CODE (op1) == SSA_NAME) 748 { 749 gimple *stmt = SSA_NAME_DEF_STMT (op1); 750 if (gimple_nop_conversion_p (stmt)) 751 { 752 op1 = gimple_assign_rhs1 (stmt); 753 if (op1 == op2) 754 return true; 755 } 756 } 757 758 if (TREE_CODE (op2) == SSA_NAME) 759 { 760 gimple *stmt = SSA_NAME_DEF_STMT (op2); 761 if (gimple_nop_conversion_p (stmt)) 762 { 763 op2 = gimple_assign_rhs1 (stmt); 764 if (op1 == op2 765 || orig_op1 == op2) 766 return true; 767 } 768 } 769 770 return false; 771 } 772 773 774 /* If CURR and LAST are a pair of ops that OPCODE allows us to 775 eliminate through equivalences, do so, remove them from OPS, and 776 return true. Otherwise, return false. */ 777 778 static bool 779 eliminate_duplicate_pair (enum tree_code opcode, 780 vec<operand_entry *> *ops, 781 bool *all_done, 782 unsigned int i, 783 operand_entry *curr, 784 operand_entry *last) 785 { 786 787 /* If we have two of the same op, and the opcode is & |, min, or max, 788 we can eliminate one of them. 789 If we have two of the same op, and the opcode is ^, we can 790 eliminate both of them. */ 791 792 if (last && last->op == curr->op) 793 { 794 switch (opcode) 795 { 796 case MAX_EXPR: 797 case MIN_EXPR: 798 case BIT_IOR_EXPR: 799 case BIT_AND_EXPR: 800 if (dump_file && (dump_flags & TDF_DETAILS)) 801 { 802 fprintf (dump_file, "Equivalence: "); 803 print_generic_expr (dump_file, curr->op); 804 fprintf (dump_file, " [&|minmax] "); 805 print_generic_expr (dump_file, last->op); 806 fprintf (dump_file, " -> "); 807 print_generic_stmt (dump_file, last->op); 808 } 809 810 ops->ordered_remove (i); 811 reassociate_stats.ops_eliminated ++; 812 813 return true; 814 815 case BIT_XOR_EXPR: 816 if (dump_file && (dump_flags & TDF_DETAILS)) 817 { 818 fprintf (dump_file, "Equivalence: "); 819 print_generic_expr (dump_file, curr->op); 820 fprintf (dump_file, " ^ "); 821 print_generic_expr (dump_file, last->op); 822 fprintf (dump_file, " -> nothing\n"); 823 } 824 825 reassociate_stats.ops_eliminated += 2; 826 827 if (ops->length () == 2) 828 { 829 ops->truncate (0); 830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 831 *all_done = true; 832 } 833 else 834 { 835 ops->ordered_remove (i-1); 836 ops->ordered_remove (i-1); 837 } 838 839 return true; 840 841 default: 842 break; 843 } 844 } 845 return false; 846 } 847 848 static vec<tree> plus_negates; 849 850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 851 expression, look in OPS for a corresponding positive operation to cancel 852 it out. If we find one, remove the other from OPS, replace 853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 854 return false. */ 855 856 static bool 857 eliminate_plus_minus_pair (enum tree_code opcode, 858 vec<operand_entry *> *ops, 859 unsigned int currindex, 860 operand_entry *curr) 861 { 862 tree negateop; 863 tree notop; 864 unsigned int i; 865 operand_entry *oe; 866 867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 868 return false; 869 870 negateop = get_unary_op (curr->op, NEGATE_EXPR); 871 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 872 if (negateop == NULL_TREE && notop == NULL_TREE) 873 return false; 874 875 /* Any non-negated version will have a rank that is one less than 876 the current rank. So once we hit those ranks, if we don't find 877 one, we can stop. */ 878 879 for (i = currindex + 1; 880 ops->iterate (i, &oe) 881 && oe->rank >= curr->rank - 1 ; 882 i++) 883 { 884 if (negateop 885 && ops_equal_values_p (oe->op, negateop)) 886 { 887 if (dump_file && (dump_flags & TDF_DETAILS)) 888 { 889 fprintf (dump_file, "Equivalence: "); 890 print_generic_expr (dump_file, negateop); 891 fprintf (dump_file, " + -"); 892 print_generic_expr (dump_file, oe->op); 893 fprintf (dump_file, " -> 0\n"); 894 } 895 896 ops->ordered_remove (i); 897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 898 ops->ordered_remove (currindex); 899 reassociate_stats.ops_eliminated ++; 900 901 return true; 902 } 903 else if (notop 904 && ops_equal_values_p (oe->op, notop)) 905 { 906 tree op_type = TREE_TYPE (oe->op); 907 908 if (dump_file && (dump_flags & TDF_DETAILS)) 909 { 910 fprintf (dump_file, "Equivalence: "); 911 print_generic_expr (dump_file, notop); 912 fprintf (dump_file, " + ~"); 913 print_generic_expr (dump_file, oe->op); 914 fprintf (dump_file, " -> -1\n"); 915 } 916 917 ops->ordered_remove (i); 918 add_to_ops_vec (ops, build_all_ones_cst (op_type)); 919 ops->ordered_remove (currindex); 920 reassociate_stats.ops_eliminated ++; 921 922 return true; 923 } 924 } 925 926 /* If CURR->OP is a negate expr without nop conversion in a plus expr: 927 save it for later inspection in repropagate_negates(). */ 928 if (negateop != NULL_TREE 929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR) 930 plus_negates.safe_push (curr->op); 931 932 return false; 933 } 934 935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 936 bitwise not expression, look in OPS for a corresponding operand to 937 cancel it out. If we find one, remove the other from OPS, replace 938 OPS[CURRINDEX] with 0, and return true. Otherwise, return 939 false. */ 940 941 static bool 942 eliminate_not_pairs (enum tree_code opcode, 943 vec<operand_entry *> *ops, 944 unsigned int currindex, 945 operand_entry *curr) 946 { 947 tree notop; 948 unsigned int i; 949 operand_entry *oe; 950 951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 952 || TREE_CODE (curr->op) != SSA_NAME) 953 return false; 954 955 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 956 if (notop == NULL_TREE) 957 return false; 958 959 /* Any non-not version will have a rank that is one less than 960 the current rank. So once we hit those ranks, if we don't find 961 one, we can stop. */ 962 963 for (i = currindex + 1; 964 ops->iterate (i, &oe) 965 && oe->rank >= curr->rank - 1; 966 i++) 967 { 968 if (oe->op == notop) 969 { 970 if (dump_file && (dump_flags & TDF_DETAILS)) 971 { 972 fprintf (dump_file, "Equivalence: "); 973 print_generic_expr (dump_file, notop); 974 if (opcode == BIT_AND_EXPR) 975 fprintf (dump_file, " & ~"); 976 else if (opcode == BIT_IOR_EXPR) 977 fprintf (dump_file, " | ~"); 978 print_generic_expr (dump_file, oe->op); 979 if (opcode == BIT_AND_EXPR) 980 fprintf (dump_file, " -> 0\n"); 981 else if (opcode == BIT_IOR_EXPR) 982 fprintf (dump_file, " -> -1\n"); 983 } 984 985 if (opcode == BIT_AND_EXPR) 986 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 987 else if (opcode == BIT_IOR_EXPR) 988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 989 990 reassociate_stats.ops_eliminated += ops->length () - 1; 991 ops->truncate (0); 992 ops->quick_push (oe); 993 return true; 994 } 995 } 996 997 return false; 998 } 999 1000 /* Use constant value that may be present in OPS to try to eliminate 1001 operands. Note that this function is only really used when we've 1002 eliminated ops for other reasons, or merged constants. Across 1003 single statements, fold already does all of this, plus more. There 1004 is little point in duplicating logic, so I've only included the 1005 identities that I could ever construct testcases to trigger. */ 1006 1007 static void 1008 eliminate_using_constants (enum tree_code opcode, 1009 vec<operand_entry *> *ops) 1010 { 1011 operand_entry *oelast = ops->last (); 1012 tree type = TREE_TYPE (oelast->op); 1013 1014 if (oelast->rank == 0 1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 1016 { 1017 switch (opcode) 1018 { 1019 case BIT_AND_EXPR: 1020 if (integer_zerop (oelast->op)) 1021 { 1022 if (ops->length () != 1) 1023 { 1024 if (dump_file && (dump_flags & TDF_DETAILS)) 1025 fprintf (dump_file, "Found & 0, removing all other ops\n"); 1026 1027 reassociate_stats.ops_eliminated += ops->length () - 1; 1028 1029 ops->truncate (0); 1030 ops->quick_push (oelast); 1031 return; 1032 } 1033 } 1034 else if (integer_all_onesp (oelast->op)) 1035 { 1036 if (ops->length () != 1) 1037 { 1038 if (dump_file && (dump_flags & TDF_DETAILS)) 1039 fprintf (dump_file, "Found & -1, removing\n"); 1040 ops->pop (); 1041 reassociate_stats.ops_eliminated++; 1042 } 1043 } 1044 break; 1045 case BIT_IOR_EXPR: 1046 if (integer_all_onesp (oelast->op)) 1047 { 1048 if (ops->length () != 1) 1049 { 1050 if (dump_file && (dump_flags & TDF_DETAILS)) 1051 fprintf (dump_file, "Found | -1, removing all other ops\n"); 1052 1053 reassociate_stats.ops_eliminated += ops->length () - 1; 1054 1055 ops->truncate (0); 1056 ops->quick_push (oelast); 1057 return; 1058 } 1059 } 1060 else if (integer_zerop (oelast->op)) 1061 { 1062 if (ops->length () != 1) 1063 { 1064 if (dump_file && (dump_flags & TDF_DETAILS)) 1065 fprintf (dump_file, "Found | 0, removing\n"); 1066 ops->pop (); 1067 reassociate_stats.ops_eliminated++; 1068 } 1069 } 1070 break; 1071 case MULT_EXPR: 1072 if (integer_zerop (oelast->op) 1073 || (FLOAT_TYPE_P (type) 1074 && !HONOR_NANS (type) 1075 && !HONOR_SIGNED_ZEROS (type) 1076 && real_zerop (oelast->op))) 1077 { 1078 if (ops->length () != 1) 1079 { 1080 if (dump_file && (dump_flags & TDF_DETAILS)) 1081 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1082 1083 reassociate_stats.ops_eliminated += ops->length () - 1; 1084 ops->truncate (0); 1085 ops->quick_push (oelast); 1086 return; 1087 } 1088 } 1089 else if (integer_onep (oelast->op) 1090 || (FLOAT_TYPE_P (type) 1091 && !HONOR_SNANS (type) 1092 && real_onep (oelast->op))) 1093 { 1094 if (ops->length () != 1) 1095 { 1096 if (dump_file && (dump_flags & TDF_DETAILS)) 1097 fprintf (dump_file, "Found * 1, removing\n"); 1098 ops->pop (); 1099 reassociate_stats.ops_eliminated++; 1100 return; 1101 } 1102 } 1103 break; 1104 case BIT_XOR_EXPR: 1105 case PLUS_EXPR: 1106 case MINUS_EXPR: 1107 if (integer_zerop (oelast->op) 1108 || (FLOAT_TYPE_P (type) 1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1110 && fold_real_zero_addition_p (type, 0, oelast->op, 1111 opcode == MINUS_EXPR))) 1112 { 1113 if (ops->length () != 1) 1114 { 1115 if (dump_file && (dump_flags & TDF_DETAILS)) 1116 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1117 ops->pop (); 1118 reassociate_stats.ops_eliminated++; 1119 return; 1120 } 1121 } 1122 break; 1123 default: 1124 break; 1125 } 1126 } 1127 } 1128 1129 1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *, 1131 bool, bool); 1132 1133 /* Structure for tracking and counting operands. */ 1134 struct oecount { 1135 unsigned int cnt; 1136 unsigned int id; 1137 enum tree_code oecode; 1138 tree op; 1139 }; 1140 1141 1142 /* The heap for the oecount hashtable and the sorted list of operands. */ 1143 static vec<oecount> cvec; 1144 1145 1146 /* Oecount hashtable helpers. */ 1147 1148 struct oecount_hasher : int_hash <int, 0, 1> 1149 { 1150 static inline hashval_t hash (int); 1151 static inline bool equal (int, int); 1152 }; 1153 1154 /* Hash function for oecount. */ 1155 1156 inline hashval_t 1157 oecount_hasher::hash (int p) 1158 { 1159 const oecount *c = &cvec[p - 42]; 1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1161 } 1162 1163 /* Comparison function for oecount. */ 1164 1165 inline bool 1166 oecount_hasher::equal (int p1, int p2) 1167 { 1168 const oecount *c1 = &cvec[p1 - 42]; 1169 const oecount *c2 = &cvec[p2 - 42]; 1170 return c1->oecode == c2->oecode && c1->op == c2->op; 1171 } 1172 1173 /* Comparison function for qsort sorting oecount elements by count. */ 1174 1175 static int 1176 oecount_cmp (const void *p1, const void *p2) 1177 { 1178 const oecount *c1 = (const oecount *)p1; 1179 const oecount *c2 = (const oecount *)p2; 1180 if (c1->cnt != c2->cnt) 1181 return c1->cnt > c2->cnt ? 1 : -1; 1182 else 1183 /* If counts are identical, use unique IDs to stabilize qsort. */ 1184 return c1->id > c2->id ? 1 : -1; 1185 } 1186 1187 /* Return TRUE iff STMT represents a builtin call that raises OP 1188 to some exponent. */ 1189 1190 static bool 1191 stmt_is_power_of_op (gimple *stmt, tree op) 1192 { 1193 if (!is_gimple_call (stmt)) 1194 return false; 1195 1196 switch (gimple_call_combined_fn (stmt)) 1197 { 1198 CASE_CFN_POW: 1199 CASE_CFN_POWI: 1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1201 1202 default: 1203 return false; 1204 } 1205 } 1206 1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent 1208 in place and return the result. Assumes that stmt_is_power_of_op 1209 was previously called for STMT and returned TRUE. */ 1210 1211 static HOST_WIDE_INT 1212 decrement_power (gimple *stmt) 1213 { 1214 REAL_VALUE_TYPE c, cint; 1215 HOST_WIDE_INT power; 1216 tree arg1; 1217 1218 switch (gimple_call_combined_fn (stmt)) 1219 { 1220 CASE_CFN_POW: 1221 arg1 = gimple_call_arg (stmt, 1); 1222 c = TREE_REAL_CST (arg1); 1223 power = real_to_integer (&c) - 1; 1224 real_from_integer (&cint, VOIDmode, power, SIGNED); 1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1226 return power; 1227 1228 CASE_CFN_POWI: 1229 arg1 = gimple_call_arg (stmt, 1); 1230 power = TREE_INT_CST_LOW (arg1) - 1; 1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1232 return power; 1233 1234 default: 1235 gcc_unreachable (); 1236 } 1237 } 1238 1239 /* Replace SSA defined by STMT and replace all its uses with new 1240 SSA. Also return the new SSA. */ 1241 1242 static tree 1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op) 1244 { 1245 gimple *use_stmt; 1246 use_operand_p use; 1247 imm_use_iterator iter; 1248 tree new_lhs, new_debug_lhs = NULL_TREE; 1249 tree lhs = gimple_get_lhs (stmt); 1250 1251 new_lhs = make_ssa_name (TREE_TYPE (lhs)); 1252 gimple_set_lhs (stmt, new_lhs); 1253 1254 /* Also need to update GIMPLE_DEBUGs. */ 1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 1256 { 1257 tree repl = new_lhs; 1258 if (is_gimple_debug (use_stmt)) 1259 { 1260 if (new_debug_lhs == NULL_TREE) 1261 { 1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs)); 1263 gdebug *def_temp 1264 = gimple_build_debug_bind (new_debug_lhs, 1265 build2 (opcode, TREE_TYPE (lhs), 1266 new_lhs, op), 1267 stmt); 1268 gimple_set_uid (def_temp, gimple_uid (stmt)); 1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT); 1271 } 1272 repl = new_debug_lhs; 1273 } 1274 FOR_EACH_IMM_USE_ON_STMT (use, iter) 1275 SET_USE (use, repl); 1276 update_stmt (use_stmt); 1277 } 1278 return new_lhs; 1279 } 1280 1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its 1282 uses with new SSAs. Also do this for the stmt that defines DEF 1283 if *DEF is not OP. */ 1284 1285 static void 1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op, 1287 vec<gimple *> &stmts_to_fix) 1288 { 1289 unsigned i; 1290 gimple *stmt; 1291 1292 if (*def != op 1293 && TREE_CODE (*def) == SSA_NAME 1294 && (stmt = SSA_NAME_DEF_STMT (*def)) 1295 && gimple_code (stmt) != GIMPLE_NOP) 1296 *def = make_new_ssa_for_def (stmt, opcode, op); 1297 1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt) 1299 make_new_ssa_for_def (stmt, opcode, op); 1300 } 1301 1302 /* Find the single immediate use of STMT's LHS, and replace it 1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1304 replace *DEF with OP as well. */ 1305 1306 static void 1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def) 1308 { 1309 tree lhs; 1310 gimple *use_stmt; 1311 use_operand_p use; 1312 gimple_stmt_iterator gsi; 1313 1314 if (is_gimple_call (stmt)) 1315 lhs = gimple_call_lhs (stmt); 1316 else 1317 lhs = gimple_assign_lhs (stmt); 1318 1319 gcc_assert (has_single_use (lhs)); 1320 single_imm_use (lhs, &use, &use_stmt); 1321 if (lhs == *def) 1322 *def = op; 1323 SET_USE (use, op); 1324 if (TREE_CODE (op) != SSA_NAME) 1325 update_stmt (use_stmt); 1326 gsi = gsi_for_stmt (stmt); 1327 unlink_stmt_vdef (stmt); 1328 reassoc_remove_stmt (&gsi); 1329 release_defs (stmt); 1330 } 1331 1332 /* Walks the linear chain with result *DEF searching for an operation 1333 with operand OP and code OPCODE removing that from the chain. *DEF 1334 is updated if there is only one operand but no operation left. */ 1335 1336 static void 1337 zero_one_operation (tree *def, enum tree_code opcode, tree op) 1338 { 1339 tree orig_def = *def; 1340 gimple *stmt = SSA_NAME_DEF_STMT (*def); 1341 /* PR72835 - Record the stmt chain that has to be updated such that 1342 we dont use the same LHS when the values computed are different. */ 1343 auto_vec<gimple *, 64> stmts_to_fix; 1344 1345 do 1346 { 1347 tree name; 1348 1349 if (opcode == MULT_EXPR) 1350 { 1351 if (stmt_is_power_of_op (stmt, op)) 1352 { 1353 if (decrement_power (stmt) == 1) 1354 { 1355 if (stmts_to_fix.length () > 0) 1356 stmts_to_fix.pop (); 1357 propagate_op_to_single_use (op, stmt, def); 1358 } 1359 break; 1360 } 1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR) 1362 { 1363 if (gimple_assign_rhs1 (stmt) == op) 1364 { 1365 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1366 if (stmts_to_fix.length () > 0) 1367 stmts_to_fix.pop (); 1368 propagate_op_to_single_use (cst, stmt, def); 1369 break; 1370 } 1371 else if (integer_minus_onep (op) 1372 || real_minus_onep (op)) 1373 { 1374 gimple_assign_set_rhs_code 1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt))); 1376 break; 1377 } 1378 } 1379 } 1380 1381 name = gimple_assign_rhs1 (stmt); 1382 1383 /* If this is the operation we look for and one of the operands 1384 is ours simply propagate the other operand into the stmts 1385 single use. */ 1386 if (gimple_assign_rhs_code (stmt) == opcode 1387 && (name == op 1388 || gimple_assign_rhs2 (stmt) == op)) 1389 { 1390 if (name == op) 1391 name = gimple_assign_rhs2 (stmt); 1392 if (stmts_to_fix.length () > 0) 1393 stmts_to_fix.pop (); 1394 propagate_op_to_single_use (name, stmt, def); 1395 break; 1396 } 1397 1398 /* We might have a multiply of two __builtin_pow* calls, and 1399 the operand might be hiding in the rightmost one. Likewise 1400 this can happen for a negate. */ 1401 if (opcode == MULT_EXPR 1402 && gimple_assign_rhs_code (stmt) == opcode 1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1404 && has_single_use (gimple_assign_rhs2 (stmt))) 1405 { 1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1407 if (stmt_is_power_of_op (stmt2, op)) 1408 { 1409 if (decrement_power (stmt2) == 1) 1410 propagate_op_to_single_use (op, stmt2, def); 1411 else 1412 stmts_to_fix.safe_push (stmt2); 1413 break; 1414 } 1415 else if (is_gimple_assign (stmt2) 1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR) 1417 { 1418 if (gimple_assign_rhs1 (stmt2) == op) 1419 { 1420 tree cst = build_minus_one_cst (TREE_TYPE (op)); 1421 propagate_op_to_single_use (cst, stmt2, def); 1422 break; 1423 } 1424 else if (integer_minus_onep (op) 1425 || real_minus_onep (op)) 1426 { 1427 stmts_to_fix.safe_push (stmt2); 1428 gimple_assign_set_rhs_code 1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2))); 1430 break; 1431 } 1432 } 1433 } 1434 1435 /* Continue walking the chain. */ 1436 gcc_assert (name != op 1437 && TREE_CODE (name) == SSA_NAME); 1438 stmt = SSA_NAME_DEF_STMT (name); 1439 stmts_to_fix.safe_push (stmt); 1440 } 1441 while (1); 1442 1443 if (stmts_to_fix.length () > 0 || *def == orig_def) 1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix); 1445 } 1446 1447 /* Returns true if statement S1 dominates statement S2. Like 1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1449 1450 static bool 1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 1452 { 1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1454 1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1456 SSA_NAME. Assume it lives at the beginning of function and 1457 thus dominates everything. */ 1458 if (!bb1 || s1 == s2) 1459 return true; 1460 1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1462 if (!bb2) 1463 return false; 1464 1465 if (bb1 == bb2) 1466 { 1467 /* PHIs in the same basic block are assumed to be 1468 executed all in parallel, if only one stmt is a PHI, 1469 it dominates the other stmt in the same basic block. */ 1470 if (gimple_code (s1) == GIMPLE_PHI) 1471 return true; 1472 1473 if (gimple_code (s2) == GIMPLE_PHI) 1474 return false; 1475 1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1477 1478 if (gimple_uid (s1) < gimple_uid (s2)) 1479 return true; 1480 1481 if (gimple_uid (s1) > gimple_uid (s2)) 1482 return false; 1483 1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1485 unsigned int uid = gimple_uid (s1); 1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1487 { 1488 gimple *s = gsi_stmt (gsi); 1489 if (gimple_uid (s) != uid) 1490 break; 1491 if (s == s2) 1492 return true; 1493 } 1494 1495 return false; 1496 } 1497 1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1499 } 1500 1501 /* Insert STMT after INSERT_POINT. */ 1502 1503 static void 1504 insert_stmt_after (gimple *stmt, gimple *insert_point) 1505 { 1506 gimple_stmt_iterator gsi; 1507 basic_block bb; 1508 1509 if (gimple_code (insert_point) == GIMPLE_PHI) 1510 bb = gimple_bb (insert_point); 1511 else if (!stmt_ends_bb_p (insert_point)) 1512 { 1513 gsi = gsi_for_stmt (insert_point); 1514 gimple_set_uid (stmt, gimple_uid (insert_point)); 1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1516 return; 1517 } 1518 else if (gimple_code (insert_point) == GIMPLE_ASM 1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0) 1520 /* We have no idea where to insert - it depends on where the 1521 uses will be placed. */ 1522 gcc_unreachable (); 1523 else 1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1525 thus if it must end a basic block, it should be a call that can 1526 throw, or some assignment that can throw. If it throws, the LHS 1527 of it will not be initialized though, so only valid places using 1528 the SSA_NAME should be dominated by the fallthru edge. */ 1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1530 gsi = gsi_after_labels (bb); 1531 if (gsi_end_p (gsi)) 1532 { 1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1534 gimple_set_uid (stmt, 1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1536 } 1537 else 1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1540 } 1541 1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1543 the result. Places the statement after the definition of either 1544 OP1 or OP2. Returns the new statement. */ 1545 1546 static gimple * 1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1548 { 1549 gimple *op1def = NULL, *op2def = NULL; 1550 gimple_stmt_iterator gsi; 1551 tree op; 1552 gassign *sum; 1553 1554 /* Create the addition statement. */ 1555 op = make_ssa_name (type); 1556 sum = gimple_build_assign (op, opcode, op1, op2); 1557 1558 /* Find an insertion place and insert. */ 1559 if (TREE_CODE (op1) == SSA_NAME) 1560 op1def = SSA_NAME_DEF_STMT (op1); 1561 if (TREE_CODE (op2) == SSA_NAME) 1562 op2def = SSA_NAME_DEF_STMT (op2); 1563 if ((!op1def || gimple_nop_p (op1def)) 1564 && (!op2def || gimple_nop_p (op2def))) 1565 { 1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1567 if (!gsi_end_p (gsi) 1568 && is_gimple_call (gsi_stmt (gsi)) 1569 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE)) 1570 { 1571 /* Don't add statements before a returns_twice call at the start 1572 of a function. */ 1573 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1574 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1575 } 1576 if (gsi_end_p (gsi)) 1577 { 1578 gimple_stmt_iterator gsi2 1579 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1580 gimple_set_uid (sum, 1581 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1582 } 1583 else 1584 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1585 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1586 } 1587 else 1588 { 1589 gimple *insert_point; 1590 if ((!op1def || gimple_nop_p (op1def)) 1591 || (op2def && !gimple_nop_p (op2def) 1592 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1593 insert_point = op2def; 1594 else 1595 insert_point = op1def; 1596 insert_stmt_after (sum, insert_point); 1597 } 1598 update_stmt (sum); 1599 1600 return sum; 1601 } 1602 1603 /* Perform un-distribution of divisions and multiplications. 1604 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1605 to (A + B) / X for real X. 1606 1607 The algorithm is organized as follows. 1608 1609 - First we walk the addition chain *OPS looking for summands that 1610 are defined by a multiplication or a real division. This results 1611 in the candidates bitmap with relevant indices into *OPS. 1612 1613 - Second we build the chains of multiplications or divisions for 1614 these candidates, counting the number of occurrences of (operand, code) 1615 pairs in all of the candidates chains. 1616 1617 - Third we sort the (operand, code) pairs by number of occurrence and 1618 process them starting with the pair with the most uses. 1619 1620 * For each such pair we walk the candidates again to build a 1621 second candidate bitmap noting all multiplication/division chains 1622 that have at least one occurrence of (operand, code). 1623 1624 * We build an alternate addition chain only covering these 1625 candidates with one (operand, code) operation removed from their 1626 multiplication/division chain. 1627 1628 * The first candidate gets replaced by the alternate addition chain 1629 multiplied/divided by the operand. 1630 1631 * All candidate chains get disabled for further processing and 1632 processing of (operand, code) pairs continues. 1633 1634 The alternate addition chains built are re-processed by the main 1635 reassociation algorithm which allows optimizing a * x * y + b * y * x 1636 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1637 1638 static bool 1639 undistribute_ops_list (enum tree_code opcode, 1640 vec<operand_entry *> *ops, class loop *loop) 1641 { 1642 unsigned int length = ops->length (); 1643 operand_entry *oe1; 1644 unsigned i, j; 1645 unsigned nr_candidates, nr_candidates2; 1646 sbitmap_iterator sbi0; 1647 vec<operand_entry *> *subops; 1648 bool changed = false; 1649 unsigned int next_oecount_id = 0; 1650 1651 if (length <= 1 1652 || opcode != PLUS_EXPR) 1653 return false; 1654 1655 /* Build a list of candidates to process. */ 1656 auto_sbitmap candidates (length); 1657 bitmap_clear (candidates); 1658 nr_candidates = 0; 1659 FOR_EACH_VEC_ELT (*ops, i, oe1) 1660 { 1661 enum tree_code dcode; 1662 gimple *oe1def; 1663 1664 if (TREE_CODE (oe1->op) != SSA_NAME) 1665 continue; 1666 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1667 if (!is_gimple_assign (oe1def)) 1668 continue; 1669 dcode = gimple_assign_rhs_code (oe1def); 1670 if ((dcode != MULT_EXPR 1671 && dcode != RDIV_EXPR) 1672 || !is_reassociable_op (oe1def, dcode, loop)) 1673 continue; 1674 1675 bitmap_set_bit (candidates, i); 1676 nr_candidates++; 1677 } 1678 1679 if (nr_candidates < 2) 1680 return false; 1681 1682 if (dump_file && (dump_flags & TDF_DETAILS)) 1683 { 1684 fprintf (dump_file, "searching for un-distribute opportunities "); 1685 print_generic_expr (dump_file, 1686 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE); 1687 fprintf (dump_file, " %d\n", nr_candidates); 1688 } 1689 1690 /* Build linearized sub-operand lists and the counting table. */ 1691 cvec.create (0); 1692 1693 hash_table<oecount_hasher> ctable (15); 1694 1695 /* ??? Macro arguments cannot have multi-argument template types in 1696 them. This typedef is needed to workaround that limitation. */ 1697 typedef vec<operand_entry *> vec_operand_entry_t_heap; 1698 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1699 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1700 { 1701 gimple *oedef; 1702 enum tree_code oecode; 1703 unsigned j; 1704 1705 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1706 oecode = gimple_assign_rhs_code (oedef); 1707 linearize_expr_tree (&subops[i], oedef, 1708 associative_tree_code (oecode), false); 1709 1710 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1711 { 1712 oecount c; 1713 int *slot; 1714 int idx; 1715 c.oecode = oecode; 1716 c.cnt = 1; 1717 c.id = next_oecount_id++; 1718 c.op = oe1->op; 1719 cvec.safe_push (c); 1720 idx = cvec.length () + 41; 1721 slot = ctable.find_slot (idx, INSERT); 1722 if (!*slot) 1723 { 1724 *slot = idx; 1725 } 1726 else 1727 { 1728 cvec.pop (); 1729 cvec[*slot - 42].cnt++; 1730 } 1731 } 1732 } 1733 1734 /* Sort the counting table. */ 1735 cvec.qsort (oecount_cmp); 1736 1737 if (dump_file && (dump_flags & TDF_DETAILS)) 1738 { 1739 oecount *c; 1740 fprintf (dump_file, "Candidates:\n"); 1741 FOR_EACH_VEC_ELT (cvec, j, c) 1742 { 1743 fprintf (dump_file, " %u %s: ", c->cnt, 1744 c->oecode == MULT_EXPR 1745 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1746 print_generic_expr (dump_file, c->op); 1747 fprintf (dump_file, "\n"); 1748 } 1749 } 1750 1751 /* Process the (operand, code) pairs in order of most occurrence. */ 1752 auto_sbitmap candidates2 (length); 1753 while (!cvec.is_empty ()) 1754 { 1755 oecount *c = &cvec.last (); 1756 if (c->cnt < 2) 1757 break; 1758 1759 /* Now collect the operands in the outer chain that contain 1760 the common operand in their inner chain. */ 1761 bitmap_clear (candidates2); 1762 nr_candidates2 = 0; 1763 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1764 { 1765 gimple *oedef; 1766 enum tree_code oecode; 1767 unsigned j; 1768 tree op = (*ops)[i]->op; 1769 1770 /* If we undistributed in this chain already this may be 1771 a constant. */ 1772 if (TREE_CODE (op) != SSA_NAME) 1773 continue; 1774 1775 oedef = SSA_NAME_DEF_STMT (op); 1776 oecode = gimple_assign_rhs_code (oedef); 1777 if (oecode != c->oecode) 1778 continue; 1779 1780 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1781 { 1782 if (oe1->op == c->op) 1783 { 1784 bitmap_set_bit (candidates2, i); 1785 ++nr_candidates2; 1786 break; 1787 } 1788 } 1789 } 1790 1791 if (nr_candidates2 >= 2) 1792 { 1793 operand_entry *oe1, *oe2; 1794 gimple *prod; 1795 int first = bitmap_first_set_bit (candidates2); 1796 1797 /* Build the new addition chain. */ 1798 oe1 = (*ops)[first]; 1799 if (dump_file && (dump_flags & TDF_DETAILS)) 1800 { 1801 fprintf (dump_file, "Building ("); 1802 print_generic_expr (dump_file, oe1->op); 1803 } 1804 zero_one_operation (&oe1->op, c->oecode, c->op); 1805 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1806 { 1807 gimple *sum; 1808 oe2 = (*ops)[i]; 1809 if (dump_file && (dump_flags & TDF_DETAILS)) 1810 { 1811 fprintf (dump_file, " + "); 1812 print_generic_expr (dump_file, oe2->op); 1813 } 1814 zero_one_operation (&oe2->op, c->oecode, c->op); 1815 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1816 oe1->op, oe2->op, opcode); 1817 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1818 oe2->rank = 0; 1819 oe1->op = gimple_get_lhs (sum); 1820 } 1821 1822 /* Apply the multiplication/division. */ 1823 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1824 oe1->op, c->op, c->oecode); 1825 if (dump_file && (dump_flags & TDF_DETAILS)) 1826 { 1827 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1828 print_generic_expr (dump_file, c->op); 1829 fprintf (dump_file, "\n"); 1830 } 1831 1832 /* Record it in the addition chain and disable further 1833 undistribution with this op. */ 1834 oe1->op = gimple_assign_lhs (prod); 1835 oe1->rank = get_rank (oe1->op); 1836 subops[first].release (); 1837 1838 changed = true; 1839 } 1840 1841 cvec.pop (); 1842 } 1843 1844 for (i = 0; i < ops->length (); ++i) 1845 subops[i].release (); 1846 free (subops); 1847 cvec.release (); 1848 1849 return changed; 1850 } 1851 1852 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: 1853 first: element index for each relevant BIT_FIELD_REF. 1854 second: the index of vec ops* for each relevant BIT_FIELD_REF. */ 1855 typedef std::pair<unsigned, unsigned> v_info_elem; 1856 struct v_info { 1857 tree vec_type; 1858 auto_vec<v_info_elem, 32> vec; 1859 }; 1860 typedef v_info *v_info_ptr; 1861 1862 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ 1863 static int 1864 sort_by_mach_mode (const void *p_i, const void *p_j) 1865 { 1866 const tree tr1 = *((const tree *) p_i); 1867 const tree tr2 = *((const tree *) p_j); 1868 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); 1869 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); 1870 if (mode1 > mode2) 1871 return 1; 1872 else if (mode1 < mode2) 1873 return -1; 1874 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2)) 1875 return -1; 1876 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2)) 1877 return 1; 1878 return 0; 1879 } 1880 1881 /* Cleanup hash map for VECTOR information. */ 1882 static void 1883 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) 1884 { 1885 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); 1886 it != info_map.end (); ++it) 1887 { 1888 v_info_ptr info = (*it).second; 1889 delete info; 1890 (*it).second = NULL; 1891 } 1892 } 1893 1894 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. 1895 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] 1896 is transformed to 1897 Vs = (V1 + V2 + ... + Vn) 1898 Vs[0] + Vs[1] + ... + Vs[k] 1899 1900 The basic steps are listed below: 1901 1902 1) Check the addition chain *OPS by looking those summands coming from 1903 VECTOR bit_field_ref on VECTOR type. Put the information into 1904 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. 1905 1906 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are 1907 continuous, they can cover the whole VECTOR perfectly without any holes. 1908 Obtain one VECTOR list which contain candidates to be transformed. 1909 1910 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of 1911 candidates with same mode, build the addition statements for them and 1912 generate BIT_FIELD_REFs accordingly. 1913 1914 TODO: 1915 The current implementation requires the whole VECTORs should be fully 1916 covered, but it can be extended to support partial, checking adjacent 1917 but not fill the whole, it may need some cost model to define the 1918 boundary to do or not. 1919 */ 1920 static bool 1921 undistribute_bitref_for_vector (enum tree_code opcode, 1922 vec<operand_entry *> *ops, struct loop *loop) 1923 { 1924 if (ops->length () <= 1) 1925 return false; 1926 1927 if (opcode != PLUS_EXPR 1928 && opcode != MULT_EXPR 1929 && opcode != BIT_XOR_EXPR 1930 && opcode != BIT_IOR_EXPR 1931 && opcode != BIT_AND_EXPR) 1932 return false; 1933 1934 hash_map<tree, v_info_ptr> v_info_map; 1935 operand_entry *oe1; 1936 unsigned i; 1937 1938 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the 1939 information into map. */ 1940 FOR_EACH_VEC_ELT (*ops, i, oe1) 1941 { 1942 enum tree_code dcode; 1943 gimple *oe1def; 1944 1945 if (TREE_CODE (oe1->op) != SSA_NAME) 1946 continue; 1947 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1948 if (!is_gimple_assign (oe1def)) 1949 continue; 1950 dcode = gimple_assign_rhs_code (oe1def); 1951 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop)) 1952 continue; 1953 1954 tree rhs = gimple_assign_rhs1 (oe1def); 1955 tree vec = TREE_OPERAND (rhs, 0); 1956 tree vec_type = TREE_TYPE (vec); 1957 1958 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) 1959 continue; 1960 1961 /* Ignore it if target machine can't support this VECTOR type. */ 1962 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1963 continue; 1964 1965 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ 1966 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1967 continue; 1968 1969 if (VECTOR_TYPE_P (TREE_TYPE (rhs)) 1970 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) 1971 continue; 1972 1973 /* The type of BIT_FIELD_REF might not be equal to the element type of 1974 the vector. We want to use a vector type with element type the 1975 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ 1976 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) 1977 { 1978 machine_mode simd_mode; 1979 unsigned HOST_WIDE_INT size, nunits; 1980 unsigned HOST_WIDE_INT elem_size 1981 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); 1982 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) 1983 continue; 1984 if (size <= elem_size || (size % elem_size) != 0) 1985 continue; 1986 nunits = size / elem_size; 1987 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), 1988 nunits).exists (&simd_mode)) 1989 continue; 1990 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); 1991 1992 /* Ignore it if target machine can't support this VECTOR type. */ 1993 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) 1994 continue; 1995 1996 /* Check const vector type, constrain BIT_FIELD_REF offset and 1997 size. */ 1998 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) 1999 continue; 2000 2001 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), 2002 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) 2003 continue; 2004 } 2005 2006 tree elem_type = TREE_TYPE (vec_type); 2007 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); 2008 if (maybe_ne (bit_field_size (rhs), elem_size)) 2009 continue; 2010 2011 unsigned idx; 2012 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx)) 2013 continue; 2014 2015 /* Ignore it if target machine can't support this type of VECTOR 2016 operation. */ 2017 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); 2018 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) 2019 continue; 2020 2021 bool existed; 2022 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); 2023 if (!existed) 2024 { 2025 info = new v_info; 2026 info->vec_type = vec_type; 2027 } 2028 else if (!types_compatible_p (vec_type, info->vec_type)) 2029 continue; 2030 info->vec.safe_push (std::make_pair (idx, i)); 2031 } 2032 2033 /* At least two VECTOR to combine. */ 2034 if (v_info_map.elements () <= 1) 2035 { 2036 cleanup_vinfo_map (v_info_map); 2037 return false; 2038 } 2039 2040 /* Verify all VECTOR candidates by checking two conditions: 2041 1) sorted offsets are adjacent, no holes. 2042 2) can fill the whole VECTOR perfectly. 2043 And add the valid candidates to a vector for further handling. */ 2044 auto_vec<tree> valid_vecs (v_info_map.elements ()); 2045 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); 2046 it != v_info_map.end (); ++it) 2047 { 2048 tree cand_vec = (*it).first; 2049 v_info_ptr cand_info = (*it).second; 2050 unsigned int num_elems 2051 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); 2052 if (cand_info->vec.length () != num_elems) 2053 continue; 2054 sbitmap holes = sbitmap_alloc (num_elems); 2055 bitmap_ones (holes); 2056 bool valid = true; 2057 v_info_elem *curr; 2058 FOR_EACH_VEC_ELT (cand_info->vec, i, curr) 2059 { 2060 if (!bitmap_bit_p (holes, curr->first)) 2061 { 2062 valid = false; 2063 break; 2064 } 2065 else 2066 bitmap_clear_bit (holes, curr->first); 2067 } 2068 if (valid && bitmap_empty_p (holes)) 2069 valid_vecs.quick_push (cand_vec); 2070 sbitmap_free (holes); 2071 } 2072 2073 /* At least two VECTOR to combine. */ 2074 if (valid_vecs.length () <= 1) 2075 { 2076 cleanup_vinfo_map (v_info_map); 2077 return false; 2078 } 2079 2080 valid_vecs.qsort (sort_by_mach_mode); 2081 /* Go through all candidates by machine mode order, query the mode_to_total 2082 to get the total number for each mode and skip the single one. */ 2083 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) 2084 { 2085 tree tvec = valid_vecs[i]; 2086 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); 2087 2088 /* Skip modes with only a single candidate. */ 2089 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) 2090 continue; 2091 2092 unsigned int idx, j; 2093 gimple *sum = NULL; 2094 tree sum_vec = tvec; 2095 v_info_ptr info_ptr = *(v_info_map.get (tvec)); 2096 v_info_elem *elem; 2097 tree vec_type = info_ptr->vec_type; 2098 2099 /* Build the sum for all candidates with same mode. */ 2100 do 2101 { 2102 sum = build_and_add_sum (vec_type, sum_vec, 2103 valid_vecs[i + 1], opcode); 2104 if (!useless_type_conversion_p (vec_type, 2105 TREE_TYPE (valid_vecs[i + 1]))) 2106 { 2107 /* Update the operands only after build_and_add_sum, 2108 so that we don't have to repeat the placement algorithm 2109 of build_and_add_sum. */ 2110 gimple_stmt_iterator gsi = gsi_for_stmt (sum); 2111 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, 2112 valid_vecs[i + 1]); 2113 tree lhs = make_ssa_name (vec_type); 2114 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2115 gimple_set_uid (g, gimple_uid (sum)); 2116 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2117 gimple_assign_set_rhs2 (sum, lhs); 2118 if (sum_vec == tvec) 2119 { 2120 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); 2121 lhs = make_ssa_name (vec_type); 2122 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); 2123 gimple_set_uid (g, gimple_uid (sum)); 2124 gsi_insert_before (&gsi, g, GSI_NEW_STMT); 2125 gimple_assign_set_rhs1 (sum, lhs); 2126 } 2127 update_stmt (sum); 2128 } 2129 sum_vec = gimple_get_lhs (sum); 2130 info_ptr = *(v_info_map.get (valid_vecs[i + 1])); 2131 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); 2132 /* Update those related ops of current candidate VECTOR. */ 2133 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2134 { 2135 idx = elem->second; 2136 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2137 /* Set this then op definition will get DCEd later. */ 2138 gimple_set_visited (def, true); 2139 if (opcode == PLUS_EXPR 2140 || opcode == BIT_XOR_EXPR 2141 || opcode == BIT_IOR_EXPR) 2142 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); 2143 else if (opcode == MULT_EXPR) 2144 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); 2145 else 2146 { 2147 gcc_assert (opcode == BIT_AND_EXPR); 2148 (*ops)[idx]->op 2149 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); 2150 } 2151 (*ops)[idx]->rank = 0; 2152 } 2153 if (dump_file && (dump_flags & TDF_DETAILS)) 2154 { 2155 fprintf (dump_file, "Generating addition -> "); 2156 print_gimple_stmt (dump_file, sum, 0); 2157 } 2158 i++; 2159 } 2160 while ((i < valid_vecs.length () - 1) 2161 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); 2162 2163 /* Referring to first valid VECTOR with this mode, generate the 2164 BIT_FIELD_REF statements accordingly. */ 2165 info_ptr = *(v_info_map.get (tvec)); 2166 gcc_assert (sum); 2167 tree elem_type = TREE_TYPE (vec_type); 2168 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) 2169 { 2170 idx = elem->second; 2171 tree dst = make_ssa_name (elem_type); 2172 tree pos = bitsize_int (elem->first 2173 * tree_to_uhwi (TYPE_SIZE (elem_type))); 2174 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, 2175 TYPE_SIZE (elem_type), pos); 2176 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); 2177 insert_stmt_after (gs, sum); 2178 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); 2179 /* Set this then op definition will get DCEd later. */ 2180 gimple_set_visited (def, true); 2181 (*ops)[idx]->op = gimple_assign_lhs (gs); 2182 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); 2183 if (dump_file && (dump_flags & TDF_DETAILS)) 2184 { 2185 fprintf (dump_file, "Generating bit_field_ref -> "); 2186 print_gimple_stmt (dump_file, gs, 0); 2187 } 2188 } 2189 } 2190 2191 if (dump_file && (dump_flags & TDF_DETAILS)) 2192 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n"); 2193 2194 cleanup_vinfo_map (v_info_map); 2195 2196 return true; 2197 } 2198 2199 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 2200 expression, examine the other OPS to see if any of them are comparisons 2201 of the same values, which we may be able to combine or eliminate. 2202 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 2203 2204 static bool 2205 eliminate_redundant_comparison (enum tree_code opcode, 2206 vec<operand_entry *> *ops, 2207 unsigned int currindex, 2208 operand_entry *curr) 2209 { 2210 tree op1, op2; 2211 enum tree_code lcode, rcode; 2212 gimple *def1, *def2; 2213 int i; 2214 operand_entry *oe; 2215 2216 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 2217 return false; 2218 2219 /* Check that CURR is a comparison. */ 2220 if (TREE_CODE (curr->op) != SSA_NAME) 2221 return false; 2222 def1 = SSA_NAME_DEF_STMT (curr->op); 2223 if (!is_gimple_assign (def1)) 2224 return false; 2225 lcode = gimple_assign_rhs_code (def1); 2226 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 2227 return false; 2228 op1 = gimple_assign_rhs1 (def1); 2229 op2 = gimple_assign_rhs2 (def1); 2230 2231 /* Now look for a similar comparison in the remaining OPS. */ 2232 for (i = currindex + 1; ops->iterate (i, &oe); i++) 2233 { 2234 tree t; 2235 2236 if (TREE_CODE (oe->op) != SSA_NAME) 2237 continue; 2238 def2 = SSA_NAME_DEF_STMT (oe->op); 2239 if (!is_gimple_assign (def2)) 2240 continue; 2241 rcode = gimple_assign_rhs_code (def2); 2242 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 2243 continue; 2244 2245 /* If we got here, we have a match. See if we can combine the 2246 two comparisons. */ 2247 tree type = TREE_TYPE (gimple_assign_lhs (def1)); 2248 if (opcode == BIT_IOR_EXPR) 2249 t = maybe_fold_or_comparisons (type, 2250 lcode, op1, op2, 2251 rcode, gimple_assign_rhs1 (def2), 2252 gimple_assign_rhs2 (def2)); 2253 else 2254 t = maybe_fold_and_comparisons (type, 2255 lcode, op1, op2, 2256 rcode, gimple_assign_rhs1 (def2), 2257 gimple_assign_rhs2 (def2)); 2258 if (!t) 2259 continue; 2260 2261 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 2262 always give us a boolean_type_node value back. If the original 2263 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 2264 we need to convert. */ 2265 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 2266 { 2267 if (!fold_convertible_p (TREE_TYPE (curr->op), t)) 2268 continue; 2269 t = fold_convert (TREE_TYPE (curr->op), t); 2270 } 2271 2272 if (TREE_CODE (t) != INTEGER_CST 2273 && !operand_equal_p (t, curr->op, 0)) 2274 { 2275 enum tree_code subcode; 2276 tree newop1, newop2; 2277 if (!COMPARISON_CLASS_P (t)) 2278 continue; 2279 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2280 STRIP_USELESS_TYPE_CONVERSION (newop1); 2281 STRIP_USELESS_TYPE_CONVERSION (newop2); 2282 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 2283 continue; 2284 } 2285 2286 if (dump_file && (dump_flags & TDF_DETAILS)) 2287 { 2288 fprintf (dump_file, "Equivalence: "); 2289 print_generic_expr (dump_file, curr->op); 2290 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 2291 print_generic_expr (dump_file, oe->op); 2292 fprintf (dump_file, " -> "); 2293 print_generic_expr (dump_file, t); 2294 fprintf (dump_file, "\n"); 2295 } 2296 2297 /* Now we can delete oe, as it has been subsumed by the new combined 2298 expression t. */ 2299 ops->ordered_remove (i); 2300 reassociate_stats.ops_eliminated ++; 2301 2302 /* If t is the same as curr->op, we're done. Otherwise we must 2303 replace curr->op with t. Special case is if we got a constant 2304 back, in which case we add it to the end instead of in place of 2305 the current entry. */ 2306 if (TREE_CODE (t) == INTEGER_CST) 2307 { 2308 ops->ordered_remove (currindex); 2309 add_to_ops_vec (ops, t); 2310 } 2311 else if (!operand_equal_p (t, curr->op, 0)) 2312 { 2313 gimple *sum; 2314 enum tree_code subcode; 2315 tree newop1; 2316 tree newop2; 2317 gcc_assert (COMPARISON_CLASS_P (t)); 2318 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 2319 STRIP_USELESS_TYPE_CONVERSION (newop1); 2320 STRIP_USELESS_TYPE_CONVERSION (newop2); 2321 gcc_checking_assert (is_gimple_val (newop1) 2322 && is_gimple_val (newop2)); 2323 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 2324 curr->op = gimple_get_lhs (sum); 2325 } 2326 return true; 2327 } 2328 2329 return false; 2330 } 2331 2332 2333 /* Transform repeated addition of same values into multiply with 2334 constant. */ 2335 static bool 2336 transform_add_to_multiply (vec<operand_entry *> *ops) 2337 { 2338 operand_entry *oe; 2339 tree op = NULL_TREE; 2340 int j; 2341 int i, start = -1, end = 0, count = 0; 2342 auto_vec<std::pair <int, int> > indxs; 2343 bool changed = false; 2344 2345 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2346 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op)) 2347 || !flag_unsafe_math_optimizations)) 2348 return false; 2349 2350 /* Look for repeated operands. */ 2351 FOR_EACH_VEC_ELT (*ops, i, oe) 2352 { 2353 if (start == -1) 2354 { 2355 count = 1; 2356 op = oe->op; 2357 start = i; 2358 } 2359 else if (operand_equal_p (oe->op, op, 0)) 2360 { 2361 count++; 2362 end = i; 2363 } 2364 else 2365 { 2366 if (count > 1) 2367 indxs.safe_push (std::make_pair (start, end)); 2368 count = 1; 2369 op = oe->op; 2370 start = i; 2371 } 2372 } 2373 2374 if (count > 1) 2375 indxs.safe_push (std::make_pair (start, end)); 2376 2377 for (j = indxs.length () - 1; j >= 0; --j) 2378 { 2379 /* Convert repeated operand addition to multiplication. */ 2380 start = indxs[j].first; 2381 end = indxs[j].second; 2382 op = (*ops)[start]->op; 2383 count = end - start + 1; 2384 for (i = end; i >= start; --i) 2385 ops->unordered_remove (i); 2386 tree tmp = make_ssa_name (TREE_TYPE (op)); 2387 tree cst = build_int_cst (integer_type_node, count); 2388 gassign *mul_stmt 2389 = gimple_build_assign (tmp, MULT_EXPR, 2390 op, fold_convert (TREE_TYPE (op), cst)); 2391 gimple_set_visited (mul_stmt, true); 2392 add_to_ops_vec (ops, tmp, mul_stmt); 2393 changed = true; 2394 } 2395 2396 return changed; 2397 } 2398 2399 2400 /* Perform various identities and other optimizations on the list of 2401 operand entries, stored in OPS. The tree code for the binary 2402 operation between all the operands is OPCODE. */ 2403 2404 static void 2405 optimize_ops_list (enum tree_code opcode, 2406 vec<operand_entry *> *ops) 2407 { 2408 unsigned int length = ops->length (); 2409 unsigned int i; 2410 operand_entry *oe; 2411 operand_entry *oelast = NULL; 2412 bool iterate = false; 2413 2414 if (length == 1) 2415 return; 2416 2417 oelast = ops->last (); 2418 2419 /* If the last two are constants, pop the constants off, merge them 2420 and try the next two. */ 2421 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 2422 { 2423 operand_entry *oelm1 = (*ops)[length - 2]; 2424 2425 if (oelm1->rank == 0 2426 && is_gimple_min_invariant (oelm1->op) 2427 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 2428 TREE_TYPE (oelast->op))) 2429 { 2430 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 2431 oelm1->op, oelast->op); 2432 2433 if (folded && is_gimple_min_invariant (folded)) 2434 { 2435 if (dump_file && (dump_flags & TDF_DETAILS)) 2436 fprintf (dump_file, "Merging constants\n"); 2437 2438 ops->pop (); 2439 ops->pop (); 2440 2441 add_to_ops_vec (ops, folded); 2442 reassociate_stats.constants_eliminated++; 2443 2444 optimize_ops_list (opcode, ops); 2445 return; 2446 } 2447 } 2448 } 2449 2450 eliminate_using_constants (opcode, ops); 2451 oelast = NULL; 2452 2453 for (i = 0; ops->iterate (i, &oe);) 2454 { 2455 bool done = false; 2456 2457 if (eliminate_not_pairs (opcode, ops, i, oe)) 2458 return; 2459 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 2460 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 2461 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 2462 { 2463 if (done) 2464 return; 2465 iterate = true; 2466 oelast = NULL; 2467 continue; 2468 } 2469 oelast = oe; 2470 i++; 2471 } 2472 2473 if (iterate) 2474 optimize_ops_list (opcode, ops); 2475 } 2476 2477 /* The following functions are subroutines to optimize_range_tests and allow 2478 it to try to change a logical combination of comparisons into a range 2479 test. 2480 2481 For example, both 2482 X == 2 || X == 5 || X == 3 || X == 4 2483 and 2484 X >= 2 && X <= 5 2485 are converted to 2486 (unsigned) (X - 2) <= 3 2487 2488 For more information see comments above fold_test_range in fold-const.cc, 2489 this implementation is for GIMPLE. */ 2490 2491 2492 2493 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */ 2494 2495 void 2496 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp) 2497 { 2498 if (!skip_exp) 2499 print_generic_expr (file, r->exp); 2500 fprintf (file, " %c[", r->in_p ? '+' : '-'); 2501 print_generic_expr (file, r->low); 2502 fputs (", ", file); 2503 print_generic_expr (file, r->high); 2504 fputc (']', file); 2505 } 2506 2507 /* Dump the range entry R to STDERR. */ 2508 2509 DEBUG_FUNCTION void 2510 debug_range_entry (struct range_entry *r) 2511 { 2512 dump_range_entry (stderr, r, false); 2513 fputc ('\n', stderr); 2514 } 2515 2516 /* This is similar to make_range in fold-const.cc, but on top of 2517 GIMPLE instead of trees. If EXP is non-NULL, it should be 2518 an SSA_NAME and STMT argument is ignored, otherwise STMT 2519 argument should be a GIMPLE_COND. */ 2520 2521 void 2522 init_range_entry (struct range_entry *r, tree exp, gimple *stmt) 2523 { 2524 int in_p; 2525 tree low, high; 2526 bool is_bool, strict_overflow_p; 2527 2528 r->exp = NULL_TREE; 2529 r->in_p = false; 2530 r->strict_overflow_p = false; 2531 r->low = NULL_TREE; 2532 r->high = NULL_TREE; 2533 if (exp != NULL_TREE 2534 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 2535 return; 2536 2537 /* Start with simply saying "EXP != 0" and then look at the code of EXP 2538 and see if we can refine the range. Some of the cases below may not 2539 happen, but it doesn't seem worth worrying about this. We "continue" 2540 the outer loop when we've changed something; otherwise we "break" 2541 the switch, which will "break" the while. */ 2542 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 2543 high = low; 2544 in_p = 0; 2545 strict_overflow_p = false; 2546 is_bool = false; 2547 if (exp == NULL_TREE) 2548 is_bool = true; 2549 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 2550 { 2551 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 2552 is_bool = true; 2553 else 2554 return; 2555 } 2556 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 2557 is_bool = true; 2558 2559 while (1) 2560 { 2561 enum tree_code code; 2562 tree arg0, arg1, exp_type; 2563 tree nexp; 2564 location_t loc; 2565 2566 if (exp != NULL_TREE) 2567 { 2568 if (TREE_CODE (exp) != SSA_NAME 2569 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 2570 break; 2571 2572 stmt = SSA_NAME_DEF_STMT (exp); 2573 if (!is_gimple_assign (stmt)) 2574 break; 2575 2576 code = gimple_assign_rhs_code (stmt); 2577 arg0 = gimple_assign_rhs1 (stmt); 2578 arg1 = gimple_assign_rhs2 (stmt); 2579 exp_type = TREE_TYPE (exp); 2580 } 2581 else 2582 { 2583 code = gimple_cond_code (stmt); 2584 arg0 = gimple_cond_lhs (stmt); 2585 arg1 = gimple_cond_rhs (stmt); 2586 exp_type = boolean_type_node; 2587 } 2588 2589 if (TREE_CODE (arg0) != SSA_NAME 2590 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) 2591 break; 2592 loc = gimple_location (stmt); 2593 switch (code) 2594 { 2595 case BIT_NOT_EXPR: 2596 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 2597 /* Ensure the range is either +[-,0], +[0,0], 2598 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 2599 -[1,1]. If it is e.g. +[-,-] or -[-,-] 2600 or similar expression of unconditional true or 2601 false, it should not be negated. */ 2602 && ((high && integer_zerop (high)) 2603 || (low && integer_onep (low)))) 2604 { 2605 in_p = !in_p; 2606 exp = arg0; 2607 continue; 2608 } 2609 break; 2610 case SSA_NAME: 2611 exp = arg0; 2612 continue; 2613 CASE_CONVERT: 2614 if (is_bool) 2615 { 2616 if ((TYPE_PRECISION (exp_type) == 1 2617 || TREE_CODE (exp_type) == BOOLEAN_TYPE) 2618 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1) 2619 return; 2620 } 2621 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 2622 { 2623 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 2624 is_bool = true; 2625 else 2626 return; 2627 } 2628 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 2629 is_bool = true; 2630 goto do_default; 2631 case EQ_EXPR: 2632 case NE_EXPR: 2633 case LT_EXPR: 2634 case LE_EXPR: 2635 case GE_EXPR: 2636 case GT_EXPR: 2637 is_bool = true; 2638 /* FALLTHRU */ 2639 default: 2640 if (!is_bool) 2641 return; 2642 do_default: 2643 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2644 &low, &high, &in_p, 2645 &strict_overflow_p); 2646 if (nexp != NULL_TREE) 2647 { 2648 exp = nexp; 2649 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2650 continue; 2651 } 2652 break; 2653 } 2654 break; 2655 } 2656 if (is_bool) 2657 { 2658 r->exp = exp; 2659 r->in_p = in_p; 2660 r->low = low; 2661 r->high = high; 2662 r->strict_overflow_p = strict_overflow_p; 2663 } 2664 } 2665 2666 /* Comparison function for qsort. Sort entries 2667 without SSA_NAME exp first, then with SSA_NAMEs sorted 2668 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2669 by increasing ->low and if ->low is the same, by increasing 2670 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2671 maximum. */ 2672 2673 static int 2674 range_entry_cmp (const void *a, const void *b) 2675 { 2676 const struct range_entry *p = (const struct range_entry *) a; 2677 const struct range_entry *q = (const struct range_entry *) b; 2678 2679 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2680 { 2681 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2682 { 2683 /* Group range_entries for the same SSA_NAME together. */ 2684 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2685 return -1; 2686 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2687 return 1; 2688 /* If ->low is different, NULL low goes first, then by 2689 ascending low. */ 2690 if (p->low != NULL_TREE) 2691 { 2692 if (q->low != NULL_TREE) 2693 { 2694 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2695 p->low, q->low); 2696 if (tem && integer_onep (tem)) 2697 return -1; 2698 tem = fold_binary (GT_EXPR, boolean_type_node, 2699 p->low, q->low); 2700 if (tem && integer_onep (tem)) 2701 return 1; 2702 } 2703 else 2704 return 1; 2705 } 2706 else if (q->low != NULL_TREE) 2707 return -1; 2708 /* If ->high is different, NULL high goes last, before that by 2709 ascending high. */ 2710 if (p->high != NULL_TREE) 2711 { 2712 if (q->high != NULL_TREE) 2713 { 2714 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2715 p->high, q->high); 2716 if (tem && integer_onep (tem)) 2717 return -1; 2718 tem = fold_binary (GT_EXPR, boolean_type_node, 2719 p->high, q->high); 2720 if (tem && integer_onep (tem)) 2721 return 1; 2722 } 2723 else 2724 return -1; 2725 } 2726 else if (q->high != NULL_TREE) 2727 return 1; 2728 /* If both ranges are the same, sort below by ascending idx. */ 2729 } 2730 else 2731 return 1; 2732 } 2733 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2734 return -1; 2735 2736 if (p->idx < q->idx) 2737 return -1; 2738 else 2739 { 2740 gcc_checking_assert (p->idx > q->idx); 2741 return 1; 2742 } 2743 } 2744 2745 /* Helper function for update_range_test. Force EXPR into an SSA_NAME, 2746 insert needed statements BEFORE or after GSI. */ 2747 2748 static tree 2749 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before) 2750 { 2751 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING; 2752 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m); 2753 if (TREE_CODE (ret) != SSA_NAME) 2754 { 2755 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret); 2756 if (before) 2757 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2758 else 2759 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING); 2760 ret = gimple_assign_lhs (g); 2761 } 2762 return ret; 2763 } 2764 2765 /* Helper routine of optimize_range_test. 2766 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2767 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2768 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2769 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2770 an array of COUNT pointers to other ranges. Return 2771 true if the range merge has been successful. 2772 If OPCODE is ERROR_MARK, this is called from within 2773 maybe_optimize_range_tests and is performing inter-bb range optimization. 2774 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2775 oe->rank. */ 2776 2777 static bool 2778 update_range_test (struct range_entry *range, struct range_entry *otherrange, 2779 struct range_entry **otherrangep, 2780 unsigned int count, enum tree_code opcode, 2781 vec<operand_entry *> *ops, tree exp, gimple_seq seq, 2782 bool in_p, tree low, tree high, bool strict_overflow_p) 2783 { 2784 unsigned int idx = range->idx; 2785 struct range_entry *swap_with = NULL; 2786 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL; 2787 if (opcode == ERROR_MARK) 2788 { 2789 /* For inter-bb range test optimization, pick from the range tests 2790 the one which is tested in the earliest condition (one dominating 2791 the others), because otherwise there could be some UB (e.g. signed 2792 overflow) in following bbs that we'd expose which wasn't there in 2793 the original program. See PR104196. */ 2794 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id); 2795 basic_block range_bb = orig_range_bb; 2796 for (unsigned int i = 0; i < count; i++) 2797 { 2798 struct range_entry *this_range; 2799 if (otherrange) 2800 this_range = otherrange + i; 2801 else 2802 this_range = otherrangep[i]; 2803 operand_entry *oe = (*ops)[this_range->idx]; 2804 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id); 2805 if (range_bb != this_bb 2806 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb)) 2807 { 2808 swap_with = this_range; 2809 range_bb = this_bb; 2810 idx = this_range->idx; 2811 } 2812 } 2813 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs 2814 only defined in later blocks. In this case we can't move the 2815 merged comparison earlier, so instead check if there are any stmts 2816 that might trigger signed integer overflow in between and rewrite 2817 them. But only after we check if the optimization is possible. */ 2818 if (seq && swap_with) 2819 { 2820 rewrite_bb_first = range_bb; 2821 rewrite_bb_last = orig_range_bb; 2822 idx = range->idx; 2823 swap_with = NULL; 2824 } 2825 } 2826 operand_entry *oe = (*ops)[idx]; 2827 tree op = oe->op; 2828 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 2829 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2830 location_t loc = gimple_location (stmt); 2831 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2832 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2833 in_p, low, high); 2834 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2835 gimple_stmt_iterator gsi; 2836 unsigned int i, uid; 2837 2838 if (tem == NULL_TREE) 2839 return false; 2840 2841 /* If op is default def SSA_NAME, there is no place to insert the 2842 new comparison. Give up, unless we can use OP itself as the 2843 range test. */ 2844 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2845 { 2846 if (op == range->exp 2847 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2848 || TREE_CODE (optype) == BOOLEAN_TYPE) 2849 && (op == tem 2850 || (TREE_CODE (tem) == EQ_EXPR 2851 && TREE_OPERAND (tem, 0) == op 2852 && integer_onep (TREE_OPERAND (tem, 1)))) 2853 && opcode != BIT_IOR_EXPR 2854 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2855 { 2856 stmt = NULL; 2857 tem = op; 2858 } 2859 else 2860 return false; 2861 } 2862 2863 if (swap_with) 2864 std::swap (range->idx, swap_with->idx); 2865 2866 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2867 warning_at (loc, OPT_Wstrict_overflow, 2868 "assuming signed overflow does not occur " 2869 "when simplifying range test"); 2870 2871 if (dump_file && (dump_flags & TDF_DETAILS)) 2872 { 2873 struct range_entry *r; 2874 fprintf (dump_file, "Optimizing range tests "); 2875 dump_range_entry (dump_file, range, false); 2876 for (i = 0; i < count; i++) 2877 { 2878 if (otherrange) 2879 r = otherrange + i; 2880 else 2881 r = otherrangep[i]; 2882 if (r->exp 2883 && r->exp != range->exp 2884 && TREE_CODE (r->exp) == SSA_NAME) 2885 { 2886 fprintf (dump_file, " and "); 2887 dump_range_entry (dump_file, r, false); 2888 } 2889 else 2890 { 2891 fprintf (dump_file, " and"); 2892 dump_range_entry (dump_file, r, true); 2893 } 2894 } 2895 fprintf (dump_file, "\n into "); 2896 print_generic_expr (dump_file, tem); 2897 fprintf (dump_file, "\n"); 2898 } 2899 2900 /* In inter-bb range optimization mode, if we have a seq, we can't 2901 move the merged comparison to the earliest bb from the comparisons 2902 being replaced, so instead rewrite stmts that could trigger signed 2903 integer overflow. */ 2904 for (basic_block bb = rewrite_bb_last; 2905 bb != rewrite_bb_first; bb = single_pred (bb)) 2906 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); 2907 !gsi_end_p (gsi); gsi_next (&gsi)) 2908 { 2909 gimple *stmt = gsi_stmt (gsi); 2910 if (is_gimple_assign (stmt)) 2911 if (tree lhs = gimple_assign_lhs (stmt)) 2912 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2913 || POINTER_TYPE_P (TREE_TYPE (lhs))) 2914 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) 2915 { 2916 enum tree_code code = gimple_assign_rhs_code (stmt); 2917 if (arith_code_with_undefined_signed_overflow (code)) 2918 { 2919 gimple_stmt_iterator gsip = gsi; 2920 gimple_stmt_iterator gsin = gsi; 2921 gsi_prev (&gsip); 2922 gsi_next (&gsin); 2923 rewrite_to_defined_overflow (stmt, true); 2924 unsigned uid = gimple_uid (stmt); 2925 if (gsi_end_p (gsip)) 2926 gsip = gsi_after_labels (bb); 2927 else 2928 gsi_next (&gsip); 2929 for (; gsi_stmt (gsip) != gsi_stmt (gsin); 2930 gsi_next (&gsip)) 2931 gimple_set_uid (gsi_stmt (gsip), uid); 2932 } 2933 } 2934 } 2935 2936 if (opcode == BIT_IOR_EXPR 2937 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2938 tem = invert_truthvalue_loc (loc, tem); 2939 2940 tem = fold_convert_loc (loc, optype, tem); 2941 if (stmt) 2942 { 2943 gsi = gsi_for_stmt (stmt); 2944 uid = gimple_uid (stmt); 2945 } 2946 else 2947 { 2948 gsi = gsi_none (); 2949 uid = 0; 2950 } 2951 if (stmt == NULL) 2952 gcc_checking_assert (tem == op); 2953 /* In rare cases range->exp can be equal to lhs of stmt. 2954 In that case we have to insert after the stmt rather then before 2955 it. If stmt is a PHI, insert it at the start of the basic block. */ 2956 else if (op != range->exp) 2957 { 2958 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2959 tem = force_into_ssa_name (&gsi, tem, true); 2960 gsi_prev (&gsi); 2961 } 2962 else if (gimple_code (stmt) != GIMPLE_PHI) 2963 { 2964 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2965 tem = force_into_ssa_name (&gsi, tem, false); 2966 } 2967 else 2968 { 2969 gsi = gsi_after_labels (gimple_bb (stmt)); 2970 if (!gsi_end_p (gsi)) 2971 uid = gimple_uid (gsi_stmt (gsi)); 2972 else 2973 { 2974 gsi = gsi_start_bb (gimple_bb (stmt)); 2975 uid = 1; 2976 while (!gsi_end_p (gsi)) 2977 { 2978 uid = gimple_uid (gsi_stmt (gsi)); 2979 gsi_next (&gsi); 2980 } 2981 } 2982 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2983 tem = force_into_ssa_name (&gsi, tem, true); 2984 if (gsi_end_p (gsi)) 2985 gsi = gsi_last_bb (gimple_bb (stmt)); 2986 else 2987 gsi_prev (&gsi); 2988 } 2989 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2990 if (gimple_uid (gsi_stmt (gsi))) 2991 break; 2992 else 2993 gimple_set_uid (gsi_stmt (gsi), uid); 2994 2995 oe->op = tem; 2996 range->exp = exp; 2997 range->low = low; 2998 range->high = high; 2999 range->in_p = in_p; 3000 range->strict_overflow_p = false; 3001 3002 for (i = 0; i < count; i++) 3003 { 3004 if (otherrange) 3005 range = otherrange + i; 3006 else 3007 range = otherrangep[i]; 3008 oe = (*ops)[range->idx]; 3009 /* Now change all the other range test immediate uses, so that 3010 those tests will be optimized away. */ 3011 if (opcode == ERROR_MARK) 3012 { 3013 if (oe->op) 3014 oe->op = build_int_cst (TREE_TYPE (oe->op), 3015 oe->rank == BIT_IOR_EXPR ? 0 : 1); 3016 else 3017 oe->op = (oe->rank == BIT_IOR_EXPR 3018 ? boolean_false_node : boolean_true_node); 3019 } 3020 else 3021 oe->op = error_mark_node; 3022 range->exp = NULL_TREE; 3023 range->low = NULL_TREE; 3024 range->high = NULL_TREE; 3025 } 3026 return true; 3027 } 3028 3029 /* Optimize X == CST1 || X == CST2 3030 if popcount (CST1 ^ CST2) == 1 into 3031 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 3032 Similarly for ranges. E.g. 3033 X != 2 && X != 3 && X != 10 && X != 11 3034 will be transformed by the previous optimization into 3035 !((X - 2U) <= 1U || (X - 10U) <= 1U) 3036 and this loop can transform that into 3037 !(((X & ~8) - 2U) <= 1U). */ 3038 3039 static bool 3040 optimize_range_tests_xor (enum tree_code opcode, tree type, 3041 tree lowi, tree lowj, tree highi, tree highj, 3042 vec<operand_entry *> *ops, 3043 struct range_entry *rangei, 3044 struct range_entry *rangej) 3045 { 3046 tree lowxor, highxor, tem, exp; 3047 /* Check lowi ^ lowj == highi ^ highj and 3048 popcount (lowi ^ lowj) == 1. */ 3049 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 3050 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 3051 return false; 3052 if (!integer_pow2p (lowxor)) 3053 return false; 3054 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 3055 if (!tree_int_cst_equal (lowxor, highxor)) 3056 return false; 3057 3058 exp = rangei->exp; 3059 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 3060 int prec = GET_MODE_PRECISION (mode); 3061 if (TYPE_PRECISION (type) < prec 3062 || (wi::to_wide (TYPE_MIN_VALUE (type)) 3063 != wi::min_value (prec, TYPE_SIGN (type))) 3064 || (wi::to_wide (TYPE_MAX_VALUE (type)) 3065 != wi::max_value (prec, TYPE_SIGN (type)))) 3066 { 3067 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); 3068 exp = fold_convert (type, exp); 3069 lowxor = fold_convert (type, lowxor); 3070 lowi = fold_convert (type, lowi); 3071 highi = fold_convert (type, highi); 3072 } 3073 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 3074 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); 3075 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 3076 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 3077 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 3078 NULL, rangei->in_p, lowj, highj, 3079 rangei->strict_overflow_p 3080 || rangej->strict_overflow_p)) 3081 return true; 3082 return false; 3083 } 3084 3085 /* Optimize X == CST1 || X == CST2 3086 if popcount (CST2 - CST1) == 1 into 3087 ((X - CST1) & ~(CST2 - CST1)) == 0. 3088 Similarly for ranges. E.g. 3089 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 3090 || X == 75 || X == 45 3091 will be transformed by the previous optimization into 3092 (X - 43U) <= 3U || (X - 75U) <= 3U 3093 and this loop can transform that into 3094 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 3095 static bool 3096 optimize_range_tests_diff (enum tree_code opcode, tree type, 3097 tree lowi, tree lowj, tree highi, tree highj, 3098 vec<operand_entry *> *ops, 3099 struct range_entry *rangei, 3100 struct range_entry *rangej) 3101 { 3102 tree tem1, tem2, mask; 3103 /* Check highi - lowi == highj - lowj. */ 3104 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 3105 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 3106 return false; 3107 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 3108 if (!tree_int_cst_equal (tem1, tem2)) 3109 return false; 3110 /* Check popcount (lowj - lowi) == 1. */ 3111 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 3112 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 3113 return false; 3114 if (!integer_pow2p (tem1)) 3115 return false; 3116 3117 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); 3118 int prec = GET_MODE_PRECISION (mode); 3119 if (TYPE_PRECISION (type) < prec 3120 || (wi::to_wide (TYPE_MIN_VALUE (type)) 3121 != wi::min_value (prec, TYPE_SIGN (type))) 3122 || (wi::to_wide (TYPE_MAX_VALUE (type)) 3123 != wi::max_value (prec, TYPE_SIGN (type)))) 3124 type = build_nonstandard_integer_type (prec, 1); 3125 else 3126 type = unsigned_type_for (type); 3127 tem1 = fold_convert (type, tem1); 3128 tem2 = fold_convert (type, tem2); 3129 lowi = fold_convert (type, lowi); 3130 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 3131 tem1 = fold_build2 (MINUS_EXPR, type, 3132 fold_convert (type, rangei->exp), lowi); 3133 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 3134 lowj = build_int_cst (type, 0); 3135 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 3136 NULL, rangei->in_p, lowj, tem2, 3137 rangei->strict_overflow_p 3138 || rangej->strict_overflow_p)) 3139 return true; 3140 return false; 3141 } 3142 3143 /* It does some common checks for function optimize_range_tests_xor and 3144 optimize_range_tests_diff. 3145 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 3146 Else it calls optimize_range_tests_diff. */ 3147 3148 static bool 3149 optimize_range_tests_1 (enum tree_code opcode, int first, int length, 3150 bool optimize_xor, vec<operand_entry *> *ops, 3151 struct range_entry *ranges) 3152 { 3153 int i, j; 3154 bool any_changes = false; 3155 for (i = first; i < length; i++) 3156 { 3157 tree lowi, highi, lowj, highj, type, tem; 3158 3159 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 3160 continue; 3161 type = TREE_TYPE (ranges[i].exp); 3162 if (!INTEGRAL_TYPE_P (type)) 3163 continue; 3164 lowi = ranges[i].low; 3165 if (lowi == NULL_TREE) 3166 lowi = TYPE_MIN_VALUE (type); 3167 highi = ranges[i].high; 3168 if (highi == NULL_TREE) 3169 continue; 3170 for (j = i + 1; j < length && j < i + 64; j++) 3171 { 3172 bool changes; 3173 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 3174 continue; 3175 lowj = ranges[j].low; 3176 if (lowj == NULL_TREE) 3177 continue; 3178 highj = ranges[j].high; 3179 if (highj == NULL_TREE) 3180 highj = TYPE_MAX_VALUE (type); 3181 /* Check lowj > highi. */ 3182 tem = fold_binary (GT_EXPR, boolean_type_node, 3183 lowj, highi); 3184 if (tem == NULL_TREE || !integer_onep (tem)) 3185 continue; 3186 if (optimize_xor) 3187 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 3188 highi, highj, ops, 3189 ranges + i, ranges + j); 3190 else 3191 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 3192 highi, highj, ops, 3193 ranges + i, ranges + j); 3194 if (changes) 3195 { 3196 any_changes = true; 3197 break; 3198 } 3199 } 3200 } 3201 return any_changes; 3202 } 3203 3204 /* Helper function of optimize_range_tests_to_bit_test. Handle a single 3205 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 3206 EXP on success, NULL otherwise. */ 3207 3208 static tree 3209 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 3210 wide_int *mask, tree *totallowp) 3211 { 3212 tree tem = int_const_binop (MINUS_EXPR, high, low); 3213 if (tem == NULL_TREE 3214 || TREE_CODE (tem) != INTEGER_CST 3215 || TREE_OVERFLOW (tem) 3216 || tree_int_cst_sgn (tem) == -1 3217 || compare_tree_int (tem, prec) != -1) 3218 return NULL_TREE; 3219 3220 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 3221 *mask = wi::shifted_mask (0, max, false, prec); 3222 if (TREE_CODE (exp) == BIT_AND_EXPR 3223 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3224 { 3225 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 3226 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 3227 if (wi::popcount (msk) == 1 3228 && wi::ltu_p (msk, prec - max)) 3229 { 3230 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 3231 max += msk.to_uhwi (); 3232 exp = TREE_OPERAND (exp, 0); 3233 if (integer_zerop (low) 3234 && TREE_CODE (exp) == PLUS_EXPR 3235 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 3236 { 3237 tree ret = TREE_OPERAND (exp, 0); 3238 STRIP_NOPS (ret); 3239 widest_int bias 3240 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 3241 TYPE_PRECISION (TREE_TYPE (low)))); 3242 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 3243 if (totallowp) 3244 { 3245 *totallowp = tbias; 3246 return ret; 3247 } 3248 else if (!tree_int_cst_lt (totallow, tbias)) 3249 return NULL_TREE; 3250 bias = wi::to_widest (tbias); 3251 bias -= wi::to_widest (totallow); 3252 if (bias >= 0 && bias < prec - max) 3253 { 3254 *mask = wi::lshift (*mask, bias); 3255 return ret; 3256 } 3257 } 3258 } 3259 } 3260 if (totallowp) 3261 return exp; 3262 if (!tree_int_cst_lt (totallow, low)) 3263 return exp; 3264 tem = int_const_binop (MINUS_EXPR, low, totallow); 3265 if (tem == NULL_TREE 3266 || TREE_CODE (tem) != INTEGER_CST 3267 || TREE_OVERFLOW (tem) 3268 || compare_tree_int (tem, prec - max) == 1) 3269 return NULL_TREE; 3270 3271 *mask = wi::lshift (*mask, wi::to_widest (tem)); 3272 return exp; 3273 } 3274 3275 /* Attempt to optimize small range tests using bit test. 3276 E.g. 3277 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 3278 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 3279 has been by earlier optimizations optimized into: 3280 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 3281 As all the 43 through 82 range is less than 64 numbers, 3282 for 64-bit word targets optimize that into: 3283 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 3284 3285 static bool 3286 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 3287 vec<operand_entry *> *ops, 3288 struct range_entry *ranges) 3289 { 3290 int i, j; 3291 bool any_changes = false; 3292 int prec = GET_MODE_BITSIZE (word_mode); 3293 auto_vec<struct range_entry *, 64> candidates; 3294 3295 for (i = first; i < length - 1; i++) 3296 { 3297 tree lowi, highi, lowj, highj, type; 3298 3299 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 3300 continue; 3301 type = TREE_TYPE (ranges[i].exp); 3302 if (!INTEGRAL_TYPE_P (type)) 3303 continue; 3304 lowi = ranges[i].low; 3305 if (lowi == NULL_TREE) 3306 lowi = TYPE_MIN_VALUE (type); 3307 highi = ranges[i].high; 3308 if (highi == NULL_TREE) 3309 continue; 3310 wide_int mask; 3311 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 3312 highi, &mask, &lowi); 3313 if (exp == NULL_TREE) 3314 continue; 3315 bool strict_overflow_p = ranges[i].strict_overflow_p; 3316 candidates.truncate (0); 3317 int end = MIN (i + 64, length); 3318 for (j = i + 1; j < end; j++) 3319 { 3320 tree exp2; 3321 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 3322 continue; 3323 if (ranges[j].exp == exp) 3324 ; 3325 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 3326 { 3327 exp2 = TREE_OPERAND (ranges[j].exp, 0); 3328 if (exp2 == exp) 3329 ; 3330 else if (TREE_CODE (exp2) == PLUS_EXPR) 3331 { 3332 exp2 = TREE_OPERAND (exp2, 0); 3333 STRIP_NOPS (exp2); 3334 if (exp2 != exp) 3335 continue; 3336 } 3337 else 3338 continue; 3339 } 3340 else 3341 continue; 3342 lowj = ranges[j].low; 3343 if (lowj == NULL_TREE) 3344 continue; 3345 highj = ranges[j].high; 3346 if (highj == NULL_TREE) 3347 highj = TYPE_MAX_VALUE (type); 3348 wide_int mask2; 3349 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 3350 highj, &mask2, NULL); 3351 if (exp2 != exp) 3352 continue; 3353 mask |= mask2; 3354 strict_overflow_p |= ranges[j].strict_overflow_p; 3355 candidates.safe_push (&ranges[j]); 3356 } 3357 3358 /* If every possible relative value of the expression is a valid shift 3359 amount, then we can merge the entry test in the bit test. In this 3360 case, if we would need otherwise 2 or more comparisons, then use 3361 the bit test; in the other cases, the threshold is 3 comparisons. */ 3362 bool entry_test_needed; 3363 value_range r; 3364 if (TREE_CODE (exp) == SSA_NAME 3365 && get_range_query (cfun)->range_of_expr (r, exp) 3366 && r.kind () == VR_RANGE 3367 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1)) 3368 { 3369 wide_int min = r.lower_bound (); 3370 wide_int ilowi = wi::to_wide (lowi); 3371 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) 3372 { 3373 lowi = wide_int_to_tree (TREE_TYPE (lowi), min); 3374 mask = wi::lshift (mask, ilowi - min); 3375 } 3376 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi)))) 3377 { 3378 lowi = wide_int_to_tree (TREE_TYPE (lowi), min); 3379 mask = wi::lrshift (mask, min - ilowi); 3380 } 3381 entry_test_needed = false; 3382 } 3383 else 3384 entry_test_needed = true; 3385 if (candidates.length () >= (entry_test_needed ? 2 : 1)) 3386 { 3387 tree high = wide_int_to_tree (TREE_TYPE (lowi), 3388 wi::to_widest (lowi) 3389 + prec - 1 - wi::clz (mask)); 3390 operand_entry *oe = (*ops)[ranges[i].idx]; 3391 tree op = oe->op; 3392 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) 3393 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3394 location_t loc = gimple_location (stmt); 3395 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 3396 3397 /* See if it isn't cheaper to pretend the minimum value of the 3398 range is 0, if maximum value is small enough. 3399 We can avoid then subtraction of the minimum value, but the 3400 mask constant could be perhaps more expensive. */ 3401 if (compare_tree_int (lowi, 0) > 0 3402 && compare_tree_int (high, prec) < 0) 3403 { 3404 int cost_diff; 3405 HOST_WIDE_INT m = tree_to_uhwi (lowi); 3406 rtx reg = gen_raw_REG (word_mode, 10000); 3407 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 3408 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg, 3409 GEN_INT (-m)), 3410 word_mode, speed_p); 3411 rtx r = immed_wide_int_const (mask, word_mode); 3412 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 3413 word_mode, speed_p); 3414 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 3415 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 3416 word_mode, speed_p); 3417 if (cost_diff > 0) 3418 { 3419 mask = wi::lshift (mask, m); 3420 lowi = build_zero_cst (TREE_TYPE (lowi)); 3421 } 3422 } 3423 3424 tree tem; 3425 if (entry_test_needed) 3426 { 3427 tem = build_range_check (loc, optype, unshare_expr (exp), 3428 false, lowi, high); 3429 if (tem == NULL_TREE || is_gimple_val (tem)) 3430 continue; 3431 } 3432 else 3433 tem = NULL_TREE; 3434 tree etype = unsigned_type_for (TREE_TYPE (exp)); 3435 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 3436 fold_convert_loc (loc, etype, exp), 3437 fold_convert_loc (loc, etype, lowi)); 3438 exp = fold_convert_loc (loc, integer_type_node, exp); 3439 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 3440 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 3441 build_int_cst (word_type, 1), exp); 3442 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 3443 wide_int_to_tree (word_type, mask)); 3444 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 3445 build_zero_cst (word_type)); 3446 if (is_gimple_val (exp)) 3447 continue; 3448 3449 /* The shift might have undefined behavior if TEM is true, 3450 but reassociate_bb isn't prepared to have basic blocks 3451 split when it is running. So, temporarily emit a code 3452 with BIT_IOR_EXPR instead of &&, and fix it up in 3453 branch_fixup. */ 3454 gimple_seq seq = NULL; 3455 if (tem) 3456 { 3457 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 3458 gcc_assert (TREE_CODE (tem) == SSA_NAME); 3459 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 3460 } 3461 gimple_seq seq2; 3462 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 3463 gimple_seq_add_seq_without_update (&seq, seq2); 3464 gcc_assert (TREE_CODE (exp) == SSA_NAME); 3465 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 3466 if (tem) 3467 { 3468 gimple *g = gimple_build_assign (make_ssa_name (optype), 3469 BIT_IOR_EXPR, tem, exp); 3470 gimple_set_location (g, loc); 3471 gimple_seq_add_stmt_without_update (&seq, g); 3472 exp = gimple_assign_lhs (g); 3473 } 3474 tree val = build_zero_cst (optype); 3475 if (update_range_test (&ranges[i], NULL, candidates.address (), 3476 candidates.length (), opcode, ops, exp, 3477 seq, false, val, val, strict_overflow_p)) 3478 { 3479 any_changes = true; 3480 if (tem) 3481 reassoc_branch_fixups.safe_push (tem); 3482 } 3483 else 3484 gimple_seq_discard (seq); 3485 } 3486 } 3487 return any_changes; 3488 } 3489 3490 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 3491 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. 3492 Also, handle x < C && y < C && z < C where C is power of two as 3493 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0 3494 as (x | y | z) < 0. */ 3495 3496 static bool 3497 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length, 3498 vec<operand_entry *> *ops, 3499 struct range_entry *ranges) 3500 { 3501 int i; 3502 unsigned int b; 3503 bool any_changes = false; 3504 auto_vec<int, 128> buckets; 3505 auto_vec<int, 32> chains; 3506 auto_vec<struct range_entry *, 32> candidates; 3507 3508 for (i = first; i < length; i++) 3509 { 3510 int idx; 3511 3512 if (ranges[i].exp == NULL_TREE 3513 || TREE_CODE (ranges[i].exp) != SSA_NAME 3514 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 3515 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) 3516 continue; 3517 3518 if (ranges[i].low != NULL_TREE 3519 && ranges[i].high != NULL_TREE 3520 && ranges[i].in_p 3521 && tree_int_cst_equal (ranges[i].low, ranges[i].high)) 3522 { 3523 idx = !integer_zerop (ranges[i].low); 3524 if (idx && !integer_all_onesp (ranges[i].low)) 3525 continue; 3526 } 3527 else if (ranges[i].high != NULL_TREE 3528 && TREE_CODE (ranges[i].high) == INTEGER_CST 3529 && ranges[i].in_p) 3530 { 3531 wide_int w = wi::to_wide (ranges[i].high); 3532 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); 3533 int l = wi::clz (w); 3534 idx = 2; 3535 if (l <= 0 3536 || l >= prec 3537 || w != wi::mask (prec - l, false, prec)) 3538 continue; 3539 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3540 && ranges[i].low == NULL_TREE) 3541 || (ranges[i].low 3542 && integer_zerop (ranges[i].low)))) 3543 continue; 3544 } 3545 else if (ranges[i].high == NULL_TREE 3546 && ranges[i].low != NULL_TREE 3547 /* Perform this optimization only in the last 3548 reassoc pass, as it interferes with the reassociation 3549 itself or could also with VRP etc. which might not 3550 be able to virtually undo the optimization. */ 3551 && !reassoc_insert_powi_p 3552 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3553 && integer_zerop (ranges[i].low)) 3554 idx = 3; 3555 else 3556 continue; 3557 3558 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx; 3559 if (buckets.length () <= b) 3560 buckets.safe_grow_cleared (b + 1, true); 3561 if (chains.length () <= (unsigned) i) 3562 chains.safe_grow (i + 1, true); 3563 chains[i] = buckets[b]; 3564 buckets[b] = i + 1; 3565 } 3566 3567 FOR_EACH_VEC_ELT (buckets, b, i) 3568 if (i && chains[i - 1]) 3569 { 3570 int j, k = i; 3571 if ((b % 4) == 2) 3572 { 3573 /* When ranges[X - 1].high + 1 is a power of two, 3574 we need to process the same bucket up to 3575 precision - 1 times, each time split the entries 3576 with the same high bound into one chain and the 3577 rest into another one to be processed later. */ 3578 int this_prev = i; 3579 int other_prev = 0; 3580 for (j = chains[i - 1]; j; j = chains[j - 1]) 3581 { 3582 if (tree_int_cst_equal (ranges[i - 1].high, 3583 ranges[j - 1].high)) 3584 { 3585 chains[this_prev - 1] = j; 3586 this_prev = j; 3587 } 3588 else if (other_prev == 0) 3589 { 3590 buckets[b] = j; 3591 other_prev = j; 3592 } 3593 else 3594 { 3595 chains[other_prev - 1] = j; 3596 other_prev = j; 3597 } 3598 } 3599 chains[this_prev - 1] = 0; 3600 if (other_prev) 3601 chains[other_prev - 1] = 0; 3602 if (chains[i - 1] == 0) 3603 { 3604 if (other_prev) 3605 b--; 3606 continue; 3607 } 3608 } 3609 for (j = chains[i - 1]; j; j = chains[j - 1]) 3610 { 3611 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); 3612 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp); 3613 if (reassoc_stmt_dominates_stmt_p (gk, gj)) 3614 k = j; 3615 } 3616 tree type1 = TREE_TYPE (ranges[k - 1].exp); 3617 tree type2 = NULL_TREE; 3618 bool strict_overflow_p = false; 3619 candidates.truncate (0); 3620 for (j = i; j; j = chains[j - 1]) 3621 { 3622 tree type = TREE_TYPE (ranges[j - 1].exp); 3623 strict_overflow_p |= ranges[j - 1].strict_overflow_p; 3624 if ((b % 4) == 3) 3625 { 3626 /* For the signed < 0 cases, the types should be 3627 really compatible (all signed with the same precision, 3628 instead put ranges that have different in_p from 3629 k first. */ 3630 if (!useless_type_conversion_p (type1, type)) 3631 continue; 3632 if (ranges[j - 1].in_p != ranges[k - 1].in_p) 3633 candidates.safe_push (&ranges[j - 1]); 3634 type2 = type1; 3635 continue; 3636 } 3637 if (j == k 3638 || useless_type_conversion_p (type1, type)) 3639 ; 3640 else if (type2 == NULL_TREE 3641 || useless_type_conversion_p (type2, type)) 3642 { 3643 if (type2 == NULL_TREE) 3644 type2 = type; 3645 candidates.safe_push (&ranges[j - 1]); 3646 } 3647 } 3648 unsigned l = candidates.length (); 3649 for (j = i; j; j = chains[j - 1]) 3650 { 3651 tree type = TREE_TYPE (ranges[j - 1].exp); 3652 if (j == k) 3653 continue; 3654 if ((b % 4) == 3) 3655 { 3656 if (!useless_type_conversion_p (type1, type)) 3657 continue; 3658 if (ranges[j - 1].in_p == ranges[k - 1].in_p) 3659 candidates.safe_push (&ranges[j - 1]); 3660 continue; 3661 } 3662 if (useless_type_conversion_p (type1, type)) 3663 ; 3664 else if (type2 == NULL_TREE 3665 || useless_type_conversion_p (type2, type)) 3666 continue; 3667 candidates.safe_push (&ranges[j - 1]); 3668 } 3669 gimple_seq seq = NULL; 3670 tree op = NULL_TREE; 3671 unsigned int id; 3672 struct range_entry *r; 3673 candidates.safe_push (&ranges[k - 1]); 3674 FOR_EACH_VEC_ELT (candidates, id, r) 3675 { 3676 gimple *g; 3677 enum tree_code code; 3678 if (id == 0) 3679 { 3680 op = r->exp; 3681 continue; 3682 } 3683 if (id == l) 3684 { 3685 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR; 3686 g = gimple_build_assign (make_ssa_name (type1), code, op); 3687 gimple_seq_add_stmt_without_update (&seq, g); 3688 op = gimple_assign_lhs (g); 3689 } 3690 tree type = TREE_TYPE (r->exp); 3691 tree exp = r->exp; 3692 if (id >= l && !useless_type_conversion_p (type1, type)) 3693 { 3694 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp); 3695 gimple_seq_add_stmt_without_update (&seq, g); 3696 exp = gimple_assign_lhs (g); 3697 } 3698 if ((b % 4) == 3) 3699 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR; 3700 else 3701 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR; 3702 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), 3703 code, op, exp); 3704 gimple_seq_add_stmt_without_update (&seq, g); 3705 op = gimple_assign_lhs (g); 3706 } 3707 candidates.pop (); 3708 if (update_range_test (&ranges[k - 1], NULL, candidates.address (), 3709 candidates.length (), opcode, ops, op, 3710 seq, ranges[k - 1].in_p, ranges[k - 1].low, 3711 ranges[k - 1].high, strict_overflow_p)) 3712 any_changes = true; 3713 else 3714 gimple_seq_discard (seq); 3715 if ((b % 4) == 2 && buckets[b] != i) 3716 /* There is more work to do for this bucket. */ 3717 b--; 3718 } 3719 3720 return any_changes; 3721 } 3722 3723 /* Attempt to optimize for signed a and b where b is known to be >= 0: 3724 a >= 0 && a < b into (unsigned) a < (unsigned) b 3725 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ 3726 3727 static bool 3728 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, 3729 vec<operand_entry *> *ops, 3730 struct range_entry *ranges, 3731 basic_block first_bb) 3732 { 3733 int i; 3734 bool any_changes = false; 3735 hash_map<tree, int> *map = NULL; 3736 3737 for (i = first; i < length; i++) 3738 { 3739 if (ranges[i].exp == NULL_TREE 3740 || TREE_CODE (ranges[i].exp) != SSA_NAME 3741 || !ranges[i].in_p) 3742 continue; 3743 3744 tree type = TREE_TYPE (ranges[i].exp); 3745 if (!INTEGRAL_TYPE_P (type) 3746 || TYPE_UNSIGNED (type) 3747 || ranges[i].low == NULL_TREE 3748 || !integer_zerop (ranges[i].low) 3749 || ranges[i].high != NULL_TREE) 3750 continue; 3751 /* EXP >= 0 here. */ 3752 if (map == NULL) 3753 map = new hash_map <tree, int>; 3754 map->put (ranges[i].exp, i); 3755 } 3756 3757 if (map == NULL) 3758 return false; 3759 3760 for (i = 0; i < length; i++) 3761 { 3762 bool in_p = ranges[i].in_p; 3763 if (ranges[i].low == NULL_TREE 3764 || ranges[i].high == NULL_TREE) 3765 continue; 3766 if (!integer_zerop (ranges[i].low) 3767 || !integer_zerop (ranges[i].high)) 3768 { 3769 if (ranges[i].exp 3770 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1 3771 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) 3772 && integer_onep (ranges[i].low) 3773 && integer_onep (ranges[i].high)) 3774 in_p = !in_p; 3775 else 3776 continue; 3777 } 3778 3779 gimple *stmt; 3780 tree_code ccode; 3781 tree rhs1, rhs2; 3782 if (ranges[i].exp) 3783 { 3784 if (TREE_CODE (ranges[i].exp) != SSA_NAME) 3785 continue; 3786 stmt = SSA_NAME_DEF_STMT (ranges[i].exp); 3787 if (!is_gimple_assign (stmt)) 3788 continue; 3789 ccode = gimple_assign_rhs_code (stmt); 3790 rhs1 = gimple_assign_rhs1 (stmt); 3791 rhs2 = gimple_assign_rhs2 (stmt); 3792 } 3793 else 3794 { 3795 operand_entry *oe = (*ops)[ranges[i].idx]; 3796 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 3797 if (gimple_code (stmt) != GIMPLE_COND) 3798 continue; 3799 ccode = gimple_cond_code (stmt); 3800 rhs1 = gimple_cond_lhs (stmt); 3801 rhs2 = gimple_cond_rhs (stmt); 3802 } 3803 3804 if (TREE_CODE (rhs1) != SSA_NAME 3805 || rhs2 == NULL_TREE 3806 || TREE_CODE (rhs2) != SSA_NAME) 3807 continue; 3808 3809 switch (ccode) 3810 { 3811 case GT_EXPR: 3812 case GE_EXPR: 3813 case LT_EXPR: 3814 case LE_EXPR: 3815 break; 3816 default: 3817 continue; 3818 } 3819 if (in_p) 3820 ccode = invert_tree_comparison (ccode, false); 3821 switch (ccode) 3822 { 3823 case GT_EXPR: 3824 case GE_EXPR: 3825 std::swap (rhs1, rhs2); 3826 ccode = swap_tree_comparison (ccode); 3827 break; 3828 case LT_EXPR: 3829 case LE_EXPR: 3830 break; 3831 default: 3832 gcc_unreachable (); 3833 } 3834 3835 int *idx = map->get (rhs1); 3836 if (idx == NULL) 3837 continue; 3838 3839 /* maybe_optimize_range_tests allows statements without side-effects 3840 in the basic blocks as long as they are consumed in the same bb. 3841 Make sure rhs2's def stmt is not among them, otherwise we can't 3842 use safely get_nonzero_bits on it. E.g. in: 3843 # RANGE [-83, 1] NONZERO 173 3844 # k_32 = PHI <k_47(13), k_12(9)> 3845 ... 3846 if (k_32 >= 0) 3847 goto <bb 5>; [26.46%] 3848 else 3849 goto <bb 9>; [73.54%] 3850 3851 <bb 5> [local count: 140323371]: 3852 # RANGE [0, 1] NONZERO 1 3853 _5 = (int) k_32; 3854 # RANGE [0, 4] NONZERO 4 3855 _21 = _5 << 2; 3856 # RANGE [0, 4] NONZERO 4 3857 iftmp.0_44 = (char) _21; 3858 if (k_32 < iftmp.0_44) 3859 goto <bb 6>; [84.48%] 3860 else 3861 goto <bb 9>; [15.52%] 3862 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that 3863 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 3864 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute 3865 those stmts even for negative k_32 and the value ranges would be no 3866 longer guaranteed and so the optimization would be invalid. */ 3867 while (opcode == ERROR_MARK) 3868 { 3869 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3870 basic_block bb2 = gimple_bb (g); 3871 if (bb2 3872 && bb2 != first_bb 3873 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) 3874 { 3875 /* As an exception, handle a few common cases. */ 3876 if (gimple_assign_cast_p (g) 3877 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))) 3878 { 3879 tree op0 = gimple_assign_rhs1 (g); 3880 if (TYPE_UNSIGNED (TREE_TYPE (op0)) 3881 && (TYPE_PRECISION (TREE_TYPE (rhs2)) 3882 > TYPE_PRECISION (TREE_TYPE (op0)))) 3883 /* Zero-extension is always ok. */ 3884 break; 3885 else if (TYPE_PRECISION (TREE_TYPE (rhs2)) 3886 == TYPE_PRECISION (TREE_TYPE (op0)) 3887 && TREE_CODE (op0) == SSA_NAME) 3888 { 3889 /* Cast from signed to unsigned or vice versa. Retry 3890 with the op0 as new rhs2. */ 3891 rhs2 = op0; 3892 continue; 3893 } 3894 } 3895 else if (is_gimple_assign (g) 3896 && gimple_assign_rhs_code (g) == BIT_AND_EXPR 3897 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 3898 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) 3899 /* Masking with INTEGER_CST with MSB clear is always ok 3900 too. */ 3901 break; 3902 rhs2 = NULL_TREE; 3903 } 3904 break; 3905 } 3906 if (rhs2 == NULL_TREE) 3907 continue; 3908 3909 wide_int nz = get_nonzero_bits (rhs2); 3910 if (wi::neg_p (nz)) 3911 continue; 3912 3913 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 3914 and RHS2 is known to be RHS2 >= 0. */ 3915 tree utype = unsigned_type_for (TREE_TYPE (rhs1)); 3916 3917 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 3918 if ((ranges[*idx].strict_overflow_p 3919 || ranges[i].strict_overflow_p) 3920 && issue_strict_overflow_warning (wc)) 3921 warning_at (gimple_location (stmt), OPT_Wstrict_overflow, 3922 "assuming signed overflow does not occur " 3923 "when simplifying range test"); 3924 3925 if (dump_file && (dump_flags & TDF_DETAILS)) 3926 { 3927 struct range_entry *r = &ranges[*idx]; 3928 fprintf (dump_file, "Optimizing range test "); 3929 print_generic_expr (dump_file, r->exp); 3930 fprintf (dump_file, " +["); 3931 print_generic_expr (dump_file, r->low); 3932 fprintf (dump_file, ", "); 3933 print_generic_expr (dump_file, r->high); 3934 fprintf (dump_file, "] and comparison "); 3935 print_generic_expr (dump_file, rhs1); 3936 fprintf (dump_file, " %s ", op_symbol_code (ccode)); 3937 print_generic_expr (dump_file, rhs2); 3938 fprintf (dump_file, "\n into ("); 3939 print_generic_expr (dump_file, utype); 3940 fprintf (dump_file, ") "); 3941 print_generic_expr (dump_file, rhs1); 3942 fprintf (dump_file, " %s (", op_symbol_code (ccode)); 3943 print_generic_expr (dump_file, utype); 3944 fprintf (dump_file, ") "); 3945 print_generic_expr (dump_file, rhs2); 3946 fprintf (dump_file, "\n"); 3947 } 3948 3949 operand_entry *oe = (*ops)[ranges[i].idx]; 3950 ranges[i].in_p = 0; 3951 if (opcode == BIT_IOR_EXPR 3952 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 3953 { 3954 ranges[i].in_p = 1; 3955 ccode = invert_tree_comparison (ccode, false); 3956 } 3957 3958 unsigned int uid = gimple_uid (stmt); 3959 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3960 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); 3961 gimple_set_uid (g, uid); 3962 rhs1 = gimple_assign_lhs (g); 3963 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3964 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2))) 3965 { 3966 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); 3967 gimple_set_uid (g, uid); 3968 rhs2 = gimple_assign_lhs (g); 3969 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3970 } 3971 if (tree_swap_operands_p (rhs1, rhs2)) 3972 { 3973 std::swap (rhs1, rhs2); 3974 ccode = swap_tree_comparison (ccode); 3975 } 3976 if (gimple_code (stmt) == GIMPLE_COND) 3977 { 3978 gcond *c = as_a <gcond *> (stmt); 3979 gimple_cond_set_code (c, ccode); 3980 gimple_cond_set_lhs (c, rhs1); 3981 gimple_cond_set_rhs (c, rhs2); 3982 update_stmt (stmt); 3983 } 3984 else 3985 { 3986 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; 3987 if (!INTEGRAL_TYPE_P (ctype) 3988 || (TREE_CODE (ctype) != BOOLEAN_TYPE 3989 && TYPE_PRECISION (ctype) != 1)) 3990 ctype = boolean_type_node; 3991 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2); 3992 gimple_set_uid (g, uid); 3993 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3994 if (oe->op && ctype != TREE_TYPE (oe->op)) 3995 { 3996 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)), 3997 NOP_EXPR, gimple_assign_lhs (g)); 3998 gimple_set_uid (g, uid); 3999 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4000 } 4001 ranges[i].exp = gimple_assign_lhs (g); 4002 oe->op = ranges[i].exp; 4003 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp)); 4004 ranges[i].high = ranges[i].low; 4005 } 4006 ranges[i].strict_overflow_p = false; 4007 oe = (*ops)[ranges[*idx].idx]; 4008 /* Now change all the other range test immediate uses, so that 4009 those tests will be optimized away. */ 4010 if (opcode == ERROR_MARK) 4011 { 4012 if (oe->op) 4013 oe->op = build_int_cst (TREE_TYPE (oe->op), 4014 oe->rank == BIT_IOR_EXPR ? 0 : 1); 4015 else 4016 oe->op = (oe->rank == BIT_IOR_EXPR 4017 ? boolean_false_node : boolean_true_node); 4018 } 4019 else 4020 oe->op = error_mark_node; 4021 ranges[*idx].exp = NULL_TREE; 4022 ranges[*idx].low = NULL_TREE; 4023 ranges[*idx].high = NULL_TREE; 4024 any_changes = true; 4025 } 4026 4027 delete map; 4028 return any_changes; 4029 } 4030 4031 /* Optimize range tests, similarly how fold_range_test optimizes 4032 it on trees. The tree code for the binary 4033 operation between all the operands is OPCODE. 4034 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 4035 maybe_optimize_range_tests for inter-bb range optimization. 4036 In that case if oe->op is NULL, oe->id is bb->index whose 4037 GIMPLE_COND is && or ||ed into the test, and oe->rank says 4038 the actual opcode. 4039 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ 4040 4041 static bool 4042 optimize_range_tests (enum tree_code opcode, 4043 vec<operand_entry *> *ops, basic_block first_bb) 4044 { 4045 unsigned int length = ops->length (), i, j, first; 4046 operand_entry *oe; 4047 struct range_entry *ranges; 4048 bool any_changes = false; 4049 4050 if (length == 1) 4051 return false; 4052 4053 ranges = XNEWVEC (struct range_entry, length); 4054 for (i = 0; i < length; i++) 4055 { 4056 oe = (*ops)[i]; 4057 ranges[i].idx = i; 4058 init_range_entry (ranges + i, oe->op, 4059 oe->op 4060 ? NULL 4061 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 4062 /* For | invert it now, we will invert it again before emitting 4063 the optimized expression. */ 4064 if (opcode == BIT_IOR_EXPR 4065 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 4066 ranges[i].in_p = !ranges[i].in_p; 4067 } 4068 4069 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 4070 for (i = 0; i < length; i++) 4071 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 4072 break; 4073 4074 /* Try to merge ranges. */ 4075 for (first = i; i < length; i++) 4076 { 4077 tree low = ranges[i].low; 4078 tree high = ranges[i].high; 4079 int in_p = ranges[i].in_p; 4080 bool strict_overflow_p = ranges[i].strict_overflow_p; 4081 int update_fail_count = 0; 4082 4083 for (j = i + 1; j < length; j++) 4084 { 4085 if (ranges[i].exp != ranges[j].exp) 4086 break; 4087 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 4088 ranges[j].in_p, ranges[j].low, ranges[j].high)) 4089 break; 4090 strict_overflow_p |= ranges[j].strict_overflow_p; 4091 } 4092 4093 if (j == i + 1) 4094 continue; 4095 4096 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 4097 opcode, ops, ranges[i].exp, NULL, in_p, 4098 low, high, strict_overflow_p)) 4099 { 4100 i = j - 1; 4101 any_changes = true; 4102 } 4103 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 4104 while update_range_test would fail. */ 4105 else if (update_fail_count == 64) 4106 i = j - 1; 4107 else 4108 ++update_fail_count; 4109 } 4110 4111 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 4112 ops, ranges); 4113 4114 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 4115 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 4116 ops, ranges); 4117 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 4118 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 4119 ops, ranges); 4120 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, 4121 ranges, first_bb); 4122 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, 4123 ops, ranges); 4124 4125 if (any_changes && opcode != ERROR_MARK) 4126 { 4127 j = 0; 4128 FOR_EACH_VEC_ELT (*ops, i, oe) 4129 { 4130 if (oe->op == error_mark_node) 4131 continue; 4132 else if (i != j) 4133 (*ops)[j] = oe; 4134 j++; 4135 } 4136 ops->truncate (j); 4137 } 4138 4139 XDELETEVEC (ranges); 4140 return any_changes; 4141 } 4142 4143 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 4144 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 4145 otherwise the comparison code. TYPE is a return value that is set 4146 to type of comparison. */ 4147 4148 static tree_code 4149 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type, 4150 tree *lhs, tree *rhs, gassign **vcond) 4151 { 4152 if (TREE_CODE (var) != SSA_NAME) 4153 return ERROR_MARK; 4154 4155 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 4156 if (stmt == NULL) 4157 return ERROR_MARK; 4158 if (vcond) 4159 *vcond = stmt; 4160 4161 /* ??? If we start creating more COND_EXPR, we could perform 4162 this same optimization with them. For now, simplify. */ 4163 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR) 4164 return ERROR_MARK; 4165 4166 tree cond = gimple_assign_rhs1 (stmt); 4167 tree_code cmp = TREE_CODE (cond); 4168 if (cmp != SSA_NAME) 4169 return ERROR_MARK; 4170 4171 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond)); 4172 if (assign == NULL 4173 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) 4174 return ERROR_MARK; 4175 4176 cmp = gimple_assign_rhs_code (assign); 4177 if (lhs) 4178 *lhs = gimple_assign_rhs1 (assign); 4179 if (rhs) 4180 *rhs = gimple_assign_rhs2 (assign); 4181 4182 /* ??? For now, allow only canonical true and false result vectors. 4183 We could expand this to other constants should the need arise, 4184 but at the moment we don't create them. */ 4185 tree t = gimple_assign_rhs2 (stmt); 4186 tree f = gimple_assign_rhs3 (stmt); 4187 bool inv; 4188 if (integer_all_onesp (t)) 4189 inv = false; 4190 else if (integer_all_onesp (f)) 4191 { 4192 cmp = invert_tree_comparison (cmp, false); 4193 inv = true; 4194 } 4195 else 4196 return ERROR_MARK; 4197 if (!integer_zerop (f)) 4198 return ERROR_MARK; 4199 4200 /* Success! */ 4201 if (rets) 4202 *rets = assign; 4203 if (reti) 4204 *reti = inv; 4205 if (type) 4206 *type = TREE_TYPE (cond); 4207 return cmp; 4208 } 4209 4210 /* Optimize the condition of VEC_COND_EXPRs which have been combined 4211 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 4212 4213 static bool 4214 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops) 4215 { 4216 unsigned int length = ops->length (), i, j; 4217 bool any_changes = false; 4218 4219 if (length == 1) 4220 return false; 4221 4222 for (i = 0; i < length; ++i) 4223 { 4224 tree elt0 = (*ops)[i]->op; 4225 4226 gassign *stmt0, *vcond0; 4227 bool invert; 4228 tree type, lhs0, rhs0; 4229 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0, 4230 &rhs0, &vcond0); 4231 if (cmp0 == ERROR_MARK) 4232 continue; 4233 4234 for (j = i + 1; j < length; ++j) 4235 { 4236 tree &elt1 = (*ops)[j]->op; 4237 4238 gassign *stmt1, *vcond1; 4239 tree lhs1, rhs1; 4240 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1, 4241 &rhs1, &vcond1); 4242 if (cmp1 == ERROR_MARK) 4243 continue; 4244 4245 tree comb; 4246 if (opcode == BIT_AND_EXPR) 4247 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0, 4248 cmp1, lhs1, rhs1); 4249 else if (opcode == BIT_IOR_EXPR) 4250 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0, 4251 cmp1, lhs1, rhs1); 4252 else 4253 gcc_unreachable (); 4254 if (comb == NULL) 4255 continue; 4256 4257 /* Success! */ 4258 if (dump_file && (dump_flags & TDF_DETAILS)) 4259 { 4260 fprintf (dump_file, "Transforming "); 4261 print_generic_expr (dump_file, gimple_assign_lhs (stmt0)); 4262 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); 4263 print_generic_expr (dump_file, gimple_assign_lhs (stmt1)); 4264 fprintf (dump_file, " into "); 4265 print_generic_expr (dump_file, comb); 4266 fputc ('\n', dump_file); 4267 } 4268 4269 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0); 4270 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE, 4271 true, GSI_SAME_STMT); 4272 if (invert) 4273 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0), 4274 gimple_assign_rhs3_ptr (vcond0)); 4275 gimple_assign_set_rhs1 (vcond0, exp); 4276 update_stmt (vcond0); 4277 4278 elt1 = error_mark_node; 4279 any_changes = true; 4280 } 4281 } 4282 4283 if (any_changes) 4284 { 4285 operand_entry *oe; 4286 j = 0; 4287 FOR_EACH_VEC_ELT (*ops, i, oe) 4288 { 4289 if (oe->op == error_mark_node) 4290 continue; 4291 else if (i != j) 4292 (*ops)[j] = oe; 4293 j++; 4294 } 4295 ops->truncate (j); 4296 } 4297 4298 return any_changes; 4299 } 4300 4301 /* Return true if STMT is a cast like: 4302 <bb N>: 4303 ... 4304 _123 = (int) _234; 4305 4306 <bb M>: 4307 # _345 = PHI <_123(N), 1(...), 1(...)> 4308 where _234 has bool type, _123 has single use and 4309 bb N has a single successor M. This is commonly used in 4310 the last block of a range test. 4311 4312 Also Return true if STMT is tcc_compare like: 4313 <bb N>: 4314 ... 4315 _234 = a_2(D) == 2; 4316 4317 <bb M>: 4318 # _345 = PHI <_234(N), 1(...), 1(...)> 4319 _346 = (int) _345; 4320 where _234 has booltype, single use and 4321 bb N has a single successor M. This is commonly used in 4322 the last block of a range test. */ 4323 4324 static bool 4325 final_range_test_p (gimple *stmt) 4326 { 4327 basic_block bb, rhs_bb, lhs_bb; 4328 edge e; 4329 tree lhs, rhs; 4330 use_operand_p use_p; 4331 gimple *use_stmt; 4332 4333 if (!gimple_assign_cast_p (stmt) 4334 && (!is_gimple_assign (stmt) 4335 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4336 != tcc_comparison))) 4337 return false; 4338 bb = gimple_bb (stmt); 4339 if (!single_succ_p (bb)) 4340 return false; 4341 e = single_succ_edge (bb); 4342 if (e->flags & EDGE_COMPLEX) 4343 return false; 4344 4345 lhs = gimple_assign_lhs (stmt); 4346 rhs = gimple_assign_rhs1 (stmt); 4347 if (gimple_assign_cast_p (stmt) 4348 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4349 || TREE_CODE (rhs) != SSA_NAME 4350 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)) 4351 return false; 4352 4353 if (!gimple_assign_cast_p (stmt) 4354 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)) 4355 return false; 4356 4357 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 4358 if (!single_imm_use (lhs, &use_p, &use_stmt)) 4359 return false; 4360 4361 if (gimple_code (use_stmt) != GIMPLE_PHI 4362 || gimple_bb (use_stmt) != e->dest) 4363 return false; 4364 4365 /* And that the rhs is defined in the same loop. */ 4366 if (gimple_assign_cast_p (stmt)) 4367 { 4368 if (TREE_CODE (rhs) != SSA_NAME 4369 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs))) 4370 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 4371 return false; 4372 } 4373 else 4374 { 4375 if (TREE_CODE (lhs) != SSA_NAME 4376 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs))) 4377 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb)) 4378 return false; 4379 } 4380 4381 return true; 4382 } 4383 4384 /* Return true if BB is suitable basic block for inter-bb range test 4385 optimization. If BACKWARD is true, BB should be the only predecessor 4386 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 4387 or compared with to find a common basic block to which all conditions 4388 branch to if true resp. false. If BACKWARD is false, TEST_BB should 4389 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if 4390 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB 4391 block points to an empty block that falls through into *OTHER_BB and 4392 the phi args match that path. */ 4393 4394 static bool 4395 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 4396 bool *test_swapped_p, bool backward) 4397 { 4398 edge_iterator ei, ei2; 4399 edge e, e2; 4400 gimple *stmt; 4401 gphi_iterator gsi; 4402 bool other_edge_seen = false; 4403 bool is_cond; 4404 4405 if (test_bb == bb) 4406 return false; 4407 /* Check last stmt first. */ 4408 stmt = last_stmt (bb); 4409 if (stmt == NULL 4410 || (gimple_code (stmt) != GIMPLE_COND 4411 && (backward || !final_range_test_p (stmt))) 4412 || gimple_visited_p (stmt) 4413 || stmt_could_throw_p (cfun, stmt) 4414 || *other_bb == bb) 4415 return false; 4416 is_cond = gimple_code (stmt) == GIMPLE_COND; 4417 if (is_cond) 4418 { 4419 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 4420 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 4421 to *OTHER_BB (if not set yet, try to find it out). */ 4422 if (EDGE_COUNT (bb->succs) != 2) 4423 return false; 4424 FOR_EACH_EDGE (e, ei, bb->succs) 4425 { 4426 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4427 return false; 4428 if (e->dest == test_bb) 4429 { 4430 if (backward) 4431 continue; 4432 else 4433 return false; 4434 } 4435 if (e->dest == bb) 4436 return false; 4437 if (*other_bb == NULL) 4438 { 4439 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 4440 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 4441 return false; 4442 else if (e->dest == e2->dest) 4443 *other_bb = e->dest; 4444 if (*other_bb == NULL) 4445 return false; 4446 } 4447 if (e->dest == *other_bb) 4448 other_edge_seen = true; 4449 else if (backward) 4450 return false; 4451 } 4452 if (*other_bb == NULL || !other_edge_seen) 4453 return false; 4454 } 4455 else if (single_succ (bb) != *other_bb) 4456 return false; 4457 4458 /* Now check all PHIs of *OTHER_BB. */ 4459 e = find_edge (bb, *other_bb); 4460 e2 = find_edge (test_bb, *other_bb); 4461 retry:; 4462 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 4463 { 4464 gphi *phi = gsi.phi (); 4465 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 4466 corresponding to BB and TEST_BB predecessor must be the same. */ 4467 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 4468 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 4469 { 4470 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 4471 one of the PHIs should have the lhs of the last stmt in 4472 that block as PHI arg and that PHI should have 0 or 1 4473 corresponding to it in all other range test basic blocks 4474 considered. */ 4475 if (!is_cond) 4476 { 4477 if (gimple_phi_arg_def (phi, e->dest_idx) 4478 == gimple_assign_lhs (stmt) 4479 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 4480 || integer_onep (gimple_phi_arg_def (phi, 4481 e2->dest_idx)))) 4482 continue; 4483 } 4484 else 4485 { 4486 gimple *test_last = last_stmt (test_bb); 4487 if (gimple_code (test_last) == GIMPLE_COND) 4488 { 4489 if (backward ? e2->src != test_bb : e->src != bb) 4490 return false; 4491 4492 /* For last_bb, handle also: 4493 if (x_3(D) == 3) 4494 goto <bb 6>; [34.00%] 4495 else 4496 goto <bb 7>; [66.00%] 4497 4498 <bb 6> [local count: 79512730]: 4499 4500 <bb 7> [local count: 1073741824]: 4501 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> 4502 where bb 7 is *OTHER_BB, but the PHI values from the 4503 earlier bbs match the path through the empty bb 4504 in between. */ 4505 edge e3; 4506 if (backward) 4507 e3 = EDGE_SUCC (test_bb, 4508 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0); 4509 else 4510 e3 = EDGE_SUCC (bb, 4511 e == EDGE_SUCC (bb, 0) ? 1 : 0); 4512 if (empty_block_p (e3->dest) 4513 && single_succ_p (e3->dest) 4514 && single_succ (e3->dest) == *other_bb 4515 && single_pred_p (e3->dest) 4516 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU) 4517 { 4518 if (backward) 4519 e2 = single_succ_edge (e3->dest); 4520 else 4521 e = single_succ_edge (e3->dest); 4522 if (test_swapped_p) 4523 *test_swapped_p = true; 4524 goto retry; 4525 } 4526 } 4527 else if (gimple_phi_arg_def (phi, e2->dest_idx) 4528 == gimple_assign_lhs (test_last) 4529 && (integer_zerop (gimple_phi_arg_def (phi, 4530 e->dest_idx)) 4531 || integer_onep (gimple_phi_arg_def (phi, 4532 e->dest_idx)))) 4533 continue; 4534 } 4535 4536 return false; 4537 } 4538 } 4539 return true; 4540 } 4541 4542 /* Return true if BB doesn't have side-effects that would disallow 4543 range test optimization, all SSA_NAMEs set in the bb are consumed 4544 in the bb and there are no PHIs. */ 4545 4546 bool 4547 no_side_effect_bb (basic_block bb) 4548 { 4549 gimple_stmt_iterator gsi; 4550 gimple *last; 4551 4552 if (!gimple_seq_empty_p (phi_nodes (bb))) 4553 return false; 4554 last = last_stmt (bb); 4555 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 4556 { 4557 gimple *stmt = gsi_stmt (gsi); 4558 tree lhs; 4559 imm_use_iterator imm_iter; 4560 use_operand_p use_p; 4561 4562 if (is_gimple_debug (stmt)) 4563 continue; 4564 if (gimple_has_side_effects (stmt)) 4565 return false; 4566 if (stmt == last) 4567 return true; 4568 if (!is_gimple_assign (stmt)) 4569 return false; 4570 lhs = gimple_assign_lhs (stmt); 4571 if (TREE_CODE (lhs) != SSA_NAME) 4572 return false; 4573 if (gimple_assign_rhs_could_trap_p (stmt)) 4574 return false; 4575 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 4576 { 4577 gimple *use_stmt = USE_STMT (use_p); 4578 if (is_gimple_debug (use_stmt)) 4579 continue; 4580 if (gimple_bb (use_stmt) != bb) 4581 return false; 4582 } 4583 } 4584 return false; 4585 } 4586 4587 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 4588 return true and fill in *OPS recursively. */ 4589 4590 static bool 4591 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 4592 class loop *loop) 4593 { 4594 gimple *stmt = SSA_NAME_DEF_STMT (var); 4595 tree rhs[2]; 4596 int i; 4597 4598 if (!is_reassociable_op (stmt, code, loop)) 4599 return false; 4600 4601 rhs[0] = gimple_assign_rhs1 (stmt); 4602 rhs[1] = gimple_assign_rhs2 (stmt); 4603 gimple_set_visited (stmt, true); 4604 for (i = 0; i < 2; i++) 4605 if (TREE_CODE (rhs[i]) == SSA_NAME 4606 && !get_ops (rhs[i], code, ops, loop) 4607 && has_single_use (rhs[i])) 4608 { 4609 operand_entry *oe = operand_entry_pool.allocate (); 4610 4611 oe->op = rhs[i]; 4612 oe->rank = code; 4613 oe->id = 0; 4614 oe->count = 1; 4615 oe->stmt_to_insert = NULL; 4616 ops->safe_push (oe); 4617 } 4618 return true; 4619 } 4620 4621 /* Find the ops that were added by get_ops starting from VAR, see if 4622 they were changed during update_range_test and if yes, create new 4623 stmts. */ 4624 4625 static tree 4626 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops, 4627 unsigned int *pidx, class loop *loop) 4628 { 4629 gimple *stmt = SSA_NAME_DEF_STMT (var); 4630 tree rhs[4]; 4631 int i; 4632 4633 if (!is_reassociable_op (stmt, code, loop)) 4634 return NULL; 4635 4636 rhs[0] = gimple_assign_rhs1 (stmt); 4637 rhs[1] = gimple_assign_rhs2 (stmt); 4638 rhs[2] = rhs[0]; 4639 rhs[3] = rhs[1]; 4640 for (i = 0; i < 2; i++) 4641 if (TREE_CODE (rhs[i]) == SSA_NAME) 4642 { 4643 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 4644 if (rhs[2 + i] == NULL_TREE) 4645 { 4646 if (has_single_use (rhs[i])) 4647 rhs[2 + i] = ops[(*pidx)++]->op; 4648 else 4649 rhs[2 + i] = rhs[i]; 4650 } 4651 } 4652 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 4653 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 4654 { 4655 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4656 var = make_ssa_name (TREE_TYPE (var)); 4657 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 4658 rhs[2], rhs[3]); 4659 gimple_set_uid (g, gimple_uid (stmt)); 4660 gimple_set_visited (g, true); 4661 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4662 gimple_stmt_iterator gsi2 = gsi_for_stmt (g); 4663 if (fold_stmt_inplace (&gsi2)) 4664 update_stmt (g); 4665 } 4666 return var; 4667 } 4668 4669 /* Structure to track the initial value passed to get_ops and 4670 the range in the ops vector for each basic block. */ 4671 4672 struct inter_bb_range_test_entry 4673 { 4674 tree op; 4675 unsigned int first_idx, last_idx; 4676 }; 4677 4678 /* Inter-bb range test optimization. 4679 4680 Returns TRUE if a gimple conditional is optimized to a true/false, 4681 otherwise return FALSE. 4682 4683 This indicates to the caller that it should run a CFG cleanup pass 4684 once reassociation is completed. */ 4685 4686 static bool 4687 maybe_optimize_range_tests (gimple *stmt) 4688 { 4689 basic_block first_bb = gimple_bb (stmt); 4690 basic_block last_bb = first_bb; 4691 basic_block other_bb = NULL; 4692 basic_block bb; 4693 edge_iterator ei; 4694 edge e; 4695 auto_vec<operand_entry *> ops; 4696 auto_vec<inter_bb_range_test_entry> bbinfo; 4697 bool any_changes = false; 4698 bool cfg_cleanup_needed = false; 4699 4700 /* Consider only basic blocks that end with GIMPLE_COND or 4701 a cast statement satisfying final_range_test_p. All 4702 but the last bb in the first_bb .. last_bb range 4703 should end with GIMPLE_COND. */ 4704 if (gimple_code (stmt) == GIMPLE_COND) 4705 { 4706 if (EDGE_COUNT (first_bb->succs) != 2) 4707 return cfg_cleanup_needed; 4708 } 4709 else if (final_range_test_p (stmt)) 4710 other_bb = single_succ (first_bb); 4711 else 4712 return cfg_cleanup_needed; 4713 4714 if (stmt_could_throw_p (cfun, stmt)) 4715 return cfg_cleanup_needed; 4716 4717 /* As relative ordering of post-dominator sons isn't fixed, 4718 maybe_optimize_range_tests can be called first on any 4719 bb in the range we want to optimize. So, start searching 4720 backwards, if first_bb can be set to a predecessor. */ 4721 while (single_pred_p (first_bb)) 4722 { 4723 basic_block pred_bb = single_pred (first_bb); 4724 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true)) 4725 break; 4726 if (!no_side_effect_bb (first_bb)) 4727 break; 4728 first_bb = pred_bb; 4729 } 4730 /* If first_bb is last_bb, other_bb hasn't been computed yet. 4731 Before starting forward search in last_bb successors, find 4732 out the other_bb. */ 4733 if (first_bb == last_bb) 4734 { 4735 other_bb = NULL; 4736 /* As non-GIMPLE_COND last stmt always terminates the range, 4737 if forward search didn't discover anything, just give up. */ 4738 if (gimple_code (stmt) != GIMPLE_COND) 4739 return cfg_cleanup_needed; 4740 /* Look at both successors. Either it ends with a GIMPLE_COND 4741 and satisfies suitable_cond_bb, or ends with a cast and 4742 other_bb is that cast's successor. */ 4743 FOR_EACH_EDGE (e, ei, first_bb->succs) 4744 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 4745 || e->dest == first_bb) 4746 return cfg_cleanup_needed; 4747 else if (single_pred_p (e->dest)) 4748 { 4749 stmt = last_stmt (e->dest); 4750 if (stmt 4751 && gimple_code (stmt) == GIMPLE_COND 4752 && EDGE_COUNT (e->dest->succs) == 2) 4753 { 4754 if (suitable_cond_bb (first_bb, e->dest, &other_bb, 4755 NULL, true)) 4756 break; 4757 else 4758 other_bb = NULL; 4759 } 4760 else if (stmt 4761 && final_range_test_p (stmt) 4762 && find_edge (first_bb, single_succ (e->dest))) 4763 { 4764 other_bb = single_succ (e->dest); 4765 if (other_bb == first_bb) 4766 other_bb = NULL; 4767 } 4768 } 4769 if (other_bb == NULL) 4770 return cfg_cleanup_needed; 4771 } 4772 /* Now do the forward search, moving last_bb to successor bbs 4773 that aren't other_bb. */ 4774 while (EDGE_COUNT (last_bb->succs) == 2) 4775 { 4776 FOR_EACH_EDGE (e, ei, last_bb->succs) 4777 if (e->dest != other_bb) 4778 break; 4779 if (e == NULL) 4780 break; 4781 if (!single_pred_p (e->dest)) 4782 break; 4783 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false)) 4784 break; 4785 if (!no_side_effect_bb (e->dest)) 4786 break; 4787 last_bb = e->dest; 4788 } 4789 if (first_bb == last_bb) 4790 return cfg_cleanup_needed; 4791 /* Here basic blocks first_bb through last_bb's predecessor 4792 end with GIMPLE_COND, all of them have one of the edges to 4793 other_bb and another to another block in the range, 4794 all blocks except first_bb don't have side-effects and 4795 last_bb ends with either GIMPLE_COND, or cast satisfying 4796 final_range_test_p. */ 4797 for (bb = last_bb; ; bb = single_pred (bb)) 4798 { 4799 enum tree_code code; 4800 tree lhs, rhs; 4801 inter_bb_range_test_entry bb_ent; 4802 4803 bb_ent.op = NULL_TREE; 4804 bb_ent.first_idx = ops.length (); 4805 bb_ent.last_idx = bb_ent.first_idx; 4806 e = find_edge (bb, other_bb); 4807 stmt = last_stmt (bb); 4808 gimple_set_visited (stmt, true); 4809 if (gimple_code (stmt) != GIMPLE_COND) 4810 { 4811 use_operand_p use_p; 4812 gimple *phi; 4813 edge e2; 4814 unsigned int d; 4815 4816 lhs = gimple_assign_lhs (stmt); 4817 rhs = gimple_assign_rhs1 (stmt); 4818 gcc_assert (bb == last_bb); 4819 4820 /* stmt is 4821 _123 = (int) _234; 4822 OR 4823 _234 = a_2(D) == 2; 4824 4825 followed by: 4826 <bb M>: 4827 # _345 = PHI <_123(N), 1(...), 1(...)> 4828 4829 or 0 instead of 1. If it is 0, the _234 4830 range test is anded together with all the 4831 other range tests, if it is 1, it is ored with 4832 them. */ 4833 single_imm_use (lhs, &use_p, &phi); 4834 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 4835 e2 = find_edge (first_bb, other_bb); 4836 d = e2->dest_idx; 4837 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 4838 if (integer_zerop (gimple_phi_arg_def (phi, d))) 4839 code = BIT_AND_EXPR; 4840 else 4841 { 4842 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 4843 code = BIT_IOR_EXPR; 4844 } 4845 4846 /* If _234 SSA_NAME_DEF_STMT is 4847 _234 = _567 | _789; 4848 (or &, corresponding to 1/0 in the phi arguments, 4849 push into ops the individual range test arguments 4850 of the bitwise or resp. and, recursively. */ 4851 if (TREE_CODE (rhs) == SSA_NAME 4852 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4853 != tcc_comparison) 4854 && !get_ops (rhs, code, &ops, 4855 loop_containing_stmt (stmt)) 4856 && has_single_use (rhs)) 4857 { 4858 /* Otherwise, push the _234 range test itself. */ 4859 operand_entry *oe = operand_entry_pool.allocate (); 4860 4861 oe->op = rhs; 4862 oe->rank = code; 4863 oe->id = 0; 4864 oe->count = 1; 4865 oe->stmt_to_insert = NULL; 4866 ops.safe_push (oe); 4867 bb_ent.last_idx++; 4868 bb_ent.op = rhs; 4869 } 4870 else if (is_gimple_assign (stmt) 4871 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) 4872 == tcc_comparison) 4873 && !get_ops (lhs, code, &ops, 4874 loop_containing_stmt (stmt)) 4875 && has_single_use (lhs)) 4876 { 4877 operand_entry *oe = operand_entry_pool.allocate (); 4878 oe->op = lhs; 4879 oe->rank = code; 4880 oe->id = 0; 4881 oe->count = 1; 4882 ops.safe_push (oe); 4883 bb_ent.last_idx++; 4884 bb_ent.op = lhs; 4885 } 4886 else 4887 { 4888 bb_ent.last_idx = ops.length (); 4889 bb_ent.op = rhs; 4890 } 4891 bbinfo.safe_push (bb_ent); 4892 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) 4893 ops[i]->id = bb->index; 4894 continue; 4895 } 4896 else if (bb == last_bb) 4897 { 4898 /* For last_bb, handle also: 4899 if (x_3(D) == 3) 4900 goto <bb 6>; [34.00%] 4901 else 4902 goto <bb 7>; [66.00%] 4903 4904 <bb 6> [local count: 79512730]: 4905 4906 <bb 7> [local count: 1073741824]: 4907 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)> 4908 where bb 7 is OTHER_BB, but the PHI values from the 4909 earlier bbs match the path through the empty bb 4910 in between. */ 4911 bool test_swapped_p = false; 4912 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb, 4913 &other_bb, &test_swapped_p, true); 4914 gcc_assert (ok); 4915 if (test_swapped_p) 4916 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0); 4917 } 4918 /* Otherwise stmt is GIMPLE_COND. */ 4919 code = gimple_cond_code (stmt); 4920 lhs = gimple_cond_lhs (stmt); 4921 rhs = gimple_cond_rhs (stmt); 4922 if (TREE_CODE (lhs) == SSA_NAME 4923 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 4924 && ((code != EQ_EXPR && code != NE_EXPR) 4925 || rhs != boolean_false_node 4926 /* Either push into ops the individual bitwise 4927 or resp. and operands, depending on which 4928 edge is other_bb. */ 4929 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 4930 ^ (code == EQ_EXPR)) 4931 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 4932 loop_containing_stmt (stmt)))) 4933 { 4934 /* Or push the GIMPLE_COND stmt itself. */ 4935 operand_entry *oe = operand_entry_pool.allocate (); 4936 4937 oe->op = NULL; 4938 oe->rank = (e->flags & EDGE_TRUE_VALUE) 4939 ? BIT_IOR_EXPR : BIT_AND_EXPR; 4940 /* oe->op = NULL signs that there is no SSA_NAME 4941 for the range test, and oe->id instead is the 4942 basic block number, at which's end the GIMPLE_COND 4943 is. */ 4944 oe->id = bb->index; 4945 oe->count = 1; 4946 oe->stmt_to_insert = NULL; 4947 ops.safe_push (oe); 4948 bb_ent.op = NULL; 4949 bb_ent.last_idx++; 4950 } 4951 else if (ops.length () > bb_ent.first_idx) 4952 { 4953 bb_ent.op = lhs; 4954 bb_ent.last_idx = ops.length (); 4955 } 4956 bbinfo.safe_push (bb_ent); 4957 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i) 4958 ops[i]->id = bb->index; 4959 if (bb == first_bb) 4960 break; 4961 } 4962 if (ops.length () > 1) 4963 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); 4964 if (any_changes) 4965 { 4966 unsigned int idx, max_idx = 0; 4967 /* update_ops relies on has_single_use predicates returning the 4968 same values as it did during get_ops earlier. Additionally it 4969 never removes statements, only adds new ones and it should walk 4970 from the single imm use and check the predicate already before 4971 making those changes. 4972 On the other side, the handling of GIMPLE_COND directly can turn 4973 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 4974 it needs to be done in a separate loop afterwards. */ 4975 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 4976 { 4977 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 4978 && bbinfo[idx].op != NULL_TREE) 4979 { 4980 tree new_op; 4981 4982 max_idx = idx; 4983 stmt = last_stmt (bb); 4984 new_op = update_ops (bbinfo[idx].op, 4985 (enum tree_code) 4986 ops[bbinfo[idx].first_idx]->rank, 4987 ops, &bbinfo[idx].first_idx, 4988 loop_containing_stmt (stmt)); 4989 if (new_op == NULL_TREE) 4990 { 4991 gcc_assert (bb == last_bb); 4992 new_op = ops[bbinfo[idx].first_idx++]->op; 4993 } 4994 if (bbinfo[idx].op != new_op) 4995 { 4996 imm_use_iterator iter; 4997 use_operand_p use_p; 4998 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL; 4999 5000 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 5001 if (is_gimple_debug (use_stmt)) 5002 continue; 5003 else if (gimple_code (use_stmt) == GIMPLE_COND 5004 || gimple_code (use_stmt) == GIMPLE_PHI) 5005 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 5006 SET_USE (use_p, new_op); 5007 else if ((is_gimple_assign (use_stmt) 5008 && (TREE_CODE_CLASS 5009 (gimple_assign_rhs_code (use_stmt)) 5010 == tcc_comparison))) 5011 cast_or_tcc_cmp_stmt = use_stmt; 5012 else if (gimple_assign_cast_p (use_stmt)) 5013 cast_or_tcc_cmp_stmt = use_stmt; 5014 else 5015 gcc_unreachable (); 5016 5017 if (cast_or_tcc_cmp_stmt) 5018 { 5019 gcc_assert (bb == last_bb); 5020 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt); 5021 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 5022 enum tree_code rhs_code 5023 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt) 5024 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt) 5025 : CONVERT_EXPR; 5026 gassign *g; 5027 if (is_gimple_min_invariant (new_op)) 5028 { 5029 new_op = fold_convert (TREE_TYPE (lhs), new_op); 5030 g = gimple_build_assign (new_lhs, new_op); 5031 } 5032 else 5033 g = gimple_build_assign (new_lhs, rhs_code, new_op); 5034 gimple_stmt_iterator gsi 5035 = gsi_for_stmt (cast_or_tcc_cmp_stmt); 5036 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt)); 5037 gimple_set_visited (g, true); 5038 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5039 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 5040 if (is_gimple_debug (use_stmt)) 5041 continue; 5042 else if (gimple_code (use_stmt) == GIMPLE_COND 5043 || gimple_code (use_stmt) == GIMPLE_PHI) 5044 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 5045 SET_USE (use_p, new_lhs); 5046 else 5047 gcc_unreachable (); 5048 } 5049 } 5050 } 5051 if (bb == first_bb) 5052 break; 5053 } 5054 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 5055 { 5056 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 5057 && bbinfo[idx].op == NULL_TREE 5058 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 5059 { 5060 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 5061 5062 if (idx > max_idx) 5063 max_idx = idx; 5064 5065 /* If we collapse the conditional to a true/false 5066 condition, then bubble that knowledge up to our caller. */ 5067 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 5068 { 5069 gimple_cond_make_false (cond_stmt); 5070 cfg_cleanup_needed = true; 5071 } 5072 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 5073 { 5074 gimple_cond_make_true (cond_stmt); 5075 cfg_cleanup_needed = true; 5076 } 5077 else 5078 { 5079 gimple_cond_set_code (cond_stmt, NE_EXPR); 5080 gimple_cond_set_lhs (cond_stmt, 5081 ops[bbinfo[idx].first_idx]->op); 5082 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 5083 } 5084 update_stmt (cond_stmt); 5085 } 5086 if (bb == first_bb) 5087 break; 5088 } 5089 5090 /* The above changes could result in basic blocks after the first 5091 modified one, up to and including last_bb, to be executed even if 5092 they would not be in the original program. If the value ranges of 5093 assignment lhs' in those bbs were dependent on the conditions 5094 guarding those basic blocks which now can change, the VRs might 5095 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 5096 are only used within the same bb, it should be not a big deal if 5097 we just reset all the VRs in those bbs. See PR68671. */ 5098 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 5099 reset_flow_sensitive_info_in_bb (bb); 5100 } 5101 return cfg_cleanup_needed; 5102 } 5103 5104 /* Return true if OPERAND is defined by a PHI node which uses the LHS 5105 of STMT in it's operands. This is also known as a "destructive 5106 update" operation. */ 5107 5108 static bool 5109 is_phi_for_stmt (gimple *stmt, tree operand) 5110 { 5111 gimple *def_stmt; 5112 gphi *def_phi; 5113 tree lhs; 5114 use_operand_p arg_p; 5115 ssa_op_iter i; 5116 5117 if (TREE_CODE (operand) != SSA_NAME) 5118 return false; 5119 5120 lhs = gimple_assign_lhs (stmt); 5121 5122 def_stmt = SSA_NAME_DEF_STMT (operand); 5123 def_phi = dyn_cast <gphi *> (def_stmt); 5124 if (!def_phi) 5125 return false; 5126 5127 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 5128 if (lhs == USE_FROM_PTR (arg_p)) 5129 return true; 5130 return false; 5131 } 5132 5133 /* Remove def stmt of VAR if VAR has zero uses and recurse 5134 on rhs1 operand if so. */ 5135 5136 static void 5137 remove_visited_stmt_chain (tree var) 5138 { 5139 gimple *stmt; 5140 gimple_stmt_iterator gsi; 5141 5142 while (1) 5143 { 5144 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 5145 return; 5146 stmt = SSA_NAME_DEF_STMT (var); 5147 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 5148 { 5149 var = gimple_assign_rhs1 (stmt); 5150 gsi = gsi_for_stmt (stmt); 5151 reassoc_remove_stmt (&gsi); 5152 release_defs (stmt); 5153 } 5154 else 5155 return; 5156 } 5157 } 5158 5159 /* This function checks three consequtive operands in 5160 passed operands vector OPS starting from OPINDEX and 5161 swaps two operands if it is profitable for binary operation 5162 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 5163 5164 We pair ops with the same rank if possible. 5165 5166 The alternative we try is to see if STMT is a destructive 5167 update style statement, which is like: 5168 b = phi (a, ...) 5169 a = c + b; 5170 In that case, we want to use the destructive update form to 5171 expose the possible vectorizer sum reduction opportunity. 5172 In that case, the third operand will be the phi node. This 5173 check is not performed if STMT is null. 5174 5175 We could, of course, try to be better as noted above, and do a 5176 lot of work to try to find these opportunities in >3 operand 5177 cases, but it is unlikely to be worth it. */ 5178 5179 static void 5180 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops, 5181 unsigned int opindex, gimple *stmt) 5182 { 5183 operand_entry *oe1, *oe2, *oe3; 5184 5185 oe1 = ops[opindex]; 5186 oe2 = ops[opindex + 1]; 5187 oe3 = ops[opindex + 2]; 5188 5189 if ((oe1->rank == oe2->rank 5190 && oe2->rank != oe3->rank) 5191 || (stmt && is_phi_for_stmt (stmt, oe3->op) 5192 && !is_phi_for_stmt (stmt, oe1->op) 5193 && !is_phi_for_stmt (stmt, oe2->op))) 5194 std::swap (*oe1, *oe3); 5195 else if ((oe1->rank == oe3->rank 5196 && oe2->rank != oe3->rank) 5197 || (stmt && is_phi_for_stmt (stmt, oe2->op) 5198 && !is_phi_for_stmt (stmt, oe1->op) 5199 && !is_phi_for_stmt (stmt, oe3->op))) 5200 std::swap (*oe1, *oe2); 5201 } 5202 5203 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those 5204 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate 5205 whether RHS1 op RHS2 can be inserted before or needs to be inserted 5206 after the returned stmt. */ 5207 5208 static inline gimple * 5209 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before) 5210 { 5211 insert_before = true; 5212 if (TREE_CODE (rhs1) == SSA_NAME 5213 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 5214 { 5215 stmt = SSA_NAME_DEF_STMT (rhs1); 5216 insert_before = false; 5217 } 5218 if (TREE_CODE (rhs2) == SSA_NAME 5219 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 5220 { 5221 stmt = SSA_NAME_DEF_STMT (rhs2); 5222 insert_before = false; 5223 } 5224 return stmt; 5225 } 5226 5227 /* If the stmt that defines operand has to be inserted, insert it 5228 before the use. */ 5229 static void 5230 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) 5231 { 5232 gcc_assert (is_gimple_assign (stmt_to_insert)); 5233 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert); 5234 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert); 5235 bool insert_before; 5236 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before); 5237 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point); 5238 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point)); 5239 5240 /* If the insert point is not stmt, then insert_point would be 5241 the point where operand rhs1 or rhs2 is defined. In this case, 5242 stmt_to_insert has to be inserted afterwards. This would 5243 only happen when the stmt insertion point is flexible. */ 5244 if (insert_before) 5245 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT); 5246 else 5247 insert_stmt_after (stmt_to_insert, insert_point); 5248 } 5249 5250 5251 /* Recursively rewrite our linearized statements so that the operators 5252 match those in OPS[OPINDEX], putting the computation in rank 5253 order. Return new lhs. 5254 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in 5255 the current stmt and during recursive invocations. 5256 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in 5257 recursive invocations. */ 5258 5259 static tree 5260 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex, 5261 const vec<operand_entry *> &ops, bool changed, 5262 bool next_changed) 5263 { 5264 tree rhs1 = gimple_assign_rhs1 (stmt); 5265 tree rhs2 = gimple_assign_rhs2 (stmt); 5266 tree lhs = gimple_assign_lhs (stmt); 5267 operand_entry *oe; 5268 5269 /* The final recursion case for this function is that you have 5270 exactly two operations left. 5271 If we had exactly one op in the entire list to start with, we 5272 would have never called this function, and the tail recursion 5273 rewrites them one at a time. */ 5274 if (opindex + 2 == ops.length ()) 5275 { 5276 operand_entry *oe1, *oe2; 5277 5278 oe1 = ops[opindex]; 5279 oe2 = ops[opindex + 1]; 5280 5281 if (rhs1 != oe1->op || rhs2 != oe2->op) 5282 { 5283 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5284 unsigned int uid = gimple_uid (stmt); 5285 5286 if (dump_file && (dump_flags & TDF_DETAILS)) 5287 { 5288 fprintf (dump_file, "Transforming "); 5289 print_gimple_stmt (dump_file, stmt, 0); 5290 } 5291 5292 /* If the stmt that defines operand has to be inserted, insert it 5293 before the use. */ 5294 if (oe1->stmt_to_insert) 5295 insert_stmt_before_use (stmt, oe1->stmt_to_insert); 5296 if (oe2->stmt_to_insert) 5297 insert_stmt_before_use (stmt, oe2->stmt_to_insert); 5298 /* Even when changed is false, reassociation could have e.g. removed 5299 some redundant operations, so unless we are just swapping the 5300 arguments or unless there is no change at all (then we just 5301 return lhs), force creation of a new SSA_NAME. */ 5302 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 5303 { 5304 bool insert_before; 5305 gimple *insert_point 5306 = find_insert_point (stmt, oe1->op, oe2->op, insert_before); 5307 lhs = make_ssa_name (TREE_TYPE (lhs)); 5308 stmt 5309 = gimple_build_assign (lhs, rhs_code, 5310 oe1->op, oe2->op); 5311 gimple_set_uid (stmt, uid); 5312 gimple_set_visited (stmt, true); 5313 if (insert_before) 5314 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5315 else 5316 insert_stmt_after (stmt, insert_point); 5317 } 5318 else 5319 { 5320 bool insert_before; 5321 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op, 5322 insert_before) 5323 == stmt); 5324 gimple_assign_set_rhs1 (stmt, oe1->op); 5325 gimple_assign_set_rhs2 (stmt, oe2->op); 5326 update_stmt (stmt); 5327 } 5328 5329 if (rhs1 != oe1->op && rhs1 != oe2->op) 5330 remove_visited_stmt_chain (rhs1); 5331 5332 if (dump_file && (dump_flags & TDF_DETAILS)) 5333 { 5334 fprintf (dump_file, " into "); 5335 print_gimple_stmt (dump_file, stmt, 0); 5336 } 5337 } 5338 return lhs; 5339 } 5340 5341 /* If we hit here, we should have 3 or more ops left. */ 5342 gcc_assert (opindex + 2 < ops.length ()); 5343 5344 /* Rewrite the next operator. */ 5345 oe = ops[opindex]; 5346 5347 /* If the stmt that defines operand has to be inserted, insert it 5348 before the use. */ 5349 if (oe->stmt_to_insert) 5350 insert_stmt_before_use (stmt, oe->stmt_to_insert); 5351 5352 /* Recurse on the LHS of the binary operator, which is guaranteed to 5353 be the non-leaf side. */ 5354 tree new_rhs1 5355 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops, 5356 changed || oe->op != rhs2 || next_changed, 5357 false); 5358 5359 if (oe->op != rhs2 || new_rhs1 != rhs1) 5360 { 5361 if (dump_file && (dump_flags & TDF_DETAILS)) 5362 { 5363 fprintf (dump_file, "Transforming "); 5364 print_gimple_stmt (dump_file, stmt, 0); 5365 } 5366 5367 /* If changed is false, this is either opindex == 0 5368 or all outer rhs2's were equal to corresponding oe->op, 5369 and powi_result is NULL. 5370 That means lhs is equivalent before and after reassociation. 5371 Otherwise ensure the old lhs SSA_NAME is not reused and 5372 create a new stmt as well, so that any debug stmts will be 5373 properly adjusted. */ 5374 if (changed) 5375 { 5376 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 5377 unsigned int uid = gimple_uid (stmt); 5378 bool insert_before; 5379 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op, 5380 insert_before); 5381 5382 lhs = make_ssa_name (TREE_TYPE (lhs)); 5383 stmt = gimple_build_assign (lhs, rhs_code, 5384 new_rhs1, oe->op); 5385 gimple_set_uid (stmt, uid); 5386 gimple_set_visited (stmt, true); 5387 if (insert_before) 5388 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5389 else 5390 insert_stmt_after (stmt, insert_point); 5391 } 5392 else 5393 { 5394 bool insert_before; 5395 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op, 5396 insert_before) 5397 == stmt); 5398 gimple_assign_set_rhs1 (stmt, new_rhs1); 5399 gimple_assign_set_rhs2 (stmt, oe->op); 5400 update_stmt (stmt); 5401 } 5402 5403 if (dump_file && (dump_flags & TDF_DETAILS)) 5404 { 5405 fprintf (dump_file, " into "); 5406 print_gimple_stmt (dump_file, stmt, 0); 5407 } 5408 } 5409 return lhs; 5410 } 5411 5412 /* Find out how many cycles we need to compute statements chain. 5413 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 5414 maximum number of independent statements we may execute per cycle. */ 5415 5416 static int 5417 get_required_cycles (int ops_num, int cpu_width) 5418 { 5419 int res; 5420 int elog; 5421 unsigned int rest; 5422 5423 /* While we have more than 2 * cpu_width operands 5424 we may reduce number of operands by cpu_width 5425 per cycle. */ 5426 res = ops_num / (2 * cpu_width); 5427 5428 /* Remained operands count may be reduced twice per cycle 5429 until we have only one operand. */ 5430 rest = (unsigned)(ops_num - res * cpu_width); 5431 elog = exact_log2 (rest); 5432 if (elog >= 0) 5433 res += elog; 5434 else 5435 res += floor_log2 (rest) + 1; 5436 5437 return res; 5438 } 5439 5440 /* Returns an optimal number of registers to use for computation of 5441 given statements. */ 5442 5443 static int 5444 get_reassociation_width (int ops_num, enum tree_code opc, 5445 machine_mode mode) 5446 { 5447 int param_width = param_tree_reassoc_width; 5448 int width; 5449 int width_min; 5450 int cycles_best; 5451 5452 if (param_width > 0) 5453 width = param_width; 5454 else 5455 width = targetm.sched.reassociation_width (opc, mode); 5456 5457 if (width == 1) 5458 return width; 5459 5460 /* Get the minimal time required for sequence computation. */ 5461 cycles_best = get_required_cycles (ops_num, width); 5462 5463 /* Check if we may use less width and still compute sequence for 5464 the same time. It will allow us to reduce registers usage. 5465 get_required_cycles is monotonically increasing with lower width 5466 so we can perform a binary search for the minimal width that still 5467 results in the optimal cycle count. */ 5468 width_min = 1; 5469 while (width > width_min) 5470 { 5471 int width_mid = (width + width_min) / 2; 5472 5473 if (get_required_cycles (ops_num, width_mid) == cycles_best) 5474 width = width_mid; 5475 else if (width_min < width_mid) 5476 width_min = width_mid; 5477 else 5478 break; 5479 } 5480 5481 return width; 5482 } 5483 5484 /* Recursively rewrite our linearized statements so that the operators 5485 match those in OPS[OPINDEX], putting the computation in rank 5486 order and trying to allow operations to be executed in 5487 parallel. */ 5488 5489 static void 5490 rewrite_expr_tree_parallel (gassign *stmt, int width, 5491 const vec<operand_entry *> &ops) 5492 { 5493 enum tree_code opcode = gimple_assign_rhs_code (stmt); 5494 int op_num = ops.length (); 5495 gcc_assert (op_num > 0); 5496 int stmt_num = op_num - 1; 5497 gimple **stmts = XALLOCAVEC (gimple *, stmt_num); 5498 int op_index = op_num - 1; 5499 int stmt_index = 0; 5500 int ready_stmts_end = 0; 5501 int i = 0; 5502 gimple *stmt1 = NULL, *stmt2 = NULL; 5503 tree last_rhs1 = gimple_assign_rhs1 (stmt); 5504 5505 /* We start expression rewriting from the top statements. 5506 So, in this loop we create a full list of statements 5507 we will work with. */ 5508 stmts[stmt_num - 1] = stmt; 5509 for (i = stmt_num - 2; i >= 0; i--) 5510 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 5511 5512 for (i = 0; i < stmt_num; i++) 5513 { 5514 tree op1, op2; 5515 5516 /* Determine whether we should use results of 5517 already handled statements or not. */ 5518 if (ready_stmts_end == 0 5519 && (i - stmt_index >= width || op_index < 1)) 5520 ready_stmts_end = i; 5521 5522 /* Now we choose operands for the next statement. Non zero 5523 value in ready_stmts_end means here that we should use 5524 the result of already generated statements as new operand. */ 5525 if (ready_stmts_end > 0) 5526 { 5527 op1 = gimple_assign_lhs (stmts[stmt_index++]); 5528 if (ready_stmts_end > stmt_index) 5529 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5530 else if (op_index >= 0) 5531 { 5532 operand_entry *oe = ops[op_index--]; 5533 stmt2 = oe->stmt_to_insert; 5534 op2 = oe->op; 5535 } 5536 else 5537 { 5538 gcc_assert (stmt_index < i); 5539 op2 = gimple_assign_lhs (stmts[stmt_index++]); 5540 } 5541 5542 if (stmt_index >= ready_stmts_end) 5543 ready_stmts_end = 0; 5544 } 5545 else 5546 { 5547 if (op_index > 1) 5548 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 5549 operand_entry *oe2 = ops[op_index--]; 5550 operand_entry *oe1 = ops[op_index--]; 5551 op2 = oe2->op; 5552 stmt2 = oe2->stmt_to_insert; 5553 op1 = oe1->op; 5554 stmt1 = oe1->stmt_to_insert; 5555 } 5556 5557 /* If we emit the last statement then we should put 5558 operands into the last statement. It will also 5559 break the loop. */ 5560 if (op_index < 0 && stmt_index == i) 5561 i = stmt_num - 1; 5562 5563 if (dump_file && (dump_flags & TDF_DETAILS)) 5564 { 5565 fprintf (dump_file, "Transforming "); 5566 print_gimple_stmt (dump_file, stmts[i], 0); 5567 } 5568 5569 /* If the stmt that defines operand has to be inserted, insert it 5570 before the use. */ 5571 if (stmt1) 5572 insert_stmt_before_use (stmts[i], stmt1); 5573 if (stmt2) 5574 insert_stmt_before_use (stmts[i], stmt2); 5575 stmt1 = stmt2 = NULL; 5576 5577 /* We keep original statement only for the last one. All 5578 others are recreated. */ 5579 if (i == stmt_num - 1) 5580 { 5581 gimple_assign_set_rhs1 (stmts[i], op1); 5582 gimple_assign_set_rhs2 (stmts[i], op2); 5583 update_stmt (stmts[i]); 5584 } 5585 else 5586 { 5587 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 5588 gimple_set_visited (stmts[i], true); 5589 } 5590 if (dump_file && (dump_flags & TDF_DETAILS)) 5591 { 5592 fprintf (dump_file, " into "); 5593 print_gimple_stmt (dump_file, stmts[i], 0); 5594 } 5595 } 5596 5597 remove_visited_stmt_chain (last_rhs1); 5598 } 5599 5600 /* Transform STMT, which is really (A +B) + (C + D) into the left 5601 linear form, ((A+B)+C)+D. 5602 Recurse on D if necessary. */ 5603 5604 static void 5605 linearize_expr (gimple *stmt) 5606 { 5607 gimple_stmt_iterator gsi; 5608 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 5609 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5610 gimple *oldbinrhs = binrhs; 5611 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5612 gimple *newbinrhs = NULL; 5613 class loop *loop = loop_containing_stmt (stmt); 5614 tree lhs = gimple_assign_lhs (stmt); 5615 5616 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 5617 && is_reassociable_op (binrhs, rhscode, loop)); 5618 5619 gsi = gsi_for_stmt (stmt); 5620 5621 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 5622 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 5623 gimple_assign_rhs_code (binrhs), 5624 gimple_assign_lhs (binlhs), 5625 gimple_assign_rhs2 (binrhs)); 5626 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 5627 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 5628 gimple_set_uid (binrhs, gimple_uid (stmt)); 5629 5630 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 5631 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5632 5633 if (dump_file && (dump_flags & TDF_DETAILS)) 5634 { 5635 fprintf (dump_file, "Linearized: "); 5636 print_gimple_stmt (dump_file, stmt, 0); 5637 } 5638 5639 reassociate_stats.linearized++; 5640 update_stmt (stmt); 5641 5642 gsi = gsi_for_stmt (oldbinrhs); 5643 reassoc_remove_stmt (&gsi); 5644 release_defs (oldbinrhs); 5645 5646 gimple_set_visited (stmt, true); 5647 gimple_set_visited (binlhs, true); 5648 gimple_set_visited (binrhs, true); 5649 5650 /* Tail recurse on the new rhs if it still needs reassociation. */ 5651 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 5652 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 5653 want to change the algorithm while converting to tuples. */ 5654 linearize_expr (stmt); 5655 } 5656 5657 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 5658 it. Otherwise, return NULL. */ 5659 5660 static gimple * 5661 get_single_immediate_use (tree lhs) 5662 { 5663 use_operand_p immuse; 5664 gimple *immusestmt; 5665 5666 if (TREE_CODE (lhs) == SSA_NAME 5667 && single_imm_use (lhs, &immuse, &immusestmt) 5668 && is_gimple_assign (immusestmt)) 5669 return immusestmt; 5670 5671 return NULL; 5672 } 5673 5674 /* Recursively negate the value of TONEGATE, and return the SSA_NAME 5675 representing the negated value. Insertions of any necessary 5676 instructions go before GSI. 5677 This function is recursive in that, if you hand it "a_5" as the 5678 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 5679 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 5680 5681 static tree 5682 negate_value (tree tonegate, gimple_stmt_iterator *gsip) 5683 { 5684 gimple *negatedefstmt = NULL; 5685 tree resultofnegate; 5686 gimple_stmt_iterator gsi; 5687 unsigned int uid; 5688 5689 /* If we are trying to negate a name, defined by an add, negate the 5690 add operands instead. */ 5691 if (TREE_CODE (tonegate) == SSA_NAME) 5692 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 5693 if (TREE_CODE (tonegate) == SSA_NAME 5694 && is_gimple_assign (negatedefstmt) 5695 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 5696 && has_single_use (gimple_assign_lhs (negatedefstmt)) 5697 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 5698 { 5699 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 5700 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 5701 tree lhs = gimple_assign_lhs (negatedefstmt); 5702 gimple *g; 5703 5704 gsi = gsi_for_stmt (negatedefstmt); 5705 rhs1 = negate_value (rhs1, &gsi); 5706 5707 gsi = gsi_for_stmt (negatedefstmt); 5708 rhs2 = negate_value (rhs2, &gsi); 5709 5710 gsi = gsi_for_stmt (negatedefstmt); 5711 lhs = make_ssa_name (TREE_TYPE (lhs)); 5712 gimple_set_visited (negatedefstmt, true); 5713 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 5714 gimple_set_uid (g, gimple_uid (negatedefstmt)); 5715 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5716 return lhs; 5717 } 5718 5719 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 5720 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 5721 NULL_TREE, true, GSI_SAME_STMT); 5722 gsi = *gsip; 5723 uid = gimple_uid (gsi_stmt (gsi)); 5724 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 5725 { 5726 gimple *stmt = gsi_stmt (gsi); 5727 if (gimple_uid (stmt) != 0) 5728 break; 5729 gimple_set_uid (stmt, uid); 5730 } 5731 return resultofnegate; 5732 } 5733 5734 /* Return true if we should break up the subtract in STMT into an add 5735 with negate. This is true when we the subtract operands are really 5736 adds, or the subtract itself is used in an add expression. In 5737 either case, breaking up the subtract into an add with negate 5738 exposes the adds to reassociation. */ 5739 5740 static bool 5741 should_break_up_subtract (gimple *stmt) 5742 { 5743 tree lhs = gimple_assign_lhs (stmt); 5744 tree binlhs = gimple_assign_rhs1 (stmt); 5745 tree binrhs = gimple_assign_rhs2 (stmt); 5746 gimple *immusestmt; 5747 class loop *loop = loop_containing_stmt (stmt); 5748 5749 if (TREE_CODE (binlhs) == SSA_NAME 5750 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 5751 return true; 5752 5753 if (TREE_CODE (binrhs) == SSA_NAME 5754 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 5755 return true; 5756 5757 if (TREE_CODE (lhs) == SSA_NAME 5758 && (immusestmt = get_single_immediate_use (lhs)) 5759 && is_gimple_assign (immusestmt) 5760 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 5761 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR 5762 && gimple_assign_rhs1 (immusestmt) == lhs) 5763 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 5764 return true; 5765 return false; 5766 } 5767 5768 /* Transform STMT from A - B into A + -B. */ 5769 5770 static void 5771 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip) 5772 { 5773 tree rhs1 = gimple_assign_rhs1 (stmt); 5774 tree rhs2 = gimple_assign_rhs2 (stmt); 5775 5776 if (dump_file && (dump_flags & TDF_DETAILS)) 5777 { 5778 fprintf (dump_file, "Breaking up subtract "); 5779 print_gimple_stmt (dump_file, stmt, 0); 5780 } 5781 5782 rhs2 = negate_value (rhs2, gsip); 5783 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 5784 update_stmt (stmt); 5785 } 5786 5787 /* Determine whether STMT is a builtin call that raises an SSA name 5788 to an integer power and has only one use. If so, and this is early 5789 reassociation and unsafe math optimizations are permitted, place 5790 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 5791 If any of these conditions does not hold, return FALSE. */ 5792 5793 static bool 5794 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent) 5795 { 5796 tree arg1; 5797 REAL_VALUE_TYPE c, cint; 5798 5799 switch (gimple_call_combined_fn (stmt)) 5800 { 5801 CASE_CFN_POW: 5802 if (flag_errno_math) 5803 return false; 5804 5805 *base = gimple_call_arg (stmt, 0); 5806 arg1 = gimple_call_arg (stmt, 1); 5807 5808 if (TREE_CODE (arg1) != REAL_CST) 5809 return false; 5810 5811 c = TREE_REAL_CST (arg1); 5812 5813 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 5814 return false; 5815 5816 *exponent = real_to_integer (&c); 5817 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 5818 if (!real_identical (&c, &cint)) 5819 return false; 5820 5821 break; 5822 5823 CASE_CFN_POWI: 5824 *base = gimple_call_arg (stmt, 0); 5825 arg1 = gimple_call_arg (stmt, 1); 5826 5827 if (!tree_fits_shwi_p (arg1)) 5828 return false; 5829 5830 *exponent = tree_to_shwi (arg1); 5831 break; 5832 5833 default: 5834 return false; 5835 } 5836 5837 /* Expanding negative exponents is generally unproductive, so we don't 5838 complicate matters with those. Exponents of zero and one should 5839 have been handled by expression folding. */ 5840 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 5841 return false; 5842 5843 return true; 5844 } 5845 5846 /* Try to derive and add operand entry for OP to *OPS. Return false if 5847 unsuccessful. */ 5848 5849 static bool 5850 try_special_add_to_ops (vec<operand_entry *> *ops, 5851 enum tree_code code, 5852 tree op, gimple* def_stmt) 5853 { 5854 tree base = NULL_TREE; 5855 HOST_WIDE_INT exponent = 0; 5856 5857 if (TREE_CODE (op) != SSA_NAME 5858 || ! has_single_use (op)) 5859 return false; 5860 5861 if (code == MULT_EXPR 5862 && reassoc_insert_powi_p 5863 && flag_unsafe_math_optimizations 5864 && is_gimple_call (def_stmt) 5865 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent)) 5866 { 5867 add_repeat_to_ops_vec (ops, base, exponent); 5868 gimple_set_visited (def_stmt, true); 5869 return true; 5870 } 5871 else if (code == MULT_EXPR 5872 && is_gimple_assign (def_stmt) 5873 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR 5874 && !HONOR_SNANS (TREE_TYPE (op)) 5875 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op)) 5876 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))) 5877 && (!FLOAT_TYPE_P (TREE_TYPE (op)) 5878 || !DECIMAL_FLOAT_MODE_P (element_mode (op)))) 5879 { 5880 tree rhs1 = gimple_assign_rhs1 (def_stmt); 5881 tree cst = build_minus_one_cst (TREE_TYPE (op)); 5882 add_to_ops_vec (ops, rhs1); 5883 add_to_ops_vec (ops, cst); 5884 gimple_set_visited (def_stmt, true); 5885 return true; 5886 } 5887 5888 return false; 5889 } 5890 5891 /* Recursively linearize a binary expression that is the RHS of STMT. 5892 Place the operands of the expression tree in the vector named OPS. */ 5893 5894 static void 5895 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt, 5896 bool is_associative, bool set_visited) 5897 { 5898 tree binlhs = gimple_assign_rhs1 (stmt); 5899 tree binrhs = gimple_assign_rhs2 (stmt); 5900 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5901 bool binlhsisreassoc = false; 5902 bool binrhsisreassoc = false; 5903 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5904 class loop *loop = loop_containing_stmt (stmt); 5905 5906 if (set_visited) 5907 gimple_set_visited (stmt, true); 5908 5909 if (TREE_CODE (binlhs) == SSA_NAME) 5910 { 5911 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 5912 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 5913 && !stmt_could_throw_p (cfun, binlhsdef)); 5914 } 5915 5916 if (TREE_CODE (binrhs) == SSA_NAME) 5917 { 5918 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 5919 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 5920 && !stmt_could_throw_p (cfun, binrhsdef)); 5921 } 5922 5923 /* If the LHS is not reassociable, but the RHS is, we need to swap 5924 them. If neither is reassociable, there is nothing we can do, so 5925 just put them in the ops vector. If the LHS is reassociable, 5926 linearize it. If both are reassociable, then linearize the RHS 5927 and the LHS. */ 5928 5929 if (!binlhsisreassoc) 5930 { 5931 /* If this is not a associative operation like division, give up. */ 5932 if (!is_associative) 5933 { 5934 add_to_ops_vec (ops, binrhs); 5935 return; 5936 } 5937 5938 if (!binrhsisreassoc) 5939 { 5940 bool swap = false; 5941 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5942 /* If we add ops for the rhs we expect to be able to recurse 5943 to it via the lhs during expression rewrite so swap 5944 operands. */ 5945 swap = true; 5946 else 5947 add_to_ops_vec (ops, binrhs); 5948 5949 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef)) 5950 add_to_ops_vec (ops, binlhs); 5951 5952 if (!swap) 5953 return; 5954 } 5955 5956 if (dump_file && (dump_flags & TDF_DETAILS)) 5957 { 5958 fprintf (dump_file, "swapping operands of "); 5959 print_gimple_stmt (dump_file, stmt, 0); 5960 } 5961 5962 swap_ssa_operands (stmt, 5963 gimple_assign_rhs1_ptr (stmt), 5964 gimple_assign_rhs2_ptr (stmt)); 5965 update_stmt (stmt); 5966 5967 if (dump_file && (dump_flags & TDF_DETAILS)) 5968 { 5969 fprintf (dump_file, " is now "); 5970 print_gimple_stmt (dump_file, stmt, 0); 5971 } 5972 if (!binrhsisreassoc) 5973 return; 5974 5975 /* We want to make it so the lhs is always the reassociative op, 5976 so swap. */ 5977 std::swap (binlhs, binrhs); 5978 } 5979 else if (binrhsisreassoc) 5980 { 5981 linearize_expr (stmt); 5982 binlhs = gimple_assign_rhs1 (stmt); 5983 binrhs = gimple_assign_rhs2 (stmt); 5984 } 5985 5986 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 5987 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 5988 rhscode, loop)); 5989 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 5990 is_associative, set_visited); 5991 5992 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef)) 5993 add_to_ops_vec (ops, binrhs); 5994 } 5995 5996 /* Repropagate the negates back into subtracts, since no other pass 5997 currently does it. */ 5998 5999 static void 6000 repropagate_negates (void) 6001 { 6002 unsigned int i = 0; 6003 tree negate; 6004 6005 FOR_EACH_VEC_ELT (plus_negates, i, negate) 6006 { 6007 gimple *user = get_single_immediate_use (negate); 6008 if (!user || !is_gimple_assign (user)) 6009 continue; 6010 6011 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate)); 6012 if (TREE_CODE (negateop) == SSA_NAME 6013 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop)) 6014 continue; 6015 6016 /* The negate operand can be either operand of a PLUS_EXPR 6017 (it can be the LHS if the RHS is a constant for example). 6018 6019 Force the negate operand to the RHS of the PLUS_EXPR, then 6020 transform the PLUS_EXPR into a MINUS_EXPR. */ 6021 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 6022 { 6023 /* If the negated operand appears on the LHS of the 6024 PLUS_EXPR, exchange the operands of the PLUS_EXPR 6025 to force the negated operand to the RHS of the PLUS_EXPR. */ 6026 if (gimple_assign_rhs1 (user) == negate) 6027 { 6028 swap_ssa_operands (user, 6029 gimple_assign_rhs1_ptr (user), 6030 gimple_assign_rhs2_ptr (user)); 6031 } 6032 6033 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 6034 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 6035 if (gimple_assign_rhs2 (user) == negate) 6036 { 6037 tree rhs1 = gimple_assign_rhs1 (user); 6038 gimple_stmt_iterator gsi = gsi_for_stmt (user); 6039 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, 6040 negateop); 6041 update_stmt (user); 6042 } 6043 } 6044 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 6045 { 6046 if (gimple_assign_rhs1 (user) == negate) 6047 { 6048 /* We have 6049 x = -negateop 6050 y = x - b 6051 which we transform into 6052 x = negateop + b 6053 y = -x . 6054 This pushes down the negate which we possibly can merge 6055 into some other operation, hence insert it into the 6056 plus_negates vector. */ 6057 gimple *feed = SSA_NAME_DEF_STMT (negate); 6058 tree b = gimple_assign_rhs2 (user); 6059 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 6060 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 6061 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 6062 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b); 6063 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 6064 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 6065 user = gsi_stmt (gsi2); 6066 update_stmt (user); 6067 reassoc_remove_stmt (&gsi); 6068 release_defs (feed); 6069 plus_negates.safe_push (gimple_assign_lhs (user)); 6070 } 6071 else 6072 { 6073 /* Transform "x = -negateop; y = b - x" into "y = b + negateop", 6074 getting rid of one operation. */ 6075 tree rhs1 = gimple_assign_rhs1 (user); 6076 gimple_stmt_iterator gsi = gsi_for_stmt (user); 6077 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop); 6078 update_stmt (gsi_stmt (gsi)); 6079 } 6080 } 6081 } 6082 } 6083 6084 /* Break up subtract operations in block BB. 6085 6086 We do this top down because we don't know whether the subtract is 6087 part of a possible chain of reassociation except at the top. 6088 6089 IE given 6090 d = f + g 6091 c = a + e 6092 b = c - d 6093 q = b - r 6094 k = t - q 6095 6096 we want to break up k = t - q, but we won't until we've transformed q 6097 = b - r, which won't be broken up until we transform b = c - d. 6098 6099 En passant, clear the GIMPLE visited flag on every statement 6100 and set UIDs within each basic block. */ 6101 6102 static void 6103 break_up_subtract_bb (basic_block bb) 6104 { 6105 gimple_stmt_iterator gsi; 6106 basic_block son; 6107 unsigned int uid = 1; 6108 6109 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 6110 { 6111 gimple *stmt = gsi_stmt (gsi); 6112 gimple_set_visited (stmt, false); 6113 gimple_set_uid (stmt, uid++); 6114 6115 if (!is_gimple_assign (stmt) 6116 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt))) 6117 || !can_reassociate_op_p (gimple_assign_lhs (stmt))) 6118 continue; 6119 6120 /* Look for simple gimple subtract operations. */ 6121 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 6122 { 6123 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt)) 6124 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt))) 6125 continue; 6126 6127 /* Check for a subtract used only in an addition. If this 6128 is the case, transform it into add of a negate for better 6129 reassociation. IE transform C = A-B into C = A + -B if C 6130 is only used in an addition. */ 6131 if (should_break_up_subtract (stmt)) 6132 break_up_subtract (stmt, &gsi); 6133 } 6134 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 6135 && can_reassociate_op_p (gimple_assign_rhs1 (stmt))) 6136 plus_negates.safe_push (gimple_assign_lhs (stmt)); 6137 } 6138 for (son = first_dom_son (CDI_DOMINATORS, bb); 6139 son; 6140 son = next_dom_son (CDI_DOMINATORS, son)) 6141 break_up_subtract_bb (son); 6142 } 6143 6144 /* Used for repeated factor analysis. */ 6145 struct repeat_factor 6146 { 6147 /* An SSA name that occurs in a multiply chain. */ 6148 tree factor; 6149 6150 /* Cached rank of the factor. */ 6151 unsigned rank; 6152 6153 /* Number of occurrences of the factor in the chain. */ 6154 HOST_WIDE_INT count; 6155 6156 /* An SSA name representing the product of this factor and 6157 all factors appearing later in the repeated factor vector. */ 6158 tree repr; 6159 }; 6160 6161 6162 static vec<repeat_factor> repeat_factor_vec; 6163 6164 /* Used for sorting the repeat factor vector. Sort primarily by 6165 ascending occurrence count, secondarily by descending rank. */ 6166 6167 static int 6168 compare_repeat_factors (const void *x1, const void *x2) 6169 { 6170 const repeat_factor *rf1 = (const repeat_factor *) x1; 6171 const repeat_factor *rf2 = (const repeat_factor *) x2; 6172 6173 if (rf1->count != rf2->count) 6174 return rf1->count - rf2->count; 6175 6176 return rf2->rank - rf1->rank; 6177 } 6178 6179 /* Look for repeated operands in OPS in the multiply tree rooted at 6180 STMT. Replace them with an optimal sequence of multiplies and powi 6181 builtin calls, and remove the used operands from OPS. Return an 6182 SSA name representing the value of the replacement sequence. */ 6183 6184 static tree 6185 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops) 6186 { 6187 unsigned i, j, vec_len; 6188 int ii; 6189 operand_entry *oe; 6190 repeat_factor *rf1, *rf2; 6191 repeat_factor rfnew; 6192 tree result = NULL_TREE; 6193 tree target_ssa, iter_result; 6194 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 6195 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 6196 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 6197 gimple *mul_stmt, *pow_stmt; 6198 6199 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 6200 target, unless type is integral. */ 6201 if (!powi_fndecl && !INTEGRAL_TYPE_P (type)) 6202 return NULL_TREE; 6203 6204 /* Allocate the repeated factor vector. */ 6205 repeat_factor_vec.create (10); 6206 6207 /* Scan the OPS vector for all SSA names in the product and build 6208 up a vector of occurrence counts for each factor. */ 6209 FOR_EACH_VEC_ELT (*ops, i, oe) 6210 { 6211 if (TREE_CODE (oe->op) == SSA_NAME) 6212 { 6213 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6214 { 6215 if (rf1->factor == oe->op) 6216 { 6217 rf1->count += oe->count; 6218 break; 6219 } 6220 } 6221 6222 if (j >= repeat_factor_vec.length ()) 6223 { 6224 rfnew.factor = oe->op; 6225 rfnew.rank = oe->rank; 6226 rfnew.count = oe->count; 6227 rfnew.repr = NULL_TREE; 6228 repeat_factor_vec.safe_push (rfnew); 6229 } 6230 } 6231 } 6232 6233 /* Sort the repeated factor vector by (a) increasing occurrence count, 6234 and (b) decreasing rank. */ 6235 repeat_factor_vec.qsort (compare_repeat_factors); 6236 6237 /* It is generally best to combine as many base factors as possible 6238 into a product before applying __builtin_powi to the result. 6239 However, the sort order chosen for the repeated factor vector 6240 allows us to cache partial results for the product of the base 6241 factors for subsequent use. When we already have a cached partial 6242 result from a previous iteration, it is best to make use of it 6243 before looking for another __builtin_pow opportunity. 6244 6245 As an example, consider x * x * y * y * y * z * z * z * z. 6246 We want to first compose the product x * y * z, raise it to the 6247 second power, then multiply this by y * z, and finally multiply 6248 by z. This can be done in 5 multiplies provided we cache y * z 6249 for use in both expressions: 6250 6251 t1 = y * z 6252 t2 = t1 * x 6253 t3 = t2 * t2 6254 t4 = t1 * t3 6255 result = t4 * z 6256 6257 If we instead ignored the cached y * z and first multiplied by 6258 the __builtin_pow opportunity z * z, we would get the inferior: 6259 6260 t1 = y * z 6261 t2 = t1 * x 6262 t3 = t2 * t2 6263 t4 = z * z 6264 t5 = t3 * t4 6265 result = t5 * y */ 6266 6267 vec_len = repeat_factor_vec.length (); 6268 6269 /* Repeatedly look for opportunities to create a builtin_powi call. */ 6270 while (true) 6271 { 6272 HOST_WIDE_INT power; 6273 6274 /* First look for the largest cached product of factors from 6275 preceding iterations. If found, create a builtin_powi for 6276 it if the minimum occurrence count for its factors is at 6277 least 2, or just use this cached product as our next 6278 multiplicand if the minimum occurrence count is 1. */ 6279 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6280 { 6281 if (rf1->repr && rf1->count > 0) 6282 break; 6283 } 6284 6285 if (j < vec_len) 6286 { 6287 power = rf1->count; 6288 6289 if (power == 1) 6290 { 6291 iter_result = rf1->repr; 6292 6293 if (dump_file && (dump_flags & TDF_DETAILS)) 6294 { 6295 unsigned elt; 6296 repeat_factor *rf; 6297 fputs ("Multiplying by cached product ", dump_file); 6298 for (elt = j; elt < vec_len; elt++) 6299 { 6300 rf = &repeat_factor_vec[elt]; 6301 print_generic_expr (dump_file, rf->factor); 6302 if (elt < vec_len - 1) 6303 fputs (" * ", dump_file); 6304 } 6305 fputs ("\n", dump_file); 6306 } 6307 } 6308 else 6309 { 6310 if (INTEGRAL_TYPE_P (type)) 6311 { 6312 gcc_assert (power > 1); 6313 gimple_stmt_iterator gsip = gsi; 6314 gsi_prev (&gsip); 6315 iter_result = powi_as_mults (&gsi, gimple_location (stmt), 6316 rf1->repr, power); 6317 gimple_stmt_iterator gsic = gsi; 6318 while (gsi_stmt (gsic) != gsi_stmt (gsip)) 6319 { 6320 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt)); 6321 gimple_set_visited (gsi_stmt (gsic), true); 6322 gsi_prev (&gsic); 6323 } 6324 } 6325 else 6326 { 6327 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6328 pow_stmt 6329 = gimple_build_call (powi_fndecl, 2, rf1->repr, 6330 build_int_cst (integer_type_node, 6331 power)); 6332 gimple_call_set_lhs (pow_stmt, iter_result); 6333 gimple_set_location (pow_stmt, gimple_location (stmt)); 6334 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 6335 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 6336 } 6337 6338 if (dump_file && (dump_flags & TDF_DETAILS)) 6339 { 6340 unsigned elt; 6341 repeat_factor *rf; 6342 fputs ("Building __builtin_pow call for cached product (", 6343 dump_file); 6344 for (elt = j; elt < vec_len; elt++) 6345 { 6346 rf = &repeat_factor_vec[elt]; 6347 print_generic_expr (dump_file, rf->factor); 6348 if (elt < vec_len - 1) 6349 fputs (" * ", dump_file); 6350 } 6351 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", 6352 power); 6353 } 6354 } 6355 } 6356 else 6357 { 6358 /* Otherwise, find the first factor in the repeated factor 6359 vector whose occurrence count is at least 2. If no such 6360 factor exists, there are no builtin_powi opportunities 6361 remaining. */ 6362 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 6363 { 6364 if (rf1->count >= 2) 6365 break; 6366 } 6367 6368 if (j >= vec_len) 6369 break; 6370 6371 power = rf1->count; 6372 6373 if (dump_file && (dump_flags & TDF_DETAILS)) 6374 { 6375 unsigned elt; 6376 repeat_factor *rf; 6377 fputs ("Building __builtin_pow call for (", dump_file); 6378 for (elt = j; elt < vec_len; elt++) 6379 { 6380 rf = &repeat_factor_vec[elt]; 6381 print_generic_expr (dump_file, rf->factor); 6382 if (elt < vec_len - 1) 6383 fputs (" * ", dump_file); 6384 } 6385 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power); 6386 } 6387 6388 reassociate_stats.pows_created++; 6389 6390 /* Visit each element of the vector in reverse order (so that 6391 high-occurrence elements are visited first, and within the 6392 same occurrence count, lower-ranked elements are visited 6393 first). Form a linear product of all elements in this order 6394 whose occurrencce count is at least that of element J. 6395 Record the SSA name representing the product of each element 6396 with all subsequent elements in the vector. */ 6397 if (j == vec_len - 1) 6398 rf1->repr = rf1->factor; 6399 else 6400 { 6401 for (ii = vec_len - 2; ii >= (int)j; ii--) 6402 { 6403 tree op1, op2; 6404 6405 rf1 = &repeat_factor_vec[ii]; 6406 rf2 = &repeat_factor_vec[ii + 1]; 6407 6408 /* Init the last factor's representative to be itself. */ 6409 if (!rf2->repr) 6410 rf2->repr = rf2->factor; 6411 6412 op1 = rf1->factor; 6413 op2 = rf2->repr; 6414 6415 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 6416 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 6417 op1, op2); 6418 gimple_set_location (mul_stmt, gimple_location (stmt)); 6419 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6420 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6421 rf1->repr = target_ssa; 6422 6423 /* Don't reprocess the multiply we just introduced. */ 6424 gimple_set_visited (mul_stmt, true); 6425 } 6426 } 6427 6428 /* Form a call to __builtin_powi for the maximum product 6429 just formed, raised to the power obtained earlier. */ 6430 rf1 = &repeat_factor_vec[j]; 6431 if (INTEGRAL_TYPE_P (type)) 6432 { 6433 gcc_assert (power > 1); 6434 gimple_stmt_iterator gsip = gsi; 6435 gsi_prev (&gsip); 6436 iter_result = powi_as_mults (&gsi, gimple_location (stmt), 6437 rf1->repr, power); 6438 gimple_stmt_iterator gsic = gsi; 6439 while (gsi_stmt (gsic) != gsi_stmt (gsip)) 6440 { 6441 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt)); 6442 gimple_set_visited (gsi_stmt (gsic), true); 6443 gsi_prev (&gsic); 6444 } 6445 } 6446 else 6447 { 6448 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6449 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 6450 build_int_cst (integer_type_node, 6451 power)); 6452 gimple_call_set_lhs (pow_stmt, iter_result); 6453 gimple_set_location (pow_stmt, gimple_location (stmt)); 6454 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 6455 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 6456 } 6457 } 6458 6459 /* If we previously formed at least one other builtin_powi call, 6460 form the product of this one and those others. */ 6461 if (result) 6462 { 6463 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 6464 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 6465 result, iter_result); 6466 gimple_set_location (mul_stmt, gimple_location (stmt)); 6467 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6468 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 6469 gimple_set_visited (mul_stmt, true); 6470 result = new_result; 6471 } 6472 else 6473 result = iter_result; 6474 6475 /* Decrement the occurrence count of each element in the product 6476 by the count found above, and remove this many copies of each 6477 factor from OPS. */ 6478 for (i = j; i < vec_len; i++) 6479 { 6480 unsigned k = power; 6481 unsigned n; 6482 6483 rf1 = &repeat_factor_vec[i]; 6484 rf1->count -= power; 6485 6486 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 6487 { 6488 if (oe->op == rf1->factor) 6489 { 6490 if (oe->count <= k) 6491 { 6492 ops->ordered_remove (n); 6493 k -= oe->count; 6494 6495 if (k == 0) 6496 break; 6497 } 6498 else 6499 { 6500 oe->count -= k; 6501 break; 6502 } 6503 } 6504 } 6505 } 6506 } 6507 6508 /* At this point all elements in the repeated factor vector have a 6509 remaining occurrence count of 0 or 1, and those with a count of 1 6510 don't have cached representatives. Re-sort the ops vector and 6511 clean up. */ 6512 ops->qsort (sort_by_operand_rank); 6513 repeat_factor_vec.release (); 6514 6515 /* Return the final product computed herein. Note that there may 6516 still be some elements with single occurrence count left in OPS; 6517 those will be handled by the normal reassociation logic. */ 6518 return result; 6519 } 6520 6521 /* Attempt to optimize 6522 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or 6523 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */ 6524 6525 static void 6526 attempt_builtin_copysign (vec<operand_entry *> *ops) 6527 { 6528 operand_entry *oe; 6529 unsigned int i; 6530 unsigned int length = ops->length (); 6531 tree cst = ops->last ()->op; 6532 6533 if (length == 1 || TREE_CODE (cst) != REAL_CST) 6534 return; 6535 6536 FOR_EACH_VEC_ELT (*ops, i, oe) 6537 { 6538 if (TREE_CODE (oe->op) == SSA_NAME 6539 && has_single_use (oe->op)) 6540 { 6541 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op); 6542 if (gcall *old_call = dyn_cast <gcall *> (def_stmt)) 6543 { 6544 tree arg0, arg1; 6545 switch (gimple_call_combined_fn (old_call)) 6546 { 6547 CASE_CFN_COPYSIGN: 6548 CASE_CFN_COPYSIGN_FN: 6549 arg0 = gimple_call_arg (old_call, 0); 6550 arg1 = gimple_call_arg (old_call, 1); 6551 /* The first argument of copysign must be a constant, 6552 otherwise there's nothing to do. */ 6553 if (TREE_CODE (arg0) == REAL_CST) 6554 { 6555 tree type = TREE_TYPE (arg0); 6556 tree mul = const_binop (MULT_EXPR, type, cst, arg0); 6557 /* If we couldn't fold to a single constant, skip it. 6558 That happens e.g. for inexact multiplication when 6559 -frounding-math. */ 6560 if (mul == NULL_TREE) 6561 break; 6562 /* Instead of adjusting OLD_CALL, let's build a new 6563 call to not leak the LHS and prevent keeping bogus 6564 debug statements. DCE will clean up the old call. */ 6565 gcall *new_call; 6566 if (gimple_call_internal_p (old_call)) 6567 new_call = gimple_build_call_internal 6568 (IFN_COPYSIGN, 2, mul, arg1); 6569 else 6570 new_call = gimple_build_call 6571 (gimple_call_fndecl (old_call), 2, mul, arg1); 6572 tree lhs = make_ssa_name (type); 6573 gimple_call_set_lhs (new_call, lhs); 6574 gimple_set_location (new_call, 6575 gimple_location (old_call)); 6576 insert_stmt_after (new_call, old_call); 6577 /* We've used the constant, get rid of it. */ 6578 ops->pop (); 6579 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst)); 6580 /* Handle the CST1 < 0 case by negating the result. */ 6581 if (cst1_neg) 6582 { 6583 tree negrhs = make_ssa_name (TREE_TYPE (lhs)); 6584 gimple *negate_stmt 6585 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs); 6586 insert_stmt_after (negate_stmt, new_call); 6587 oe->op = negrhs; 6588 } 6589 else 6590 oe->op = lhs; 6591 if (dump_file && (dump_flags & TDF_DETAILS)) 6592 { 6593 fprintf (dump_file, "Optimizing copysign: "); 6594 print_generic_expr (dump_file, cst); 6595 fprintf (dump_file, " * COPYSIGN ("); 6596 print_generic_expr (dump_file, arg0); 6597 fprintf (dump_file, ", "); 6598 print_generic_expr (dump_file, arg1); 6599 fprintf (dump_file, ") into %sCOPYSIGN (", 6600 cst1_neg ? "-" : ""); 6601 print_generic_expr (dump_file, mul); 6602 fprintf (dump_file, ", "); 6603 print_generic_expr (dump_file, arg1); 6604 fprintf (dump_file, "\n"); 6605 } 6606 return; 6607 } 6608 break; 6609 default: 6610 break; 6611 } 6612 } 6613 } 6614 } 6615 } 6616 6617 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 6618 6619 static void 6620 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs) 6621 { 6622 tree rhs1; 6623 6624 if (dump_file && (dump_flags & TDF_DETAILS)) 6625 { 6626 fprintf (dump_file, "Transforming "); 6627 print_gimple_stmt (dump_file, stmt, 0); 6628 } 6629 6630 rhs1 = gimple_assign_rhs1 (stmt); 6631 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 6632 update_stmt (stmt); 6633 remove_visited_stmt_chain (rhs1); 6634 6635 if (dump_file && (dump_flags & TDF_DETAILS)) 6636 { 6637 fprintf (dump_file, " into "); 6638 print_gimple_stmt (dump_file, stmt, 0); 6639 } 6640 } 6641 6642 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 6643 6644 static void 6645 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt, 6646 tree rhs1, tree rhs2) 6647 { 6648 if (dump_file && (dump_flags & TDF_DETAILS)) 6649 { 6650 fprintf (dump_file, "Transforming "); 6651 print_gimple_stmt (dump_file, stmt, 0); 6652 } 6653 6654 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 6655 update_stmt (gsi_stmt (*gsi)); 6656 remove_visited_stmt_chain (rhs1); 6657 6658 if (dump_file && (dump_flags & TDF_DETAILS)) 6659 { 6660 fprintf (dump_file, " into "); 6661 print_gimple_stmt (dump_file, stmt, 0); 6662 } 6663 } 6664 6665 /* Reassociate expressions in basic block BB and its post-dominator as 6666 children. 6667 6668 Bubble up return status from maybe_optimize_range_tests. */ 6669 6670 static bool 6671 reassociate_bb (basic_block bb) 6672 { 6673 gimple_stmt_iterator gsi; 6674 basic_block son; 6675 gimple *stmt = last_stmt (bb); 6676 bool cfg_cleanup_needed = false; 6677 6678 if (stmt && !gimple_visited_p (stmt)) 6679 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt); 6680 6681 bool do_prev = false; 6682 for (gsi = gsi_last_bb (bb); 6683 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0) 6684 { 6685 do_prev = true; 6686 stmt = gsi_stmt (gsi); 6687 6688 if (is_gimple_assign (stmt) 6689 && !stmt_could_throw_p (cfun, stmt)) 6690 { 6691 tree lhs, rhs1, rhs2; 6692 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 6693 6694 /* If this was part of an already processed statement, 6695 we don't need to touch it again. */ 6696 if (gimple_visited_p (stmt)) 6697 { 6698 /* This statement might have become dead because of previous 6699 reassociations. */ 6700 if (has_zero_uses (gimple_get_lhs (stmt))) 6701 { 6702 reassoc_remove_stmt (&gsi); 6703 release_defs (stmt); 6704 /* We might end up removing the last stmt above which 6705 places the iterator to the end of the sequence. 6706 Reset it to the last stmt in this case and make sure 6707 we don't do gsi_prev in that case. */ 6708 if (gsi_end_p (gsi)) 6709 { 6710 gsi = gsi_last_bb (bb); 6711 do_prev = false; 6712 } 6713 } 6714 continue; 6715 } 6716 6717 /* If this is not a gimple binary expression, there is 6718 nothing for us to do with it. */ 6719 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 6720 continue; 6721 6722 lhs = gimple_assign_lhs (stmt); 6723 rhs1 = gimple_assign_rhs1 (stmt); 6724 rhs2 = gimple_assign_rhs2 (stmt); 6725 6726 /* For non-bit or min/max operations we can't associate 6727 all types. Verify that here. */ 6728 if ((rhs_code != BIT_IOR_EXPR 6729 && rhs_code != BIT_AND_EXPR 6730 && rhs_code != BIT_XOR_EXPR 6731 && rhs_code != MIN_EXPR 6732 && rhs_code != MAX_EXPR 6733 && !can_reassociate_type_p (TREE_TYPE (lhs))) 6734 || !can_reassociate_op_p (rhs1) 6735 || !can_reassociate_op_p (rhs2)) 6736 continue; 6737 6738 if (associative_tree_code (rhs_code)) 6739 { 6740 auto_vec<operand_entry *> ops; 6741 tree powi_result = NULL_TREE; 6742 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs)); 6743 6744 /* There may be no immediate uses left by the time we 6745 get here because we may have eliminated them all. */ 6746 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 6747 continue; 6748 6749 gimple_set_visited (stmt, true); 6750 linearize_expr_tree (&ops, stmt, true, true); 6751 ops.qsort (sort_by_operand_rank); 6752 int orig_len = ops.length (); 6753 optimize_ops_list (rhs_code, &ops); 6754 if (undistribute_ops_list (rhs_code, &ops, 6755 loop_containing_stmt (stmt))) 6756 { 6757 ops.qsort (sort_by_operand_rank); 6758 optimize_ops_list (rhs_code, &ops); 6759 } 6760 if (undistribute_bitref_for_vector (rhs_code, &ops, 6761 loop_containing_stmt (stmt))) 6762 { 6763 ops.qsort (sort_by_operand_rank); 6764 optimize_ops_list (rhs_code, &ops); 6765 } 6766 if (rhs_code == PLUS_EXPR 6767 && transform_add_to_multiply (&ops)) 6768 ops.qsort (sort_by_operand_rank); 6769 6770 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 6771 { 6772 if (is_vector) 6773 optimize_vec_cond_expr (rhs_code, &ops); 6774 else 6775 optimize_range_tests (rhs_code, &ops, NULL); 6776 } 6777 6778 if (rhs_code == MULT_EXPR && !is_vector) 6779 { 6780 attempt_builtin_copysign (&ops); 6781 6782 if (reassoc_insert_powi_p 6783 && (flag_unsafe_math_optimizations 6784 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs))))) 6785 powi_result = attempt_builtin_powi (stmt, &ops); 6786 } 6787 6788 operand_entry *last; 6789 bool negate_result = false; 6790 if (ops.length () > 1 6791 && rhs_code == MULT_EXPR) 6792 { 6793 last = ops.last (); 6794 if ((integer_minus_onep (last->op) 6795 || real_minus_onep (last->op)) 6796 && !HONOR_SNANS (TREE_TYPE (lhs)) 6797 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs)) 6798 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs)))) 6799 { 6800 ops.pop (); 6801 negate_result = true; 6802 } 6803 } 6804 6805 tree new_lhs = lhs; 6806 /* If the operand vector is now empty, all operands were 6807 consumed by the __builtin_powi optimization. */ 6808 if (ops.length () == 0) 6809 transform_stmt_to_copy (&gsi, stmt, powi_result); 6810 else if (ops.length () == 1) 6811 { 6812 tree last_op = ops.last ()->op; 6813 6814 /* If the stmt that defines operand has to be inserted, insert it 6815 before the use. */ 6816 if (ops.last ()->stmt_to_insert) 6817 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert); 6818 if (powi_result) 6819 transform_stmt_to_multiply (&gsi, stmt, last_op, 6820 powi_result); 6821 else 6822 transform_stmt_to_copy (&gsi, stmt, last_op); 6823 } 6824 else 6825 { 6826 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 6827 int ops_num = ops.length (); 6828 int width; 6829 6830 /* For binary bit operations, if there are at least 3 6831 operands and the last operand in OPS is a constant, 6832 move it to the front. This helps ensure that we generate 6833 (X & Y) & C rather than (X & C) & Y. The former will 6834 often match a canonical bit test when we get to RTL. */ 6835 if (ops.length () > 2 6836 && (rhs_code == BIT_AND_EXPR 6837 || rhs_code == BIT_IOR_EXPR 6838 || rhs_code == BIT_XOR_EXPR) 6839 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 6840 std::swap (*ops[0], *ops[ops_num - 1]); 6841 6842 /* Only rewrite the expression tree to parallel in the 6843 last reassoc pass to avoid useless work back-and-forth 6844 with initial linearization. */ 6845 if (!reassoc_insert_powi_p 6846 && ops.length () > 3 6847 && (width = get_reassociation_width (ops_num, rhs_code, 6848 mode)) > 1) 6849 { 6850 if (dump_file && (dump_flags & TDF_DETAILS)) 6851 fprintf (dump_file, 6852 "Width = %d was chosen for reassociation\n", 6853 width); 6854 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 6855 width, ops); 6856 } 6857 else 6858 { 6859 /* When there are three operands left, we want 6860 to make sure the ones that get the double 6861 binary op are chosen wisely. */ 6862 int len = ops.length (); 6863 if (len >= 3) 6864 swap_ops_for_binary_stmt (ops, len - 3, stmt); 6865 6866 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops, 6867 powi_result != NULL 6868 || negate_result, 6869 len != orig_len); 6870 } 6871 6872 /* If we combined some repeated factors into a 6873 __builtin_powi call, multiply that result by the 6874 reassociated operands. */ 6875 if (powi_result) 6876 { 6877 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs); 6878 tree type = TREE_TYPE (lhs); 6879 tree target_ssa = make_temp_ssa_name (type, NULL, 6880 "reassocpow"); 6881 gimple_set_lhs (lhs_stmt, target_ssa); 6882 update_stmt (lhs_stmt); 6883 if (lhs != new_lhs) 6884 { 6885 target_ssa = new_lhs; 6886 new_lhs = lhs; 6887 } 6888 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 6889 powi_result, target_ssa); 6890 gimple_set_location (mul_stmt, gimple_location (stmt)); 6891 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 6892 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 6893 } 6894 } 6895 6896 if (negate_result) 6897 { 6898 stmt = SSA_NAME_DEF_STMT (lhs); 6899 tree tmp = make_ssa_name (TREE_TYPE (lhs)); 6900 gimple_set_lhs (stmt, tmp); 6901 if (lhs != new_lhs) 6902 tmp = new_lhs; 6903 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR, 6904 tmp); 6905 gimple_set_uid (neg_stmt, gimple_uid (stmt)); 6906 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT); 6907 update_stmt (stmt); 6908 } 6909 } 6910 } 6911 } 6912 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 6913 son; 6914 son = next_dom_son (CDI_POST_DOMINATORS, son)) 6915 cfg_cleanup_needed |= reassociate_bb (son); 6916 6917 return cfg_cleanup_needed; 6918 } 6919 6920 /* Add jumps around shifts for range tests turned into bit tests. 6921 For each SSA_NAME VAR we have code like: 6922 VAR = ...; // final stmt of range comparison 6923 // bit test here...; 6924 OTHERVAR = ...; // final stmt of the bit test sequence 6925 RES = VAR | OTHERVAR; 6926 Turn the above into: 6927 VAR = ...; 6928 if (VAR != 0) 6929 goto <l3>; 6930 else 6931 goto <l2>; 6932 <l2>: 6933 // bit test here...; 6934 OTHERVAR = ...; 6935 <l3>: 6936 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 6937 6938 static void 6939 branch_fixup (void) 6940 { 6941 tree var; 6942 unsigned int i; 6943 6944 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 6945 { 6946 gimple *def_stmt = SSA_NAME_DEF_STMT (var); 6947 gimple *use_stmt; 6948 use_operand_p use; 6949 bool ok = single_imm_use (var, &use, &use_stmt); 6950 gcc_assert (ok 6951 && is_gimple_assign (use_stmt) 6952 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 6953 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 6954 6955 basic_block cond_bb = gimple_bb (def_stmt); 6956 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 6957 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 6958 6959 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 6960 gimple *g = gimple_build_cond (NE_EXPR, var, 6961 build_zero_cst (TREE_TYPE (var)), 6962 NULL_TREE, NULL_TREE); 6963 location_t loc = gimple_location (use_stmt); 6964 gimple_set_location (g, loc); 6965 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 6966 6967 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 6968 etrue->probability = profile_probability::even (); 6969 edge efalse = find_edge (cond_bb, then_bb); 6970 efalse->flags = EDGE_FALSE_VALUE; 6971 efalse->probability -= etrue->probability; 6972 then_bb->count -= etrue->count (); 6973 6974 tree othervar = NULL_TREE; 6975 if (gimple_assign_rhs1 (use_stmt) == var) 6976 othervar = gimple_assign_rhs2 (use_stmt); 6977 else if (gimple_assign_rhs2 (use_stmt) == var) 6978 othervar = gimple_assign_rhs1 (use_stmt); 6979 else 6980 gcc_unreachable (); 6981 tree lhs = gimple_assign_lhs (use_stmt); 6982 gphi *phi = create_phi_node (lhs, merge_bb); 6983 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 6984 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 6985 gsi = gsi_for_stmt (use_stmt); 6986 gsi_remove (&gsi, true); 6987 6988 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 6989 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 6990 } 6991 reassoc_branch_fixups.release (); 6992 } 6993 6994 void dump_ops_vector (FILE *file, vec<operand_entry *> ops); 6995 void debug_ops_vector (vec<operand_entry *> ops); 6996 6997 /* Dump the operand entry vector OPS to FILE. */ 6998 6999 void 7000 dump_ops_vector (FILE *file, vec<operand_entry *> ops) 7001 { 7002 operand_entry *oe; 7003 unsigned int i; 7004 7005 FOR_EACH_VEC_ELT (ops, i, oe) 7006 { 7007 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 7008 print_generic_expr (file, oe->op); 7009 fprintf (file, "\n"); 7010 } 7011 } 7012 7013 /* Dump the operand entry vector OPS to STDERR. */ 7014 7015 DEBUG_FUNCTION void 7016 debug_ops_vector (vec<operand_entry *> ops) 7017 { 7018 dump_ops_vector (stderr, ops); 7019 } 7020 7021 /* Bubble up return status from reassociate_bb. */ 7022 7023 static bool 7024 do_reassoc (void) 7025 { 7026 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 7027 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 7028 } 7029 7030 /* Initialize the reassociation pass. */ 7031 7032 static void 7033 init_reassoc (void) 7034 { 7035 int i; 7036 int64_t rank = 2; 7037 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 7038 7039 /* Find the loops, so that we can prevent moving calculations in 7040 them. */ 7041 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 7042 7043 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 7044 7045 next_operand_entry_id = 0; 7046 7047 /* Reverse RPO (Reverse Post Order) will give us something where 7048 deeper loops come later. */ 7049 pre_and_rev_post_order_compute (NULL, bbs, false); 7050 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun)); 7051 operand_rank = new hash_map<tree, int64_t>; 7052 7053 /* Give each default definition a distinct rank. This includes 7054 parameters and the static chain. Walk backwards over all 7055 SSA names so that we get proper rank ordering according 7056 to tree_swap_operands_p. */ 7057 for (i = num_ssa_names - 1; i > 0; --i) 7058 { 7059 tree name = ssa_name (i); 7060 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 7061 insert_operand_rank (name, ++rank); 7062 } 7063 7064 /* Set up rank for each BB */ 7065 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 7066 bb_rank[bbs[i]] = ++rank << 16; 7067 7068 free (bbs); 7069 calculate_dominance_info (CDI_POST_DOMINATORS); 7070 plus_negates = vNULL; 7071 } 7072 7073 /* Cleanup after the reassociation pass, and print stats if 7074 requested. */ 7075 7076 static void 7077 fini_reassoc (void) 7078 { 7079 statistics_counter_event (cfun, "Linearized", 7080 reassociate_stats.linearized); 7081 statistics_counter_event (cfun, "Constants eliminated", 7082 reassociate_stats.constants_eliminated); 7083 statistics_counter_event (cfun, "Ops eliminated", 7084 reassociate_stats.ops_eliminated); 7085 statistics_counter_event (cfun, "Statements rewritten", 7086 reassociate_stats.rewritten); 7087 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 7088 reassociate_stats.pows_encountered); 7089 statistics_counter_event (cfun, "Built-in powi calls created", 7090 reassociate_stats.pows_created); 7091 7092 delete operand_rank; 7093 bitmap_clear (biased_names); 7094 operand_entry_pool.release (); 7095 free (bb_rank); 7096 plus_negates.release (); 7097 free_dominance_info (CDI_POST_DOMINATORS); 7098 loop_optimizer_finalize (); 7099 } 7100 7101 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable 7102 insertion of __builtin_powi calls. 7103 7104 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to 7105 optimization of a gimple conditional. Otherwise returns zero. */ 7106 7107 static unsigned int 7108 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p) 7109 { 7110 reassoc_insert_powi_p = insert_powi_p; 7111 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p; 7112 7113 init_reassoc (); 7114 7115 bool cfg_cleanup_needed; 7116 cfg_cleanup_needed = do_reassoc (); 7117 repropagate_negates (); 7118 branch_fixup (); 7119 7120 fini_reassoc (); 7121 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0; 7122 } 7123 7124 namespace { 7125 7126 const pass_data pass_data_reassoc = 7127 { 7128 GIMPLE_PASS, /* type */ 7129 "reassoc", /* name */ 7130 OPTGROUP_NONE, /* optinfo_flags */ 7131 TV_TREE_REASSOC, /* tv_id */ 7132 ( PROP_cfg | PROP_ssa ), /* properties_required */ 7133 0, /* properties_provided */ 7134 0, /* properties_destroyed */ 7135 0, /* todo_flags_start */ 7136 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 7137 }; 7138 7139 class pass_reassoc : public gimple_opt_pass 7140 { 7141 public: 7142 pass_reassoc (gcc::context *ctxt) 7143 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false) 7144 {} 7145 7146 /* opt_pass methods: */ 7147 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 7148 void set_pass_param (unsigned int n, bool param) 7149 { 7150 gcc_assert (n == 0); 7151 insert_powi_p = param; 7152 bias_loop_carried_phi_ranks_p = !param; 7153 } 7154 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 7155 virtual unsigned int execute (function *) 7156 { 7157 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p); 7158 } 7159 7160 private: 7161 /* Enable insertion of __builtin_powi calls during execute_reassoc. See 7162 point 3a in the pass header comment. */ 7163 bool insert_powi_p; 7164 bool bias_loop_carried_phi_ranks_p; 7165 }; // class pass_reassoc 7166 7167 } // anon namespace 7168 7169 gimple_opt_pass * 7170 make_pass_reassoc (gcc::context *ctxt) 7171 { 7172 return new pass_reassoc (ctxt); 7173 } 7174