1 /* Optimization of PHI nodes by converting them into straightline code. 2 Copyright (C) 2004-2019 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "insn-codes.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "cfghooks.h" 29 #include "tree-pass.h" 30 #include "ssa.h" 31 #include "optabs-tree.h" 32 #include "insn-config.h" 33 #include "gimple-pretty-print.h" 34 #include "fold-const.h" 35 #include "stor-layout.h" 36 #include "cfganal.h" 37 #include "gimplify.h" 38 #include "gimple-iterator.h" 39 #include "gimplify-me.h" 40 #include "tree-cfg.h" 41 #include "tree-dfa.h" 42 #include "domwalk.h" 43 #include "cfgloop.h" 44 #include "tree-data-ref.h" 45 #include "tree-scalar-evolution.h" 46 #include "tree-inline.h" 47 #include "params.h" 48 #include "case-cfn-macros.h" 49 50 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); 51 static bool two_value_replacement (basic_block, basic_block, edge, gphi *, 52 tree, tree); 53 static bool conditional_replacement (basic_block, basic_block, 54 edge, edge, gphi *, tree, tree); 55 static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, 56 gimple *); 57 static int value_replacement (basic_block, basic_block, 58 edge, edge, gimple *, tree, tree); 59 static bool minmax_replacement (basic_block, basic_block, 60 edge, edge, gimple *, tree, tree); 61 static bool abs_replacement (basic_block, basic_block, 62 edge, edge, gimple *, tree, tree); 63 static bool cond_removal_in_popcount_pattern (basic_block, basic_block, 64 edge, edge, gimple *, tree, tree); 65 static bool cond_store_replacement (basic_block, basic_block, edge, edge, 66 hash_set<tree> *); 67 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); 68 static hash_set<tree> * get_non_trapping (); 69 static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); 70 static void hoist_adjacent_loads (basic_block, basic_block, 71 basic_block, basic_block); 72 static bool gate_hoist_loads (void); 73 74 /* This pass tries to transform conditional stores into unconditional 75 ones, enabling further simplifications with the simpler then and else 76 blocks. In particular it replaces this: 77 78 bb0: 79 if (cond) goto bb2; else goto bb1; 80 bb1: 81 *p = RHS; 82 bb2: 83 84 with 85 86 bb0: 87 if (cond) goto bb1; else goto bb2; 88 bb1: 89 condtmp' = *p; 90 bb2: 91 condtmp = PHI <RHS, condtmp'> 92 *p = condtmp; 93 94 This transformation can only be done under several constraints, 95 documented below. It also replaces: 96 97 bb0: 98 if (cond) goto bb2; else goto bb1; 99 bb1: 100 *p = RHS1; 101 goto bb3; 102 bb2: 103 *p = RHS2; 104 bb3: 105 106 with 107 108 bb0: 109 if (cond) goto bb3; else goto bb1; 110 bb1: 111 bb3: 112 condtmp = PHI <RHS1, RHS2> 113 *p = condtmp; */ 114 115 static unsigned int 116 tree_ssa_cs_elim (void) 117 { 118 unsigned todo; 119 /* ??? We are not interested in loop related info, but the following 120 will create it, ICEing as we didn't init loops with pre-headers. 121 An interfacing issue of find_data_references_in_bb. */ 122 loop_optimizer_init (LOOPS_NORMAL); 123 scev_initialize (); 124 todo = tree_ssa_phiopt_worker (true, false, false); 125 scev_finalize (); 126 loop_optimizer_finalize (); 127 return todo; 128 } 129 130 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ 131 132 static gphi * 133 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) 134 { 135 gimple_stmt_iterator i; 136 gphi *phi = NULL; 137 if (gimple_seq_singleton_p (seq)) 138 return as_a <gphi *> (gsi_stmt (gsi_start (seq))); 139 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) 140 { 141 gphi *p = as_a <gphi *> (gsi_stmt (i)); 142 /* If the PHI arguments are equal then we can skip this PHI. */ 143 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx), 144 gimple_phi_arg_def (p, e1->dest_idx))) 145 continue; 146 147 /* If we already have a PHI that has the two edge arguments are 148 different, then return it is not a singleton for these PHIs. */ 149 if (phi) 150 return NULL; 151 152 phi = p; 153 } 154 return phi; 155 } 156 157 /* The core routine of conditional store replacement and normal 158 phi optimizations. Both share much of the infrastructure in how 159 to match applicable basic block patterns. DO_STORE_ELIM is true 160 when we want to do conditional store replacement, false otherwise. 161 DO_HOIST_LOADS is true when we want to hoist adjacent loads out 162 of diamond control flow patterns, false otherwise. */ 163 static unsigned int 164 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) 165 { 166 basic_block bb; 167 basic_block *bb_order; 168 unsigned n, i; 169 bool cfgchanged = false; 170 hash_set<tree> *nontrap = 0; 171 172 if (do_store_elim) 173 /* Calculate the set of non-trapping memory accesses. */ 174 nontrap = get_non_trapping (); 175 176 /* Search every basic block for COND_EXPR we may be able to optimize. 177 178 We walk the blocks in order that guarantees that a block with 179 a single predecessor is processed before the predecessor. 180 This ensures that we collapse inner ifs before visiting the 181 outer ones, and also that we do not try to visit a removed 182 block. */ 183 bb_order = single_pred_before_succ_order (); 184 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; 185 186 for (i = 0; i < n; i++) 187 { 188 gimple *cond_stmt; 189 gphi *phi; 190 basic_block bb1, bb2; 191 edge e1, e2; 192 tree arg0, arg1; 193 194 bb = bb_order[i]; 195 196 cond_stmt = last_stmt (bb); 197 /* Check to see if the last statement is a GIMPLE_COND. */ 198 if (!cond_stmt 199 || gimple_code (cond_stmt) != GIMPLE_COND) 200 continue; 201 202 e1 = EDGE_SUCC (bb, 0); 203 bb1 = e1->dest; 204 e2 = EDGE_SUCC (bb, 1); 205 bb2 = e2->dest; 206 207 /* We cannot do the optimization on abnormal edges. */ 208 if ((e1->flags & EDGE_ABNORMAL) != 0 209 || (e2->flags & EDGE_ABNORMAL) != 0) 210 continue; 211 212 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ 213 if (EDGE_COUNT (bb1->succs) == 0 214 || bb2 == NULL 215 || EDGE_COUNT (bb2->succs) == 0) 216 continue; 217 218 /* Find the bb which is the fall through to the other. */ 219 if (EDGE_SUCC (bb1, 0)->dest == bb2) 220 ; 221 else if (EDGE_SUCC (bb2, 0)->dest == bb1) 222 { 223 std::swap (bb1, bb2); 224 std::swap (e1, e2); 225 } 226 else if (do_store_elim 227 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 228 { 229 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 230 231 if (!single_succ_p (bb1) 232 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 233 || !single_succ_p (bb2) 234 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 235 || EDGE_COUNT (bb3->preds) != 2) 236 continue; 237 if (cond_if_else_store_replacement (bb1, bb2, bb3)) 238 cfgchanged = true; 239 continue; 240 } 241 else if (do_hoist_loads 242 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 243 { 244 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 245 246 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) 247 && single_succ_p (bb1) 248 && single_succ_p (bb2) 249 && single_pred_p (bb1) 250 && single_pred_p (bb2) 251 && EDGE_COUNT (bb->succs) == 2 252 && EDGE_COUNT (bb3->preds) == 2 253 /* If one edge or the other is dominant, a conditional move 254 is likely to perform worse than the well-predicted branch. */ 255 && !predictable_edge_p (EDGE_SUCC (bb, 0)) 256 && !predictable_edge_p (EDGE_SUCC (bb, 1))) 257 hoist_adjacent_loads (bb, bb1, bb2, bb3); 258 continue; 259 } 260 else 261 continue; 262 263 e1 = EDGE_SUCC (bb1, 0); 264 265 /* Make sure that bb1 is just a fall through. */ 266 if (!single_succ_p (bb1) 267 || (e1->flags & EDGE_FALLTHRU) == 0) 268 continue; 269 270 /* Also make sure that bb1 only have one predecessor and that it 271 is bb. */ 272 if (!single_pred_p (bb1) 273 || single_pred (bb1) != bb) 274 continue; 275 276 if (do_store_elim) 277 { 278 /* bb1 is the middle block, bb2 the join block, bb the split block, 279 e1 the fallthrough edge from bb1 to bb2. We can't do the 280 optimization if the join block has more than two predecessors. */ 281 if (EDGE_COUNT (bb2->preds) > 2) 282 continue; 283 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) 284 cfgchanged = true; 285 } 286 else 287 { 288 gimple_seq phis = phi_nodes (bb2); 289 gimple_stmt_iterator gsi; 290 bool candorest = true; 291 292 /* Value replacement can work with more than one PHI 293 so try that first. */ 294 if (!early_p) 295 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 296 { 297 phi = as_a <gphi *> (gsi_stmt (gsi)); 298 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 299 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 300 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) 301 { 302 candorest = false; 303 cfgchanged = true; 304 break; 305 } 306 } 307 308 if (!candorest) 309 continue; 310 311 phi = single_non_singleton_phi_for_edges (phis, e1, e2); 312 if (!phi) 313 continue; 314 315 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 316 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 317 318 /* Something is wrong if we cannot find the arguments in the PHI 319 node. */ 320 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 321 322 gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, 323 arg0, arg1, 324 cond_stmt); 325 if (newphi != NULL) 326 { 327 phi = newphi; 328 /* factor_out_conditional_conversion may create a new PHI in 329 BB2 and eliminate an existing PHI in BB2. Recompute values 330 that may be affected by that change. */ 331 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 332 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 333 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 334 } 335 336 /* Do the replacement of conditional if it can be done. */ 337 if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) 338 cfgchanged = true; 339 else if (!early_p 340 && conditional_replacement (bb, bb1, e1, e2, phi, 341 arg0, arg1)) 342 cfgchanged = true; 343 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 344 cfgchanged = true; 345 else if (!early_p 346 && cond_removal_in_popcount_pattern (bb, bb1, e1, e2, 347 phi, arg0, arg1)) 348 cfgchanged = true; 349 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 350 cfgchanged = true; 351 } 352 } 353 354 free (bb_order); 355 356 if (do_store_elim) 357 delete nontrap; 358 /* If the CFG has changed, we should cleanup the CFG. */ 359 if (cfgchanged && do_store_elim) 360 { 361 /* In cond-store replacement we have added some loads on edges 362 and new VOPS (as we moved the store, and created a load). */ 363 gsi_commit_edge_inserts (); 364 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; 365 } 366 else if (cfgchanged) 367 return TODO_cleanup_cfg; 368 return 0; 369 } 370 371 /* Replace PHI node element whose edge is E in block BB with variable NEW. 372 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK 373 is known to have two edges, one of which must reach BB). */ 374 375 static void 376 replace_phi_edge_with_variable (basic_block cond_block, 377 edge e, gimple *phi, tree new_tree) 378 { 379 basic_block bb = gimple_bb (phi); 380 basic_block block_to_remove; 381 gimple_stmt_iterator gsi; 382 383 /* Change the PHI argument to new. */ 384 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); 385 386 /* Remove the empty basic block. */ 387 if (EDGE_SUCC (cond_block, 0)->dest == bb) 388 { 389 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; 390 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 391 EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); 392 393 block_to_remove = EDGE_SUCC (cond_block, 1)->dest; 394 } 395 else 396 { 397 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; 398 EDGE_SUCC (cond_block, 1)->flags 399 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 400 EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); 401 402 block_to_remove = EDGE_SUCC (cond_block, 0)->dest; 403 } 404 delete_basic_block (block_to_remove); 405 406 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ 407 gsi = gsi_last_bb (cond_block); 408 gsi_remove (&gsi, true); 409 410 if (dump_file && (dump_flags & TDF_DETAILS)) 411 fprintf (dump_file, 412 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n", 413 cond_block->index, 414 bb->index); 415 } 416 417 /* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI 418 stmt are CONVERT_STMT, factor out the conversion and perform the conversion 419 to the result of PHI stmt. COND_STMT is the controlling predicate. 420 Return the newly-created PHI, if any. */ 421 422 static gphi * 423 factor_out_conditional_conversion (edge e0, edge e1, gphi *phi, 424 tree arg0, tree arg1, gimple *cond_stmt) 425 { 426 gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt; 427 tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; 428 tree temp, result; 429 gphi *newphi; 430 gimple_stmt_iterator gsi, gsi_for_def; 431 location_t locus = gimple_location (phi); 432 enum tree_code convert_code; 433 434 /* Handle only PHI statements with two arguments. TODO: If all 435 other arguments to PHI are INTEGER_CST or if their defining 436 statement have the same unary operation, we can handle more 437 than two arguments too. */ 438 if (gimple_phi_num_args (phi) != 2) 439 return NULL; 440 441 /* First canonicalize to simplify tests. */ 442 if (TREE_CODE (arg0) != SSA_NAME) 443 { 444 std::swap (arg0, arg1); 445 std::swap (e0, e1); 446 } 447 448 if (TREE_CODE (arg0) != SSA_NAME 449 || (TREE_CODE (arg1) != SSA_NAME 450 && TREE_CODE (arg1) != INTEGER_CST)) 451 return NULL; 452 453 /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is 454 a conversion. */ 455 arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); 456 if (!gimple_assign_cast_p (arg0_def_stmt)) 457 return NULL; 458 459 /* Use the RHS as new_arg0. */ 460 convert_code = gimple_assign_rhs_code (arg0_def_stmt); 461 new_arg0 = gimple_assign_rhs1 (arg0_def_stmt); 462 if (convert_code == VIEW_CONVERT_EXPR) 463 { 464 new_arg0 = TREE_OPERAND (new_arg0, 0); 465 if (!is_gimple_reg_type (TREE_TYPE (new_arg0))) 466 return NULL; 467 } 468 469 if (TREE_CODE (arg1) == SSA_NAME) 470 { 471 /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 472 is a conversion. */ 473 arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); 474 if (!is_gimple_assign (arg1_def_stmt) 475 || gimple_assign_rhs_code (arg1_def_stmt) != convert_code) 476 return NULL; 477 478 /* Use the RHS as new_arg1. */ 479 new_arg1 = gimple_assign_rhs1 (arg1_def_stmt); 480 if (convert_code == VIEW_CONVERT_EXPR) 481 new_arg1 = TREE_OPERAND (new_arg1, 0); 482 } 483 else 484 { 485 /* If arg1 is an INTEGER_CST, fold it to new type. */ 486 if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) 487 && int_fits_type_p (arg1, TREE_TYPE (new_arg0))) 488 { 489 if (gimple_assign_cast_p (arg0_def_stmt)) 490 { 491 /* For the INTEGER_CST case, we are just moving the 492 conversion from one place to another, which can often 493 hurt as the conversion moves further away from the 494 statement that computes the value. So, perform this 495 only if new_arg0 is an operand of COND_STMT, or 496 if arg0_def_stmt is the only non-debug stmt in 497 its basic block, because then it is possible this 498 could enable further optimizations (minmax replacement 499 etc.). See PR71016. */ 500 if (new_arg0 != gimple_cond_lhs (cond_stmt) 501 && new_arg0 != gimple_cond_rhs (cond_stmt) 502 && gimple_bb (arg0_def_stmt) == e0->src) 503 { 504 gsi = gsi_for_stmt (arg0_def_stmt); 505 gsi_prev_nondebug (&gsi); 506 if (!gsi_end_p (gsi)) 507 return NULL; 508 gsi = gsi_for_stmt (arg0_def_stmt); 509 gsi_next_nondebug (&gsi); 510 if (!gsi_end_p (gsi)) 511 return NULL; 512 } 513 new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); 514 } 515 else 516 return NULL; 517 } 518 else 519 return NULL; 520 } 521 522 /* If arg0/arg1 have > 1 use, then this transformation actually increases 523 the number of expressions evaluated at runtime. */ 524 if (!has_single_use (arg0) 525 || (arg1_def_stmt && !has_single_use (arg1))) 526 return NULL; 527 528 /* If types of new_arg0 and new_arg1 are different bailout. */ 529 if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) 530 return NULL; 531 532 /* Create a new PHI stmt. */ 533 result = PHI_RESULT (phi); 534 temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); 535 newphi = create_phi_node (temp, gimple_bb (phi)); 536 537 if (dump_file && (dump_flags & TDF_DETAILS)) 538 { 539 fprintf (dump_file, "PHI "); 540 print_generic_expr (dump_file, gimple_phi_result (phi)); 541 fprintf (dump_file, 542 " changed to factor conversion out from COND_EXPR.\n"); 543 fprintf (dump_file, "New stmt with CAST that defines "); 544 print_generic_expr (dump_file, result); 545 fprintf (dump_file, ".\n"); 546 } 547 548 /* Remove the old cast(s) that has single use. */ 549 gsi_for_def = gsi_for_stmt (arg0_def_stmt); 550 gsi_remove (&gsi_for_def, true); 551 release_defs (arg0_def_stmt); 552 553 if (arg1_def_stmt) 554 { 555 gsi_for_def = gsi_for_stmt (arg1_def_stmt); 556 gsi_remove (&gsi_for_def, true); 557 release_defs (arg1_def_stmt); 558 } 559 560 add_phi_arg (newphi, new_arg0, e0, locus); 561 add_phi_arg (newphi, new_arg1, e1, locus); 562 563 /* Create the conversion stmt and insert it. */ 564 if (convert_code == VIEW_CONVERT_EXPR) 565 { 566 temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); 567 new_stmt = gimple_build_assign (result, temp); 568 } 569 else 570 new_stmt = gimple_build_assign (result, convert_code, temp); 571 gsi = gsi_after_labels (gimple_bb (phi)); 572 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 573 574 /* Remove the original PHI stmt. */ 575 gsi = gsi_for_stmt (phi); 576 gsi_remove (&gsi, true); 577 return newphi; 578 } 579 580 /* Optimize 581 # x_5 in range [cst1, cst2] where cst2 = cst1 + 1 582 if (x_5 op cstN) # where op is == or != and N is 1 or 2 583 goto bb3; 584 else 585 goto bb4; 586 bb3: 587 bb4: 588 # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == cst3 + 1 589 590 to r_6 = x_5 + (min (cst3, cst4) - cst1) or 591 r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which 592 of cst3 and cst4 is smaller. */ 593 594 static bool 595 two_value_replacement (basic_block cond_bb, basic_block middle_bb, 596 edge e1, gphi *phi, tree arg0, tree arg1) 597 { 598 /* Only look for adjacent integer constants. */ 599 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 600 || !INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 601 || TREE_CODE (arg0) != INTEGER_CST 602 || TREE_CODE (arg1) != INTEGER_CST 603 || (tree_int_cst_lt (arg0, arg1) 604 ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1) 605 : wi::to_widest (arg1) + 1 != wi::to_widest (arg0))) 606 return false; 607 608 if (!empty_block_p (middle_bb)) 609 return false; 610 611 gimple *stmt = last_stmt (cond_bb); 612 tree lhs = gimple_cond_lhs (stmt); 613 tree rhs = gimple_cond_rhs (stmt); 614 615 if (TREE_CODE (lhs) != SSA_NAME 616 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 617 || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE 618 || TREE_CODE (rhs) != INTEGER_CST) 619 return false; 620 621 switch (gimple_cond_code (stmt)) 622 { 623 case EQ_EXPR: 624 case NE_EXPR: 625 break; 626 default: 627 return false; 628 } 629 630 wide_int min, max; 631 if (get_range_info (lhs, &min, &max) != VR_RANGE 632 || min + 1 != max 633 || (wi::to_wide (rhs) != min 634 && wi::to_wide (rhs) != max)) 635 return false; 636 637 /* We need to know which is the true edge and which is the false 638 edge so that we know when to invert the condition below. */ 639 edge true_edge, false_edge; 640 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 641 if ((gimple_cond_code (stmt) == EQ_EXPR) 642 ^ (wi::to_wide (rhs) == max) 643 ^ (e1 == false_edge)) 644 std::swap (arg0, arg1); 645 646 tree type; 647 if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE (arg0))) 648 { 649 /* Avoid performing the arithmetics in bool type which has different 650 semantics, otherwise prefer unsigned types from the two with 651 the same precision. */ 652 if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE 653 || !TYPE_UNSIGNED (TREE_TYPE (arg0))) 654 type = TREE_TYPE (lhs); 655 else 656 type = TREE_TYPE (arg0); 657 } 658 else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE (arg0))) 659 type = TREE_TYPE (lhs); 660 else 661 type = TREE_TYPE (arg0); 662 663 min = wide_int::from (min, TYPE_PRECISION (type), 664 TYPE_SIGN (TREE_TYPE (lhs))); 665 wide_int a = wide_int::from (wi::to_wide (arg0), TYPE_PRECISION (type), 666 TYPE_SIGN (TREE_TYPE (arg0))); 667 enum tree_code code; 668 wi::overflow_type ovf; 669 if (tree_int_cst_lt (arg0, arg1)) 670 { 671 code = PLUS_EXPR; 672 a -= min; 673 if (!TYPE_UNSIGNED (type)) 674 { 675 /* lhs is known to be in range [min, min+1] and we want to add a 676 to it. Check if that operation can overflow for those 2 values 677 and if yes, force unsigned type. */ 678 wi::add (min + (wi::neg_p (a) ? 0 : 1), a, SIGNED, &ovf); 679 if (ovf) 680 type = unsigned_type_for (type); 681 } 682 } 683 else 684 { 685 code = MINUS_EXPR; 686 a += min; 687 if (!TYPE_UNSIGNED (type)) 688 { 689 /* lhs is known to be in range [min, min+1] and we want to subtract 690 it from a. Check if that operation can overflow for those 2 691 values and if yes, force unsigned type. */ 692 wi::sub (a, min + (wi::neg_p (min) ? 0 : 1), SIGNED, &ovf); 693 if (ovf) 694 type = unsigned_type_for (type); 695 } 696 } 697 698 tree arg = wide_int_to_tree (type, a); 699 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 700 if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) 701 lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs); 702 tree new_rhs; 703 if (code == PLUS_EXPR) 704 new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, arg); 705 else 706 new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, arg, lhs); 707 if (!useless_type_conversion_p (TREE_TYPE (arg0), type)) 708 new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), new_rhs); 709 710 replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs); 711 712 /* Note that we optimized this PHI. */ 713 return true; 714 } 715 716 /* The function conditional_replacement does the main work of doing the 717 conditional replacement. Return true if the replacement is done. 718 Otherwise return false. 719 BB is the basic block where the replacement is going to be done on. ARG0 720 is argument 0 from PHI. Likewise for ARG1. */ 721 722 static bool 723 conditional_replacement (basic_block cond_bb, basic_block middle_bb, 724 edge e0, edge e1, gphi *phi, 725 tree arg0, tree arg1) 726 { 727 tree result; 728 gimple *stmt; 729 gassign *new_stmt; 730 tree cond; 731 gimple_stmt_iterator gsi; 732 edge true_edge, false_edge; 733 tree new_var, new_var2; 734 bool neg; 735 736 /* FIXME: Gimplification of complex type is too hard for now. */ 737 /* We aren't prepared to handle vectors either (and it is a question 738 if it would be worthwhile anyway). */ 739 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 740 || POINTER_TYPE_P (TREE_TYPE (arg0))) 741 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 742 || POINTER_TYPE_P (TREE_TYPE (arg1)))) 743 return false; 744 745 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then 746 convert it to the conditional. */ 747 if ((integer_zerop (arg0) && integer_onep (arg1)) 748 || (integer_zerop (arg1) && integer_onep (arg0))) 749 neg = false; 750 else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) 751 || (integer_zerop (arg1) && integer_all_onesp (arg0))) 752 neg = true; 753 else 754 return false; 755 756 if (!empty_block_p (middle_bb)) 757 return false; 758 759 /* At this point we know we have a GIMPLE_COND with two successors. 760 One successor is BB, the other successor is an empty block which 761 falls through into BB. 762 763 There is a single PHI node at the join point (BB) and its arguments 764 are constants (0, 1) or (0, -1). 765 766 So, given the condition COND, and the two PHI arguments, we can 767 rewrite this PHI into non-branching code: 768 769 dest = (COND) or dest = COND' 770 771 We use the condition as-is if the argument associated with the 772 true edge has the value one or the argument associated with the 773 false edge as the value zero. Note that those conditions are not 774 the same since only one of the outgoing edges from the GIMPLE_COND 775 will directly reach BB and thus be associated with an argument. */ 776 777 stmt = last_stmt (cond_bb); 778 result = PHI_RESULT (phi); 779 780 /* To handle special cases like floating point comparison, it is easier and 781 less error-prone to build a tree and gimplify it on the fly though it is 782 less efficient. */ 783 cond = fold_build2_loc (gimple_location (stmt), 784 gimple_cond_code (stmt), boolean_type_node, 785 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); 786 787 /* We need to know which is the true edge and which is the false 788 edge so that we know when to invert the condition below. */ 789 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 790 if ((e0 == true_edge && integer_zerop (arg0)) 791 || (e0 == false_edge && !integer_zerop (arg0)) 792 || (e1 == true_edge && integer_zerop (arg1)) 793 || (e1 == false_edge && !integer_zerop (arg1))) 794 cond = fold_build1_loc (gimple_location (stmt), 795 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); 796 797 if (neg) 798 { 799 cond = fold_convert_loc (gimple_location (stmt), 800 TREE_TYPE (result), cond); 801 cond = fold_build1_loc (gimple_location (stmt), 802 NEGATE_EXPR, TREE_TYPE (cond), cond); 803 } 804 805 /* Insert our new statements at the end of conditional block before the 806 COND_STMT. */ 807 gsi = gsi_for_stmt (stmt); 808 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, 809 GSI_SAME_STMT); 810 811 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) 812 { 813 location_t locus_0, locus_1; 814 815 new_var2 = make_ssa_name (TREE_TYPE (result)); 816 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); 817 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 818 new_var = new_var2; 819 820 /* Set the locus to the first argument, unless is doesn't have one. */ 821 locus_0 = gimple_phi_arg_location (phi, 0); 822 locus_1 = gimple_phi_arg_location (phi, 1); 823 if (locus_0 == UNKNOWN_LOCATION) 824 locus_0 = locus_1; 825 gimple_set_location (new_stmt, locus_0); 826 } 827 828 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); 829 830 /* Note that we optimized this PHI. */ 831 return true; 832 } 833 834 /* Update *ARG which is defined in STMT so that it contains the 835 computed value if that seems profitable. Return true if the 836 statement is made dead by that rewriting. */ 837 838 static bool 839 jump_function_from_stmt (tree *arg, gimple *stmt) 840 { 841 enum tree_code code = gimple_assign_rhs_code (stmt); 842 if (code == ADDR_EXPR) 843 { 844 /* For arg = &p->i transform it to p, if possible. */ 845 tree rhs1 = gimple_assign_rhs1 (stmt); 846 poly_int64 offset; 847 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), 848 &offset); 849 if (tem 850 && TREE_CODE (tem) == MEM_REF 851 && known_eq (mem_ref_offset (tem) + offset, 0)) 852 { 853 *arg = TREE_OPERAND (tem, 0); 854 return true; 855 } 856 } 857 /* TODO: Much like IPA-CP jump-functions we want to handle constant 858 additions symbolically here, and we'd need to update the comparison 859 code that compares the arg + cst tuples in our caller. For now the 860 code above exactly handles the VEC_BASE pattern from vec.h. */ 861 return false; 862 } 863 864 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional 865 of the form SSA_NAME NE 0. 866 867 If RHS is fed by a simple EQ_EXPR comparison of two values, see if 868 the two input values of the EQ_EXPR match arg0 and arg1. 869 870 If so update *code and return TRUE. Otherwise return FALSE. */ 871 872 static bool 873 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, 874 enum tree_code *code, const_tree rhs) 875 { 876 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining 877 statement. */ 878 if (TREE_CODE (rhs) == SSA_NAME) 879 { 880 gimple *def1 = SSA_NAME_DEF_STMT (rhs); 881 882 /* Verify the defining statement has an EQ_EXPR on the RHS. */ 883 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR) 884 { 885 /* Finally verify the source operands of the EQ_EXPR are equal 886 to arg0 and arg1. */ 887 tree op0 = gimple_assign_rhs1 (def1); 888 tree op1 = gimple_assign_rhs2 (def1); 889 if ((operand_equal_for_phi_arg_p (arg0, op0) 890 && operand_equal_for_phi_arg_p (arg1, op1)) 891 || (operand_equal_for_phi_arg_p (arg0, op1) 892 && operand_equal_for_phi_arg_p (arg1, op0))) 893 { 894 /* We will perform the optimization. */ 895 *code = gimple_assign_rhs_code (def1); 896 return true; 897 } 898 } 899 } 900 return false; 901 } 902 903 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. 904 905 Also return TRUE if arg0/arg1 are equal to the source arguments of a 906 an EQ comparison feeding a BIT_AND_EXPR which feeds COND. 907 908 Return FALSE otherwise. */ 909 910 static bool 911 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, 912 enum tree_code *code, gimple *cond) 913 { 914 gimple *def; 915 tree lhs = gimple_cond_lhs (cond); 916 tree rhs = gimple_cond_rhs (cond); 917 918 if ((operand_equal_for_phi_arg_p (arg0, lhs) 919 && operand_equal_for_phi_arg_p (arg1, rhs)) 920 || (operand_equal_for_phi_arg_p (arg1, lhs) 921 && operand_equal_for_phi_arg_p (arg0, rhs))) 922 return true; 923 924 /* Now handle more complex case where we have an EQ comparison 925 which feeds a BIT_AND_EXPR which feeds COND. 926 927 First verify that COND is of the form SSA_NAME NE 0. */ 928 if (*code != NE_EXPR || !integer_zerop (rhs) 929 || TREE_CODE (lhs) != SSA_NAME) 930 return false; 931 932 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ 933 def = SSA_NAME_DEF_STMT (lhs); 934 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR) 935 return false; 936 937 /* Now verify arg0/arg1 correspond to the source arguments of an 938 EQ comparison feeding the BIT_AND_EXPR. */ 939 940 tree tmp = gimple_assign_rhs1 (def); 941 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 942 return true; 943 944 tmp = gimple_assign_rhs2 (def); 945 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 946 return true; 947 948 return false; 949 } 950 951 /* Returns true if ARG is a neutral element for operation CODE 952 on the RIGHT side. */ 953 954 static bool 955 neutral_element_p (tree_code code, tree arg, bool right) 956 { 957 switch (code) 958 { 959 case PLUS_EXPR: 960 case BIT_IOR_EXPR: 961 case BIT_XOR_EXPR: 962 return integer_zerop (arg); 963 964 case LROTATE_EXPR: 965 case RROTATE_EXPR: 966 case LSHIFT_EXPR: 967 case RSHIFT_EXPR: 968 case MINUS_EXPR: 969 case POINTER_PLUS_EXPR: 970 return right && integer_zerop (arg); 971 972 case MULT_EXPR: 973 return integer_onep (arg); 974 975 case TRUNC_DIV_EXPR: 976 case CEIL_DIV_EXPR: 977 case FLOOR_DIV_EXPR: 978 case ROUND_DIV_EXPR: 979 case EXACT_DIV_EXPR: 980 return right && integer_onep (arg); 981 982 case BIT_AND_EXPR: 983 return integer_all_onesp (arg); 984 985 default: 986 return false; 987 } 988 } 989 990 /* Returns true if ARG is an absorbing element for operation CODE. */ 991 992 static bool 993 absorbing_element_p (tree_code code, tree arg, bool right, tree rval) 994 { 995 switch (code) 996 { 997 case BIT_IOR_EXPR: 998 return integer_all_onesp (arg); 999 1000 case MULT_EXPR: 1001 case BIT_AND_EXPR: 1002 return integer_zerop (arg); 1003 1004 case LSHIFT_EXPR: 1005 case RSHIFT_EXPR: 1006 case LROTATE_EXPR: 1007 case RROTATE_EXPR: 1008 return !right && integer_zerop (arg); 1009 1010 case TRUNC_DIV_EXPR: 1011 case CEIL_DIV_EXPR: 1012 case FLOOR_DIV_EXPR: 1013 case ROUND_DIV_EXPR: 1014 case EXACT_DIV_EXPR: 1015 case TRUNC_MOD_EXPR: 1016 case CEIL_MOD_EXPR: 1017 case FLOOR_MOD_EXPR: 1018 case ROUND_MOD_EXPR: 1019 return (!right 1020 && integer_zerop (arg) 1021 && tree_single_nonzero_warnv_p (rval, NULL)); 1022 1023 default: 1024 return false; 1025 } 1026 } 1027 1028 /* The function value_replacement does the main work of doing the value 1029 replacement. Return non-zero if the replacement is done. Otherwise return 1030 0. If we remove the middle basic block, return 2. 1031 BB is the basic block where the replacement is going to be done on. ARG0 1032 is argument 0 from the PHI. Likewise for ARG1. */ 1033 1034 static int 1035 value_replacement (basic_block cond_bb, basic_block middle_bb, 1036 edge e0, edge e1, gimple *phi, 1037 tree arg0, tree arg1) 1038 { 1039 gimple_stmt_iterator gsi; 1040 gimple *cond; 1041 edge true_edge, false_edge; 1042 enum tree_code code; 1043 bool emtpy_or_with_defined_p = true; 1044 1045 /* If the type says honor signed zeros we cannot do this 1046 optimization. */ 1047 if (HONOR_SIGNED_ZEROS (arg1)) 1048 return 0; 1049 1050 /* If there is a statement in MIDDLE_BB that defines one of the PHI 1051 arguments, then adjust arg0 or arg1. */ 1052 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); 1053 while (!gsi_end_p (gsi)) 1054 { 1055 gimple *stmt = gsi_stmt (gsi); 1056 tree lhs; 1057 gsi_next_nondebug (&gsi); 1058 if (!is_gimple_assign (stmt)) 1059 { 1060 if (gimple_code (stmt) != GIMPLE_PREDICT 1061 && gimple_code (stmt) != GIMPLE_NOP) 1062 emtpy_or_with_defined_p = false; 1063 continue; 1064 } 1065 /* Now try to adjust arg0 or arg1 according to the computation 1066 in the statement. */ 1067 lhs = gimple_assign_lhs (stmt); 1068 if (!(lhs == arg0 1069 && jump_function_from_stmt (&arg0, stmt)) 1070 || (lhs == arg1 1071 && jump_function_from_stmt (&arg1, stmt))) 1072 emtpy_or_with_defined_p = false; 1073 } 1074 1075 cond = last_stmt (cond_bb); 1076 code = gimple_cond_code (cond); 1077 1078 /* This transformation is only valid for equality comparisons. */ 1079 if (code != NE_EXPR && code != EQ_EXPR) 1080 return 0; 1081 1082 /* We need to know which is the true edge and which is the false 1083 edge so that we know if have abs or negative abs. */ 1084 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1085 1086 /* At this point we know we have a COND_EXPR with two successors. 1087 One successor is BB, the other successor is an empty block which 1088 falls through into BB. 1089 1090 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. 1091 1092 There is a single PHI node at the join point (BB) with two arguments. 1093 1094 We now need to verify that the two arguments in the PHI node match 1095 the two arguments to the equality comparison. */ 1096 1097 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond)) 1098 { 1099 edge e; 1100 tree arg; 1101 1102 /* For NE_EXPR, we want to build an assignment result = arg where 1103 arg is the PHI argument associated with the true edge. For 1104 EQ_EXPR we want the PHI argument associated with the false edge. */ 1105 e = (code == NE_EXPR ? true_edge : false_edge); 1106 1107 /* Unfortunately, E may not reach BB (it may instead have gone to 1108 OTHER_BLOCK). If that is the case, then we want the single outgoing 1109 edge from OTHER_BLOCK which reaches BB and represents the desired 1110 path from COND_BLOCK. */ 1111 if (e->dest == middle_bb) 1112 e = single_succ_edge (e->dest); 1113 1114 /* Now we know the incoming edge to BB that has the argument for the 1115 RHS of our new assignment statement. */ 1116 if (e0 == e) 1117 arg = arg0; 1118 else 1119 arg = arg1; 1120 1121 /* If the middle basic block was empty or is defining the 1122 PHI arguments and this is a single phi where the args are different 1123 for the edges e0 and e1 then we can remove the middle basic block. */ 1124 if (emtpy_or_with_defined_p 1125 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), 1126 e0, e1) == phi) 1127 { 1128 replace_phi_edge_with_variable (cond_bb, e1, phi, arg); 1129 /* Note that we optimized this PHI. */ 1130 return 2; 1131 } 1132 else 1133 { 1134 /* Replace the PHI arguments with arg. */ 1135 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); 1136 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); 1137 if (dump_file && (dump_flags & TDF_DETAILS)) 1138 { 1139 fprintf (dump_file, "PHI "); 1140 print_generic_expr (dump_file, gimple_phi_result (phi)); 1141 fprintf (dump_file, " reduced for COND_EXPR in block %d to ", 1142 cond_bb->index); 1143 print_generic_expr (dump_file, arg); 1144 fprintf (dump_file, ".\n"); 1145 } 1146 return 1; 1147 } 1148 1149 } 1150 1151 /* Now optimize (x != 0) ? x + y : y to just x + y. */ 1152 gsi = gsi_last_nondebug_bb (middle_bb); 1153 if (gsi_end_p (gsi)) 1154 return 0; 1155 1156 gimple *assign = gsi_stmt (gsi); 1157 if (!is_gimple_assign (assign) 1158 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS 1159 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 1160 && !POINTER_TYPE_P (TREE_TYPE (arg0)))) 1161 return 0; 1162 1163 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */ 1164 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) 1165 return 0; 1166 1167 /* Allow up to 2 cheap preparation statements that prepare argument 1168 for assign, e.g.: 1169 if (y_4 != 0) 1170 goto <bb 3>; 1171 else 1172 goto <bb 4>; 1173 <bb 3>: 1174 _1 = (int) y_4; 1175 iftmp.0_6 = x_5(D) r<< _1; 1176 <bb 4>: 1177 # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> 1178 or: 1179 if (y_3(D) == 0) 1180 goto <bb 4>; 1181 else 1182 goto <bb 3>; 1183 <bb 3>: 1184 y_4 = y_3(D) & 31; 1185 _1 = (int) y_4; 1186 _6 = x_5(D) r<< _1; 1187 <bb 4>: 1188 # _2 = PHI <x_5(D)(2), _6(3)> */ 1189 gimple *prep_stmt[2] = { NULL, NULL }; 1190 int prep_cnt; 1191 for (prep_cnt = 0; ; prep_cnt++) 1192 { 1193 gsi_prev_nondebug (&gsi); 1194 if (gsi_end_p (gsi)) 1195 break; 1196 1197 gimple *g = gsi_stmt (gsi); 1198 if (gimple_code (g) == GIMPLE_LABEL) 1199 break; 1200 1201 if (prep_cnt == 2 || !is_gimple_assign (g)) 1202 return 0; 1203 1204 tree lhs = gimple_assign_lhs (g); 1205 tree rhs1 = gimple_assign_rhs1 (g); 1206 use_operand_p use_p; 1207 gimple *use_stmt; 1208 if (TREE_CODE (lhs) != SSA_NAME 1209 || TREE_CODE (rhs1) != SSA_NAME 1210 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 1211 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 1212 || !single_imm_use (lhs, &use_p, &use_stmt) 1213 || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) 1214 return 0; 1215 switch (gimple_assign_rhs_code (g)) 1216 { 1217 CASE_CONVERT: 1218 break; 1219 case PLUS_EXPR: 1220 case BIT_AND_EXPR: 1221 case BIT_IOR_EXPR: 1222 case BIT_XOR_EXPR: 1223 if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) 1224 return 0; 1225 break; 1226 default: 1227 return 0; 1228 } 1229 prep_stmt[prep_cnt] = g; 1230 } 1231 1232 /* Only transform if it removes the condition. */ 1233 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1)) 1234 return 0; 1235 1236 /* Size-wise, this is always profitable. */ 1237 if (optimize_bb_for_speed_p (cond_bb) 1238 /* The special case is useless if it has a low probability. */ 1239 && profile_status_for_fn (cfun) != PROFILE_ABSENT 1240 && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () 1241 /* If assign is cheap, there is no point avoiding it. */ 1242 && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights) 1243 >= 3 * estimate_num_insns (cond, &eni_time_weights)) 1244 return 0; 1245 1246 tree lhs = gimple_assign_lhs (assign); 1247 tree rhs1 = gimple_assign_rhs1 (assign); 1248 tree rhs2 = gimple_assign_rhs2 (assign); 1249 enum tree_code code_def = gimple_assign_rhs_code (assign); 1250 tree cond_lhs = gimple_cond_lhs (cond); 1251 tree cond_rhs = gimple_cond_rhs (cond); 1252 1253 /* Propagate the cond_rhs constant through preparation stmts, 1254 make sure UB isn't invoked while doing that. */ 1255 for (int i = prep_cnt - 1; i >= 0; --i) 1256 { 1257 gimple *g = prep_stmt[i]; 1258 tree grhs1 = gimple_assign_rhs1 (g); 1259 if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) 1260 return 0; 1261 cond_lhs = gimple_assign_lhs (g); 1262 cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); 1263 if (TREE_CODE (cond_rhs) != INTEGER_CST 1264 || TREE_OVERFLOW (cond_rhs)) 1265 return 0; 1266 if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) 1267 { 1268 cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, 1269 gimple_assign_rhs2 (g)); 1270 if (TREE_OVERFLOW (cond_rhs)) 1271 return 0; 1272 } 1273 cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); 1274 if (TREE_CODE (cond_rhs) != INTEGER_CST 1275 || TREE_OVERFLOW (cond_rhs)) 1276 return 0; 1277 } 1278 1279 if (((code == NE_EXPR && e1 == false_edge) 1280 || (code == EQ_EXPR && e1 == true_edge)) 1281 && arg0 == lhs 1282 && ((arg1 == rhs1 1283 && operand_equal_for_phi_arg_p (rhs2, cond_lhs) 1284 && neutral_element_p (code_def, cond_rhs, true)) 1285 || (arg1 == rhs2 1286 && operand_equal_for_phi_arg_p (rhs1, cond_lhs) 1287 && neutral_element_p (code_def, cond_rhs, false)) 1288 || (operand_equal_for_phi_arg_p (arg1, cond_rhs) 1289 && ((operand_equal_for_phi_arg_p (rhs2, cond_lhs) 1290 && absorbing_element_p (code_def, cond_rhs, true, rhs2)) 1291 || (operand_equal_for_phi_arg_p (rhs1, cond_lhs) 1292 && absorbing_element_p (code_def, 1293 cond_rhs, false, rhs2)))))) 1294 { 1295 gsi = gsi_for_stmt (cond); 1296 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6 1297 def-stmt in: 1298 if (n_5 != 0) 1299 goto <bb 3>; 1300 else 1301 goto <bb 4>; 1302 1303 <bb 3>: 1304 # RANGE [0, 4294967294] 1305 u_6 = n_5 + 4294967295; 1306 1307 <bb 4>: 1308 # u_3 = PHI <u_6(3), 4294967295(2)> */ 1309 reset_flow_sensitive_info (lhs); 1310 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) 1311 { 1312 /* If available, we can use VR of phi result at least. */ 1313 tree phires = gimple_phi_result (phi); 1314 struct range_info_def *phires_range_info 1315 = SSA_NAME_RANGE_INFO (phires); 1316 if (phires_range_info) 1317 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), 1318 phires_range_info); 1319 } 1320 gimple_stmt_iterator gsi_from; 1321 for (int i = prep_cnt - 1; i >= 0; --i) 1322 { 1323 tree plhs = gimple_assign_lhs (prep_stmt[i]); 1324 reset_flow_sensitive_info (plhs); 1325 gsi_from = gsi_for_stmt (prep_stmt[i]); 1326 gsi_move_before (&gsi_from, &gsi); 1327 } 1328 gsi_from = gsi_for_stmt (assign); 1329 gsi_move_before (&gsi_from, &gsi); 1330 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); 1331 return 2; 1332 } 1333 1334 return 0; 1335 } 1336 1337 /* The function minmax_replacement does the main work of doing the minmax 1338 replacement. Return true if the replacement is done. Otherwise return 1339 false. 1340 BB is the basic block where the replacement is going to be done on. ARG0 1341 is argument 0 from the PHI. Likewise for ARG1. */ 1342 1343 static bool 1344 minmax_replacement (basic_block cond_bb, basic_block middle_bb, 1345 edge e0, edge e1, gimple *phi, 1346 tree arg0, tree arg1) 1347 { 1348 tree result, type, rhs; 1349 gcond *cond; 1350 gassign *new_stmt; 1351 edge true_edge, false_edge; 1352 enum tree_code cmp, minmax, ass_code; 1353 tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; 1354 gimple_stmt_iterator gsi, gsi_from; 1355 1356 type = TREE_TYPE (PHI_RESULT (phi)); 1357 1358 /* The optimization may be unsafe due to NaNs. */ 1359 if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) 1360 return false; 1361 1362 cond = as_a <gcond *> (last_stmt (cond_bb)); 1363 cmp = gimple_cond_code (cond); 1364 rhs = gimple_cond_rhs (cond); 1365 1366 /* Turn EQ/NE of extreme values to order comparisons. */ 1367 if ((cmp == NE_EXPR || cmp == EQ_EXPR) 1368 && TREE_CODE (rhs) == INTEGER_CST 1369 && INTEGRAL_TYPE_P (TREE_TYPE (rhs))) 1370 { 1371 if (wi::eq_p (wi::to_wide (rhs), wi::min_value (TREE_TYPE (rhs)))) 1372 { 1373 cmp = (cmp == EQ_EXPR) ? LT_EXPR : GE_EXPR; 1374 rhs = wide_int_to_tree (TREE_TYPE (rhs), 1375 wi::min_value (TREE_TYPE (rhs)) + 1); 1376 } 1377 else if (wi::eq_p (wi::to_wide (rhs), wi::max_value (TREE_TYPE (rhs)))) 1378 { 1379 cmp = (cmp == EQ_EXPR) ? GT_EXPR : LE_EXPR; 1380 rhs = wide_int_to_tree (TREE_TYPE (rhs), 1381 wi::max_value (TREE_TYPE (rhs)) - 1); 1382 } 1383 } 1384 1385 /* This transformation is only valid for order comparisons. Record which 1386 operand is smaller/larger if the result of the comparison is true. */ 1387 alt_smaller = NULL_TREE; 1388 alt_larger = NULL_TREE; 1389 if (cmp == LT_EXPR || cmp == LE_EXPR) 1390 { 1391 smaller = gimple_cond_lhs (cond); 1392 larger = rhs; 1393 /* If we have smaller < CST it is equivalent to smaller <= CST-1. 1394 Likewise smaller <= CST is equivalent to smaller < CST+1. */ 1395 if (TREE_CODE (larger) == INTEGER_CST 1396 && INTEGRAL_TYPE_P (TREE_TYPE (larger))) 1397 { 1398 if (cmp == LT_EXPR) 1399 { 1400 wi::overflow_type overflow; 1401 wide_int alt = wi::sub (wi::to_wide (larger), 1, 1402 TYPE_SIGN (TREE_TYPE (larger)), 1403 &overflow); 1404 if (! overflow) 1405 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1406 } 1407 else 1408 { 1409 wi::overflow_type overflow; 1410 wide_int alt = wi::add (wi::to_wide (larger), 1, 1411 TYPE_SIGN (TREE_TYPE (larger)), 1412 &overflow); 1413 if (! overflow) 1414 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1415 } 1416 } 1417 } 1418 else if (cmp == GT_EXPR || cmp == GE_EXPR) 1419 { 1420 smaller = rhs; 1421 larger = gimple_cond_lhs (cond); 1422 /* If we have larger > CST it is equivalent to larger >= CST+1. 1423 Likewise larger >= CST is equivalent to larger > CST-1. */ 1424 if (TREE_CODE (smaller) == INTEGER_CST 1425 && INTEGRAL_TYPE_P (TREE_TYPE (smaller))) 1426 { 1427 wi::overflow_type overflow; 1428 if (cmp == GT_EXPR) 1429 { 1430 wide_int alt = wi::add (wi::to_wide (smaller), 1, 1431 TYPE_SIGN (TREE_TYPE (smaller)), 1432 &overflow); 1433 if (! overflow) 1434 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1435 } 1436 else 1437 { 1438 wide_int alt = wi::sub (wi::to_wide (smaller), 1, 1439 TYPE_SIGN (TREE_TYPE (smaller)), 1440 &overflow); 1441 if (! overflow) 1442 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1443 } 1444 } 1445 } 1446 else 1447 return false; 1448 1449 /* We need to know which is the true edge and which is the false 1450 edge so that we know if have abs or negative abs. */ 1451 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1452 1453 /* Forward the edges over the middle basic block. */ 1454 if (true_edge->dest == middle_bb) 1455 true_edge = EDGE_SUCC (true_edge->dest, 0); 1456 if (false_edge->dest == middle_bb) 1457 false_edge = EDGE_SUCC (false_edge->dest, 0); 1458 1459 if (true_edge == e0) 1460 { 1461 gcc_assert (false_edge == e1); 1462 arg_true = arg0; 1463 arg_false = arg1; 1464 } 1465 else 1466 { 1467 gcc_assert (false_edge == e0); 1468 gcc_assert (true_edge == e1); 1469 arg_true = arg1; 1470 arg_false = arg0; 1471 } 1472 1473 if (empty_block_p (middle_bb)) 1474 { 1475 if ((operand_equal_for_phi_arg_p (arg_true, smaller) 1476 || (alt_smaller 1477 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1478 && (operand_equal_for_phi_arg_p (arg_false, larger) 1479 || (alt_larger 1480 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1481 { 1482 /* Case 1483 1484 if (smaller < larger) 1485 rslt = smaller; 1486 else 1487 rslt = larger; */ 1488 minmax = MIN_EXPR; 1489 } 1490 else if ((operand_equal_for_phi_arg_p (arg_false, smaller) 1491 || (alt_smaller 1492 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1493 && (operand_equal_for_phi_arg_p (arg_true, larger) 1494 || (alt_larger 1495 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1496 minmax = MAX_EXPR; 1497 else 1498 return false; 1499 } 1500 else 1501 { 1502 /* Recognize the following case, assuming d <= u: 1503 1504 if (a <= u) 1505 b = MAX (a, d); 1506 x = PHI <b, u> 1507 1508 This is equivalent to 1509 1510 b = MAX (a, d); 1511 x = MIN (b, u); */ 1512 1513 gimple *assign = last_and_only_stmt (middle_bb); 1514 tree lhs, op0, op1, bound; 1515 1516 if (!assign 1517 || gimple_code (assign) != GIMPLE_ASSIGN) 1518 return false; 1519 1520 lhs = gimple_assign_lhs (assign); 1521 ass_code = gimple_assign_rhs_code (assign); 1522 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) 1523 return false; 1524 op0 = gimple_assign_rhs1 (assign); 1525 op1 = gimple_assign_rhs2 (assign); 1526 1527 if (true_edge->src == middle_bb) 1528 { 1529 /* We got here if the condition is true, i.e., SMALLER < LARGER. */ 1530 if (!operand_equal_for_phi_arg_p (lhs, arg_true)) 1531 return false; 1532 1533 if (operand_equal_for_phi_arg_p (arg_false, larger) 1534 || (alt_larger 1535 && operand_equal_for_phi_arg_p (arg_false, alt_larger))) 1536 { 1537 /* Case 1538 1539 if (smaller < larger) 1540 { 1541 r' = MAX_EXPR (smaller, bound) 1542 } 1543 r = PHI <r', larger> --> to be turned to MIN_EXPR. */ 1544 if (ass_code != MAX_EXPR) 1545 return false; 1546 1547 minmax = MIN_EXPR; 1548 if (operand_equal_for_phi_arg_p (op0, smaller) 1549 || (alt_smaller 1550 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1551 bound = op1; 1552 else if (operand_equal_for_phi_arg_p (op1, smaller) 1553 || (alt_smaller 1554 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1555 bound = op0; 1556 else 1557 return false; 1558 1559 /* We need BOUND <= LARGER. */ 1560 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1561 bound, larger))) 1562 return false; 1563 } 1564 else if (operand_equal_for_phi_arg_p (arg_false, smaller) 1565 || (alt_smaller 1566 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1567 { 1568 /* Case 1569 1570 if (smaller < larger) 1571 { 1572 r' = MIN_EXPR (larger, bound) 1573 } 1574 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ 1575 if (ass_code != MIN_EXPR) 1576 return false; 1577 1578 minmax = MAX_EXPR; 1579 if (operand_equal_for_phi_arg_p (op0, larger) 1580 || (alt_larger 1581 && operand_equal_for_phi_arg_p (op0, alt_larger))) 1582 bound = op1; 1583 else if (operand_equal_for_phi_arg_p (op1, larger) 1584 || (alt_larger 1585 && operand_equal_for_phi_arg_p (op1, alt_larger))) 1586 bound = op0; 1587 else 1588 return false; 1589 1590 /* We need BOUND >= SMALLER. */ 1591 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1592 bound, smaller))) 1593 return false; 1594 } 1595 else 1596 return false; 1597 } 1598 else 1599 { 1600 /* We got here if the condition is false, i.e., SMALLER > LARGER. */ 1601 if (!operand_equal_for_phi_arg_p (lhs, arg_false)) 1602 return false; 1603 1604 if (operand_equal_for_phi_arg_p (arg_true, larger) 1605 || (alt_larger 1606 && operand_equal_for_phi_arg_p (arg_true, alt_larger))) 1607 { 1608 /* Case 1609 1610 if (smaller > larger) 1611 { 1612 r' = MIN_EXPR (smaller, bound) 1613 } 1614 r = PHI <r', larger> --> to be turned to MAX_EXPR. */ 1615 if (ass_code != MIN_EXPR) 1616 return false; 1617 1618 minmax = MAX_EXPR; 1619 if (operand_equal_for_phi_arg_p (op0, smaller) 1620 || (alt_smaller 1621 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1622 bound = op1; 1623 else if (operand_equal_for_phi_arg_p (op1, smaller) 1624 || (alt_smaller 1625 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1626 bound = op0; 1627 else 1628 return false; 1629 1630 /* We need BOUND >= LARGER. */ 1631 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1632 bound, larger))) 1633 return false; 1634 } 1635 else if (operand_equal_for_phi_arg_p (arg_true, smaller) 1636 || (alt_smaller 1637 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1638 { 1639 /* Case 1640 1641 if (smaller > larger) 1642 { 1643 r' = MAX_EXPR (larger, bound) 1644 } 1645 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ 1646 if (ass_code != MAX_EXPR) 1647 return false; 1648 1649 minmax = MIN_EXPR; 1650 if (operand_equal_for_phi_arg_p (op0, larger)) 1651 bound = op1; 1652 else if (operand_equal_for_phi_arg_p (op1, larger)) 1653 bound = op0; 1654 else 1655 return false; 1656 1657 /* We need BOUND <= SMALLER. */ 1658 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1659 bound, smaller))) 1660 return false; 1661 } 1662 else 1663 return false; 1664 } 1665 1666 /* Move the statement from the middle block. */ 1667 gsi = gsi_last_bb (cond_bb); 1668 gsi_from = gsi_last_nondebug_bb (middle_bb); 1669 reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from), 1670 SSA_OP_DEF)); 1671 gsi_move_before (&gsi_from, &gsi); 1672 } 1673 1674 /* Create an SSA var to hold the min/max result. If we're the only 1675 things setting the target PHI, then we can clone the PHI 1676 variable. Otherwise we must create a new one. */ 1677 result = PHI_RESULT (phi); 1678 if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) 1679 result = duplicate_ssa_name (result, NULL); 1680 else 1681 result = make_ssa_name (TREE_TYPE (result)); 1682 1683 /* Emit the statement to compute min/max. */ 1684 new_stmt = gimple_build_assign (result, minmax, arg0, arg1); 1685 gsi = gsi_last_bb (cond_bb); 1686 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1687 1688 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1689 1690 return true; 1691 } 1692 1693 /* Convert 1694 1695 <bb 2> 1696 if (b_4(D) != 0) 1697 goto <bb 3> 1698 else 1699 goto <bb 4> 1700 1701 <bb 3> 1702 _2 = (unsigned long) b_4(D); 1703 _9 = __builtin_popcountl (_2); 1704 OR 1705 _9 = __builtin_popcountl (b_4(D)); 1706 1707 <bb 4> 1708 c_12 = PHI <0(2), _9(3)> 1709 1710 Into 1711 <bb 2> 1712 _2 = (unsigned long) b_4(D); 1713 _9 = __builtin_popcountl (_2); 1714 OR 1715 _9 = __builtin_popcountl (b_4(D)); 1716 1717 <bb 4> 1718 c_12 = PHI <_9(2)> 1719 */ 1720 1721 static bool 1722 cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, 1723 edge e1, edge e2, 1724 gimple *phi, tree arg0, tree arg1) 1725 { 1726 gimple *cond; 1727 gimple_stmt_iterator gsi, gsi_from; 1728 gimple *popcount; 1729 gimple *cast = NULL; 1730 tree lhs, arg; 1731 1732 /* Check that 1733 _2 = (unsigned long) b_4(D); 1734 _9 = __builtin_popcountl (_2); 1735 OR 1736 _9 = __builtin_popcountl (b_4(D)); 1737 are the only stmts in the middle_bb. */ 1738 1739 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); 1740 if (gsi_end_p (gsi)) 1741 return false; 1742 cast = gsi_stmt (gsi); 1743 gsi_next_nondebug (&gsi); 1744 if (!gsi_end_p (gsi)) 1745 { 1746 popcount = gsi_stmt (gsi); 1747 gsi_next_nondebug (&gsi); 1748 if (!gsi_end_p (gsi)) 1749 return false; 1750 } 1751 else 1752 { 1753 popcount = cast; 1754 cast = NULL; 1755 } 1756 1757 /* Check that we have a popcount builtin. */ 1758 if (!is_gimple_call (popcount)) 1759 return false; 1760 combined_fn cfn = gimple_call_combined_fn (popcount); 1761 switch (cfn) 1762 { 1763 CASE_CFN_POPCOUNT: 1764 break; 1765 default: 1766 return false; 1767 } 1768 1769 arg = gimple_call_arg (popcount, 0); 1770 lhs = gimple_get_lhs (popcount); 1771 1772 if (cast) 1773 { 1774 /* We have a cast stmt feeding popcount builtin. */ 1775 /* Check that we have a cast prior to that. */ 1776 if (gimple_code (cast) != GIMPLE_ASSIGN 1777 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) 1778 return false; 1779 /* Result of the cast stmt is the argument to the builtin. */ 1780 if (arg != gimple_assign_lhs (cast)) 1781 return false; 1782 arg = gimple_assign_rhs1 (cast); 1783 } 1784 1785 cond = last_stmt (cond_bb); 1786 1787 /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount 1788 builtin. */ 1789 if (gimple_code (cond) != GIMPLE_COND 1790 || (gimple_cond_code (cond) != NE_EXPR 1791 && gimple_cond_code (cond) != EQ_EXPR) 1792 || !integer_zerop (gimple_cond_rhs (cond)) 1793 || arg != gimple_cond_lhs (cond)) 1794 return false; 1795 1796 /* Canonicalize. */ 1797 if ((e2->flags & EDGE_TRUE_VALUE 1798 && gimple_cond_code (cond) == NE_EXPR) 1799 || (e1->flags & EDGE_TRUE_VALUE 1800 && gimple_cond_code (cond) == EQ_EXPR)) 1801 { 1802 std::swap (arg0, arg1); 1803 std::swap (e1, e2); 1804 } 1805 1806 /* Check PHI arguments. */ 1807 if (lhs != arg0 || !integer_zerop (arg1)) 1808 return false; 1809 1810 /* And insert the popcount builtin and cast stmt before the cond_bb. */ 1811 gsi = gsi_last_bb (cond_bb); 1812 if (cast) 1813 { 1814 gsi_from = gsi_for_stmt (cast); 1815 gsi_move_before (&gsi_from, &gsi); 1816 reset_flow_sensitive_info (gimple_get_lhs (cast)); 1817 } 1818 gsi_from = gsi_for_stmt (popcount); 1819 gsi_move_before (&gsi_from, &gsi); 1820 reset_flow_sensitive_info (gimple_get_lhs (popcount)); 1821 1822 /* Now update the PHI and remove unneeded bbs. */ 1823 replace_phi_edge_with_variable (cond_bb, e2, phi, lhs); 1824 return true; 1825 } 1826 1827 /* The function absolute_replacement does the main work of doing the absolute 1828 replacement. Return true if the replacement is done. Otherwise return 1829 false. 1830 bb is the basic block where the replacement is going to be done on. arg0 1831 is argument 0 from the phi. Likewise for arg1. */ 1832 1833 static bool 1834 abs_replacement (basic_block cond_bb, basic_block middle_bb, 1835 edge e0 ATTRIBUTE_UNUSED, edge e1, 1836 gimple *phi, tree arg0, tree arg1) 1837 { 1838 tree result; 1839 gassign *new_stmt; 1840 gimple *cond; 1841 gimple_stmt_iterator gsi; 1842 edge true_edge, false_edge; 1843 gimple *assign; 1844 edge e; 1845 tree rhs, lhs; 1846 bool negate; 1847 enum tree_code cond_code; 1848 1849 /* If the type says honor signed zeros we cannot do this 1850 optimization. */ 1851 if (HONOR_SIGNED_ZEROS (arg1)) 1852 return false; 1853 1854 /* OTHER_BLOCK must have only one executable statement which must have the 1855 form arg0 = -arg1 or arg1 = -arg0. */ 1856 1857 assign = last_and_only_stmt (middle_bb); 1858 /* If we did not find the proper negation assignment, then we cannot 1859 optimize. */ 1860 if (assign == NULL) 1861 return false; 1862 1863 /* If we got here, then we have found the only executable statement 1864 in OTHER_BLOCK. If it is anything other than arg = -arg1 or 1865 arg1 = -arg0, then we cannot optimize. */ 1866 if (gimple_code (assign) != GIMPLE_ASSIGN) 1867 return false; 1868 1869 lhs = gimple_assign_lhs (assign); 1870 1871 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) 1872 return false; 1873 1874 rhs = gimple_assign_rhs1 (assign); 1875 1876 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ 1877 if (!(lhs == arg0 && rhs == arg1) 1878 && !(lhs == arg1 && rhs == arg0)) 1879 return false; 1880 1881 cond = last_stmt (cond_bb); 1882 result = PHI_RESULT (phi); 1883 1884 /* Only relationals comparing arg[01] against zero are interesting. */ 1885 cond_code = gimple_cond_code (cond); 1886 if (cond_code != GT_EXPR && cond_code != GE_EXPR 1887 && cond_code != LT_EXPR && cond_code != LE_EXPR) 1888 return false; 1889 1890 /* Make sure the conditional is arg[01] OP y. */ 1891 if (gimple_cond_lhs (cond) != rhs) 1892 return false; 1893 1894 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond))) 1895 ? real_zerop (gimple_cond_rhs (cond)) 1896 : integer_zerop (gimple_cond_rhs (cond))) 1897 ; 1898 else 1899 return false; 1900 1901 /* We need to know which is the true edge and which is the false 1902 edge so that we know if have abs or negative abs. */ 1903 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1904 1905 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we 1906 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if 1907 the false edge goes to OTHER_BLOCK. */ 1908 if (cond_code == GT_EXPR || cond_code == GE_EXPR) 1909 e = true_edge; 1910 else 1911 e = false_edge; 1912 1913 if (e->dest == middle_bb) 1914 negate = true; 1915 else 1916 negate = false; 1917 1918 /* If the code negates only iff positive then make sure to not 1919 introduce undefined behavior when negating or computing the absolute. 1920 ??? We could use range info if present to check for arg1 == INT_MIN. */ 1921 if (negate 1922 && (ANY_INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 1923 && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1)))) 1924 return false; 1925 1926 result = duplicate_ssa_name (result, NULL); 1927 1928 if (negate) 1929 lhs = make_ssa_name (TREE_TYPE (result)); 1930 else 1931 lhs = result; 1932 1933 /* Build the modify expression with abs expression. */ 1934 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs); 1935 1936 gsi = gsi_last_bb (cond_bb); 1937 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1938 1939 if (negate) 1940 { 1941 /* Get the right GSI. We want to insert after the recently 1942 added ABS_EXPR statement (which we know is the first statement 1943 in the block. */ 1944 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs); 1945 1946 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 1947 } 1948 1949 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1950 1951 /* Note that we optimized this PHI. */ 1952 return true; 1953 } 1954 1955 /* Auxiliary functions to determine the set of memory accesses which 1956 can't trap because they are preceded by accesses to the same memory 1957 portion. We do that for MEM_REFs, so we only need to track 1958 the SSA_NAME of the pointer indirectly referenced. The algorithm 1959 simply is a walk over all instructions in dominator order. When 1960 we see an MEM_REF we determine if we've already seen a same 1961 ref anywhere up to the root of the dominator tree. If we do the 1962 current access can't trap. If we don't see any dominating access 1963 the current access might trap, but might also make later accesses 1964 non-trapping, so we remember it. We need to be careful with loads 1965 or stores, for instance a load might not trap, while a store would, 1966 so if we see a dominating read access this doesn't mean that a later 1967 write access would not trap. Hence we also need to differentiate the 1968 type of access(es) seen. 1969 1970 ??? We currently are very conservative and assume that a load might 1971 trap even if a store doesn't (write-only memory). This probably is 1972 overly conservative. */ 1973 1974 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF 1975 through it was seen, which would constitute a no-trap region for 1976 same accesses. */ 1977 struct name_to_bb 1978 { 1979 unsigned int ssa_name_ver; 1980 unsigned int phase; 1981 bool store; 1982 HOST_WIDE_INT offset, size; 1983 basic_block bb; 1984 }; 1985 1986 /* Hashtable helpers. */ 1987 1988 struct ssa_names_hasher : free_ptr_hash <name_to_bb> 1989 { 1990 static inline hashval_t hash (const name_to_bb *); 1991 static inline bool equal (const name_to_bb *, const name_to_bb *); 1992 }; 1993 1994 /* Used for quick clearing of the hash-table when we see calls. 1995 Hash entries with phase < nt_call_phase are invalid. */ 1996 static unsigned int nt_call_phase; 1997 1998 /* The hash function. */ 1999 2000 inline hashval_t 2001 ssa_names_hasher::hash (const name_to_bb *n) 2002 { 2003 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) 2004 ^ (n->offset << 6) ^ (n->size << 3); 2005 } 2006 2007 /* The equality function of *P1 and *P2. */ 2008 2009 inline bool 2010 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) 2011 { 2012 return n1->ssa_name_ver == n2->ssa_name_ver 2013 && n1->store == n2->store 2014 && n1->offset == n2->offset 2015 && n1->size == n2->size; 2016 } 2017 2018 class nontrapping_dom_walker : public dom_walker 2019 { 2020 public: 2021 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) 2022 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} 2023 2024 virtual edge before_dom_children (basic_block); 2025 virtual void after_dom_children (basic_block); 2026 2027 private: 2028 2029 /* We see the expression EXP in basic block BB. If it's an interesting 2030 expression (an MEM_REF through an SSA_NAME) possibly insert the 2031 expression into the set NONTRAP or the hash table of seen expressions. 2032 STORE is true if this expression is on the LHS, otherwise it's on 2033 the RHS. */ 2034 void add_or_mark_expr (basic_block, tree, bool); 2035 2036 hash_set<tree> *m_nontrapping; 2037 2038 /* The hash table for remembering what we've seen. */ 2039 hash_table<ssa_names_hasher> m_seen_ssa_names; 2040 }; 2041 2042 /* Called by walk_dominator_tree, when entering the block BB. */ 2043 edge 2044 nontrapping_dom_walker::before_dom_children (basic_block bb) 2045 { 2046 edge e; 2047 edge_iterator ei; 2048 gimple_stmt_iterator gsi; 2049 2050 /* If we haven't seen all our predecessors, clear the hash-table. */ 2051 FOR_EACH_EDGE (e, ei, bb->preds) 2052 if ((((size_t)e->src->aux) & 2) == 0) 2053 { 2054 nt_call_phase++; 2055 break; 2056 } 2057 2058 /* Mark this BB as being on the path to dominator root and as visited. */ 2059 bb->aux = (void*)(1 | 2); 2060 2061 /* And walk the statements in order. */ 2062 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2063 { 2064 gimple *stmt = gsi_stmt (gsi); 2065 2066 if ((gimple_code (stmt) == GIMPLE_ASM && gimple_vdef (stmt)) 2067 || (is_gimple_call (stmt) 2068 && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt)))) 2069 nt_call_phase++; 2070 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) 2071 { 2072 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true); 2073 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false); 2074 } 2075 } 2076 return NULL; 2077 } 2078 2079 /* Called by walk_dominator_tree, when basic block BB is exited. */ 2080 void 2081 nontrapping_dom_walker::after_dom_children (basic_block bb) 2082 { 2083 /* This BB isn't on the path to dominator root anymore. */ 2084 bb->aux = (void*)2; 2085 } 2086 2087 /* We see the expression EXP in basic block BB. If it's an interesting 2088 expression (an MEM_REF through an SSA_NAME) possibly insert the 2089 expression into the set NONTRAP or the hash table of seen expressions. 2090 STORE is true if this expression is on the LHS, otherwise it's on 2091 the RHS. */ 2092 void 2093 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) 2094 { 2095 HOST_WIDE_INT size; 2096 2097 if (TREE_CODE (exp) == MEM_REF 2098 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME 2099 && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) 2100 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) 2101 { 2102 tree name = TREE_OPERAND (exp, 0); 2103 struct name_to_bb map; 2104 name_to_bb **slot; 2105 struct name_to_bb *n2bb; 2106 basic_block found_bb = 0; 2107 2108 /* Try to find the last seen MEM_REF through the same 2109 SSA_NAME, which can trap. */ 2110 map.ssa_name_ver = SSA_NAME_VERSION (name); 2111 map.phase = 0; 2112 map.bb = 0; 2113 map.store = store; 2114 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); 2115 map.size = size; 2116 2117 slot = m_seen_ssa_names.find_slot (&map, INSERT); 2118 n2bb = *slot; 2119 if (n2bb && n2bb->phase >= nt_call_phase) 2120 found_bb = n2bb->bb; 2121 2122 /* If we've found a trapping MEM_REF, _and_ it dominates EXP 2123 (it's in a basic block on the path from us to the dominator root) 2124 then we can't trap. */ 2125 if (found_bb && (((size_t)found_bb->aux) & 1) == 1) 2126 { 2127 m_nontrapping->add (exp); 2128 } 2129 else 2130 { 2131 /* EXP might trap, so insert it into the hash table. */ 2132 if (n2bb) 2133 { 2134 n2bb->phase = nt_call_phase; 2135 n2bb->bb = bb; 2136 } 2137 else 2138 { 2139 n2bb = XNEW (struct name_to_bb); 2140 n2bb->ssa_name_ver = SSA_NAME_VERSION (name); 2141 n2bb->phase = nt_call_phase; 2142 n2bb->bb = bb; 2143 n2bb->store = store; 2144 n2bb->offset = map.offset; 2145 n2bb->size = size; 2146 *slot = n2bb; 2147 } 2148 } 2149 } 2150 } 2151 2152 /* This is the entry point of gathering non trapping memory accesses. 2153 It will do a dominator walk over the whole function, and it will 2154 make use of the bb->aux pointers. It returns a set of trees 2155 (the MEM_REFs itself) which can't trap. */ 2156 static hash_set<tree> * 2157 get_non_trapping (void) 2158 { 2159 nt_call_phase = 0; 2160 hash_set<tree> *nontrap = new hash_set<tree>; 2161 /* We're going to do a dominator walk, so ensure that we have 2162 dominance information. */ 2163 calculate_dominance_info (CDI_DOMINATORS); 2164 2165 nontrapping_dom_walker (CDI_DOMINATORS, nontrap) 2166 .walk (cfun->cfg->x_entry_block_ptr); 2167 2168 clear_aux_for_blocks (); 2169 return nontrap; 2170 } 2171 2172 /* Do the main work of conditional store replacement. We already know 2173 that the recognized pattern looks like so: 2174 2175 split: 2176 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) 2177 MIDDLE_BB: 2178 something 2179 fallthrough (edge E0) 2180 JOIN_BB: 2181 some more 2182 2183 We check that MIDDLE_BB contains only one store, that that store 2184 doesn't trap (not via NOTRAP, but via checking if an access to the same 2185 memory location dominates us) and that the store has a "simple" RHS. */ 2186 2187 static bool 2188 cond_store_replacement (basic_block middle_bb, basic_block join_bb, 2189 edge e0, edge e1, hash_set<tree> *nontrap) 2190 { 2191 gimple *assign = last_and_only_stmt (middle_bb); 2192 tree lhs, rhs, name, name2; 2193 gphi *newphi; 2194 gassign *new_stmt; 2195 gimple_stmt_iterator gsi; 2196 location_t locus; 2197 2198 /* Check if middle_bb contains of only one store. */ 2199 if (!assign 2200 || !gimple_assign_single_p (assign) 2201 || gimple_has_volatile_ops (assign)) 2202 return false; 2203 2204 /* And no PHI nodes so all uses in the single stmt are also 2205 available where we insert to. */ 2206 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) 2207 return false; 2208 2209 locus = gimple_location (assign); 2210 lhs = gimple_assign_lhs (assign); 2211 rhs = gimple_assign_rhs1 (assign); 2212 if (TREE_CODE (lhs) != MEM_REF 2213 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME 2214 || !is_gimple_reg_type (TREE_TYPE (lhs))) 2215 return false; 2216 2217 /* Prove that we can move the store down. We could also check 2218 TREE_THIS_NOTRAP here, but in that case we also could move stores, 2219 whose value is not available readily, which we want to avoid. */ 2220 if (!nontrap->contains (lhs)) 2221 return false; 2222 2223 /* Now we've checked the constraints, so do the transformation: 2224 1) Remove the single store. */ 2225 gsi = gsi_for_stmt (assign); 2226 unlink_stmt_vdef (assign); 2227 gsi_remove (&gsi, true); 2228 release_defs (assign); 2229 2230 /* Make both store and load use alias-set zero as we have to 2231 deal with the case of the store being a conditional change 2232 of the dynamic type. */ 2233 lhs = unshare_expr (lhs); 2234 tree *basep = &lhs; 2235 while (handled_component_p (*basep)) 2236 basep = &TREE_OPERAND (*basep, 0); 2237 if (TREE_CODE (*basep) == MEM_REF 2238 || TREE_CODE (*basep) == TARGET_MEM_REF) 2239 TREE_OPERAND (*basep, 1) 2240 = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1)); 2241 else 2242 *basep = build2 (MEM_REF, TREE_TYPE (*basep), 2243 build_fold_addr_expr (*basep), 2244 build_zero_cst (ptr_type_node)); 2245 2246 /* 2) Insert a load from the memory of the store to the temporary 2247 on the edge which did not contain the store. */ 2248 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2249 new_stmt = gimple_build_assign (name, lhs); 2250 gimple_set_location (new_stmt, locus); 2251 gsi_insert_on_edge (e1, new_stmt); 2252 2253 /* 3) Create a PHI node at the join block, with one argument 2254 holding the old RHS, and the other holding the temporary 2255 where we stored the old memory contents. */ 2256 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2257 newphi = create_phi_node (name2, join_bb); 2258 add_phi_arg (newphi, rhs, e0, locus); 2259 add_phi_arg (newphi, name, e1, locus); 2260 2261 lhs = unshare_expr (lhs); 2262 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 2263 2264 /* 4) Insert that PHI node. */ 2265 gsi = gsi_after_labels (join_bb); 2266 if (gsi_end_p (gsi)) 2267 { 2268 gsi = gsi_last_bb (join_bb); 2269 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 2270 } 2271 else 2272 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 2273 2274 return true; 2275 } 2276 2277 /* Do the main work of conditional store replacement. */ 2278 2279 static bool 2280 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, 2281 basic_block join_bb, gimple *then_assign, 2282 gimple *else_assign) 2283 { 2284 tree lhs_base, lhs, then_rhs, else_rhs, name; 2285 location_t then_locus, else_locus; 2286 gimple_stmt_iterator gsi; 2287 gphi *newphi; 2288 gassign *new_stmt; 2289 2290 if (then_assign == NULL 2291 || !gimple_assign_single_p (then_assign) 2292 || gimple_clobber_p (then_assign) 2293 || gimple_has_volatile_ops (then_assign) 2294 || else_assign == NULL 2295 || !gimple_assign_single_p (else_assign) 2296 || gimple_clobber_p (else_assign) 2297 || gimple_has_volatile_ops (else_assign)) 2298 return false; 2299 2300 lhs = gimple_assign_lhs (then_assign); 2301 if (!is_gimple_reg_type (TREE_TYPE (lhs)) 2302 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) 2303 return false; 2304 2305 lhs_base = get_base_address (lhs); 2306 if (lhs_base == NULL_TREE 2307 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) 2308 return false; 2309 2310 then_rhs = gimple_assign_rhs1 (then_assign); 2311 else_rhs = gimple_assign_rhs1 (else_assign); 2312 then_locus = gimple_location (then_assign); 2313 else_locus = gimple_location (else_assign); 2314 2315 /* Now we've checked the constraints, so do the transformation: 2316 1) Remove the stores. */ 2317 gsi = gsi_for_stmt (then_assign); 2318 unlink_stmt_vdef (then_assign); 2319 gsi_remove (&gsi, true); 2320 release_defs (then_assign); 2321 2322 gsi = gsi_for_stmt (else_assign); 2323 unlink_stmt_vdef (else_assign); 2324 gsi_remove (&gsi, true); 2325 release_defs (else_assign); 2326 2327 /* 2) Create a PHI node at the join block, with one argument 2328 holding the old RHS, and the other holding the temporary 2329 where we stored the old memory contents. */ 2330 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2331 newphi = create_phi_node (name, join_bb); 2332 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); 2333 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); 2334 2335 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 2336 2337 /* 3) Insert that PHI node. */ 2338 gsi = gsi_after_labels (join_bb); 2339 if (gsi_end_p (gsi)) 2340 { 2341 gsi = gsi_last_bb (join_bb); 2342 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 2343 } 2344 else 2345 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 2346 2347 return true; 2348 } 2349 2350 /* Return the single store in BB with VDEF or NULL if there are 2351 other stores in the BB or loads following the store. */ 2352 2353 static gimple * 2354 single_trailing_store_in_bb (basic_block bb, tree vdef) 2355 { 2356 if (SSA_NAME_IS_DEFAULT_DEF (vdef)) 2357 return NULL; 2358 gimple *store = SSA_NAME_DEF_STMT (vdef); 2359 if (gimple_bb (store) != bb 2360 || gimple_code (store) == GIMPLE_PHI) 2361 return NULL; 2362 2363 /* Verify there is no other store in this BB. */ 2364 if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store)) 2365 && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb 2366 && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI) 2367 return NULL; 2368 2369 /* Verify there is no load or store after the store. */ 2370 use_operand_p use_p; 2371 imm_use_iterator imm_iter; 2372 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store)) 2373 if (USE_STMT (use_p) != store 2374 && gimple_bb (USE_STMT (use_p)) == bb) 2375 return NULL; 2376 2377 return store; 2378 } 2379 2380 /* Conditional store replacement. We already know 2381 that the recognized pattern looks like so: 2382 2383 split: 2384 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) 2385 THEN_BB: 2386 ... 2387 X = Y; 2388 ... 2389 goto JOIN_BB; 2390 ELSE_BB: 2391 ... 2392 X = Z; 2393 ... 2394 fallthrough (edge E0) 2395 JOIN_BB: 2396 some more 2397 2398 We check that it is safe to sink the store to JOIN_BB by verifying that 2399 there are no read-after-write or write-after-write dependencies in 2400 THEN_BB and ELSE_BB. */ 2401 2402 static bool 2403 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, 2404 basic_block join_bb) 2405 { 2406 vec<data_reference_p> then_datarefs, else_datarefs; 2407 vec<ddr_p> then_ddrs, else_ddrs; 2408 gimple *then_store, *else_store; 2409 bool found, ok = false, res; 2410 struct data_dependence_relation *ddr; 2411 data_reference_p then_dr, else_dr; 2412 int i, j; 2413 tree then_lhs, else_lhs; 2414 basic_block blocks[3]; 2415 2416 /* Handle the case with single store in THEN_BB and ELSE_BB. That is 2417 cheap enough to always handle as it allows us to elide dependence 2418 checking. */ 2419 gphi *vphi = NULL; 2420 for (gphi_iterator si = gsi_start_phis (join_bb); !gsi_end_p (si); 2421 gsi_next (&si)) 2422 if (virtual_operand_p (gimple_phi_result (si.phi ()))) 2423 { 2424 vphi = si.phi (); 2425 break; 2426 } 2427 if (!vphi) 2428 return false; 2429 tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb)); 2430 tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb)); 2431 gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef); 2432 if (then_assign) 2433 { 2434 gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef); 2435 if (else_assign) 2436 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 2437 then_assign, else_assign); 2438 } 2439 2440 if (MAX_STORES_TO_SINK == 0) 2441 return false; 2442 2443 /* Find data references. */ 2444 then_datarefs.create (1); 2445 else_datarefs.create (1); 2446 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) 2447 == chrec_dont_know) 2448 || !then_datarefs.length () 2449 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) 2450 == chrec_dont_know) 2451 || !else_datarefs.length ()) 2452 { 2453 free_data_refs (then_datarefs); 2454 free_data_refs (else_datarefs); 2455 return false; 2456 } 2457 2458 /* Find pairs of stores with equal LHS. */ 2459 auto_vec<gimple *, 1> then_stores, else_stores; 2460 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) 2461 { 2462 if (DR_IS_READ (then_dr)) 2463 continue; 2464 2465 then_store = DR_STMT (then_dr); 2466 then_lhs = gimple_get_lhs (then_store); 2467 if (then_lhs == NULL_TREE) 2468 continue; 2469 found = false; 2470 2471 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) 2472 { 2473 if (DR_IS_READ (else_dr)) 2474 continue; 2475 2476 else_store = DR_STMT (else_dr); 2477 else_lhs = gimple_get_lhs (else_store); 2478 if (else_lhs == NULL_TREE) 2479 continue; 2480 2481 if (operand_equal_p (then_lhs, else_lhs, 0)) 2482 { 2483 found = true; 2484 break; 2485 } 2486 } 2487 2488 if (!found) 2489 continue; 2490 2491 then_stores.safe_push (then_store); 2492 else_stores.safe_push (else_store); 2493 } 2494 2495 /* No pairs of stores found. */ 2496 if (!then_stores.length () 2497 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK) 2498 { 2499 free_data_refs (then_datarefs); 2500 free_data_refs (else_datarefs); 2501 return false; 2502 } 2503 2504 /* Compute and check data dependencies in both basic blocks. */ 2505 then_ddrs.create (1); 2506 else_ddrs.create (1); 2507 if (!compute_all_dependences (then_datarefs, &then_ddrs, 2508 vNULL, false) 2509 || !compute_all_dependences (else_datarefs, &else_ddrs, 2510 vNULL, false)) 2511 { 2512 free_dependence_relations (then_ddrs); 2513 free_dependence_relations (else_ddrs); 2514 free_data_refs (then_datarefs); 2515 free_data_refs (else_datarefs); 2516 return false; 2517 } 2518 blocks[0] = then_bb; 2519 blocks[1] = else_bb; 2520 blocks[2] = join_bb; 2521 renumber_gimple_stmt_uids_in_blocks (blocks, 3); 2522 2523 /* Check that there are no read-after-write or write-after-write dependencies 2524 in THEN_BB. */ 2525 FOR_EACH_VEC_ELT (then_ddrs, i, ddr) 2526 { 2527 struct data_reference *dra = DDR_A (ddr); 2528 struct data_reference *drb = DDR_B (ddr); 2529 2530 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2531 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2532 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2533 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2534 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2535 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2536 { 2537 free_dependence_relations (then_ddrs); 2538 free_dependence_relations (else_ddrs); 2539 free_data_refs (then_datarefs); 2540 free_data_refs (else_datarefs); 2541 return false; 2542 } 2543 } 2544 2545 /* Check that there are no read-after-write or write-after-write dependencies 2546 in ELSE_BB. */ 2547 FOR_EACH_VEC_ELT (else_ddrs, i, ddr) 2548 { 2549 struct data_reference *dra = DDR_A (ddr); 2550 struct data_reference *drb = DDR_B (ddr); 2551 2552 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2553 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2554 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2555 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2556 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2557 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2558 { 2559 free_dependence_relations (then_ddrs); 2560 free_dependence_relations (else_ddrs); 2561 free_data_refs (then_datarefs); 2562 free_data_refs (else_datarefs); 2563 return false; 2564 } 2565 } 2566 2567 /* Sink stores with same LHS. */ 2568 FOR_EACH_VEC_ELT (then_stores, i, then_store) 2569 { 2570 else_store = else_stores[i]; 2571 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 2572 then_store, else_store); 2573 ok = ok || res; 2574 } 2575 2576 free_dependence_relations (then_ddrs); 2577 free_dependence_relations (else_ddrs); 2578 free_data_refs (then_datarefs); 2579 free_data_refs (else_datarefs); 2580 2581 return ok; 2582 } 2583 2584 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ 2585 2586 static bool 2587 local_mem_dependence (gimple *stmt, basic_block bb) 2588 { 2589 tree vuse = gimple_vuse (stmt); 2590 gimple *def; 2591 2592 if (!vuse) 2593 return false; 2594 2595 def = SSA_NAME_DEF_STMT (vuse); 2596 return (def && gimple_bb (def) == bb); 2597 } 2598 2599 /* Given a "diamond" control-flow pattern where BB0 tests a condition, 2600 BB1 and BB2 are "then" and "else" blocks dependent on this test, 2601 and BB3 rejoins control flow following BB1 and BB2, look for 2602 opportunities to hoist loads as follows. If BB3 contains a PHI of 2603 two loads, one each occurring in BB1 and BB2, and the loads are 2604 provably of adjacent fields in the same structure, then move both 2605 loads into BB0. Of course this can only be done if there are no 2606 dependencies preventing such motion. 2607 2608 One of the hoisted loads will always be speculative, so the 2609 transformation is currently conservative: 2610 2611 - The fields must be strictly adjacent. 2612 - The two fields must occupy a single memory block that is 2613 guaranteed to not cross a page boundary. 2614 2615 The last is difficult to prove, as such memory blocks should be 2616 aligned on the minimum of the stack alignment boundary and the 2617 alignment guaranteed by heap allocation interfaces. Thus we rely 2618 on a parameter for the alignment value. 2619 2620 Provided a good value is used for the last case, the first 2621 restriction could possibly be relaxed. */ 2622 2623 static void 2624 hoist_adjacent_loads (basic_block bb0, basic_block bb1, 2625 basic_block bb2, basic_block bb3) 2626 { 2627 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE); 2628 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); 2629 gphi_iterator gsi; 2630 2631 /* Walk the phis in bb3 looking for an opportunity. We are looking 2632 for phis of two SSA names, one each of which is defined in bb1 and 2633 bb2. */ 2634 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi)) 2635 { 2636 gphi *phi_stmt = gsi.phi (); 2637 gimple *def1, *def2; 2638 tree arg1, arg2, ref1, ref2, field1, field2; 2639 tree tree_offset1, tree_offset2, tree_size2, next; 2640 int offset1, offset2, size2; 2641 unsigned align1; 2642 gimple_stmt_iterator gsi2; 2643 basic_block bb_for_def1, bb_for_def2; 2644 2645 if (gimple_phi_num_args (phi_stmt) != 2 2646 || virtual_operand_p (gimple_phi_result (phi_stmt))) 2647 continue; 2648 2649 arg1 = gimple_phi_arg_def (phi_stmt, 0); 2650 arg2 = gimple_phi_arg_def (phi_stmt, 1); 2651 2652 if (TREE_CODE (arg1) != SSA_NAME 2653 || TREE_CODE (arg2) != SSA_NAME 2654 || SSA_NAME_IS_DEFAULT_DEF (arg1) 2655 || SSA_NAME_IS_DEFAULT_DEF (arg2)) 2656 continue; 2657 2658 def1 = SSA_NAME_DEF_STMT (arg1); 2659 def2 = SSA_NAME_DEF_STMT (arg2); 2660 2661 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2) 2662 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2)) 2663 continue; 2664 2665 /* Check the mode of the arguments to be sure a conditional move 2666 can be generated for it. */ 2667 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) 2668 == CODE_FOR_nothing) 2669 continue; 2670 2671 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ 2672 if (!gimple_assign_single_p (def1) 2673 || !gimple_assign_single_p (def2) 2674 || gimple_has_volatile_ops (def1) 2675 || gimple_has_volatile_ops (def2)) 2676 continue; 2677 2678 ref1 = gimple_assign_rhs1 (def1); 2679 ref2 = gimple_assign_rhs1 (def2); 2680 2681 if (TREE_CODE (ref1) != COMPONENT_REF 2682 || TREE_CODE (ref2) != COMPONENT_REF) 2683 continue; 2684 2685 /* The zeroth operand of the two component references must be 2686 identical. It is not sufficient to compare get_base_address of 2687 the two references, because this could allow for different 2688 elements of the same array in the two trees. It is not safe to 2689 assume that the existence of one array element implies the 2690 existence of a different one. */ 2691 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0)) 2692 continue; 2693 2694 field1 = TREE_OPERAND (ref1, 1); 2695 field2 = TREE_OPERAND (ref2, 1); 2696 2697 /* Check for field adjacency, and ensure field1 comes first. */ 2698 for (next = DECL_CHAIN (field1); 2699 next && TREE_CODE (next) != FIELD_DECL; 2700 next = DECL_CHAIN (next)) 2701 ; 2702 2703 if (next != field2) 2704 { 2705 for (next = DECL_CHAIN (field2); 2706 next && TREE_CODE (next) != FIELD_DECL; 2707 next = DECL_CHAIN (next)) 2708 ; 2709 2710 if (next != field1) 2711 continue; 2712 2713 std::swap (field1, field2); 2714 std::swap (def1, def2); 2715 } 2716 2717 bb_for_def1 = gimple_bb (def1); 2718 bb_for_def2 = gimple_bb (def2); 2719 2720 /* Check for proper alignment of the first field. */ 2721 tree_offset1 = bit_position (field1); 2722 tree_offset2 = bit_position (field2); 2723 tree_size2 = DECL_SIZE (field2); 2724 2725 if (!tree_fits_uhwi_p (tree_offset1) 2726 || !tree_fits_uhwi_p (tree_offset2) 2727 || !tree_fits_uhwi_p (tree_size2)) 2728 continue; 2729 2730 offset1 = tree_to_uhwi (tree_offset1); 2731 offset2 = tree_to_uhwi (tree_offset2); 2732 size2 = tree_to_uhwi (tree_size2); 2733 align1 = DECL_ALIGN (field1) % param_align_bits; 2734 2735 if (offset1 % BITS_PER_UNIT != 0) 2736 continue; 2737 2738 /* For profitability, the two field references should fit within 2739 a single cache line. */ 2740 if (align1 + offset2 - offset1 + size2 > param_align_bits) 2741 continue; 2742 2743 /* The two expressions cannot be dependent upon vdefs defined 2744 in bb1/bb2. */ 2745 if (local_mem_dependence (def1, bb_for_def1) 2746 || local_mem_dependence (def2, bb_for_def2)) 2747 continue; 2748 2749 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into 2750 bb0. We hoist the first one first so that a cache miss is handled 2751 efficiently regardless of hardware cache-fill policy. */ 2752 gsi2 = gsi_for_stmt (def1); 2753 gsi_move_to_bb_end (&gsi2, bb0); 2754 gsi2 = gsi_for_stmt (def2); 2755 gsi_move_to_bb_end (&gsi2, bb0); 2756 2757 if (dump_file && (dump_flags & TDF_DETAILS)) 2758 { 2759 fprintf (dump_file, 2760 "\nHoisting adjacent loads from %d and %d into %d: \n", 2761 bb_for_def1->index, bb_for_def2->index, bb0->index); 2762 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); 2763 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); 2764 } 2765 } 2766 } 2767 2768 /* Determine whether we should attempt to hoist adjacent loads out of 2769 diamond patterns in pass_phiopt. Always hoist loads if 2770 -fhoist-adjacent-loads is specified and the target machine has 2771 both a conditional move instruction and a defined cache line size. */ 2772 2773 static bool 2774 gate_hoist_loads (void) 2775 { 2776 return (flag_hoist_adjacent_loads == 1 2777 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) 2778 && HAVE_conditional_move); 2779 } 2780 2781 /* This pass tries to replaces an if-then-else block with an 2782 assignment. We have four kinds of transformations. Some of these 2783 transformations are also performed by the ifcvt RTL optimizer. 2784 2785 Conditional Replacement 2786 ----------------------- 2787 2788 This transformation, implemented in conditional_replacement, 2789 replaces 2790 2791 bb0: 2792 if (cond) goto bb2; else goto bb1; 2793 bb1: 2794 bb2: 2795 x = PHI <0 (bb1), 1 (bb0), ...>; 2796 2797 with 2798 2799 bb0: 2800 x' = cond; 2801 goto bb2; 2802 bb2: 2803 x = PHI <x' (bb0), ...>; 2804 2805 We remove bb1 as it becomes unreachable. This occurs often due to 2806 gimplification of conditionals. 2807 2808 Value Replacement 2809 ----------------- 2810 2811 This transformation, implemented in value_replacement, replaces 2812 2813 bb0: 2814 if (a != b) goto bb2; else goto bb1; 2815 bb1: 2816 bb2: 2817 x = PHI <a (bb1), b (bb0), ...>; 2818 2819 with 2820 2821 bb0: 2822 bb2: 2823 x = PHI <b (bb0), ...>; 2824 2825 This opportunity can sometimes occur as a result of other 2826 optimizations. 2827 2828 2829 Another case caught by value replacement looks like this: 2830 2831 bb0: 2832 t1 = a == CONST; 2833 t2 = b > c; 2834 t3 = t1 & t2; 2835 if (t3 != 0) goto bb1; else goto bb2; 2836 bb1: 2837 bb2: 2838 x = PHI (CONST, a) 2839 2840 Gets replaced with: 2841 bb0: 2842 bb2: 2843 t1 = a == CONST; 2844 t2 = b > c; 2845 t3 = t1 & t2; 2846 x = a; 2847 2848 ABS Replacement 2849 --------------- 2850 2851 This transformation, implemented in abs_replacement, replaces 2852 2853 bb0: 2854 if (a >= 0) goto bb2; else goto bb1; 2855 bb1: 2856 x = -a; 2857 bb2: 2858 x = PHI <x (bb1), a (bb0), ...>; 2859 2860 with 2861 2862 bb0: 2863 x' = ABS_EXPR< a >; 2864 bb2: 2865 x = PHI <x' (bb0), ...>; 2866 2867 MIN/MAX Replacement 2868 ------------------- 2869 2870 This transformation, minmax_replacement replaces 2871 2872 bb0: 2873 if (a <= b) goto bb2; else goto bb1; 2874 bb1: 2875 bb2: 2876 x = PHI <b (bb1), a (bb0), ...>; 2877 2878 with 2879 2880 bb0: 2881 x' = MIN_EXPR (a, b) 2882 bb2: 2883 x = PHI <x' (bb0), ...>; 2884 2885 A similar transformation is done for MAX_EXPR. 2886 2887 2888 This pass also performs a fifth transformation of a slightly different 2889 flavor. 2890 2891 Factor conversion in COND_EXPR 2892 ------------------------------ 2893 2894 This transformation factors the conversion out of COND_EXPR with 2895 factor_out_conditional_conversion. 2896 2897 For example: 2898 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2899 <bb 3>: 2900 tmp = (int) a; 2901 <bb 4>: 2902 tmp = PHI <tmp, CST> 2903 2904 Into: 2905 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2906 <bb 3>: 2907 <bb 4>: 2908 a = PHI <a, CST> 2909 tmp = (int) a; 2910 2911 Adjacent Load Hoisting 2912 ---------------------- 2913 2914 This transformation replaces 2915 2916 bb0: 2917 if (...) goto bb2; else goto bb1; 2918 bb1: 2919 x1 = (<expr>).field1; 2920 goto bb3; 2921 bb2: 2922 x2 = (<expr>).field2; 2923 bb3: 2924 # x = PHI <x1, x2>; 2925 2926 with 2927 2928 bb0: 2929 x1 = (<expr>).field1; 2930 x2 = (<expr>).field2; 2931 if (...) goto bb2; else goto bb1; 2932 bb1: 2933 goto bb3; 2934 bb2: 2935 bb3: 2936 # x = PHI <x1, x2>; 2937 2938 The purpose of this transformation is to enable generation of conditional 2939 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of 2940 the loads is speculative, the transformation is restricted to very 2941 specific cases to avoid introducing a page fault. We are looking for 2942 the common idiom: 2943 2944 if (...) 2945 x = y->left; 2946 else 2947 x = y->right; 2948 2949 where left and right are typically adjacent pointers in a tree structure. */ 2950 2951 namespace { 2952 2953 const pass_data pass_data_phiopt = 2954 { 2955 GIMPLE_PASS, /* type */ 2956 "phiopt", /* name */ 2957 OPTGROUP_NONE, /* optinfo_flags */ 2958 TV_TREE_PHIOPT, /* tv_id */ 2959 ( PROP_cfg | PROP_ssa ), /* properties_required */ 2960 0, /* properties_provided */ 2961 0, /* properties_destroyed */ 2962 0, /* todo_flags_start */ 2963 0, /* todo_flags_finish */ 2964 }; 2965 2966 class pass_phiopt : public gimple_opt_pass 2967 { 2968 public: 2969 pass_phiopt (gcc::context *ctxt) 2970 : gimple_opt_pass (pass_data_phiopt, ctxt), early_p (false) 2971 {} 2972 2973 /* opt_pass methods: */ 2974 opt_pass * clone () { return new pass_phiopt (m_ctxt); } 2975 void set_pass_param (unsigned n, bool param) 2976 { 2977 gcc_assert (n == 0); 2978 early_p = param; 2979 } 2980 virtual bool gate (function *) { return flag_ssa_phiopt; } 2981 virtual unsigned int execute (function *) 2982 { 2983 return tree_ssa_phiopt_worker (false, 2984 !early_p ? gate_hoist_loads () : false, 2985 early_p); 2986 } 2987 2988 private: 2989 bool early_p; 2990 }; // class pass_phiopt 2991 2992 } // anon namespace 2993 2994 gimple_opt_pass * 2995 make_pass_phiopt (gcc::context *ctxt) 2996 { 2997 return new pass_phiopt (ctxt); 2998 } 2999 3000 namespace { 3001 3002 const pass_data pass_data_cselim = 3003 { 3004 GIMPLE_PASS, /* type */ 3005 "cselim", /* name */ 3006 OPTGROUP_NONE, /* optinfo_flags */ 3007 TV_TREE_PHIOPT, /* tv_id */ 3008 ( PROP_cfg | PROP_ssa ), /* properties_required */ 3009 0, /* properties_provided */ 3010 0, /* properties_destroyed */ 3011 0, /* todo_flags_start */ 3012 0, /* todo_flags_finish */ 3013 }; 3014 3015 class pass_cselim : public gimple_opt_pass 3016 { 3017 public: 3018 pass_cselim (gcc::context *ctxt) 3019 : gimple_opt_pass (pass_data_cselim, ctxt) 3020 {} 3021 3022 /* opt_pass methods: */ 3023 virtual bool gate (function *) { return flag_tree_cselim; } 3024 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); } 3025 3026 }; // class pass_cselim 3027 3028 } // anon namespace 3029 3030 gimple_opt_pass * 3031 make_pass_cselim (gcc::context *ctxt) 3032 { 3033 return new pass_cselim (ctxt); 3034 } 3035