1 /* Optimization of PHI nodes by converting them into straightline code. 2 Copyright (C) 2004-2016 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "insn-codes.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "cfghooks.h" 29 #include "tree-pass.h" 30 #include "ssa.h" 31 #include "optabs-tree.h" 32 #include "insn-config.h" 33 #include "gimple-pretty-print.h" 34 #include "fold-const.h" 35 #include "stor-layout.h" 36 #include "cfganal.h" 37 #include "gimplify.h" 38 #include "gimple-iterator.h" 39 #include "gimplify-me.h" 40 #include "tree-cfg.h" 41 #include "tree-dfa.h" 42 #include "domwalk.h" 43 #include "cfgloop.h" 44 #include "tree-data-ref.h" 45 #include "tree-scalar-evolution.h" 46 #include "tree-inline.h" 47 #include "params.h" 48 49 static unsigned int tree_ssa_phiopt_worker (bool, bool); 50 static bool conditional_replacement (basic_block, basic_block, 51 edge, edge, gphi *, tree, tree); 52 static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree); 53 static int value_replacement (basic_block, basic_block, 54 edge, edge, gimple *, tree, tree); 55 static bool minmax_replacement (basic_block, basic_block, 56 edge, edge, gimple *, tree, tree); 57 static bool abs_replacement (basic_block, basic_block, 58 edge, edge, gimple *, tree, tree); 59 static bool cond_store_replacement (basic_block, basic_block, edge, edge, 60 hash_set<tree> *); 61 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); 62 static hash_set<tree> * get_non_trapping (); 63 static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); 64 static void hoist_adjacent_loads (basic_block, basic_block, 65 basic_block, basic_block); 66 static bool gate_hoist_loads (void); 67 68 /* This pass tries to transform conditional stores into unconditional 69 ones, enabling further simplifications with the simpler then and else 70 blocks. In particular it replaces this: 71 72 bb0: 73 if (cond) goto bb2; else goto bb1; 74 bb1: 75 *p = RHS; 76 bb2: 77 78 with 79 80 bb0: 81 if (cond) goto bb1; else goto bb2; 82 bb1: 83 condtmp' = *p; 84 bb2: 85 condtmp = PHI <RHS, condtmp'> 86 *p = condtmp; 87 88 This transformation can only be done under several constraints, 89 documented below. It also replaces: 90 91 bb0: 92 if (cond) goto bb2; else goto bb1; 93 bb1: 94 *p = RHS1; 95 goto bb3; 96 bb2: 97 *p = RHS2; 98 bb3: 99 100 with 101 102 bb0: 103 if (cond) goto bb3; else goto bb1; 104 bb1: 105 bb3: 106 condtmp = PHI <RHS1, RHS2> 107 *p = condtmp; */ 108 109 static unsigned int 110 tree_ssa_cs_elim (void) 111 { 112 unsigned todo; 113 /* ??? We are not interested in loop related info, but the following 114 will create it, ICEing as we didn't init loops with pre-headers. 115 An interfacing issue of find_data_references_in_bb. */ 116 loop_optimizer_init (LOOPS_NORMAL); 117 scev_initialize (); 118 todo = tree_ssa_phiopt_worker (true, false); 119 scev_finalize (); 120 loop_optimizer_finalize (); 121 return todo; 122 } 123 124 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ 125 126 static gphi * 127 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) 128 { 129 gimple_stmt_iterator i; 130 gphi *phi = NULL; 131 if (gimple_seq_singleton_p (seq)) 132 return as_a <gphi *> (gsi_stmt (gsi_start (seq))); 133 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) 134 { 135 gphi *p = as_a <gphi *> (gsi_stmt (i)); 136 /* If the PHI arguments are equal then we can skip this PHI. */ 137 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx), 138 gimple_phi_arg_def (p, e1->dest_idx))) 139 continue; 140 141 /* If we already have a PHI that has the two edge arguments are 142 different, then return it is not a singleton for these PHIs. */ 143 if (phi) 144 return NULL; 145 146 phi = p; 147 } 148 return phi; 149 } 150 151 /* The core routine of conditional store replacement and normal 152 phi optimizations. Both share much of the infrastructure in how 153 to match applicable basic block patterns. DO_STORE_ELIM is true 154 when we want to do conditional store replacement, false otherwise. 155 DO_HOIST_LOADS is true when we want to hoist adjacent loads out 156 of diamond control flow patterns, false otherwise. */ 157 static unsigned int 158 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) 159 { 160 basic_block bb; 161 basic_block *bb_order; 162 unsigned n, i; 163 bool cfgchanged = false; 164 hash_set<tree> *nontrap = 0; 165 166 if (do_store_elim) 167 /* Calculate the set of non-trapping memory accesses. */ 168 nontrap = get_non_trapping (); 169 170 /* Search every basic block for COND_EXPR we may be able to optimize. 171 172 We walk the blocks in order that guarantees that a block with 173 a single predecessor is processed before the predecessor. 174 This ensures that we collapse inner ifs before visiting the 175 outer ones, and also that we do not try to visit a removed 176 block. */ 177 bb_order = single_pred_before_succ_order (); 178 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; 179 180 for (i = 0; i < n; i++) 181 { 182 gimple *cond_stmt; 183 gphi *phi; 184 basic_block bb1, bb2; 185 edge e1, e2; 186 tree arg0, arg1; 187 188 bb = bb_order[i]; 189 190 cond_stmt = last_stmt (bb); 191 /* Check to see if the last statement is a GIMPLE_COND. */ 192 if (!cond_stmt 193 || gimple_code (cond_stmt) != GIMPLE_COND) 194 continue; 195 196 e1 = EDGE_SUCC (bb, 0); 197 bb1 = e1->dest; 198 e2 = EDGE_SUCC (bb, 1); 199 bb2 = e2->dest; 200 201 /* We cannot do the optimization on abnormal edges. */ 202 if ((e1->flags & EDGE_ABNORMAL) != 0 203 || (e2->flags & EDGE_ABNORMAL) != 0) 204 continue; 205 206 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ 207 if (EDGE_COUNT (bb1->succs) == 0 208 || bb2 == NULL 209 || EDGE_COUNT (bb2->succs) == 0) 210 continue; 211 212 /* Find the bb which is the fall through to the other. */ 213 if (EDGE_SUCC (bb1, 0)->dest == bb2) 214 ; 215 else if (EDGE_SUCC (bb2, 0)->dest == bb1) 216 { 217 std::swap (bb1, bb2); 218 std::swap (e1, e2); 219 } 220 else if (do_store_elim 221 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 222 { 223 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 224 225 if (!single_succ_p (bb1) 226 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 227 || !single_succ_p (bb2) 228 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 229 || EDGE_COUNT (bb3->preds) != 2) 230 continue; 231 if (cond_if_else_store_replacement (bb1, bb2, bb3)) 232 cfgchanged = true; 233 continue; 234 } 235 else if (do_hoist_loads 236 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 237 { 238 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 239 240 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) 241 && single_succ_p (bb1) 242 && single_succ_p (bb2) 243 && single_pred_p (bb1) 244 && single_pred_p (bb2) 245 && EDGE_COUNT (bb->succs) == 2 246 && EDGE_COUNT (bb3->preds) == 2 247 /* If one edge or the other is dominant, a conditional move 248 is likely to perform worse than the well-predicted branch. */ 249 && !predictable_edge_p (EDGE_SUCC (bb, 0)) 250 && !predictable_edge_p (EDGE_SUCC (bb, 1))) 251 hoist_adjacent_loads (bb, bb1, bb2, bb3); 252 continue; 253 } 254 else 255 continue; 256 257 e1 = EDGE_SUCC (bb1, 0); 258 259 /* Make sure that bb1 is just a fall through. */ 260 if (!single_succ_p (bb1) 261 || (e1->flags & EDGE_FALLTHRU) == 0) 262 continue; 263 264 /* Also make sure that bb1 only have one predecessor and that it 265 is bb. */ 266 if (!single_pred_p (bb1) 267 || single_pred (bb1) != bb) 268 continue; 269 270 if (do_store_elim) 271 { 272 /* bb1 is the middle block, bb2 the join block, bb the split block, 273 e1 the fallthrough edge from bb1 to bb2. We can't do the 274 optimization if the join block has more than two predecessors. */ 275 if (EDGE_COUNT (bb2->preds) > 2) 276 continue; 277 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) 278 cfgchanged = true; 279 } 280 else 281 { 282 gimple_seq phis = phi_nodes (bb2); 283 gimple_stmt_iterator gsi; 284 bool candorest = true; 285 286 /* Value replacement can work with more than one PHI 287 so try that first. */ 288 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 289 { 290 phi = as_a <gphi *> (gsi_stmt (gsi)); 291 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 292 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 293 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) 294 { 295 candorest = false; 296 cfgchanged = true; 297 break; 298 } 299 } 300 301 if (!candorest) 302 continue; 303 304 phi = single_non_singleton_phi_for_edges (phis, e1, e2); 305 if (!phi) 306 continue; 307 308 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 309 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 310 311 /* Something is wrong if we cannot find the arguments in the PHI 312 node. */ 313 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 314 315 gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, 316 arg0, arg1); 317 if (newphi != NULL) 318 { 319 phi = newphi; 320 /* factor_out_conditional_conversion may create a new PHI in 321 BB2 and eliminate an existing PHI in BB2. Recompute values 322 that may be affected by that change. */ 323 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 324 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 325 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 326 } 327 328 /* Do the replacement of conditional if it can be done. */ 329 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 330 cfgchanged = true; 331 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 332 cfgchanged = true; 333 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 334 cfgchanged = true; 335 } 336 } 337 338 free (bb_order); 339 340 if (do_store_elim) 341 delete nontrap; 342 /* If the CFG has changed, we should cleanup the CFG. */ 343 if (cfgchanged && do_store_elim) 344 { 345 /* In cond-store replacement we have added some loads on edges 346 and new VOPS (as we moved the store, and created a load). */ 347 gsi_commit_edge_inserts (); 348 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; 349 } 350 else if (cfgchanged) 351 return TODO_cleanup_cfg; 352 return 0; 353 } 354 355 /* Replace PHI node element whose edge is E in block BB with variable NEW. 356 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK 357 is known to have two edges, one of which must reach BB). */ 358 359 static void 360 replace_phi_edge_with_variable (basic_block cond_block, 361 edge e, gimple *phi, tree new_tree) 362 { 363 basic_block bb = gimple_bb (phi); 364 basic_block block_to_remove; 365 gimple_stmt_iterator gsi; 366 367 /* Change the PHI argument to new. */ 368 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); 369 370 /* Remove the empty basic block. */ 371 if (EDGE_SUCC (cond_block, 0)->dest == bb) 372 { 373 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; 374 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 375 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE; 376 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count; 377 378 block_to_remove = EDGE_SUCC (cond_block, 1)->dest; 379 } 380 else 381 { 382 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; 383 EDGE_SUCC (cond_block, 1)->flags 384 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 385 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE; 386 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count; 387 388 block_to_remove = EDGE_SUCC (cond_block, 0)->dest; 389 } 390 delete_basic_block (block_to_remove); 391 392 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ 393 gsi = gsi_last_bb (cond_block); 394 gsi_remove (&gsi, true); 395 396 if (dump_file && (dump_flags & TDF_DETAILS)) 397 fprintf (dump_file, 398 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n", 399 cond_block->index, 400 bb->index); 401 } 402 403 /* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI 404 stmt are CONVERT_STMT, factor out the conversion and perform the conversion 405 to the result of PHI stmt. Return the newly-created PHI, if any. */ 406 407 static gphi * 408 factor_out_conditional_conversion (edge e0, edge e1, gphi *phi, 409 tree arg0, tree arg1) 410 { 411 gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt; 412 tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; 413 tree temp, result; 414 gphi *newphi; 415 gimple_stmt_iterator gsi, gsi_for_def; 416 source_location locus = gimple_location (phi); 417 enum tree_code convert_code; 418 419 /* Handle only PHI statements with two arguments. TODO: If all 420 other arguments to PHI are INTEGER_CST or if their defining 421 statement have the same unary operation, we can handle more 422 than two arguments too. */ 423 if (gimple_phi_num_args (phi) != 2) 424 return NULL; 425 426 /* First canonicalize to simplify tests. */ 427 if (TREE_CODE (arg0) != SSA_NAME) 428 { 429 std::swap (arg0, arg1); 430 std::swap (e0, e1); 431 } 432 433 if (TREE_CODE (arg0) != SSA_NAME 434 || (TREE_CODE (arg1) != SSA_NAME 435 && TREE_CODE (arg1) != INTEGER_CST)) 436 return NULL; 437 438 /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is 439 a conversion. */ 440 arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); 441 if (!gimple_assign_cast_p (arg0_def_stmt)) 442 return NULL; 443 444 /* Use the RHS as new_arg0. */ 445 convert_code = gimple_assign_rhs_code (arg0_def_stmt); 446 new_arg0 = gimple_assign_rhs1 (arg0_def_stmt); 447 if (convert_code == VIEW_CONVERT_EXPR) 448 { 449 new_arg0 = TREE_OPERAND (new_arg0, 0); 450 if (!is_gimple_reg_type (TREE_TYPE (new_arg0))) 451 return NULL; 452 } 453 454 if (TREE_CODE (arg1) == SSA_NAME) 455 { 456 /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 457 is a conversion. */ 458 arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); 459 if (!is_gimple_assign (arg1_def_stmt) 460 || gimple_assign_rhs_code (arg1_def_stmt) != convert_code) 461 return NULL; 462 463 /* Use the RHS as new_arg1. */ 464 new_arg1 = gimple_assign_rhs1 (arg1_def_stmt); 465 if (convert_code == VIEW_CONVERT_EXPR) 466 new_arg1 = TREE_OPERAND (new_arg1, 0); 467 } 468 else 469 { 470 /* If arg1 is an INTEGER_CST, fold it to new type. */ 471 if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) 472 && int_fits_type_p (arg1, TREE_TYPE (new_arg0))) 473 { 474 if (gimple_assign_cast_p (arg0_def_stmt)) 475 new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); 476 else 477 return NULL; 478 } 479 else 480 return NULL; 481 } 482 483 /* If arg0/arg1 have > 1 use, then this transformation actually increases 484 the number of expressions evaluated at runtime. */ 485 if (!has_single_use (arg0) 486 || (arg1_def_stmt && !has_single_use (arg1))) 487 return NULL; 488 489 /* If types of new_arg0 and new_arg1 are different bailout. */ 490 if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) 491 return NULL; 492 493 /* Create a new PHI stmt. */ 494 result = PHI_RESULT (phi); 495 temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); 496 newphi = create_phi_node (temp, gimple_bb (phi)); 497 498 if (dump_file && (dump_flags & TDF_DETAILS)) 499 { 500 fprintf (dump_file, "PHI "); 501 print_generic_expr (dump_file, gimple_phi_result (phi), 0); 502 fprintf (dump_file, 503 " changed to factor conversion out from COND_EXPR.\n"); 504 fprintf (dump_file, "New stmt with CAST that defines "); 505 print_generic_expr (dump_file, result, 0); 506 fprintf (dump_file, ".\n"); 507 } 508 509 /* Remove the old cast(s) that has single use. */ 510 gsi_for_def = gsi_for_stmt (arg0_def_stmt); 511 gsi_remove (&gsi_for_def, true); 512 release_defs (arg0_def_stmt); 513 514 if (arg1_def_stmt) 515 { 516 gsi_for_def = gsi_for_stmt (arg1_def_stmt); 517 gsi_remove (&gsi_for_def, true); 518 release_defs (arg1_def_stmt); 519 } 520 521 add_phi_arg (newphi, new_arg0, e0, locus); 522 add_phi_arg (newphi, new_arg1, e1, locus); 523 524 /* Create the conversion stmt and insert it. */ 525 if (convert_code == VIEW_CONVERT_EXPR) 526 { 527 temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); 528 new_stmt = gimple_build_assign (result, temp); 529 } 530 else 531 new_stmt = gimple_build_assign (result, convert_code, temp); 532 gsi = gsi_after_labels (gimple_bb (phi)); 533 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 534 535 /* Remove the original PHI stmt. */ 536 gsi = gsi_for_stmt (phi); 537 gsi_remove (&gsi, true); 538 return newphi; 539 } 540 541 /* The function conditional_replacement does the main work of doing the 542 conditional replacement. Return true if the replacement is done. 543 Otherwise return false. 544 BB is the basic block where the replacement is going to be done on. ARG0 545 is argument 0 from PHI. Likewise for ARG1. */ 546 547 static bool 548 conditional_replacement (basic_block cond_bb, basic_block middle_bb, 549 edge e0, edge e1, gphi *phi, 550 tree arg0, tree arg1) 551 { 552 tree result; 553 gimple *stmt; 554 gassign *new_stmt; 555 tree cond; 556 gimple_stmt_iterator gsi; 557 edge true_edge, false_edge; 558 tree new_var, new_var2; 559 bool neg; 560 561 /* FIXME: Gimplification of complex type is too hard for now. */ 562 /* We aren't prepared to handle vectors either (and it is a question 563 if it would be worthwhile anyway). */ 564 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 565 || POINTER_TYPE_P (TREE_TYPE (arg0))) 566 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 567 || POINTER_TYPE_P (TREE_TYPE (arg1)))) 568 return false; 569 570 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then 571 convert it to the conditional. */ 572 if ((integer_zerop (arg0) && integer_onep (arg1)) 573 || (integer_zerop (arg1) && integer_onep (arg0))) 574 neg = false; 575 else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) 576 || (integer_zerop (arg1) && integer_all_onesp (arg0))) 577 neg = true; 578 else 579 return false; 580 581 if (!empty_block_p (middle_bb)) 582 return false; 583 584 /* At this point we know we have a GIMPLE_COND with two successors. 585 One successor is BB, the other successor is an empty block which 586 falls through into BB. 587 588 There is a single PHI node at the join point (BB) and its arguments 589 are constants (0, 1) or (0, -1). 590 591 So, given the condition COND, and the two PHI arguments, we can 592 rewrite this PHI into non-branching code: 593 594 dest = (COND) or dest = COND' 595 596 We use the condition as-is if the argument associated with the 597 true edge has the value one or the argument associated with the 598 false edge as the value zero. Note that those conditions are not 599 the same since only one of the outgoing edges from the GIMPLE_COND 600 will directly reach BB and thus be associated with an argument. */ 601 602 stmt = last_stmt (cond_bb); 603 result = PHI_RESULT (phi); 604 605 /* To handle special cases like floating point comparison, it is easier and 606 less error-prone to build a tree and gimplify it on the fly though it is 607 less efficient. */ 608 cond = fold_build2_loc (gimple_location (stmt), 609 gimple_cond_code (stmt), boolean_type_node, 610 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); 611 612 /* We need to know which is the true edge and which is the false 613 edge so that we know when to invert the condition below. */ 614 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 615 if ((e0 == true_edge && integer_zerop (arg0)) 616 || (e0 == false_edge && !integer_zerop (arg0)) 617 || (e1 == true_edge && integer_zerop (arg1)) 618 || (e1 == false_edge && !integer_zerop (arg1))) 619 cond = fold_build1_loc (gimple_location (stmt), 620 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); 621 622 if (neg) 623 { 624 cond = fold_convert_loc (gimple_location (stmt), 625 TREE_TYPE (result), cond); 626 cond = fold_build1_loc (gimple_location (stmt), 627 NEGATE_EXPR, TREE_TYPE (cond), cond); 628 } 629 630 /* Insert our new statements at the end of conditional block before the 631 COND_STMT. */ 632 gsi = gsi_for_stmt (stmt); 633 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, 634 GSI_SAME_STMT); 635 636 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) 637 { 638 source_location locus_0, locus_1; 639 640 new_var2 = make_ssa_name (TREE_TYPE (result)); 641 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); 642 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 643 new_var = new_var2; 644 645 /* Set the locus to the first argument, unless is doesn't have one. */ 646 locus_0 = gimple_phi_arg_location (phi, 0); 647 locus_1 = gimple_phi_arg_location (phi, 1); 648 if (locus_0 == UNKNOWN_LOCATION) 649 locus_0 = locus_1; 650 gimple_set_location (new_stmt, locus_0); 651 } 652 653 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); 654 reset_flow_sensitive_info_in_bb (cond_bb); 655 656 /* Note that we optimized this PHI. */ 657 return true; 658 } 659 660 /* Update *ARG which is defined in STMT so that it contains the 661 computed value if that seems profitable. Return true if the 662 statement is made dead by that rewriting. */ 663 664 static bool 665 jump_function_from_stmt (tree *arg, gimple *stmt) 666 { 667 enum tree_code code = gimple_assign_rhs_code (stmt); 668 if (code == ADDR_EXPR) 669 { 670 /* For arg = &p->i transform it to p, if possible. */ 671 tree rhs1 = gimple_assign_rhs1 (stmt); 672 HOST_WIDE_INT offset; 673 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), 674 &offset); 675 if (tem 676 && TREE_CODE (tem) == MEM_REF 677 && (mem_ref_offset (tem) + offset) == 0) 678 { 679 *arg = TREE_OPERAND (tem, 0); 680 return true; 681 } 682 } 683 /* TODO: Much like IPA-CP jump-functions we want to handle constant 684 additions symbolically here, and we'd need to update the comparison 685 code that compares the arg + cst tuples in our caller. For now the 686 code above exactly handles the VEC_BASE pattern from vec.h. */ 687 return false; 688 } 689 690 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional 691 of the form SSA_NAME NE 0. 692 693 If RHS is fed by a simple EQ_EXPR comparison of two values, see if 694 the two input values of the EQ_EXPR match arg0 and arg1. 695 696 If so update *code and return TRUE. Otherwise return FALSE. */ 697 698 static bool 699 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, 700 enum tree_code *code, const_tree rhs) 701 { 702 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining 703 statement. */ 704 if (TREE_CODE (rhs) == SSA_NAME) 705 { 706 gimple *def1 = SSA_NAME_DEF_STMT (rhs); 707 708 /* Verify the defining statement has an EQ_EXPR on the RHS. */ 709 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR) 710 { 711 /* Finally verify the source operands of the EQ_EXPR are equal 712 to arg0 and arg1. */ 713 tree op0 = gimple_assign_rhs1 (def1); 714 tree op1 = gimple_assign_rhs2 (def1); 715 if ((operand_equal_for_phi_arg_p (arg0, op0) 716 && operand_equal_for_phi_arg_p (arg1, op1)) 717 || (operand_equal_for_phi_arg_p (arg0, op1) 718 && operand_equal_for_phi_arg_p (arg1, op0))) 719 { 720 /* We will perform the optimization. */ 721 *code = gimple_assign_rhs_code (def1); 722 return true; 723 } 724 } 725 } 726 return false; 727 } 728 729 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. 730 731 Also return TRUE if arg0/arg1 are equal to the source arguments of a 732 an EQ comparison feeding a BIT_AND_EXPR which feeds COND. 733 734 Return FALSE otherwise. */ 735 736 static bool 737 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, 738 enum tree_code *code, gimple *cond) 739 { 740 gimple *def; 741 tree lhs = gimple_cond_lhs (cond); 742 tree rhs = gimple_cond_rhs (cond); 743 744 if ((operand_equal_for_phi_arg_p (arg0, lhs) 745 && operand_equal_for_phi_arg_p (arg1, rhs)) 746 || (operand_equal_for_phi_arg_p (arg1, lhs) 747 && operand_equal_for_phi_arg_p (arg0, rhs))) 748 return true; 749 750 /* Now handle more complex case where we have an EQ comparison 751 which feeds a BIT_AND_EXPR which feeds COND. 752 753 First verify that COND is of the form SSA_NAME NE 0. */ 754 if (*code != NE_EXPR || !integer_zerop (rhs) 755 || TREE_CODE (lhs) != SSA_NAME) 756 return false; 757 758 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ 759 def = SSA_NAME_DEF_STMT (lhs); 760 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR) 761 return false; 762 763 /* Now verify arg0/arg1 correspond to the source arguments of an 764 EQ comparison feeding the BIT_AND_EXPR. */ 765 766 tree tmp = gimple_assign_rhs1 (def); 767 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 768 return true; 769 770 tmp = gimple_assign_rhs2 (def); 771 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 772 return true; 773 774 return false; 775 } 776 777 /* Returns true if ARG is a neutral element for operation CODE 778 on the RIGHT side. */ 779 780 static bool 781 neutral_element_p (tree_code code, tree arg, bool right) 782 { 783 switch (code) 784 { 785 case PLUS_EXPR: 786 case BIT_IOR_EXPR: 787 case BIT_XOR_EXPR: 788 return integer_zerop (arg); 789 790 case LROTATE_EXPR: 791 case RROTATE_EXPR: 792 case LSHIFT_EXPR: 793 case RSHIFT_EXPR: 794 case MINUS_EXPR: 795 case POINTER_PLUS_EXPR: 796 return right && integer_zerop (arg); 797 798 case MULT_EXPR: 799 return integer_onep (arg); 800 801 case TRUNC_DIV_EXPR: 802 case CEIL_DIV_EXPR: 803 case FLOOR_DIV_EXPR: 804 case ROUND_DIV_EXPR: 805 case EXACT_DIV_EXPR: 806 return right && integer_onep (arg); 807 808 case BIT_AND_EXPR: 809 return integer_all_onesp (arg); 810 811 default: 812 return false; 813 } 814 } 815 816 /* Returns true if ARG is an absorbing element for operation CODE. */ 817 818 static bool 819 absorbing_element_p (tree_code code, tree arg) 820 { 821 switch (code) 822 { 823 case BIT_IOR_EXPR: 824 return integer_all_onesp (arg); 825 826 case MULT_EXPR: 827 case BIT_AND_EXPR: 828 return integer_zerop (arg); 829 830 default: 831 return false; 832 } 833 } 834 835 /* The function value_replacement does the main work of doing the value 836 replacement. Return non-zero if the replacement is done. Otherwise return 837 0. If we remove the middle basic block, return 2. 838 BB is the basic block where the replacement is going to be done on. ARG0 839 is argument 0 from the PHI. Likewise for ARG1. */ 840 841 static int 842 value_replacement (basic_block cond_bb, basic_block middle_bb, 843 edge e0, edge e1, gimple *phi, 844 tree arg0, tree arg1) 845 { 846 gimple_stmt_iterator gsi; 847 gimple *cond; 848 edge true_edge, false_edge; 849 enum tree_code code; 850 bool emtpy_or_with_defined_p = true; 851 852 /* If the type says honor signed zeros we cannot do this 853 optimization. */ 854 if (HONOR_SIGNED_ZEROS (arg1)) 855 return 0; 856 857 /* If there is a statement in MIDDLE_BB that defines one of the PHI 858 arguments, then adjust arg0 or arg1. */ 859 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); 860 while (!gsi_end_p (gsi)) 861 { 862 gimple *stmt = gsi_stmt (gsi); 863 tree lhs; 864 gsi_next_nondebug (&gsi); 865 if (!is_gimple_assign (stmt)) 866 { 867 emtpy_or_with_defined_p = false; 868 continue; 869 } 870 /* Now try to adjust arg0 or arg1 according to the computation 871 in the statement. */ 872 lhs = gimple_assign_lhs (stmt); 873 if (!(lhs == arg0 874 && jump_function_from_stmt (&arg0, stmt)) 875 || (lhs == arg1 876 && jump_function_from_stmt (&arg1, stmt))) 877 emtpy_or_with_defined_p = false; 878 } 879 880 cond = last_stmt (cond_bb); 881 code = gimple_cond_code (cond); 882 883 /* This transformation is only valid for equality comparisons. */ 884 if (code != NE_EXPR && code != EQ_EXPR) 885 return 0; 886 887 /* We need to know which is the true edge and which is the false 888 edge so that we know if have abs or negative abs. */ 889 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 890 891 /* At this point we know we have a COND_EXPR with two successors. 892 One successor is BB, the other successor is an empty block which 893 falls through into BB. 894 895 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. 896 897 There is a single PHI node at the join point (BB) with two arguments. 898 899 We now need to verify that the two arguments in the PHI node match 900 the two arguments to the equality comparison. */ 901 902 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond)) 903 { 904 edge e; 905 tree arg; 906 907 /* For NE_EXPR, we want to build an assignment result = arg where 908 arg is the PHI argument associated with the true edge. For 909 EQ_EXPR we want the PHI argument associated with the false edge. */ 910 e = (code == NE_EXPR ? true_edge : false_edge); 911 912 /* Unfortunately, E may not reach BB (it may instead have gone to 913 OTHER_BLOCK). If that is the case, then we want the single outgoing 914 edge from OTHER_BLOCK which reaches BB and represents the desired 915 path from COND_BLOCK. */ 916 if (e->dest == middle_bb) 917 e = single_succ_edge (e->dest); 918 919 /* Now we know the incoming edge to BB that has the argument for the 920 RHS of our new assignment statement. */ 921 if (e0 == e) 922 arg = arg0; 923 else 924 arg = arg1; 925 926 /* If the middle basic block was empty or is defining the 927 PHI arguments and this is a single phi where the args are different 928 for the edges e0 and e1 then we can remove the middle basic block. */ 929 if (emtpy_or_with_defined_p 930 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), 931 e0, e1) == phi) 932 { 933 replace_phi_edge_with_variable (cond_bb, e1, phi, arg); 934 /* Note that we optimized this PHI. */ 935 return 2; 936 } 937 else 938 { 939 /* Replace the PHI arguments with arg. */ 940 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); 941 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); 942 if (dump_file && (dump_flags & TDF_DETAILS)) 943 { 944 fprintf (dump_file, "PHI "); 945 print_generic_expr (dump_file, gimple_phi_result (phi), 0); 946 fprintf (dump_file, " reduced for COND_EXPR in block %d to ", 947 cond_bb->index); 948 print_generic_expr (dump_file, arg, 0); 949 fprintf (dump_file, ".\n"); 950 } 951 return 1; 952 } 953 954 } 955 956 /* Now optimize (x != 0) ? x + y : y to just y. 957 The following condition is too restrictive, there can easily be another 958 stmt in middle_bb, for instance a CONVERT_EXPR for the second argument. */ 959 gimple *assign = last_and_only_stmt (middle_bb); 960 if (!assign || gimple_code (assign) != GIMPLE_ASSIGN 961 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS 962 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 963 && !POINTER_TYPE_P (TREE_TYPE (arg0)))) 964 return 0; 965 966 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */ 967 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) 968 return 0; 969 970 /* Only transform if it removes the condition. */ 971 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1)) 972 return 0; 973 974 /* Size-wise, this is always profitable. */ 975 if (optimize_bb_for_speed_p (cond_bb) 976 /* The special case is useless if it has a low probability. */ 977 && profile_status_for_fn (cfun) != PROFILE_ABSENT 978 && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN 979 /* If assign is cheap, there is no point avoiding it. */ 980 && estimate_num_insns (assign, &eni_time_weights) 981 >= 3 * estimate_num_insns (cond, &eni_time_weights)) 982 return 0; 983 984 tree lhs = gimple_assign_lhs (assign); 985 tree rhs1 = gimple_assign_rhs1 (assign); 986 tree rhs2 = gimple_assign_rhs2 (assign); 987 enum tree_code code_def = gimple_assign_rhs_code (assign); 988 tree cond_lhs = gimple_cond_lhs (cond); 989 tree cond_rhs = gimple_cond_rhs (cond); 990 991 if (((code == NE_EXPR && e1 == false_edge) 992 || (code == EQ_EXPR && e1 == true_edge)) 993 && arg0 == lhs 994 && ((arg1 == rhs1 995 && operand_equal_for_phi_arg_p (rhs2, cond_lhs) 996 && neutral_element_p (code_def, cond_rhs, true)) 997 || (arg1 == rhs2 998 && operand_equal_for_phi_arg_p (rhs1, cond_lhs) 999 && neutral_element_p (code_def, cond_rhs, false)) 1000 || (operand_equal_for_phi_arg_p (arg1, cond_rhs) 1001 && (operand_equal_for_phi_arg_p (rhs2, cond_lhs) 1002 || operand_equal_for_phi_arg_p (rhs1, cond_lhs)) 1003 && absorbing_element_p (code_def, cond_rhs)))) 1004 { 1005 gsi = gsi_for_stmt (cond); 1006 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) 1007 { 1008 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6 1009 def-stmt in: 1010 if (n_5 != 0) 1011 goto <bb 3>; 1012 else 1013 goto <bb 4>; 1014 1015 <bb 3>: 1016 # RANGE [0, 4294967294] 1017 u_6 = n_5 + 4294967295; 1018 1019 <bb 4>: 1020 # u_3 = PHI <u_6(3), 4294967295(2)> */ 1021 SSA_NAME_RANGE_INFO (lhs) = NULL; 1022 /* If available, we can use VR of phi result at least. */ 1023 tree phires = gimple_phi_result (phi); 1024 struct range_info_def *phires_range_info 1025 = SSA_NAME_RANGE_INFO (phires); 1026 if (phires_range_info) 1027 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), 1028 phires_range_info); 1029 } 1030 gimple_stmt_iterator gsi_from = gsi_for_stmt (assign); 1031 gsi_move_before (&gsi_from, &gsi); 1032 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); 1033 return 2; 1034 } 1035 1036 return 0; 1037 } 1038 1039 /* The function minmax_replacement does the main work of doing the minmax 1040 replacement. Return true if the replacement is done. Otherwise return 1041 false. 1042 BB is the basic block where the replacement is going to be done on. ARG0 1043 is argument 0 from the PHI. Likewise for ARG1. */ 1044 1045 static bool 1046 minmax_replacement (basic_block cond_bb, basic_block middle_bb, 1047 edge e0, edge e1, gimple *phi, 1048 tree arg0, tree arg1) 1049 { 1050 tree result, type; 1051 gcond *cond; 1052 gassign *new_stmt; 1053 edge true_edge, false_edge; 1054 enum tree_code cmp, minmax, ass_code; 1055 tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; 1056 gimple_stmt_iterator gsi, gsi_from; 1057 1058 type = TREE_TYPE (PHI_RESULT (phi)); 1059 1060 /* The optimization may be unsafe due to NaNs. */ 1061 if (HONOR_NANS (type)) 1062 return false; 1063 1064 cond = as_a <gcond *> (last_stmt (cond_bb)); 1065 cmp = gimple_cond_code (cond); 1066 1067 /* This transformation is only valid for order comparisons. Record which 1068 operand is smaller/larger if the result of the comparison is true. */ 1069 alt_smaller = NULL_TREE; 1070 alt_larger = NULL_TREE; 1071 if (cmp == LT_EXPR || cmp == LE_EXPR) 1072 { 1073 smaller = gimple_cond_lhs (cond); 1074 larger = gimple_cond_rhs (cond); 1075 /* If we have smaller < CST it is equivalent to smaller <= CST-1. 1076 Likewise smaller <= CST is equivalent to smaller < CST+1. */ 1077 if (TREE_CODE (larger) == INTEGER_CST) 1078 { 1079 if (cmp == LT_EXPR) 1080 { 1081 bool overflow; 1082 wide_int alt = wi::sub (larger, 1, TYPE_SIGN (TREE_TYPE (larger)), 1083 &overflow); 1084 if (! overflow) 1085 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1086 } 1087 else 1088 { 1089 bool overflow; 1090 wide_int alt = wi::add (larger, 1, TYPE_SIGN (TREE_TYPE (larger)), 1091 &overflow); 1092 if (! overflow) 1093 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1094 } 1095 } 1096 } 1097 else if (cmp == GT_EXPR || cmp == GE_EXPR) 1098 { 1099 smaller = gimple_cond_rhs (cond); 1100 larger = gimple_cond_lhs (cond); 1101 /* If we have larger > CST it is equivalent to larger >= CST+1. 1102 Likewise larger >= CST is equivalent to larger > CST-1. */ 1103 if (TREE_CODE (smaller) == INTEGER_CST) 1104 { 1105 if (cmp == GT_EXPR) 1106 { 1107 bool overflow; 1108 wide_int alt = wi::add (smaller, 1, TYPE_SIGN (TREE_TYPE (smaller)), 1109 &overflow); 1110 if (! overflow) 1111 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1112 } 1113 else 1114 { 1115 bool overflow; 1116 wide_int alt = wi::sub (smaller, 1, TYPE_SIGN (TREE_TYPE (smaller)), 1117 &overflow); 1118 if (! overflow) 1119 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1120 } 1121 } 1122 } 1123 else 1124 return false; 1125 1126 /* We need to know which is the true edge and which is the false 1127 edge so that we know if have abs or negative abs. */ 1128 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1129 1130 /* Forward the edges over the middle basic block. */ 1131 if (true_edge->dest == middle_bb) 1132 true_edge = EDGE_SUCC (true_edge->dest, 0); 1133 if (false_edge->dest == middle_bb) 1134 false_edge = EDGE_SUCC (false_edge->dest, 0); 1135 1136 if (true_edge == e0) 1137 { 1138 gcc_assert (false_edge == e1); 1139 arg_true = arg0; 1140 arg_false = arg1; 1141 } 1142 else 1143 { 1144 gcc_assert (false_edge == e0); 1145 gcc_assert (true_edge == e1); 1146 arg_true = arg1; 1147 arg_false = arg0; 1148 } 1149 1150 if (empty_block_p (middle_bb)) 1151 { 1152 if ((operand_equal_for_phi_arg_p (arg_true, smaller) 1153 || (alt_smaller 1154 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1155 && (operand_equal_for_phi_arg_p (arg_false, larger) 1156 || (alt_larger 1157 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1158 { 1159 /* Case 1160 1161 if (smaller < larger) 1162 rslt = smaller; 1163 else 1164 rslt = larger; */ 1165 minmax = MIN_EXPR; 1166 } 1167 else if ((operand_equal_for_phi_arg_p (arg_false, smaller) 1168 || (alt_smaller 1169 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1170 && (operand_equal_for_phi_arg_p (arg_true, larger) 1171 || (alt_larger 1172 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1173 minmax = MAX_EXPR; 1174 else 1175 return false; 1176 } 1177 else 1178 { 1179 /* Recognize the following case, assuming d <= u: 1180 1181 if (a <= u) 1182 b = MAX (a, d); 1183 x = PHI <b, u> 1184 1185 This is equivalent to 1186 1187 b = MAX (a, d); 1188 x = MIN (b, u); */ 1189 1190 gimple *assign = last_and_only_stmt (middle_bb); 1191 tree lhs, op0, op1, bound; 1192 1193 if (!assign 1194 || gimple_code (assign) != GIMPLE_ASSIGN) 1195 return false; 1196 1197 lhs = gimple_assign_lhs (assign); 1198 ass_code = gimple_assign_rhs_code (assign); 1199 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) 1200 return false; 1201 op0 = gimple_assign_rhs1 (assign); 1202 op1 = gimple_assign_rhs2 (assign); 1203 1204 if (true_edge->src == middle_bb) 1205 { 1206 /* We got here if the condition is true, i.e., SMALLER < LARGER. */ 1207 if (!operand_equal_for_phi_arg_p (lhs, arg_true)) 1208 return false; 1209 1210 if (operand_equal_for_phi_arg_p (arg_false, larger) 1211 || (alt_larger 1212 && operand_equal_for_phi_arg_p (arg_false, alt_larger))) 1213 { 1214 /* Case 1215 1216 if (smaller < larger) 1217 { 1218 r' = MAX_EXPR (smaller, bound) 1219 } 1220 r = PHI <r', larger> --> to be turned to MIN_EXPR. */ 1221 if (ass_code != MAX_EXPR) 1222 return false; 1223 1224 minmax = MIN_EXPR; 1225 if (operand_equal_for_phi_arg_p (op0, smaller) 1226 || (alt_smaller 1227 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1228 bound = op1; 1229 else if (operand_equal_for_phi_arg_p (op1, smaller) 1230 || (alt_smaller 1231 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1232 bound = op0; 1233 else 1234 return false; 1235 1236 /* We need BOUND <= LARGER. */ 1237 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1238 bound, larger))) 1239 return false; 1240 } 1241 else if (operand_equal_for_phi_arg_p (arg_false, smaller) 1242 || (alt_smaller 1243 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1244 { 1245 /* Case 1246 1247 if (smaller < larger) 1248 { 1249 r' = MIN_EXPR (larger, bound) 1250 } 1251 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ 1252 if (ass_code != MIN_EXPR) 1253 return false; 1254 1255 minmax = MAX_EXPR; 1256 if (operand_equal_for_phi_arg_p (op0, larger) 1257 || (alt_larger 1258 && operand_equal_for_phi_arg_p (op0, alt_larger))) 1259 bound = op1; 1260 else if (operand_equal_for_phi_arg_p (op1, larger) 1261 || (alt_larger 1262 && operand_equal_for_phi_arg_p (op1, alt_larger))) 1263 bound = op0; 1264 else 1265 return false; 1266 1267 /* We need BOUND >= SMALLER. */ 1268 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1269 bound, smaller))) 1270 return false; 1271 } 1272 else 1273 return false; 1274 } 1275 else 1276 { 1277 /* We got here if the condition is false, i.e., SMALLER > LARGER. */ 1278 if (!operand_equal_for_phi_arg_p (lhs, arg_false)) 1279 return false; 1280 1281 if (operand_equal_for_phi_arg_p (arg_true, larger) 1282 || (alt_larger 1283 && operand_equal_for_phi_arg_p (arg_true, alt_larger))) 1284 { 1285 /* Case 1286 1287 if (smaller > larger) 1288 { 1289 r' = MIN_EXPR (smaller, bound) 1290 } 1291 r = PHI <r', larger> --> to be turned to MAX_EXPR. */ 1292 if (ass_code != MIN_EXPR) 1293 return false; 1294 1295 minmax = MAX_EXPR; 1296 if (operand_equal_for_phi_arg_p (op0, smaller) 1297 || (alt_smaller 1298 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1299 bound = op1; 1300 else if (operand_equal_for_phi_arg_p (op1, smaller) 1301 || (alt_smaller 1302 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1303 bound = op0; 1304 else 1305 return false; 1306 1307 /* We need BOUND >= LARGER. */ 1308 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1309 bound, larger))) 1310 return false; 1311 } 1312 else if (operand_equal_for_phi_arg_p (arg_true, smaller) 1313 || (alt_smaller 1314 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1315 { 1316 /* Case 1317 1318 if (smaller > larger) 1319 { 1320 r' = MAX_EXPR (larger, bound) 1321 } 1322 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ 1323 if (ass_code != MAX_EXPR) 1324 return false; 1325 1326 minmax = MIN_EXPR; 1327 if (operand_equal_for_phi_arg_p (op0, larger)) 1328 bound = op1; 1329 else if (operand_equal_for_phi_arg_p (op1, larger)) 1330 bound = op0; 1331 else 1332 return false; 1333 1334 /* We need BOUND <= SMALLER. */ 1335 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1336 bound, smaller))) 1337 return false; 1338 } 1339 else 1340 return false; 1341 } 1342 1343 /* Move the statement from the middle block. */ 1344 gsi = gsi_last_bb (cond_bb); 1345 gsi_from = gsi_last_nondebug_bb (middle_bb); 1346 gsi_move_before (&gsi_from, &gsi); 1347 } 1348 1349 /* Create an SSA var to hold the min/max result. If we're the only 1350 things setting the target PHI, then we can clone the PHI 1351 variable. Otherwise we must create a new one. */ 1352 result = PHI_RESULT (phi); 1353 if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) 1354 result = duplicate_ssa_name (result, NULL); 1355 else 1356 result = make_ssa_name (TREE_TYPE (result)); 1357 1358 /* Emit the statement to compute min/max. */ 1359 new_stmt = gimple_build_assign (result, minmax, arg0, arg1); 1360 gsi = gsi_last_bb (cond_bb); 1361 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1362 1363 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1364 reset_flow_sensitive_info_in_bb (cond_bb); 1365 1366 return true; 1367 } 1368 1369 /* The function absolute_replacement does the main work of doing the absolute 1370 replacement. Return true if the replacement is done. Otherwise return 1371 false. 1372 bb is the basic block where the replacement is going to be done on. arg0 1373 is argument 0 from the phi. Likewise for arg1. */ 1374 1375 static bool 1376 abs_replacement (basic_block cond_bb, basic_block middle_bb, 1377 edge e0 ATTRIBUTE_UNUSED, edge e1, 1378 gimple *phi, tree arg0, tree arg1) 1379 { 1380 tree result; 1381 gassign *new_stmt; 1382 gimple *cond; 1383 gimple_stmt_iterator gsi; 1384 edge true_edge, false_edge; 1385 gimple *assign; 1386 edge e; 1387 tree rhs, lhs; 1388 bool negate; 1389 enum tree_code cond_code; 1390 1391 /* If the type says honor signed zeros we cannot do this 1392 optimization. */ 1393 if (HONOR_SIGNED_ZEROS (arg1)) 1394 return false; 1395 1396 /* OTHER_BLOCK must have only one executable statement which must have the 1397 form arg0 = -arg1 or arg1 = -arg0. */ 1398 1399 assign = last_and_only_stmt (middle_bb); 1400 /* If we did not find the proper negation assignment, then we can not 1401 optimize. */ 1402 if (assign == NULL) 1403 return false; 1404 1405 /* If we got here, then we have found the only executable statement 1406 in OTHER_BLOCK. If it is anything other than arg = -arg1 or 1407 arg1 = -arg0, then we can not optimize. */ 1408 if (gimple_code (assign) != GIMPLE_ASSIGN) 1409 return false; 1410 1411 lhs = gimple_assign_lhs (assign); 1412 1413 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) 1414 return false; 1415 1416 rhs = gimple_assign_rhs1 (assign); 1417 1418 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ 1419 if (!(lhs == arg0 && rhs == arg1) 1420 && !(lhs == arg1 && rhs == arg0)) 1421 return false; 1422 1423 cond = last_stmt (cond_bb); 1424 result = PHI_RESULT (phi); 1425 1426 /* Only relationals comparing arg[01] against zero are interesting. */ 1427 cond_code = gimple_cond_code (cond); 1428 if (cond_code != GT_EXPR && cond_code != GE_EXPR 1429 && cond_code != LT_EXPR && cond_code != LE_EXPR) 1430 return false; 1431 1432 /* Make sure the conditional is arg[01] OP y. */ 1433 if (gimple_cond_lhs (cond) != rhs) 1434 return false; 1435 1436 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond))) 1437 ? real_zerop (gimple_cond_rhs (cond)) 1438 : integer_zerop (gimple_cond_rhs (cond))) 1439 ; 1440 else 1441 return false; 1442 1443 /* We need to know which is the true edge and which is the false 1444 edge so that we know if have abs or negative abs. */ 1445 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1446 1447 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we 1448 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if 1449 the false edge goes to OTHER_BLOCK. */ 1450 if (cond_code == GT_EXPR || cond_code == GE_EXPR) 1451 e = true_edge; 1452 else 1453 e = false_edge; 1454 1455 if (e->dest == middle_bb) 1456 negate = true; 1457 else 1458 negate = false; 1459 1460 /* If the code negates only iff positive then make sure to not 1461 introduce undefined behavior when negating or computing the absolute. 1462 ??? We could use range info if present to check for arg1 == INT_MIN. */ 1463 if (negate 1464 && (ANY_INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 1465 && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1)))) 1466 return false; 1467 1468 result = duplicate_ssa_name (result, NULL); 1469 1470 if (negate) 1471 lhs = make_ssa_name (TREE_TYPE (result)); 1472 else 1473 lhs = result; 1474 1475 /* Build the modify expression with abs expression. */ 1476 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs); 1477 1478 gsi = gsi_last_bb (cond_bb); 1479 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1480 1481 if (negate) 1482 { 1483 /* Get the right GSI. We want to insert after the recently 1484 added ABS_EXPR statement (which we know is the first statement 1485 in the block. */ 1486 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs); 1487 1488 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 1489 } 1490 1491 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1492 reset_flow_sensitive_info_in_bb (cond_bb); 1493 1494 /* Note that we optimized this PHI. */ 1495 return true; 1496 } 1497 1498 /* Auxiliary functions to determine the set of memory accesses which 1499 can't trap because they are preceded by accesses to the same memory 1500 portion. We do that for MEM_REFs, so we only need to track 1501 the SSA_NAME of the pointer indirectly referenced. The algorithm 1502 simply is a walk over all instructions in dominator order. When 1503 we see an MEM_REF we determine if we've already seen a same 1504 ref anywhere up to the root of the dominator tree. If we do the 1505 current access can't trap. If we don't see any dominating access 1506 the current access might trap, but might also make later accesses 1507 non-trapping, so we remember it. We need to be careful with loads 1508 or stores, for instance a load might not trap, while a store would, 1509 so if we see a dominating read access this doesn't mean that a later 1510 write access would not trap. Hence we also need to differentiate the 1511 type of access(es) seen. 1512 1513 ??? We currently are very conservative and assume that a load might 1514 trap even if a store doesn't (write-only memory). This probably is 1515 overly conservative. */ 1516 1517 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF 1518 through it was seen, which would constitute a no-trap region for 1519 same accesses. */ 1520 struct name_to_bb 1521 { 1522 unsigned int ssa_name_ver; 1523 unsigned int phase; 1524 bool store; 1525 HOST_WIDE_INT offset, size; 1526 basic_block bb; 1527 }; 1528 1529 /* Hashtable helpers. */ 1530 1531 struct ssa_names_hasher : free_ptr_hash <name_to_bb> 1532 { 1533 static inline hashval_t hash (const name_to_bb *); 1534 static inline bool equal (const name_to_bb *, const name_to_bb *); 1535 }; 1536 1537 /* Used for quick clearing of the hash-table when we see calls. 1538 Hash entries with phase < nt_call_phase are invalid. */ 1539 static unsigned int nt_call_phase; 1540 1541 /* The hash function. */ 1542 1543 inline hashval_t 1544 ssa_names_hasher::hash (const name_to_bb *n) 1545 { 1546 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) 1547 ^ (n->offset << 6) ^ (n->size << 3); 1548 } 1549 1550 /* The equality function of *P1 and *P2. */ 1551 1552 inline bool 1553 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) 1554 { 1555 return n1->ssa_name_ver == n2->ssa_name_ver 1556 && n1->store == n2->store 1557 && n1->offset == n2->offset 1558 && n1->size == n2->size; 1559 } 1560 1561 class nontrapping_dom_walker : public dom_walker 1562 { 1563 public: 1564 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) 1565 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} 1566 1567 virtual edge before_dom_children (basic_block); 1568 virtual void after_dom_children (basic_block); 1569 1570 private: 1571 1572 /* We see the expression EXP in basic block BB. If it's an interesting 1573 expression (an MEM_REF through an SSA_NAME) possibly insert the 1574 expression into the set NONTRAP or the hash table of seen expressions. 1575 STORE is true if this expression is on the LHS, otherwise it's on 1576 the RHS. */ 1577 void add_or_mark_expr (basic_block, tree, bool); 1578 1579 hash_set<tree> *m_nontrapping; 1580 1581 /* The hash table for remembering what we've seen. */ 1582 hash_table<ssa_names_hasher> m_seen_ssa_names; 1583 }; 1584 1585 /* Called by walk_dominator_tree, when entering the block BB. */ 1586 edge 1587 nontrapping_dom_walker::before_dom_children (basic_block bb) 1588 { 1589 edge e; 1590 edge_iterator ei; 1591 gimple_stmt_iterator gsi; 1592 1593 /* If we haven't seen all our predecessors, clear the hash-table. */ 1594 FOR_EACH_EDGE (e, ei, bb->preds) 1595 if ((((size_t)e->src->aux) & 2) == 0) 1596 { 1597 nt_call_phase++; 1598 break; 1599 } 1600 1601 /* Mark this BB as being on the path to dominator root and as visited. */ 1602 bb->aux = (void*)(1 | 2); 1603 1604 /* And walk the statements in order. */ 1605 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1606 { 1607 gimple *stmt = gsi_stmt (gsi); 1608 1609 if ((gimple_code (stmt) == GIMPLE_ASM && gimple_vdef (stmt)) 1610 || (is_gimple_call (stmt) 1611 && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt)))) 1612 nt_call_phase++; 1613 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) 1614 { 1615 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true); 1616 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false); 1617 } 1618 } 1619 return NULL; 1620 } 1621 1622 /* Called by walk_dominator_tree, when basic block BB is exited. */ 1623 void 1624 nontrapping_dom_walker::after_dom_children (basic_block bb) 1625 { 1626 /* This BB isn't on the path to dominator root anymore. */ 1627 bb->aux = (void*)2; 1628 } 1629 1630 /* We see the expression EXP in basic block BB. If it's an interesting 1631 expression (an MEM_REF through an SSA_NAME) possibly insert the 1632 expression into the set NONTRAP or the hash table of seen expressions. 1633 STORE is true if this expression is on the LHS, otherwise it's on 1634 the RHS. */ 1635 void 1636 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) 1637 { 1638 HOST_WIDE_INT size; 1639 1640 if (TREE_CODE (exp) == MEM_REF 1641 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME 1642 && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) 1643 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) 1644 { 1645 tree name = TREE_OPERAND (exp, 0); 1646 struct name_to_bb map; 1647 name_to_bb **slot; 1648 struct name_to_bb *n2bb; 1649 basic_block found_bb = 0; 1650 1651 /* Try to find the last seen MEM_REF through the same 1652 SSA_NAME, which can trap. */ 1653 map.ssa_name_ver = SSA_NAME_VERSION (name); 1654 map.phase = 0; 1655 map.bb = 0; 1656 map.store = store; 1657 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); 1658 map.size = size; 1659 1660 slot = m_seen_ssa_names.find_slot (&map, INSERT); 1661 n2bb = *slot; 1662 if (n2bb && n2bb->phase >= nt_call_phase) 1663 found_bb = n2bb->bb; 1664 1665 /* If we've found a trapping MEM_REF, _and_ it dominates EXP 1666 (it's in a basic block on the path from us to the dominator root) 1667 then we can't trap. */ 1668 if (found_bb && (((size_t)found_bb->aux) & 1) == 1) 1669 { 1670 m_nontrapping->add (exp); 1671 } 1672 else 1673 { 1674 /* EXP might trap, so insert it into the hash table. */ 1675 if (n2bb) 1676 { 1677 n2bb->phase = nt_call_phase; 1678 n2bb->bb = bb; 1679 } 1680 else 1681 { 1682 n2bb = XNEW (struct name_to_bb); 1683 n2bb->ssa_name_ver = SSA_NAME_VERSION (name); 1684 n2bb->phase = nt_call_phase; 1685 n2bb->bb = bb; 1686 n2bb->store = store; 1687 n2bb->offset = map.offset; 1688 n2bb->size = size; 1689 *slot = n2bb; 1690 } 1691 } 1692 } 1693 } 1694 1695 /* This is the entry point of gathering non trapping memory accesses. 1696 It will do a dominator walk over the whole function, and it will 1697 make use of the bb->aux pointers. It returns a set of trees 1698 (the MEM_REFs itself) which can't trap. */ 1699 static hash_set<tree> * 1700 get_non_trapping (void) 1701 { 1702 nt_call_phase = 0; 1703 hash_set<tree> *nontrap = new hash_set<tree>; 1704 /* We're going to do a dominator walk, so ensure that we have 1705 dominance information. */ 1706 calculate_dominance_info (CDI_DOMINATORS); 1707 1708 nontrapping_dom_walker (CDI_DOMINATORS, nontrap) 1709 .walk (cfun->cfg->x_entry_block_ptr); 1710 1711 clear_aux_for_blocks (); 1712 return nontrap; 1713 } 1714 1715 /* Do the main work of conditional store replacement. We already know 1716 that the recognized pattern looks like so: 1717 1718 split: 1719 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) 1720 MIDDLE_BB: 1721 something 1722 fallthrough (edge E0) 1723 JOIN_BB: 1724 some more 1725 1726 We check that MIDDLE_BB contains only one store, that that store 1727 doesn't trap (not via NOTRAP, but via checking if an access to the same 1728 memory location dominates us) and that the store has a "simple" RHS. */ 1729 1730 static bool 1731 cond_store_replacement (basic_block middle_bb, basic_block join_bb, 1732 edge e0, edge e1, hash_set<tree> *nontrap) 1733 { 1734 gimple *assign = last_and_only_stmt (middle_bb); 1735 tree lhs, rhs, name, name2; 1736 gphi *newphi; 1737 gassign *new_stmt; 1738 gimple_stmt_iterator gsi; 1739 source_location locus; 1740 1741 /* Check if middle_bb contains of only one store. */ 1742 if (!assign 1743 || !gimple_assign_single_p (assign) 1744 || gimple_has_volatile_ops (assign)) 1745 return false; 1746 1747 locus = gimple_location (assign); 1748 lhs = gimple_assign_lhs (assign); 1749 rhs = gimple_assign_rhs1 (assign); 1750 if (TREE_CODE (lhs) != MEM_REF 1751 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME 1752 || !is_gimple_reg_type (TREE_TYPE (lhs))) 1753 return false; 1754 1755 /* Prove that we can move the store down. We could also check 1756 TREE_THIS_NOTRAP here, but in that case we also could move stores, 1757 whose value is not available readily, which we want to avoid. */ 1758 if (!nontrap->contains (lhs)) 1759 return false; 1760 1761 /* Now we've checked the constraints, so do the transformation: 1762 1) Remove the single store. */ 1763 gsi = gsi_for_stmt (assign); 1764 unlink_stmt_vdef (assign); 1765 gsi_remove (&gsi, true); 1766 release_defs (assign); 1767 1768 /* Make both store and load use alias-set zero as we have to 1769 deal with the case of the store being a conditional change 1770 of the dynamic type. */ 1771 lhs = unshare_expr (lhs); 1772 tree *basep = &lhs; 1773 while (handled_component_p (*basep)) 1774 basep = &TREE_OPERAND (*basep, 0); 1775 if (TREE_CODE (*basep) == MEM_REF 1776 || TREE_CODE (*basep) == TARGET_MEM_REF) 1777 TREE_OPERAND (*basep, 1) 1778 = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1)); 1779 else 1780 *basep = build2 (MEM_REF, TREE_TYPE (*basep), 1781 build_fold_addr_expr (*basep), 1782 build_zero_cst (ptr_type_node)); 1783 1784 /* 2) Insert a load from the memory of the store to the temporary 1785 on the edge which did not contain the store. */ 1786 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 1787 new_stmt = gimple_build_assign (name, lhs); 1788 gimple_set_location (new_stmt, locus); 1789 gsi_insert_on_edge (e1, new_stmt); 1790 1791 /* 3) Create a PHI node at the join block, with one argument 1792 holding the old RHS, and the other holding the temporary 1793 where we stored the old memory contents. */ 1794 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 1795 newphi = create_phi_node (name2, join_bb); 1796 add_phi_arg (newphi, rhs, e0, locus); 1797 add_phi_arg (newphi, name, e1, locus); 1798 1799 lhs = unshare_expr (lhs); 1800 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 1801 1802 /* 4) Insert that PHI node. */ 1803 gsi = gsi_after_labels (join_bb); 1804 if (gsi_end_p (gsi)) 1805 { 1806 gsi = gsi_last_bb (join_bb); 1807 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 1808 } 1809 else 1810 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1811 1812 return true; 1813 } 1814 1815 /* Do the main work of conditional store replacement. */ 1816 1817 static bool 1818 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, 1819 basic_block join_bb, gimple *then_assign, 1820 gimple *else_assign) 1821 { 1822 tree lhs_base, lhs, then_rhs, else_rhs, name; 1823 source_location then_locus, else_locus; 1824 gimple_stmt_iterator gsi; 1825 gphi *newphi; 1826 gassign *new_stmt; 1827 1828 if (then_assign == NULL 1829 || !gimple_assign_single_p (then_assign) 1830 || gimple_clobber_p (then_assign) 1831 || gimple_has_volatile_ops (then_assign) 1832 || else_assign == NULL 1833 || !gimple_assign_single_p (else_assign) 1834 || gimple_clobber_p (else_assign) 1835 || gimple_has_volatile_ops (else_assign)) 1836 return false; 1837 1838 lhs = gimple_assign_lhs (then_assign); 1839 if (!is_gimple_reg_type (TREE_TYPE (lhs)) 1840 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) 1841 return false; 1842 1843 lhs_base = get_base_address (lhs); 1844 if (lhs_base == NULL_TREE 1845 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) 1846 return false; 1847 1848 then_rhs = gimple_assign_rhs1 (then_assign); 1849 else_rhs = gimple_assign_rhs1 (else_assign); 1850 then_locus = gimple_location (then_assign); 1851 else_locus = gimple_location (else_assign); 1852 1853 /* Now we've checked the constraints, so do the transformation: 1854 1) Remove the stores. */ 1855 gsi = gsi_for_stmt (then_assign); 1856 unlink_stmt_vdef (then_assign); 1857 gsi_remove (&gsi, true); 1858 release_defs (then_assign); 1859 1860 gsi = gsi_for_stmt (else_assign); 1861 unlink_stmt_vdef (else_assign); 1862 gsi_remove (&gsi, true); 1863 release_defs (else_assign); 1864 1865 /* 2) Create a PHI node at the join block, with one argument 1866 holding the old RHS, and the other holding the temporary 1867 where we stored the old memory contents. */ 1868 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 1869 newphi = create_phi_node (name, join_bb); 1870 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); 1871 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); 1872 1873 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 1874 1875 /* 3) Insert that PHI node. */ 1876 gsi = gsi_after_labels (join_bb); 1877 if (gsi_end_p (gsi)) 1878 { 1879 gsi = gsi_last_bb (join_bb); 1880 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 1881 } 1882 else 1883 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1884 1885 return true; 1886 } 1887 1888 /* Conditional store replacement. We already know 1889 that the recognized pattern looks like so: 1890 1891 split: 1892 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) 1893 THEN_BB: 1894 ... 1895 X = Y; 1896 ... 1897 goto JOIN_BB; 1898 ELSE_BB: 1899 ... 1900 X = Z; 1901 ... 1902 fallthrough (edge E0) 1903 JOIN_BB: 1904 some more 1905 1906 We check that it is safe to sink the store to JOIN_BB by verifying that 1907 there are no read-after-write or write-after-write dependencies in 1908 THEN_BB and ELSE_BB. */ 1909 1910 static bool 1911 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, 1912 basic_block join_bb) 1913 { 1914 gimple *then_assign = last_and_only_stmt (then_bb); 1915 gimple *else_assign = last_and_only_stmt (else_bb); 1916 vec<data_reference_p> then_datarefs, else_datarefs; 1917 vec<ddr_p> then_ddrs, else_ddrs; 1918 gimple *then_store, *else_store; 1919 bool found, ok = false, res; 1920 struct data_dependence_relation *ddr; 1921 data_reference_p then_dr, else_dr; 1922 int i, j; 1923 tree then_lhs, else_lhs; 1924 basic_block blocks[3]; 1925 1926 if (MAX_STORES_TO_SINK == 0) 1927 return false; 1928 1929 /* Handle the case with single statement in THEN_BB and ELSE_BB. */ 1930 if (then_assign && else_assign) 1931 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 1932 then_assign, else_assign); 1933 1934 /* Find data references. */ 1935 then_datarefs.create (1); 1936 else_datarefs.create (1); 1937 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) 1938 == chrec_dont_know) 1939 || !then_datarefs.length () 1940 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) 1941 == chrec_dont_know) 1942 || !else_datarefs.length ()) 1943 { 1944 free_data_refs (then_datarefs); 1945 free_data_refs (else_datarefs); 1946 return false; 1947 } 1948 1949 /* Find pairs of stores with equal LHS. */ 1950 auto_vec<gimple *, 1> then_stores, else_stores; 1951 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) 1952 { 1953 if (DR_IS_READ (then_dr)) 1954 continue; 1955 1956 then_store = DR_STMT (then_dr); 1957 then_lhs = gimple_get_lhs (then_store); 1958 if (then_lhs == NULL_TREE) 1959 continue; 1960 found = false; 1961 1962 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) 1963 { 1964 if (DR_IS_READ (else_dr)) 1965 continue; 1966 1967 else_store = DR_STMT (else_dr); 1968 else_lhs = gimple_get_lhs (else_store); 1969 if (else_lhs == NULL_TREE) 1970 continue; 1971 1972 if (operand_equal_p (then_lhs, else_lhs, 0)) 1973 { 1974 found = true; 1975 break; 1976 } 1977 } 1978 1979 if (!found) 1980 continue; 1981 1982 then_stores.safe_push (then_store); 1983 else_stores.safe_push (else_store); 1984 } 1985 1986 /* No pairs of stores found. */ 1987 if (!then_stores.length () 1988 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK) 1989 { 1990 free_data_refs (then_datarefs); 1991 free_data_refs (else_datarefs); 1992 return false; 1993 } 1994 1995 /* Compute and check data dependencies in both basic blocks. */ 1996 then_ddrs.create (1); 1997 else_ddrs.create (1); 1998 if (!compute_all_dependences (then_datarefs, &then_ddrs, 1999 vNULL, false) 2000 || !compute_all_dependences (else_datarefs, &else_ddrs, 2001 vNULL, false)) 2002 { 2003 free_dependence_relations (then_ddrs); 2004 free_dependence_relations (else_ddrs); 2005 free_data_refs (then_datarefs); 2006 free_data_refs (else_datarefs); 2007 return false; 2008 } 2009 blocks[0] = then_bb; 2010 blocks[1] = else_bb; 2011 blocks[2] = join_bb; 2012 renumber_gimple_stmt_uids_in_blocks (blocks, 3); 2013 2014 /* Check that there are no read-after-write or write-after-write dependencies 2015 in THEN_BB. */ 2016 FOR_EACH_VEC_ELT (then_ddrs, i, ddr) 2017 { 2018 struct data_reference *dra = DDR_A (ddr); 2019 struct data_reference *drb = DDR_B (ddr); 2020 2021 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2022 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2023 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2024 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2025 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2026 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2027 { 2028 free_dependence_relations (then_ddrs); 2029 free_dependence_relations (else_ddrs); 2030 free_data_refs (then_datarefs); 2031 free_data_refs (else_datarefs); 2032 return false; 2033 } 2034 } 2035 2036 /* Check that there are no read-after-write or write-after-write dependencies 2037 in ELSE_BB. */ 2038 FOR_EACH_VEC_ELT (else_ddrs, i, ddr) 2039 { 2040 struct data_reference *dra = DDR_A (ddr); 2041 struct data_reference *drb = DDR_B (ddr); 2042 2043 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2044 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2045 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2046 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2047 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2048 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2049 { 2050 free_dependence_relations (then_ddrs); 2051 free_dependence_relations (else_ddrs); 2052 free_data_refs (then_datarefs); 2053 free_data_refs (else_datarefs); 2054 return false; 2055 } 2056 } 2057 2058 /* Sink stores with same LHS. */ 2059 FOR_EACH_VEC_ELT (then_stores, i, then_store) 2060 { 2061 else_store = else_stores[i]; 2062 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 2063 then_store, else_store); 2064 ok = ok || res; 2065 } 2066 2067 free_dependence_relations (then_ddrs); 2068 free_dependence_relations (else_ddrs); 2069 free_data_refs (then_datarefs); 2070 free_data_refs (else_datarefs); 2071 2072 return ok; 2073 } 2074 2075 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ 2076 2077 static bool 2078 local_mem_dependence (gimple *stmt, basic_block bb) 2079 { 2080 tree vuse = gimple_vuse (stmt); 2081 gimple *def; 2082 2083 if (!vuse) 2084 return false; 2085 2086 def = SSA_NAME_DEF_STMT (vuse); 2087 return (def && gimple_bb (def) == bb); 2088 } 2089 2090 /* Given a "diamond" control-flow pattern where BB0 tests a condition, 2091 BB1 and BB2 are "then" and "else" blocks dependent on this test, 2092 and BB3 rejoins control flow following BB1 and BB2, look for 2093 opportunities to hoist loads as follows. If BB3 contains a PHI of 2094 two loads, one each occurring in BB1 and BB2, and the loads are 2095 provably of adjacent fields in the same structure, then move both 2096 loads into BB0. Of course this can only be done if there are no 2097 dependencies preventing such motion. 2098 2099 One of the hoisted loads will always be speculative, so the 2100 transformation is currently conservative: 2101 2102 - The fields must be strictly adjacent. 2103 - The two fields must occupy a single memory block that is 2104 guaranteed to not cross a page boundary. 2105 2106 The last is difficult to prove, as such memory blocks should be 2107 aligned on the minimum of the stack alignment boundary and the 2108 alignment guaranteed by heap allocation interfaces. Thus we rely 2109 on a parameter for the alignment value. 2110 2111 Provided a good value is used for the last case, the first 2112 restriction could possibly be relaxed. */ 2113 2114 static void 2115 hoist_adjacent_loads (basic_block bb0, basic_block bb1, 2116 basic_block bb2, basic_block bb3) 2117 { 2118 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE); 2119 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); 2120 gphi_iterator gsi; 2121 2122 /* Walk the phis in bb3 looking for an opportunity. We are looking 2123 for phis of two SSA names, one each of which is defined in bb1 and 2124 bb2. */ 2125 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi)) 2126 { 2127 gphi *phi_stmt = gsi.phi (); 2128 gimple *def1, *def2; 2129 tree arg1, arg2, ref1, ref2, field1, field2; 2130 tree tree_offset1, tree_offset2, tree_size2, next; 2131 int offset1, offset2, size2; 2132 unsigned align1; 2133 gimple_stmt_iterator gsi2; 2134 basic_block bb_for_def1, bb_for_def2; 2135 2136 if (gimple_phi_num_args (phi_stmt) != 2 2137 || virtual_operand_p (gimple_phi_result (phi_stmt))) 2138 continue; 2139 2140 arg1 = gimple_phi_arg_def (phi_stmt, 0); 2141 arg2 = gimple_phi_arg_def (phi_stmt, 1); 2142 2143 if (TREE_CODE (arg1) != SSA_NAME 2144 || TREE_CODE (arg2) != SSA_NAME 2145 || SSA_NAME_IS_DEFAULT_DEF (arg1) 2146 || SSA_NAME_IS_DEFAULT_DEF (arg2)) 2147 continue; 2148 2149 def1 = SSA_NAME_DEF_STMT (arg1); 2150 def2 = SSA_NAME_DEF_STMT (arg2); 2151 2152 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2) 2153 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2)) 2154 continue; 2155 2156 /* Check the mode of the arguments to be sure a conditional move 2157 can be generated for it. */ 2158 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) 2159 == CODE_FOR_nothing) 2160 continue; 2161 2162 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ 2163 if (!gimple_assign_single_p (def1) 2164 || !gimple_assign_single_p (def2) 2165 || gimple_has_volatile_ops (def1) 2166 || gimple_has_volatile_ops (def2)) 2167 continue; 2168 2169 ref1 = gimple_assign_rhs1 (def1); 2170 ref2 = gimple_assign_rhs1 (def2); 2171 2172 if (TREE_CODE (ref1) != COMPONENT_REF 2173 || TREE_CODE (ref2) != COMPONENT_REF) 2174 continue; 2175 2176 /* The zeroth operand of the two component references must be 2177 identical. It is not sufficient to compare get_base_address of 2178 the two references, because this could allow for different 2179 elements of the same array in the two trees. It is not safe to 2180 assume that the existence of one array element implies the 2181 existence of a different one. */ 2182 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0)) 2183 continue; 2184 2185 field1 = TREE_OPERAND (ref1, 1); 2186 field2 = TREE_OPERAND (ref2, 1); 2187 2188 /* Check for field adjacency, and ensure field1 comes first. */ 2189 for (next = DECL_CHAIN (field1); 2190 next && TREE_CODE (next) != FIELD_DECL; 2191 next = DECL_CHAIN (next)) 2192 ; 2193 2194 if (next != field2) 2195 { 2196 for (next = DECL_CHAIN (field2); 2197 next && TREE_CODE (next) != FIELD_DECL; 2198 next = DECL_CHAIN (next)) 2199 ; 2200 2201 if (next != field1) 2202 continue; 2203 2204 std::swap (field1, field2); 2205 std::swap (def1, def2); 2206 } 2207 2208 bb_for_def1 = gimple_bb (def1); 2209 bb_for_def2 = gimple_bb (def2); 2210 2211 /* Check for proper alignment of the first field. */ 2212 tree_offset1 = bit_position (field1); 2213 tree_offset2 = bit_position (field2); 2214 tree_size2 = DECL_SIZE (field2); 2215 2216 if (!tree_fits_uhwi_p (tree_offset1) 2217 || !tree_fits_uhwi_p (tree_offset2) 2218 || !tree_fits_uhwi_p (tree_size2)) 2219 continue; 2220 2221 offset1 = tree_to_uhwi (tree_offset1); 2222 offset2 = tree_to_uhwi (tree_offset2); 2223 size2 = tree_to_uhwi (tree_size2); 2224 align1 = DECL_ALIGN (field1) % param_align_bits; 2225 2226 if (offset1 % BITS_PER_UNIT != 0) 2227 continue; 2228 2229 /* For profitability, the two field references should fit within 2230 a single cache line. */ 2231 if (align1 + offset2 - offset1 + size2 > param_align_bits) 2232 continue; 2233 2234 /* The two expressions cannot be dependent upon vdefs defined 2235 in bb1/bb2. */ 2236 if (local_mem_dependence (def1, bb_for_def1) 2237 || local_mem_dependence (def2, bb_for_def2)) 2238 continue; 2239 2240 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into 2241 bb0. We hoist the first one first so that a cache miss is handled 2242 efficiently regardless of hardware cache-fill policy. */ 2243 gsi2 = gsi_for_stmt (def1); 2244 gsi_move_to_bb_end (&gsi2, bb0); 2245 gsi2 = gsi_for_stmt (def2); 2246 gsi_move_to_bb_end (&gsi2, bb0); 2247 2248 if (dump_file && (dump_flags & TDF_DETAILS)) 2249 { 2250 fprintf (dump_file, 2251 "\nHoisting adjacent loads from %d and %d into %d: \n", 2252 bb_for_def1->index, bb_for_def2->index, bb0->index); 2253 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); 2254 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); 2255 } 2256 } 2257 } 2258 2259 /* Determine whether we should attempt to hoist adjacent loads out of 2260 diamond patterns in pass_phiopt. Always hoist loads if 2261 -fhoist-adjacent-loads is specified and the target machine has 2262 both a conditional move instruction and a defined cache line size. */ 2263 2264 static bool 2265 gate_hoist_loads (void) 2266 { 2267 return (flag_hoist_adjacent_loads == 1 2268 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) 2269 && HAVE_conditional_move); 2270 } 2271 2272 /* This pass tries to replaces an if-then-else block with an 2273 assignment. We have four kinds of transformations. Some of these 2274 transformations are also performed by the ifcvt RTL optimizer. 2275 2276 Conditional Replacement 2277 ----------------------- 2278 2279 This transformation, implemented in conditional_replacement, 2280 replaces 2281 2282 bb0: 2283 if (cond) goto bb2; else goto bb1; 2284 bb1: 2285 bb2: 2286 x = PHI <0 (bb1), 1 (bb0), ...>; 2287 2288 with 2289 2290 bb0: 2291 x' = cond; 2292 goto bb2; 2293 bb2: 2294 x = PHI <x' (bb0), ...>; 2295 2296 We remove bb1 as it becomes unreachable. This occurs often due to 2297 gimplification of conditionals. 2298 2299 Value Replacement 2300 ----------------- 2301 2302 This transformation, implemented in value_replacement, replaces 2303 2304 bb0: 2305 if (a != b) goto bb2; else goto bb1; 2306 bb1: 2307 bb2: 2308 x = PHI <a (bb1), b (bb0), ...>; 2309 2310 with 2311 2312 bb0: 2313 bb2: 2314 x = PHI <b (bb0), ...>; 2315 2316 This opportunity can sometimes occur as a result of other 2317 optimizations. 2318 2319 2320 Another case caught by value replacement looks like this: 2321 2322 bb0: 2323 t1 = a == CONST; 2324 t2 = b > c; 2325 t3 = t1 & t2; 2326 if (t3 != 0) goto bb1; else goto bb2; 2327 bb1: 2328 bb2: 2329 x = PHI (CONST, a) 2330 2331 Gets replaced with: 2332 bb0: 2333 bb2: 2334 t1 = a == CONST; 2335 t2 = b > c; 2336 t3 = t1 & t2; 2337 x = a; 2338 2339 ABS Replacement 2340 --------------- 2341 2342 This transformation, implemented in abs_replacement, replaces 2343 2344 bb0: 2345 if (a >= 0) goto bb2; else goto bb1; 2346 bb1: 2347 x = -a; 2348 bb2: 2349 x = PHI <x (bb1), a (bb0), ...>; 2350 2351 with 2352 2353 bb0: 2354 x' = ABS_EXPR< a >; 2355 bb2: 2356 x = PHI <x' (bb0), ...>; 2357 2358 MIN/MAX Replacement 2359 ------------------- 2360 2361 This transformation, minmax_replacement replaces 2362 2363 bb0: 2364 if (a <= b) goto bb2; else goto bb1; 2365 bb1: 2366 bb2: 2367 x = PHI <b (bb1), a (bb0), ...>; 2368 2369 with 2370 2371 bb0: 2372 x' = MIN_EXPR (a, b) 2373 bb2: 2374 x = PHI <x' (bb0), ...>; 2375 2376 A similar transformation is done for MAX_EXPR. 2377 2378 2379 This pass also performs a fifth transformation of a slightly different 2380 flavor. 2381 2382 Factor conversion in COND_EXPR 2383 ------------------------------ 2384 2385 This transformation factors the conversion out of COND_EXPR with 2386 factor_out_conditional_conversion. 2387 2388 For example: 2389 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2390 <bb 3>: 2391 tmp = (int) a; 2392 <bb 4>: 2393 tmp = PHI <tmp, CST> 2394 2395 Into: 2396 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2397 <bb 3>: 2398 <bb 4>: 2399 a = PHI <a, CST> 2400 tmp = (int) a; 2401 2402 Adjacent Load Hoisting 2403 ---------------------- 2404 2405 This transformation replaces 2406 2407 bb0: 2408 if (...) goto bb2; else goto bb1; 2409 bb1: 2410 x1 = (<expr>).field1; 2411 goto bb3; 2412 bb2: 2413 x2 = (<expr>).field2; 2414 bb3: 2415 # x = PHI <x1, x2>; 2416 2417 with 2418 2419 bb0: 2420 x1 = (<expr>).field1; 2421 x2 = (<expr>).field2; 2422 if (...) goto bb2; else goto bb1; 2423 bb1: 2424 goto bb3; 2425 bb2: 2426 bb3: 2427 # x = PHI <x1, x2>; 2428 2429 The purpose of this transformation is to enable generation of conditional 2430 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of 2431 the loads is speculative, the transformation is restricted to very 2432 specific cases to avoid introducing a page fault. We are looking for 2433 the common idiom: 2434 2435 if (...) 2436 x = y->left; 2437 else 2438 x = y->right; 2439 2440 where left and right are typically adjacent pointers in a tree structure. */ 2441 2442 namespace { 2443 2444 const pass_data pass_data_phiopt = 2445 { 2446 GIMPLE_PASS, /* type */ 2447 "phiopt", /* name */ 2448 OPTGROUP_NONE, /* optinfo_flags */ 2449 TV_TREE_PHIOPT, /* tv_id */ 2450 ( PROP_cfg | PROP_ssa ), /* properties_required */ 2451 0, /* properties_provided */ 2452 0, /* properties_destroyed */ 2453 0, /* todo_flags_start */ 2454 0, /* todo_flags_finish */ 2455 }; 2456 2457 class pass_phiopt : public gimple_opt_pass 2458 { 2459 public: 2460 pass_phiopt (gcc::context *ctxt) 2461 : gimple_opt_pass (pass_data_phiopt, ctxt) 2462 {} 2463 2464 /* opt_pass methods: */ 2465 opt_pass * clone () { return new pass_phiopt (m_ctxt); } 2466 virtual bool gate (function *) { return flag_ssa_phiopt; } 2467 virtual unsigned int execute (function *) 2468 { 2469 return tree_ssa_phiopt_worker (false, gate_hoist_loads ()); 2470 } 2471 2472 }; // class pass_phiopt 2473 2474 } // anon namespace 2475 2476 gimple_opt_pass * 2477 make_pass_phiopt (gcc::context *ctxt) 2478 { 2479 return new pass_phiopt (ctxt); 2480 } 2481 2482 namespace { 2483 2484 const pass_data pass_data_cselim = 2485 { 2486 GIMPLE_PASS, /* type */ 2487 "cselim", /* name */ 2488 OPTGROUP_NONE, /* optinfo_flags */ 2489 TV_TREE_PHIOPT, /* tv_id */ 2490 ( PROP_cfg | PROP_ssa ), /* properties_required */ 2491 0, /* properties_provided */ 2492 0, /* properties_destroyed */ 2493 0, /* todo_flags_start */ 2494 0, /* todo_flags_finish */ 2495 }; 2496 2497 class pass_cselim : public gimple_opt_pass 2498 { 2499 public: 2500 pass_cselim (gcc::context *ctxt) 2501 : gimple_opt_pass (pass_data_cselim, ctxt) 2502 {} 2503 2504 /* opt_pass methods: */ 2505 virtual bool gate (function *) { return flag_tree_cselim; } 2506 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); } 2507 2508 }; // class pass_cselim 2509 2510 } // anon namespace 2511 2512 gimple_opt_pass * 2513 make_pass_cselim (gcc::context *ctxt) 2514 { 2515 return new pass_cselim (ctxt); 2516 } 2517