1 /* Optimization of PHI nodes by converting them into straightline code. 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "insn-codes.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "cfghooks.h" 29 #include "tree-pass.h" 30 #include "ssa.h" 31 #include "optabs-tree.h" 32 #include "insn-config.h" 33 #include "gimple-pretty-print.h" 34 #include "fold-const.h" 35 #include "stor-layout.h" 36 #include "cfganal.h" 37 #include "gimplify.h" 38 #include "gimple-iterator.h" 39 #include "gimplify-me.h" 40 #include "tree-cfg.h" 41 #include "tree-dfa.h" 42 #include "domwalk.h" 43 #include "cfgloop.h" 44 #include "tree-data-ref.h" 45 #include "tree-scalar-evolution.h" 46 #include "tree-inline.h" 47 #include "case-cfn-macros.h" 48 #include "tree-eh.h" 49 50 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); 51 static bool two_value_replacement (basic_block, basic_block, edge, gphi *, 52 tree, tree); 53 static bool conditional_replacement (basic_block, basic_block, 54 edge, edge, gphi *, tree, tree); 55 static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, 56 gimple *); 57 static int value_replacement (basic_block, basic_block, 58 edge, edge, gimple *, tree, tree); 59 static bool minmax_replacement (basic_block, basic_block, 60 edge, edge, gimple *, tree, tree); 61 static bool abs_replacement (basic_block, basic_block, 62 edge, edge, gimple *, tree, tree); 63 static bool cond_removal_in_popcount_pattern (basic_block, basic_block, 64 edge, edge, gimple *, tree, tree); 65 static bool cond_store_replacement (basic_block, basic_block, edge, edge, 66 hash_set<tree> *); 67 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); 68 static hash_set<tree> * get_non_trapping (); 69 static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); 70 static void hoist_adjacent_loads (basic_block, basic_block, 71 basic_block, basic_block); 72 static bool gate_hoist_loads (void); 73 74 /* This pass tries to transform conditional stores into unconditional 75 ones, enabling further simplifications with the simpler then and else 76 blocks. In particular it replaces this: 77 78 bb0: 79 if (cond) goto bb2; else goto bb1; 80 bb1: 81 *p = RHS; 82 bb2: 83 84 with 85 86 bb0: 87 if (cond) goto bb1; else goto bb2; 88 bb1: 89 condtmp' = *p; 90 bb2: 91 condtmp = PHI <RHS, condtmp'> 92 *p = condtmp; 93 94 This transformation can only be done under several constraints, 95 documented below. It also replaces: 96 97 bb0: 98 if (cond) goto bb2; else goto bb1; 99 bb1: 100 *p = RHS1; 101 goto bb3; 102 bb2: 103 *p = RHS2; 104 bb3: 105 106 with 107 108 bb0: 109 if (cond) goto bb3; else goto bb1; 110 bb1: 111 bb3: 112 condtmp = PHI <RHS1, RHS2> 113 *p = condtmp; */ 114 115 static unsigned int 116 tree_ssa_cs_elim (void) 117 { 118 unsigned todo; 119 /* ??? We are not interested in loop related info, but the following 120 will create it, ICEing as we didn't init loops with pre-headers. 121 An interfacing issue of find_data_references_in_bb. */ 122 loop_optimizer_init (LOOPS_NORMAL); 123 scev_initialize (); 124 todo = tree_ssa_phiopt_worker (true, false, false); 125 scev_finalize (); 126 loop_optimizer_finalize (); 127 return todo; 128 } 129 130 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ 131 132 static gphi * 133 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1) 134 { 135 gimple_stmt_iterator i; 136 gphi *phi = NULL; 137 if (gimple_seq_singleton_p (seq)) 138 return as_a <gphi *> (gsi_stmt (gsi_start (seq))); 139 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) 140 { 141 gphi *p = as_a <gphi *> (gsi_stmt (i)); 142 /* If the PHI arguments are equal then we can skip this PHI. */ 143 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx), 144 gimple_phi_arg_def (p, e1->dest_idx))) 145 continue; 146 147 /* If we already have a PHI that has the two edge arguments are 148 different, then return it is not a singleton for these PHIs. */ 149 if (phi) 150 return NULL; 151 152 phi = p; 153 } 154 return phi; 155 } 156 157 /* The core routine of conditional store replacement and normal 158 phi optimizations. Both share much of the infrastructure in how 159 to match applicable basic block patterns. DO_STORE_ELIM is true 160 when we want to do conditional store replacement, false otherwise. 161 DO_HOIST_LOADS is true when we want to hoist adjacent loads out 162 of diamond control flow patterns, false otherwise. */ 163 static unsigned int 164 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) 165 { 166 basic_block bb; 167 basic_block *bb_order; 168 unsigned n, i; 169 bool cfgchanged = false; 170 hash_set<tree> *nontrap = 0; 171 172 if (do_store_elim) 173 /* Calculate the set of non-trapping memory accesses. */ 174 nontrap = get_non_trapping (); 175 176 /* Search every basic block for COND_EXPR we may be able to optimize. 177 178 We walk the blocks in order that guarantees that a block with 179 a single predecessor is processed before the predecessor. 180 This ensures that we collapse inner ifs before visiting the 181 outer ones, and also that we do not try to visit a removed 182 block. */ 183 bb_order = single_pred_before_succ_order (); 184 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; 185 186 for (i = 0; i < n; i++) 187 { 188 gimple *cond_stmt; 189 gphi *phi; 190 basic_block bb1, bb2; 191 edge e1, e2; 192 tree arg0, arg1; 193 194 bb = bb_order[i]; 195 196 cond_stmt = last_stmt (bb); 197 /* Check to see if the last statement is a GIMPLE_COND. */ 198 if (!cond_stmt 199 || gimple_code (cond_stmt) != GIMPLE_COND) 200 continue; 201 202 e1 = EDGE_SUCC (bb, 0); 203 bb1 = e1->dest; 204 e2 = EDGE_SUCC (bb, 1); 205 bb2 = e2->dest; 206 207 /* We cannot do the optimization on abnormal edges. */ 208 if ((e1->flags & EDGE_ABNORMAL) != 0 209 || (e2->flags & EDGE_ABNORMAL) != 0) 210 continue; 211 212 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ 213 if (EDGE_COUNT (bb1->succs) == 0 214 || bb2 == NULL 215 || EDGE_COUNT (bb2->succs) == 0) 216 continue; 217 218 /* Find the bb which is the fall through to the other. */ 219 if (EDGE_SUCC (bb1, 0)->dest == bb2) 220 ; 221 else if (EDGE_SUCC (bb2, 0)->dest == bb1) 222 { 223 std::swap (bb1, bb2); 224 std::swap (e1, e2); 225 } 226 else if (do_store_elim 227 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 228 { 229 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 230 231 if (!single_succ_p (bb1) 232 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 233 || !single_succ_p (bb2) 234 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 235 || EDGE_COUNT (bb3->preds) != 2) 236 continue; 237 if (cond_if_else_store_replacement (bb1, bb2, bb3)) 238 cfgchanged = true; 239 continue; 240 } 241 else if (do_hoist_loads 242 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) 243 { 244 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; 245 246 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt))) 247 && single_succ_p (bb1) 248 && single_succ_p (bb2) 249 && single_pred_p (bb1) 250 && single_pred_p (bb2) 251 && EDGE_COUNT (bb->succs) == 2 252 && EDGE_COUNT (bb3->preds) == 2 253 /* If one edge or the other is dominant, a conditional move 254 is likely to perform worse than the well-predicted branch. */ 255 && !predictable_edge_p (EDGE_SUCC (bb, 0)) 256 && !predictable_edge_p (EDGE_SUCC (bb, 1))) 257 hoist_adjacent_loads (bb, bb1, bb2, bb3); 258 continue; 259 } 260 else 261 continue; 262 263 e1 = EDGE_SUCC (bb1, 0); 264 265 /* Make sure that bb1 is just a fall through. */ 266 if (!single_succ_p (bb1) 267 || (e1->flags & EDGE_FALLTHRU) == 0) 268 continue; 269 270 /* Also make sure that bb1 only have one predecessor and that it 271 is bb. */ 272 if (!single_pred_p (bb1) 273 || single_pred (bb1) != bb) 274 continue; 275 276 if (do_store_elim) 277 { 278 /* bb1 is the middle block, bb2 the join block, bb the split block, 279 e1 the fallthrough edge from bb1 to bb2. We can't do the 280 optimization if the join block has more than two predecessors. */ 281 if (EDGE_COUNT (bb2->preds) > 2) 282 continue; 283 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap)) 284 cfgchanged = true; 285 } 286 else 287 { 288 gimple_seq phis = phi_nodes (bb2); 289 gimple_stmt_iterator gsi; 290 bool candorest = true; 291 292 /* Value replacement can work with more than one PHI 293 so try that first. */ 294 if (!early_p) 295 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 296 { 297 phi = as_a <gphi *> (gsi_stmt (gsi)); 298 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 299 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 300 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2) 301 { 302 candorest = false; 303 cfgchanged = true; 304 break; 305 } 306 } 307 308 if (!candorest) 309 continue; 310 311 phi = single_non_singleton_phi_for_edges (phis, e1, e2); 312 if (!phi) 313 continue; 314 315 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 316 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 317 318 /* Something is wrong if we cannot find the arguments in the PHI 319 node. */ 320 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 321 322 gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, 323 arg0, arg1, 324 cond_stmt); 325 if (newphi != NULL) 326 { 327 phi = newphi; 328 /* factor_out_conditional_conversion may create a new PHI in 329 BB2 and eliminate an existing PHI in BB2. Recompute values 330 that may be affected by that change. */ 331 arg0 = gimple_phi_arg_def (phi, e1->dest_idx); 332 arg1 = gimple_phi_arg_def (phi, e2->dest_idx); 333 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); 334 } 335 336 /* Do the replacement of conditional if it can be done. */ 337 if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) 338 cfgchanged = true; 339 else if (!early_p 340 && conditional_replacement (bb, bb1, e1, e2, phi, 341 arg0, arg1)) 342 cfgchanged = true; 343 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 344 cfgchanged = true; 345 else if (!early_p 346 && cond_removal_in_popcount_pattern (bb, bb1, e1, e2, 347 phi, arg0, arg1)) 348 cfgchanged = true; 349 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) 350 cfgchanged = true; 351 } 352 } 353 354 free (bb_order); 355 356 if (do_store_elim) 357 delete nontrap; 358 /* If the CFG has changed, we should cleanup the CFG. */ 359 if (cfgchanged && do_store_elim) 360 { 361 /* In cond-store replacement we have added some loads on edges 362 and new VOPS (as we moved the store, and created a load). */ 363 gsi_commit_edge_inserts (); 364 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; 365 } 366 else if (cfgchanged) 367 return TODO_cleanup_cfg; 368 return 0; 369 } 370 371 /* Replace PHI node element whose edge is E in block BB with variable NEW. 372 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK 373 is known to have two edges, one of which must reach BB). */ 374 375 static void 376 replace_phi_edge_with_variable (basic_block cond_block, 377 edge e, gimple *phi, tree new_tree) 378 { 379 basic_block bb = gimple_bb (phi); 380 basic_block block_to_remove; 381 gimple_stmt_iterator gsi; 382 383 /* Change the PHI argument to new. */ 384 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); 385 386 /* Remove the empty basic block. */ 387 if (EDGE_SUCC (cond_block, 0)->dest == bb) 388 { 389 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; 390 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 391 EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); 392 393 block_to_remove = EDGE_SUCC (cond_block, 1)->dest; 394 } 395 else 396 { 397 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; 398 EDGE_SUCC (cond_block, 1)->flags 399 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 400 EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); 401 402 block_to_remove = EDGE_SUCC (cond_block, 0)->dest; 403 } 404 delete_basic_block (block_to_remove); 405 406 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ 407 gsi = gsi_last_bb (cond_block); 408 gsi_remove (&gsi, true); 409 410 if (dump_file && (dump_flags & TDF_DETAILS)) 411 fprintf (dump_file, 412 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n", 413 cond_block->index, 414 bb->index); 415 } 416 417 /* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI 418 stmt are CONVERT_STMT, factor out the conversion and perform the conversion 419 to the result of PHI stmt. COND_STMT is the controlling predicate. 420 Return the newly-created PHI, if any. */ 421 422 static gphi * 423 factor_out_conditional_conversion (edge e0, edge e1, gphi *phi, 424 tree arg0, tree arg1, gimple *cond_stmt) 425 { 426 gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt; 427 tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE; 428 tree temp, result; 429 gphi *newphi; 430 gimple_stmt_iterator gsi, gsi_for_def; 431 location_t locus = gimple_location (phi); 432 enum tree_code convert_code; 433 434 /* Handle only PHI statements with two arguments. TODO: If all 435 other arguments to PHI are INTEGER_CST or if their defining 436 statement have the same unary operation, we can handle more 437 than two arguments too. */ 438 if (gimple_phi_num_args (phi) != 2) 439 return NULL; 440 441 /* First canonicalize to simplify tests. */ 442 if (TREE_CODE (arg0) != SSA_NAME) 443 { 444 std::swap (arg0, arg1); 445 std::swap (e0, e1); 446 } 447 448 if (TREE_CODE (arg0) != SSA_NAME 449 || (TREE_CODE (arg1) != SSA_NAME 450 && TREE_CODE (arg1) != INTEGER_CST)) 451 return NULL; 452 453 /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is 454 a conversion. */ 455 arg0_def_stmt = SSA_NAME_DEF_STMT (arg0); 456 if (!gimple_assign_cast_p (arg0_def_stmt)) 457 return NULL; 458 459 /* Use the RHS as new_arg0. */ 460 convert_code = gimple_assign_rhs_code (arg0_def_stmt); 461 new_arg0 = gimple_assign_rhs1 (arg0_def_stmt); 462 if (convert_code == VIEW_CONVERT_EXPR) 463 { 464 new_arg0 = TREE_OPERAND (new_arg0, 0); 465 if (!is_gimple_reg_type (TREE_TYPE (new_arg0))) 466 return NULL; 467 } 468 if (TREE_CODE (new_arg0) == SSA_NAME 469 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg0)) 470 return NULL; 471 472 if (TREE_CODE (arg1) == SSA_NAME) 473 { 474 /* Check if arg1 is an SSA_NAME and the stmt which defines arg1 475 is a conversion. */ 476 arg1_def_stmt = SSA_NAME_DEF_STMT (arg1); 477 if (!is_gimple_assign (arg1_def_stmt) 478 || gimple_assign_rhs_code (arg1_def_stmt) != convert_code) 479 return NULL; 480 481 /* Use the RHS as new_arg1. */ 482 new_arg1 = gimple_assign_rhs1 (arg1_def_stmt); 483 if (convert_code == VIEW_CONVERT_EXPR) 484 new_arg1 = TREE_OPERAND (new_arg1, 0); 485 if (TREE_CODE (new_arg1) == SSA_NAME 486 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg1)) 487 return NULL; 488 } 489 else 490 { 491 /* If arg1 is an INTEGER_CST, fold it to new type. */ 492 if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0)) 493 && int_fits_type_p (arg1, TREE_TYPE (new_arg0))) 494 { 495 if (gimple_assign_cast_p (arg0_def_stmt)) 496 { 497 /* For the INTEGER_CST case, we are just moving the 498 conversion from one place to another, which can often 499 hurt as the conversion moves further away from the 500 statement that computes the value. So, perform this 501 only if new_arg0 is an operand of COND_STMT, or 502 if arg0_def_stmt is the only non-debug stmt in 503 its basic block, because then it is possible this 504 could enable further optimizations (minmax replacement 505 etc.). See PR71016. */ 506 if (new_arg0 != gimple_cond_lhs (cond_stmt) 507 && new_arg0 != gimple_cond_rhs (cond_stmt) 508 && gimple_bb (arg0_def_stmt) == e0->src) 509 { 510 gsi = gsi_for_stmt (arg0_def_stmt); 511 gsi_prev_nondebug (&gsi); 512 if (!gsi_end_p (gsi)) 513 { 514 if (gassign *assign 515 = dyn_cast <gassign *> (gsi_stmt (gsi))) 516 { 517 tree lhs = gimple_assign_lhs (assign); 518 enum tree_code ass_code 519 = gimple_assign_rhs_code (assign); 520 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) 521 return NULL; 522 if (lhs != gimple_assign_rhs1 (arg0_def_stmt)) 523 return NULL; 524 gsi_prev_nondebug (&gsi); 525 if (!gsi_end_p (gsi)) 526 return NULL; 527 } 528 else 529 return NULL; 530 } 531 gsi = gsi_for_stmt (arg0_def_stmt); 532 gsi_next_nondebug (&gsi); 533 if (!gsi_end_p (gsi)) 534 return NULL; 535 } 536 new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1); 537 } 538 else 539 return NULL; 540 } 541 else 542 return NULL; 543 } 544 545 /* If arg0/arg1 have > 1 use, then this transformation actually increases 546 the number of expressions evaluated at runtime. */ 547 if (!has_single_use (arg0) 548 || (arg1_def_stmt && !has_single_use (arg1))) 549 return NULL; 550 551 /* If types of new_arg0 and new_arg1 are different bailout. */ 552 if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) 553 return NULL; 554 555 /* Create a new PHI stmt. */ 556 result = PHI_RESULT (phi); 557 temp = make_ssa_name (TREE_TYPE (new_arg0), NULL); 558 newphi = create_phi_node (temp, gimple_bb (phi)); 559 560 if (dump_file && (dump_flags & TDF_DETAILS)) 561 { 562 fprintf (dump_file, "PHI "); 563 print_generic_expr (dump_file, gimple_phi_result (phi)); 564 fprintf (dump_file, 565 " changed to factor conversion out from COND_EXPR.\n"); 566 fprintf (dump_file, "New stmt with CAST that defines "); 567 print_generic_expr (dump_file, result); 568 fprintf (dump_file, ".\n"); 569 } 570 571 /* Remove the old cast(s) that has single use. */ 572 gsi_for_def = gsi_for_stmt (arg0_def_stmt); 573 gsi_remove (&gsi_for_def, true); 574 release_defs (arg0_def_stmt); 575 576 if (arg1_def_stmt) 577 { 578 gsi_for_def = gsi_for_stmt (arg1_def_stmt); 579 gsi_remove (&gsi_for_def, true); 580 release_defs (arg1_def_stmt); 581 } 582 583 add_phi_arg (newphi, new_arg0, e0, locus); 584 add_phi_arg (newphi, new_arg1, e1, locus); 585 586 /* Create the conversion stmt and insert it. */ 587 if (convert_code == VIEW_CONVERT_EXPR) 588 { 589 temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp); 590 new_stmt = gimple_build_assign (result, temp); 591 } 592 else 593 new_stmt = gimple_build_assign (result, convert_code, temp); 594 gsi = gsi_after_labels (gimple_bb (phi)); 595 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 596 597 /* Remove the original PHI stmt. */ 598 gsi = gsi_for_stmt (phi); 599 gsi_remove (&gsi, true); 600 return newphi; 601 } 602 603 /* Optimize 604 # x_5 in range [cst1, cst2] where cst2 = cst1 + 1 605 if (x_5 op cstN) # where op is == or != and N is 1 or 2 606 goto bb3; 607 else 608 goto bb4; 609 bb3: 610 bb4: 611 # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == cst3 + 1 612 613 to r_6 = x_5 + (min (cst3, cst4) - cst1) or 614 r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which 615 of cst3 and cst4 is smaller. */ 616 617 static bool 618 two_value_replacement (basic_block cond_bb, basic_block middle_bb, 619 edge e1, gphi *phi, tree arg0, tree arg1) 620 { 621 /* Only look for adjacent integer constants. */ 622 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 623 || !INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 624 || TREE_CODE (arg0) != INTEGER_CST 625 || TREE_CODE (arg1) != INTEGER_CST 626 || (tree_int_cst_lt (arg0, arg1) 627 ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1) 628 : wi::to_widest (arg1) + 1 != wi::to_widest (arg0))) 629 return false; 630 631 if (!empty_block_p (middle_bb)) 632 return false; 633 634 gimple *stmt = last_stmt (cond_bb); 635 tree lhs = gimple_cond_lhs (stmt); 636 tree rhs = gimple_cond_rhs (stmt); 637 638 if (TREE_CODE (lhs) != SSA_NAME 639 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 640 || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE 641 || TREE_CODE (rhs) != INTEGER_CST) 642 return false; 643 644 switch (gimple_cond_code (stmt)) 645 { 646 case EQ_EXPR: 647 case NE_EXPR: 648 break; 649 default: 650 return false; 651 } 652 653 wide_int min, max; 654 if (get_range_info (lhs, &min, &max) != VR_RANGE 655 || min + 1 != max 656 || (wi::to_wide (rhs) != min 657 && wi::to_wide (rhs) != max)) 658 return false; 659 660 /* We need to know which is the true edge and which is the false 661 edge so that we know when to invert the condition below. */ 662 edge true_edge, false_edge; 663 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 664 if ((gimple_cond_code (stmt) == EQ_EXPR) 665 ^ (wi::to_wide (rhs) == max) 666 ^ (e1 == false_edge)) 667 std::swap (arg0, arg1); 668 669 tree type; 670 if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE (arg0))) 671 { 672 /* Avoid performing the arithmetics in bool type which has different 673 semantics, otherwise prefer unsigned types from the two with 674 the same precision. */ 675 if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE 676 || !TYPE_UNSIGNED (TREE_TYPE (arg0))) 677 type = TREE_TYPE (lhs); 678 else 679 type = TREE_TYPE (arg0); 680 } 681 else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE (arg0))) 682 type = TREE_TYPE (lhs); 683 else 684 type = TREE_TYPE (arg0); 685 686 min = wide_int::from (min, TYPE_PRECISION (type), 687 TYPE_SIGN (TREE_TYPE (lhs))); 688 wide_int a = wide_int::from (wi::to_wide (arg0), TYPE_PRECISION (type), 689 TYPE_SIGN (TREE_TYPE (arg0))); 690 enum tree_code code; 691 wi::overflow_type ovf; 692 if (tree_int_cst_lt (arg0, arg1)) 693 { 694 code = PLUS_EXPR; 695 a -= min; 696 if (!TYPE_UNSIGNED (type)) 697 { 698 /* lhs is known to be in range [min, min+1] and we want to add a 699 to it. Check if that operation can overflow for those 2 values 700 and if yes, force unsigned type. */ 701 wi::add (min + (wi::neg_p (a) ? 0 : 1), a, SIGNED, &ovf); 702 if (ovf) 703 type = unsigned_type_for (type); 704 } 705 } 706 else 707 { 708 code = MINUS_EXPR; 709 a += min; 710 if (!TYPE_UNSIGNED (type)) 711 { 712 /* lhs is known to be in range [min, min+1] and we want to subtract 713 it from a. Check if that operation can overflow for those 2 714 values and if yes, force unsigned type. */ 715 wi::sub (a, min + (wi::neg_p (min) ? 0 : 1), SIGNED, &ovf); 716 if (ovf) 717 type = unsigned_type_for (type); 718 } 719 } 720 721 tree arg = wide_int_to_tree (type, a); 722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 723 if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) 724 lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs); 725 tree new_rhs; 726 if (code == PLUS_EXPR) 727 new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, arg); 728 else 729 new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, arg, lhs); 730 if (!useless_type_conversion_p (TREE_TYPE (arg0), type)) 731 new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), new_rhs); 732 733 replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs); 734 735 /* Note that we optimized this PHI. */ 736 return true; 737 } 738 739 /* The function conditional_replacement does the main work of doing the 740 conditional replacement. Return true if the replacement is done. 741 Otherwise return false. 742 BB is the basic block where the replacement is going to be done on. ARG0 743 is argument 0 from PHI. Likewise for ARG1. */ 744 745 static bool 746 conditional_replacement (basic_block cond_bb, basic_block middle_bb, 747 edge e0, edge e1, gphi *phi, 748 tree arg0, tree arg1) 749 { 750 tree result; 751 gimple *stmt; 752 gassign *new_stmt; 753 tree cond; 754 gimple_stmt_iterator gsi; 755 edge true_edge, false_edge; 756 tree new_var, new_var2; 757 bool neg; 758 759 /* FIXME: Gimplification of complex type is too hard for now. */ 760 /* We aren't prepared to handle vectors either (and it is a question 761 if it would be worthwhile anyway). */ 762 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 763 || POINTER_TYPE_P (TREE_TYPE (arg0))) 764 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 765 || POINTER_TYPE_P (TREE_TYPE (arg1)))) 766 return false; 767 768 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then 769 convert it to the conditional. */ 770 if ((integer_zerop (arg0) && integer_onep (arg1)) 771 || (integer_zerop (arg1) && integer_onep (arg0))) 772 neg = false; 773 /* For signed one bit types, the negation is not needed and 774 should be avoided and is the same as 1 case for non-signed 775 one bit types. */ 776 else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) 777 || (integer_zerop (arg1) && integer_all_onesp (arg0))) 778 neg = TYPE_PRECISION (TREE_TYPE (arg0)) != 1; 779 else 780 return false; 781 782 if (!empty_block_p (middle_bb)) 783 return false; 784 785 /* At this point we know we have a GIMPLE_COND with two successors. 786 One successor is BB, the other successor is an empty block which 787 falls through into BB. 788 789 There is a single PHI node at the join point (BB) and its arguments 790 are constants (0, 1) or (0, -1). 791 792 So, given the condition COND, and the two PHI arguments, we can 793 rewrite this PHI into non-branching code: 794 795 dest = (COND) or dest = COND' 796 797 We use the condition as-is if the argument associated with the 798 true edge has the value one or the argument associated with the 799 false edge as the value zero. Note that those conditions are not 800 the same since only one of the outgoing edges from the GIMPLE_COND 801 will directly reach BB and thus be associated with an argument. */ 802 803 stmt = last_stmt (cond_bb); 804 result = PHI_RESULT (phi); 805 806 /* To handle special cases like floating point comparison, it is easier and 807 less error-prone to build a tree and gimplify it on the fly though it is 808 less efficient. */ 809 cond = fold_build2_loc (gimple_location (stmt), 810 gimple_cond_code (stmt), boolean_type_node, 811 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); 812 813 /* We need to know which is the true edge and which is the false 814 edge so that we know when to invert the condition below. */ 815 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 816 if ((e0 == true_edge && integer_zerop (arg0)) 817 || (e0 == false_edge && !integer_zerop (arg0)) 818 || (e1 == true_edge && integer_zerop (arg1)) 819 || (e1 == false_edge && !integer_zerop (arg1))) 820 cond = fold_build1_loc (gimple_location (stmt), 821 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); 822 823 if (neg) 824 { 825 cond = fold_convert_loc (gimple_location (stmt), 826 TREE_TYPE (result), cond); 827 cond = fold_build1_loc (gimple_location (stmt), 828 NEGATE_EXPR, TREE_TYPE (cond), cond); 829 } 830 831 /* Insert our new statements at the end of conditional block before the 832 COND_STMT. */ 833 gsi = gsi_for_stmt (stmt); 834 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, 835 GSI_SAME_STMT); 836 837 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) 838 { 839 location_t locus_0, locus_1; 840 841 new_var2 = make_ssa_name (TREE_TYPE (result)); 842 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); 843 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 844 new_var = new_var2; 845 846 /* Set the locus to the first argument, unless is doesn't have one. */ 847 locus_0 = gimple_phi_arg_location (phi, 0); 848 locus_1 = gimple_phi_arg_location (phi, 1); 849 if (locus_0 == UNKNOWN_LOCATION) 850 locus_0 = locus_1; 851 gimple_set_location (new_stmt, locus_0); 852 } 853 854 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); 855 856 /* Note that we optimized this PHI. */ 857 return true; 858 } 859 860 /* Update *ARG which is defined in STMT so that it contains the 861 computed value if that seems profitable. Return true if the 862 statement is made dead by that rewriting. */ 863 864 static bool 865 jump_function_from_stmt (tree *arg, gimple *stmt) 866 { 867 enum tree_code code = gimple_assign_rhs_code (stmt); 868 if (code == ADDR_EXPR) 869 { 870 /* For arg = &p->i transform it to p, if possible. */ 871 tree rhs1 = gimple_assign_rhs1 (stmt); 872 poly_int64 offset; 873 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0), 874 &offset); 875 if (tem 876 && TREE_CODE (tem) == MEM_REF 877 && known_eq (mem_ref_offset (tem) + offset, 0)) 878 { 879 *arg = TREE_OPERAND (tem, 0); 880 return true; 881 } 882 } 883 /* TODO: Much like IPA-CP jump-functions we want to handle constant 884 additions symbolically here, and we'd need to update the comparison 885 code that compares the arg + cst tuples in our caller. For now the 886 code above exactly handles the VEC_BASE pattern from vec.h. */ 887 return false; 888 } 889 890 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional 891 of the form SSA_NAME NE 0. 892 893 If RHS is fed by a simple EQ_EXPR comparison of two values, see if 894 the two input values of the EQ_EXPR match arg0 and arg1. 895 896 If so update *code and return TRUE. Otherwise return FALSE. */ 897 898 static bool 899 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, 900 enum tree_code *code, const_tree rhs) 901 { 902 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining 903 statement. */ 904 if (TREE_CODE (rhs) == SSA_NAME) 905 { 906 gimple *def1 = SSA_NAME_DEF_STMT (rhs); 907 908 /* Verify the defining statement has an EQ_EXPR on the RHS. */ 909 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR) 910 { 911 /* Finally verify the source operands of the EQ_EXPR are equal 912 to arg0 and arg1. */ 913 tree op0 = gimple_assign_rhs1 (def1); 914 tree op1 = gimple_assign_rhs2 (def1); 915 if ((operand_equal_for_phi_arg_p (arg0, op0) 916 && operand_equal_for_phi_arg_p (arg1, op1)) 917 || (operand_equal_for_phi_arg_p (arg0, op1) 918 && operand_equal_for_phi_arg_p (arg1, op0))) 919 { 920 /* We will perform the optimization. */ 921 *code = gimple_assign_rhs_code (def1); 922 return true; 923 } 924 } 925 } 926 return false; 927 } 928 929 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. 930 931 Also return TRUE if arg0/arg1 are equal to the source arguments of a 932 an EQ comparison feeding a BIT_AND_EXPR which feeds COND. 933 934 Return FALSE otherwise. */ 935 936 static bool 937 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, 938 enum tree_code *code, gimple *cond) 939 { 940 gimple *def; 941 tree lhs = gimple_cond_lhs (cond); 942 tree rhs = gimple_cond_rhs (cond); 943 944 if ((operand_equal_for_phi_arg_p (arg0, lhs) 945 && operand_equal_for_phi_arg_p (arg1, rhs)) 946 || (operand_equal_for_phi_arg_p (arg1, lhs) 947 && operand_equal_for_phi_arg_p (arg0, rhs))) 948 return true; 949 950 /* Now handle more complex case where we have an EQ comparison 951 which feeds a BIT_AND_EXPR which feeds COND. 952 953 First verify that COND is of the form SSA_NAME NE 0. */ 954 if (*code != NE_EXPR || !integer_zerop (rhs) 955 || TREE_CODE (lhs) != SSA_NAME) 956 return false; 957 958 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ 959 def = SSA_NAME_DEF_STMT (lhs); 960 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR) 961 return false; 962 963 /* Now verify arg0/arg1 correspond to the source arguments of an 964 EQ comparison feeding the BIT_AND_EXPR. */ 965 966 tree tmp = gimple_assign_rhs1 (def); 967 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 968 return true; 969 970 tmp = gimple_assign_rhs2 (def); 971 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) 972 return true; 973 974 return false; 975 } 976 977 /* Returns true if ARG is a neutral element for operation CODE 978 on the RIGHT side. */ 979 980 static bool 981 neutral_element_p (tree_code code, tree arg, bool right) 982 { 983 switch (code) 984 { 985 case PLUS_EXPR: 986 case BIT_IOR_EXPR: 987 case BIT_XOR_EXPR: 988 return integer_zerop (arg); 989 990 case LROTATE_EXPR: 991 case RROTATE_EXPR: 992 case LSHIFT_EXPR: 993 case RSHIFT_EXPR: 994 case MINUS_EXPR: 995 case POINTER_PLUS_EXPR: 996 return right && integer_zerop (arg); 997 998 case MULT_EXPR: 999 return integer_onep (arg); 1000 1001 case TRUNC_DIV_EXPR: 1002 case CEIL_DIV_EXPR: 1003 case FLOOR_DIV_EXPR: 1004 case ROUND_DIV_EXPR: 1005 case EXACT_DIV_EXPR: 1006 return right && integer_onep (arg); 1007 1008 case BIT_AND_EXPR: 1009 return integer_all_onesp (arg); 1010 1011 default: 1012 return false; 1013 } 1014 } 1015 1016 /* Returns true if ARG is an absorbing element for operation CODE. */ 1017 1018 static bool 1019 absorbing_element_p (tree_code code, tree arg, bool right, tree rval) 1020 { 1021 switch (code) 1022 { 1023 case BIT_IOR_EXPR: 1024 return integer_all_onesp (arg); 1025 1026 case MULT_EXPR: 1027 case BIT_AND_EXPR: 1028 return integer_zerop (arg); 1029 1030 case LSHIFT_EXPR: 1031 case RSHIFT_EXPR: 1032 case LROTATE_EXPR: 1033 case RROTATE_EXPR: 1034 return !right && integer_zerop (arg); 1035 1036 case TRUNC_DIV_EXPR: 1037 case CEIL_DIV_EXPR: 1038 case FLOOR_DIV_EXPR: 1039 case ROUND_DIV_EXPR: 1040 case EXACT_DIV_EXPR: 1041 case TRUNC_MOD_EXPR: 1042 case CEIL_MOD_EXPR: 1043 case FLOOR_MOD_EXPR: 1044 case ROUND_MOD_EXPR: 1045 return (!right 1046 && integer_zerop (arg) 1047 && tree_single_nonzero_warnv_p (rval, NULL)); 1048 1049 default: 1050 return false; 1051 } 1052 } 1053 1054 /* The function value_replacement does the main work of doing the value 1055 replacement. Return non-zero if the replacement is done. Otherwise return 1056 0. If we remove the middle basic block, return 2. 1057 BB is the basic block where the replacement is going to be done on. ARG0 1058 is argument 0 from the PHI. Likewise for ARG1. */ 1059 1060 static int 1061 value_replacement (basic_block cond_bb, basic_block middle_bb, 1062 edge e0, edge e1, gimple *phi, 1063 tree arg0, tree arg1) 1064 { 1065 gimple_stmt_iterator gsi; 1066 gimple *cond; 1067 edge true_edge, false_edge; 1068 enum tree_code code; 1069 bool empty_or_with_defined_p = true; 1070 1071 /* If the type says honor signed zeros we cannot do this 1072 optimization. */ 1073 if (HONOR_SIGNED_ZEROS (arg1)) 1074 return 0; 1075 1076 /* If there is a statement in MIDDLE_BB that defines one of the PHI 1077 arguments, then adjust arg0 or arg1. */ 1078 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); 1079 while (!gsi_end_p (gsi)) 1080 { 1081 gimple *stmt = gsi_stmt (gsi); 1082 tree lhs; 1083 gsi_next_nondebug (&gsi); 1084 if (!is_gimple_assign (stmt)) 1085 { 1086 if (gimple_code (stmt) != GIMPLE_PREDICT 1087 && gimple_code (stmt) != GIMPLE_NOP) 1088 empty_or_with_defined_p = false; 1089 continue; 1090 } 1091 /* Now try to adjust arg0 or arg1 according to the computation 1092 in the statement. */ 1093 lhs = gimple_assign_lhs (stmt); 1094 if (!(lhs == arg0 1095 && jump_function_from_stmt (&arg0, stmt)) 1096 || (lhs == arg1 1097 && jump_function_from_stmt (&arg1, stmt))) 1098 empty_or_with_defined_p = false; 1099 } 1100 1101 cond = last_stmt (cond_bb); 1102 code = gimple_cond_code (cond); 1103 1104 /* This transformation is only valid for equality comparisons. */ 1105 if (code != NE_EXPR && code != EQ_EXPR) 1106 return 0; 1107 1108 /* We need to know which is the true edge and which is the false 1109 edge so that we know if have abs or negative abs. */ 1110 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1111 1112 /* At this point we know we have a COND_EXPR with two successors. 1113 One successor is BB, the other successor is an empty block which 1114 falls through into BB. 1115 1116 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR. 1117 1118 There is a single PHI node at the join point (BB) with two arguments. 1119 1120 We now need to verify that the two arguments in the PHI node match 1121 the two arguments to the equality comparison. */ 1122 1123 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond)) 1124 { 1125 edge e; 1126 tree arg; 1127 1128 /* For NE_EXPR, we want to build an assignment result = arg where 1129 arg is the PHI argument associated with the true edge. For 1130 EQ_EXPR we want the PHI argument associated with the false edge. */ 1131 e = (code == NE_EXPR ? true_edge : false_edge); 1132 1133 /* Unfortunately, E may not reach BB (it may instead have gone to 1134 OTHER_BLOCK). If that is the case, then we want the single outgoing 1135 edge from OTHER_BLOCK which reaches BB and represents the desired 1136 path from COND_BLOCK. */ 1137 if (e->dest == middle_bb) 1138 e = single_succ_edge (e->dest); 1139 1140 /* Now we know the incoming edge to BB that has the argument for the 1141 RHS of our new assignment statement. */ 1142 if (e0 == e) 1143 arg = arg0; 1144 else 1145 arg = arg1; 1146 1147 /* If the middle basic block was empty or is defining the 1148 PHI arguments and this is a single phi where the args are different 1149 for the edges e0 and e1 then we can remove the middle basic block. */ 1150 if (empty_or_with_defined_p 1151 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), 1152 e0, e1) == phi) 1153 { 1154 replace_phi_edge_with_variable (cond_bb, e1, phi, arg); 1155 /* Note that we optimized this PHI. */ 1156 return 2; 1157 } 1158 else 1159 { 1160 /* Replace the PHI arguments with arg. */ 1161 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); 1162 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); 1163 if (dump_file && (dump_flags & TDF_DETAILS)) 1164 { 1165 fprintf (dump_file, "PHI "); 1166 print_generic_expr (dump_file, gimple_phi_result (phi)); 1167 fprintf (dump_file, " reduced for COND_EXPR in block %d to ", 1168 cond_bb->index); 1169 print_generic_expr (dump_file, arg); 1170 fprintf (dump_file, ".\n"); 1171 } 1172 return 1; 1173 } 1174 1175 } 1176 1177 /* Now optimize (x != 0) ? x + y : y to just x + y. */ 1178 gsi = gsi_last_nondebug_bb (middle_bb); 1179 if (gsi_end_p (gsi)) 1180 return 0; 1181 1182 gimple *assign = gsi_stmt (gsi); 1183 if (!is_gimple_assign (assign) 1184 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS 1185 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) 1186 && !POINTER_TYPE_P (TREE_TYPE (arg0)))) 1187 return 0; 1188 1189 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */ 1190 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) 1191 return 0; 1192 1193 /* Allow up to 2 cheap preparation statements that prepare argument 1194 for assign, e.g.: 1195 if (y_4 != 0) 1196 goto <bb 3>; 1197 else 1198 goto <bb 4>; 1199 <bb 3>: 1200 _1 = (int) y_4; 1201 iftmp.0_6 = x_5(D) r<< _1; 1202 <bb 4>: 1203 # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> 1204 or: 1205 if (y_3(D) == 0) 1206 goto <bb 4>; 1207 else 1208 goto <bb 3>; 1209 <bb 3>: 1210 y_4 = y_3(D) & 31; 1211 _1 = (int) y_4; 1212 _6 = x_5(D) r<< _1; 1213 <bb 4>: 1214 # _2 = PHI <x_5(D)(2), _6(3)> */ 1215 gimple *prep_stmt[2] = { NULL, NULL }; 1216 int prep_cnt; 1217 for (prep_cnt = 0; ; prep_cnt++) 1218 { 1219 gsi_prev_nondebug (&gsi); 1220 if (gsi_end_p (gsi)) 1221 break; 1222 1223 gimple *g = gsi_stmt (gsi); 1224 if (gimple_code (g) == GIMPLE_LABEL) 1225 break; 1226 1227 if (prep_cnt == 2 || !is_gimple_assign (g)) 1228 return 0; 1229 1230 tree lhs = gimple_assign_lhs (g); 1231 tree rhs1 = gimple_assign_rhs1 (g); 1232 use_operand_p use_p; 1233 gimple *use_stmt; 1234 if (TREE_CODE (lhs) != SSA_NAME 1235 || TREE_CODE (rhs1) != SSA_NAME 1236 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 1237 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 1238 || !single_imm_use (lhs, &use_p, &use_stmt) 1239 || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) 1240 return 0; 1241 switch (gimple_assign_rhs_code (g)) 1242 { 1243 CASE_CONVERT: 1244 break; 1245 case PLUS_EXPR: 1246 case BIT_AND_EXPR: 1247 case BIT_IOR_EXPR: 1248 case BIT_XOR_EXPR: 1249 if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) 1250 return 0; 1251 break; 1252 default: 1253 return 0; 1254 } 1255 prep_stmt[prep_cnt] = g; 1256 } 1257 1258 /* Only transform if it removes the condition. */ 1259 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1)) 1260 return 0; 1261 1262 /* Size-wise, this is always profitable. */ 1263 if (optimize_bb_for_speed_p (cond_bb) 1264 /* The special case is useless if it has a low probability. */ 1265 && profile_status_for_fn (cfun) != PROFILE_ABSENT 1266 && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () 1267 /* If assign is cheap, there is no point avoiding it. */ 1268 && estimate_num_insns_seq (bb_seq (middle_bb), &eni_time_weights) 1269 >= 3 * estimate_num_insns (cond, &eni_time_weights)) 1270 return 0; 1271 1272 tree lhs = gimple_assign_lhs (assign); 1273 tree rhs1 = gimple_assign_rhs1 (assign); 1274 tree rhs2 = gimple_assign_rhs2 (assign); 1275 enum tree_code code_def = gimple_assign_rhs_code (assign); 1276 tree cond_lhs = gimple_cond_lhs (cond); 1277 tree cond_rhs = gimple_cond_rhs (cond); 1278 1279 /* Propagate the cond_rhs constant through preparation stmts, 1280 make sure UB isn't invoked while doing that. */ 1281 for (int i = prep_cnt - 1; i >= 0; --i) 1282 { 1283 gimple *g = prep_stmt[i]; 1284 tree grhs1 = gimple_assign_rhs1 (g); 1285 if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) 1286 return 0; 1287 cond_lhs = gimple_assign_lhs (g); 1288 cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); 1289 if (TREE_CODE (cond_rhs) != INTEGER_CST 1290 || TREE_OVERFLOW (cond_rhs)) 1291 return 0; 1292 if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) 1293 { 1294 cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, 1295 gimple_assign_rhs2 (g)); 1296 if (TREE_OVERFLOW (cond_rhs)) 1297 return 0; 1298 } 1299 cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); 1300 if (TREE_CODE (cond_rhs) != INTEGER_CST 1301 || TREE_OVERFLOW (cond_rhs)) 1302 return 0; 1303 } 1304 1305 if (((code == NE_EXPR && e1 == false_edge) 1306 || (code == EQ_EXPR && e1 == true_edge)) 1307 && arg0 == lhs 1308 && ((arg1 == rhs1 1309 && operand_equal_for_phi_arg_p (rhs2, cond_lhs) 1310 && neutral_element_p (code_def, cond_rhs, true)) 1311 || (arg1 == rhs2 1312 && operand_equal_for_phi_arg_p (rhs1, cond_lhs) 1313 && neutral_element_p (code_def, cond_rhs, false)) 1314 || (operand_equal_for_phi_arg_p (arg1, cond_rhs) 1315 && ((operand_equal_for_phi_arg_p (rhs2, cond_lhs) 1316 && absorbing_element_p (code_def, cond_rhs, true, rhs2)) 1317 || (operand_equal_for_phi_arg_p (rhs1, cond_lhs) 1318 && absorbing_element_p (code_def, 1319 cond_rhs, false, rhs2)))))) 1320 { 1321 gsi = gsi_for_stmt (cond); 1322 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6 1323 def-stmt in: 1324 if (n_5 != 0) 1325 goto <bb 3>; 1326 else 1327 goto <bb 4>; 1328 1329 <bb 3>: 1330 # RANGE [0, 4294967294] 1331 u_6 = n_5 + 4294967295; 1332 1333 <bb 4>: 1334 # u_3 = PHI <u_6(3), 4294967295(2)> */ 1335 reset_flow_sensitive_info (lhs); 1336 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) 1337 { 1338 /* If available, we can use VR of phi result at least. */ 1339 tree phires = gimple_phi_result (phi); 1340 struct range_info_def *phires_range_info 1341 = SSA_NAME_RANGE_INFO (phires); 1342 if (phires_range_info) 1343 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), 1344 phires_range_info); 1345 } 1346 gimple_stmt_iterator gsi_from; 1347 for (int i = prep_cnt - 1; i >= 0; --i) 1348 { 1349 tree plhs = gimple_assign_lhs (prep_stmt[i]); 1350 reset_flow_sensitive_info (plhs); 1351 gsi_from = gsi_for_stmt (prep_stmt[i]); 1352 gsi_move_before (&gsi_from, &gsi); 1353 } 1354 gsi_from = gsi_for_stmt (assign); 1355 gsi_move_before (&gsi_from, &gsi); 1356 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); 1357 return 2; 1358 } 1359 1360 return 0; 1361 } 1362 1363 /* The function minmax_replacement does the main work of doing the minmax 1364 replacement. Return true if the replacement is done. Otherwise return 1365 false. 1366 BB is the basic block where the replacement is going to be done on. ARG0 1367 is argument 0 from the PHI. Likewise for ARG1. */ 1368 1369 static bool 1370 minmax_replacement (basic_block cond_bb, basic_block middle_bb, 1371 edge e0, edge e1, gimple *phi, 1372 tree arg0, tree arg1) 1373 { 1374 tree result, type, rhs; 1375 gcond *cond; 1376 gassign *new_stmt; 1377 edge true_edge, false_edge; 1378 enum tree_code cmp, minmax, ass_code; 1379 tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; 1380 gimple_stmt_iterator gsi, gsi_from; 1381 1382 type = TREE_TYPE (PHI_RESULT (phi)); 1383 1384 /* The optimization may be unsafe due to NaNs. */ 1385 if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) 1386 return false; 1387 1388 cond = as_a <gcond *> (last_stmt (cond_bb)); 1389 cmp = gimple_cond_code (cond); 1390 rhs = gimple_cond_rhs (cond); 1391 1392 /* Turn EQ/NE of extreme values to order comparisons. */ 1393 if ((cmp == NE_EXPR || cmp == EQ_EXPR) 1394 && TREE_CODE (rhs) == INTEGER_CST 1395 && INTEGRAL_TYPE_P (TREE_TYPE (rhs))) 1396 { 1397 if (wi::eq_p (wi::to_wide (rhs), wi::min_value (TREE_TYPE (rhs)))) 1398 { 1399 cmp = (cmp == EQ_EXPR) ? LT_EXPR : GE_EXPR; 1400 rhs = wide_int_to_tree (TREE_TYPE (rhs), 1401 wi::min_value (TREE_TYPE (rhs)) + 1); 1402 } 1403 else if (wi::eq_p (wi::to_wide (rhs), wi::max_value (TREE_TYPE (rhs)))) 1404 { 1405 cmp = (cmp == EQ_EXPR) ? GT_EXPR : LE_EXPR; 1406 rhs = wide_int_to_tree (TREE_TYPE (rhs), 1407 wi::max_value (TREE_TYPE (rhs)) - 1); 1408 } 1409 } 1410 1411 /* This transformation is only valid for order comparisons. Record which 1412 operand is smaller/larger if the result of the comparison is true. */ 1413 alt_smaller = NULL_TREE; 1414 alt_larger = NULL_TREE; 1415 if (cmp == LT_EXPR || cmp == LE_EXPR) 1416 { 1417 smaller = gimple_cond_lhs (cond); 1418 larger = rhs; 1419 /* If we have smaller < CST it is equivalent to smaller <= CST-1. 1420 Likewise smaller <= CST is equivalent to smaller < CST+1. */ 1421 if (TREE_CODE (larger) == INTEGER_CST 1422 && INTEGRAL_TYPE_P (TREE_TYPE (larger))) 1423 { 1424 if (cmp == LT_EXPR) 1425 { 1426 wi::overflow_type overflow; 1427 wide_int alt = wi::sub (wi::to_wide (larger), 1, 1428 TYPE_SIGN (TREE_TYPE (larger)), 1429 &overflow); 1430 if (! overflow) 1431 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1432 } 1433 else 1434 { 1435 wi::overflow_type overflow; 1436 wide_int alt = wi::add (wi::to_wide (larger), 1, 1437 TYPE_SIGN (TREE_TYPE (larger)), 1438 &overflow); 1439 if (! overflow) 1440 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt); 1441 } 1442 } 1443 } 1444 else if (cmp == GT_EXPR || cmp == GE_EXPR) 1445 { 1446 smaller = rhs; 1447 larger = gimple_cond_lhs (cond); 1448 /* If we have larger > CST it is equivalent to larger >= CST+1. 1449 Likewise larger >= CST is equivalent to larger > CST-1. */ 1450 if (TREE_CODE (smaller) == INTEGER_CST 1451 && INTEGRAL_TYPE_P (TREE_TYPE (smaller))) 1452 { 1453 wi::overflow_type overflow; 1454 if (cmp == GT_EXPR) 1455 { 1456 wide_int alt = wi::add (wi::to_wide (smaller), 1, 1457 TYPE_SIGN (TREE_TYPE (smaller)), 1458 &overflow); 1459 if (! overflow) 1460 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1461 } 1462 else 1463 { 1464 wide_int alt = wi::sub (wi::to_wide (smaller), 1, 1465 TYPE_SIGN (TREE_TYPE (smaller)), 1466 &overflow); 1467 if (! overflow) 1468 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt); 1469 } 1470 } 1471 } 1472 else 1473 return false; 1474 1475 /* We need to know which is the true edge and which is the false 1476 edge so that we know if have abs or negative abs. */ 1477 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1478 1479 /* Forward the edges over the middle basic block. */ 1480 if (true_edge->dest == middle_bb) 1481 true_edge = EDGE_SUCC (true_edge->dest, 0); 1482 if (false_edge->dest == middle_bb) 1483 false_edge = EDGE_SUCC (false_edge->dest, 0); 1484 1485 if (true_edge == e0) 1486 { 1487 gcc_assert (false_edge == e1); 1488 arg_true = arg0; 1489 arg_false = arg1; 1490 } 1491 else 1492 { 1493 gcc_assert (false_edge == e0); 1494 gcc_assert (true_edge == e1); 1495 arg_true = arg1; 1496 arg_false = arg0; 1497 } 1498 1499 if (empty_block_p (middle_bb)) 1500 { 1501 if ((operand_equal_for_phi_arg_p (arg_true, smaller) 1502 || (alt_smaller 1503 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1504 && (operand_equal_for_phi_arg_p (arg_false, larger) 1505 || (alt_larger 1506 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1507 { 1508 /* Case 1509 1510 if (smaller < larger) 1511 rslt = smaller; 1512 else 1513 rslt = larger; */ 1514 minmax = MIN_EXPR; 1515 } 1516 else if ((operand_equal_for_phi_arg_p (arg_false, smaller) 1517 || (alt_smaller 1518 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1519 && (operand_equal_for_phi_arg_p (arg_true, larger) 1520 || (alt_larger 1521 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))) 1522 minmax = MAX_EXPR; 1523 else 1524 return false; 1525 } 1526 else 1527 { 1528 /* Recognize the following case, assuming d <= u: 1529 1530 if (a <= u) 1531 b = MAX (a, d); 1532 x = PHI <b, u> 1533 1534 This is equivalent to 1535 1536 b = MAX (a, d); 1537 x = MIN (b, u); */ 1538 1539 gimple *assign = last_and_only_stmt (middle_bb); 1540 tree lhs, op0, op1, bound; 1541 1542 if (!assign 1543 || gimple_code (assign) != GIMPLE_ASSIGN) 1544 return false; 1545 1546 lhs = gimple_assign_lhs (assign); 1547 ass_code = gimple_assign_rhs_code (assign); 1548 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) 1549 return false; 1550 op0 = gimple_assign_rhs1 (assign); 1551 op1 = gimple_assign_rhs2 (assign); 1552 1553 if (true_edge->src == middle_bb) 1554 { 1555 /* We got here if the condition is true, i.e., SMALLER < LARGER. */ 1556 if (!operand_equal_for_phi_arg_p (lhs, arg_true)) 1557 return false; 1558 1559 if (operand_equal_for_phi_arg_p (arg_false, larger) 1560 || (alt_larger 1561 && operand_equal_for_phi_arg_p (arg_false, alt_larger))) 1562 { 1563 /* Case 1564 1565 if (smaller < larger) 1566 { 1567 r' = MAX_EXPR (smaller, bound) 1568 } 1569 r = PHI <r', larger> --> to be turned to MIN_EXPR. */ 1570 if (ass_code != MAX_EXPR) 1571 return false; 1572 1573 minmax = MIN_EXPR; 1574 if (operand_equal_for_phi_arg_p (op0, smaller) 1575 || (alt_smaller 1576 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1577 bound = op1; 1578 else if (operand_equal_for_phi_arg_p (op1, smaller) 1579 || (alt_smaller 1580 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1581 bound = op0; 1582 else 1583 return false; 1584 1585 /* We need BOUND <= LARGER. */ 1586 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1587 bound, larger))) 1588 return false; 1589 } 1590 else if (operand_equal_for_phi_arg_p (arg_false, smaller) 1591 || (alt_smaller 1592 && operand_equal_for_phi_arg_p (arg_false, alt_smaller))) 1593 { 1594 /* Case 1595 1596 if (smaller < larger) 1597 { 1598 r' = MIN_EXPR (larger, bound) 1599 } 1600 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */ 1601 if (ass_code != MIN_EXPR) 1602 return false; 1603 1604 minmax = MAX_EXPR; 1605 if (operand_equal_for_phi_arg_p (op0, larger) 1606 || (alt_larger 1607 && operand_equal_for_phi_arg_p (op0, alt_larger))) 1608 bound = op1; 1609 else if (operand_equal_for_phi_arg_p (op1, larger) 1610 || (alt_larger 1611 && operand_equal_for_phi_arg_p (op1, alt_larger))) 1612 bound = op0; 1613 else 1614 return false; 1615 1616 /* We need BOUND >= SMALLER. */ 1617 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1618 bound, smaller))) 1619 return false; 1620 } 1621 else 1622 return false; 1623 } 1624 else 1625 { 1626 /* We got here if the condition is false, i.e., SMALLER > LARGER. */ 1627 if (!operand_equal_for_phi_arg_p (lhs, arg_false)) 1628 return false; 1629 1630 if (operand_equal_for_phi_arg_p (arg_true, larger) 1631 || (alt_larger 1632 && operand_equal_for_phi_arg_p (arg_true, alt_larger))) 1633 { 1634 /* Case 1635 1636 if (smaller > larger) 1637 { 1638 r' = MIN_EXPR (smaller, bound) 1639 } 1640 r = PHI <r', larger> --> to be turned to MAX_EXPR. */ 1641 if (ass_code != MIN_EXPR) 1642 return false; 1643 1644 minmax = MAX_EXPR; 1645 if (operand_equal_for_phi_arg_p (op0, smaller) 1646 || (alt_smaller 1647 && operand_equal_for_phi_arg_p (op0, alt_smaller))) 1648 bound = op1; 1649 else if (operand_equal_for_phi_arg_p (op1, smaller) 1650 || (alt_smaller 1651 && operand_equal_for_phi_arg_p (op1, alt_smaller))) 1652 bound = op0; 1653 else 1654 return false; 1655 1656 /* We need BOUND >= LARGER. */ 1657 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 1658 bound, larger))) 1659 return false; 1660 } 1661 else if (operand_equal_for_phi_arg_p (arg_true, smaller) 1662 || (alt_smaller 1663 && operand_equal_for_phi_arg_p (arg_true, alt_smaller))) 1664 { 1665 /* Case 1666 1667 if (smaller > larger) 1668 { 1669 r' = MAX_EXPR (larger, bound) 1670 } 1671 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */ 1672 if (ass_code != MAX_EXPR) 1673 return false; 1674 1675 minmax = MIN_EXPR; 1676 if (operand_equal_for_phi_arg_p (op0, larger)) 1677 bound = op1; 1678 else if (operand_equal_for_phi_arg_p (op1, larger)) 1679 bound = op0; 1680 else 1681 return false; 1682 1683 /* We need BOUND <= SMALLER. */ 1684 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, 1685 bound, smaller))) 1686 return false; 1687 } 1688 else 1689 return false; 1690 } 1691 1692 /* Move the statement from the middle block. */ 1693 gsi = gsi_last_bb (cond_bb); 1694 gsi_from = gsi_last_nondebug_bb (middle_bb); 1695 reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from), 1696 SSA_OP_DEF)); 1697 gsi_move_before (&gsi_from, &gsi); 1698 } 1699 1700 /* Create an SSA var to hold the min/max result. If we're the only 1701 things setting the target PHI, then we can clone the PHI 1702 variable. Otherwise we must create a new one. */ 1703 result = PHI_RESULT (phi); 1704 if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) 1705 result = duplicate_ssa_name (result, NULL); 1706 else 1707 result = make_ssa_name (TREE_TYPE (result)); 1708 1709 /* Emit the statement to compute min/max. */ 1710 new_stmt = gimple_build_assign (result, minmax, arg0, arg1); 1711 gsi = gsi_last_bb (cond_bb); 1712 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1713 1714 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1715 1716 return true; 1717 } 1718 1719 /* Convert 1720 1721 <bb 2> 1722 if (b_4(D) != 0) 1723 goto <bb 3> 1724 else 1725 goto <bb 4> 1726 1727 <bb 3> 1728 _2 = (unsigned long) b_4(D); 1729 _9 = __builtin_popcountl (_2); 1730 OR 1731 _9 = __builtin_popcountl (b_4(D)); 1732 1733 <bb 4> 1734 c_12 = PHI <0(2), _9(3)> 1735 1736 Into 1737 <bb 2> 1738 _2 = (unsigned long) b_4(D); 1739 _9 = __builtin_popcountl (_2); 1740 OR 1741 _9 = __builtin_popcountl (b_4(D)); 1742 1743 <bb 4> 1744 c_12 = PHI <_9(2)> 1745 */ 1746 1747 static bool 1748 cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, 1749 edge e1, edge e2, 1750 gimple *phi, tree arg0, tree arg1) 1751 { 1752 gimple *cond; 1753 gimple_stmt_iterator gsi, gsi_from; 1754 gimple *popcount; 1755 gimple *cast = NULL; 1756 tree lhs, arg; 1757 1758 /* Check that 1759 _2 = (unsigned long) b_4(D); 1760 _9 = __builtin_popcountl (_2); 1761 OR 1762 _9 = __builtin_popcountl (b_4(D)); 1763 are the only stmts in the middle_bb. */ 1764 1765 gsi = gsi_start_nondebug_after_labels_bb (middle_bb); 1766 if (gsi_end_p (gsi)) 1767 return false; 1768 cast = gsi_stmt (gsi); 1769 gsi_next_nondebug (&gsi); 1770 if (!gsi_end_p (gsi)) 1771 { 1772 popcount = gsi_stmt (gsi); 1773 gsi_next_nondebug (&gsi); 1774 if (!gsi_end_p (gsi)) 1775 return false; 1776 } 1777 else 1778 { 1779 popcount = cast; 1780 cast = NULL; 1781 } 1782 1783 /* Check that we have a popcount builtin. */ 1784 if (!is_gimple_call (popcount)) 1785 return false; 1786 combined_fn cfn = gimple_call_combined_fn (popcount); 1787 switch (cfn) 1788 { 1789 CASE_CFN_POPCOUNT: 1790 break; 1791 default: 1792 return false; 1793 } 1794 1795 arg = gimple_call_arg (popcount, 0); 1796 lhs = gimple_get_lhs (popcount); 1797 1798 if (cast) 1799 { 1800 /* We have a cast stmt feeding popcount builtin. */ 1801 /* Check that we have a cast prior to that. */ 1802 if (gimple_code (cast) != GIMPLE_ASSIGN 1803 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) 1804 return false; 1805 /* Result of the cast stmt is the argument to the builtin. */ 1806 if (arg != gimple_assign_lhs (cast)) 1807 return false; 1808 arg = gimple_assign_rhs1 (cast); 1809 } 1810 1811 cond = last_stmt (cond_bb); 1812 1813 /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount 1814 builtin. */ 1815 if (gimple_code (cond) != GIMPLE_COND 1816 || (gimple_cond_code (cond) != NE_EXPR 1817 && gimple_cond_code (cond) != EQ_EXPR) 1818 || !integer_zerop (gimple_cond_rhs (cond)) 1819 || arg != gimple_cond_lhs (cond)) 1820 return false; 1821 1822 /* Canonicalize. */ 1823 if ((e2->flags & EDGE_TRUE_VALUE 1824 && gimple_cond_code (cond) == NE_EXPR) 1825 || (e1->flags & EDGE_TRUE_VALUE 1826 && gimple_cond_code (cond) == EQ_EXPR)) 1827 { 1828 std::swap (arg0, arg1); 1829 std::swap (e1, e2); 1830 } 1831 1832 /* Check PHI arguments. */ 1833 if (lhs != arg0 || !integer_zerop (arg1)) 1834 return false; 1835 1836 /* And insert the popcount builtin and cast stmt before the cond_bb. */ 1837 gsi = gsi_last_bb (cond_bb); 1838 if (cast) 1839 { 1840 gsi_from = gsi_for_stmt (cast); 1841 gsi_move_before (&gsi_from, &gsi); 1842 reset_flow_sensitive_info (gimple_get_lhs (cast)); 1843 } 1844 gsi_from = gsi_for_stmt (popcount); 1845 gsi_move_before (&gsi_from, &gsi); 1846 reset_flow_sensitive_info (gimple_get_lhs (popcount)); 1847 1848 /* Now update the PHI and remove unneeded bbs. */ 1849 replace_phi_edge_with_variable (cond_bb, e2, phi, lhs); 1850 return true; 1851 } 1852 1853 /* The function absolute_replacement does the main work of doing the absolute 1854 replacement. Return true if the replacement is done. Otherwise return 1855 false. 1856 bb is the basic block where the replacement is going to be done on. arg0 1857 is argument 0 from the phi. Likewise for arg1. */ 1858 1859 static bool 1860 abs_replacement (basic_block cond_bb, basic_block middle_bb, 1861 edge e0 ATTRIBUTE_UNUSED, edge e1, 1862 gimple *phi, tree arg0, tree arg1) 1863 { 1864 tree result; 1865 gassign *new_stmt; 1866 gimple *cond; 1867 gimple_stmt_iterator gsi; 1868 edge true_edge, false_edge; 1869 gimple *assign; 1870 edge e; 1871 tree rhs, lhs; 1872 bool negate; 1873 enum tree_code cond_code; 1874 1875 /* If the type says honor signed zeros we cannot do this 1876 optimization. */ 1877 if (HONOR_SIGNED_ZEROS (arg1)) 1878 return false; 1879 1880 /* OTHER_BLOCK must have only one executable statement which must have the 1881 form arg0 = -arg1 or arg1 = -arg0. */ 1882 1883 assign = last_and_only_stmt (middle_bb); 1884 /* If we did not find the proper negation assignment, then we cannot 1885 optimize. */ 1886 if (assign == NULL) 1887 return false; 1888 1889 /* If we got here, then we have found the only executable statement 1890 in OTHER_BLOCK. If it is anything other than arg = -arg1 or 1891 arg1 = -arg0, then we cannot optimize. */ 1892 if (gimple_code (assign) != GIMPLE_ASSIGN) 1893 return false; 1894 1895 lhs = gimple_assign_lhs (assign); 1896 1897 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) 1898 return false; 1899 1900 rhs = gimple_assign_rhs1 (assign); 1901 1902 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ 1903 if (!(lhs == arg0 && rhs == arg1) 1904 && !(lhs == arg1 && rhs == arg0)) 1905 return false; 1906 1907 cond = last_stmt (cond_bb); 1908 result = PHI_RESULT (phi); 1909 1910 /* Only relationals comparing arg[01] against zero are interesting. */ 1911 cond_code = gimple_cond_code (cond); 1912 if (cond_code != GT_EXPR && cond_code != GE_EXPR 1913 && cond_code != LT_EXPR && cond_code != LE_EXPR) 1914 return false; 1915 1916 /* Make sure the conditional is arg[01] OP y. */ 1917 if (gimple_cond_lhs (cond) != rhs) 1918 return false; 1919 1920 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond))) 1921 ? real_zerop (gimple_cond_rhs (cond)) 1922 : integer_zerop (gimple_cond_rhs (cond))) 1923 ; 1924 else 1925 return false; 1926 1927 /* We need to know which is the true edge and which is the false 1928 edge so that we know if have abs or negative abs. */ 1929 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); 1930 1931 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we 1932 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if 1933 the false edge goes to OTHER_BLOCK. */ 1934 if (cond_code == GT_EXPR || cond_code == GE_EXPR) 1935 e = true_edge; 1936 else 1937 e = false_edge; 1938 1939 if (e->dest == middle_bb) 1940 negate = true; 1941 else 1942 negate = false; 1943 1944 /* If the code negates only iff positive then make sure to not 1945 introduce undefined behavior when negating or computing the absolute. 1946 ??? We could use range info if present to check for arg1 == INT_MIN. */ 1947 if (negate 1948 && (ANY_INTEGRAL_TYPE_P (TREE_TYPE (arg1)) 1949 && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1)))) 1950 return false; 1951 1952 result = duplicate_ssa_name (result, NULL); 1953 1954 if (negate) 1955 lhs = make_ssa_name (TREE_TYPE (result)); 1956 else 1957 lhs = result; 1958 1959 /* Build the modify expression with abs expression. */ 1960 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs); 1961 1962 gsi = gsi_last_bb (cond_bb); 1963 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 1964 1965 if (negate) 1966 { 1967 /* Get the right GSI. We want to insert after the recently 1968 added ABS_EXPR statement (which we know is the first statement 1969 in the block. */ 1970 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs); 1971 1972 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 1973 } 1974 1975 replace_phi_edge_with_variable (cond_bb, e1, phi, result); 1976 1977 /* Note that we optimized this PHI. */ 1978 return true; 1979 } 1980 1981 /* Auxiliary functions to determine the set of memory accesses which 1982 can't trap because they are preceded by accesses to the same memory 1983 portion. We do that for MEM_REFs, so we only need to track 1984 the SSA_NAME of the pointer indirectly referenced. The algorithm 1985 simply is a walk over all instructions in dominator order. When 1986 we see an MEM_REF we determine if we've already seen a same 1987 ref anywhere up to the root of the dominator tree. If we do the 1988 current access can't trap. If we don't see any dominating access 1989 the current access might trap, but might also make later accesses 1990 non-trapping, so we remember it. We need to be careful with loads 1991 or stores, for instance a load might not trap, while a store would, 1992 so if we see a dominating read access this doesn't mean that a later 1993 write access would not trap. Hence we also need to differentiate the 1994 type of access(es) seen. 1995 1996 ??? We currently are very conservative and assume that a load might 1997 trap even if a store doesn't (write-only memory). This probably is 1998 overly conservative. */ 1999 2000 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF 2001 through it was seen, which would constitute a no-trap region for 2002 same accesses. */ 2003 struct name_to_bb 2004 { 2005 unsigned int ssa_name_ver; 2006 unsigned int phase; 2007 bool store; 2008 HOST_WIDE_INT offset, size; 2009 basic_block bb; 2010 }; 2011 2012 /* Hashtable helpers. */ 2013 2014 struct ssa_names_hasher : free_ptr_hash <name_to_bb> 2015 { 2016 static inline hashval_t hash (const name_to_bb *); 2017 static inline bool equal (const name_to_bb *, const name_to_bb *); 2018 }; 2019 2020 /* Used for quick clearing of the hash-table when we see calls. 2021 Hash entries with phase < nt_call_phase are invalid. */ 2022 static unsigned int nt_call_phase; 2023 2024 /* The hash function. */ 2025 2026 inline hashval_t 2027 ssa_names_hasher::hash (const name_to_bb *n) 2028 { 2029 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) 2030 ^ (n->offset << 6) ^ (n->size << 3); 2031 } 2032 2033 /* The equality function of *P1 and *P2. */ 2034 2035 inline bool 2036 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) 2037 { 2038 return n1->ssa_name_ver == n2->ssa_name_ver 2039 && n1->store == n2->store 2040 && n1->offset == n2->offset 2041 && n1->size == n2->size; 2042 } 2043 2044 class nontrapping_dom_walker : public dom_walker 2045 { 2046 public: 2047 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps) 2048 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} 2049 2050 virtual edge before_dom_children (basic_block); 2051 virtual void after_dom_children (basic_block); 2052 2053 private: 2054 2055 /* We see the expression EXP in basic block BB. If it's an interesting 2056 expression (an MEM_REF through an SSA_NAME) possibly insert the 2057 expression into the set NONTRAP or the hash table of seen expressions. 2058 STORE is true if this expression is on the LHS, otherwise it's on 2059 the RHS. */ 2060 void add_or_mark_expr (basic_block, tree, bool); 2061 2062 hash_set<tree> *m_nontrapping; 2063 2064 /* The hash table for remembering what we've seen. */ 2065 hash_table<ssa_names_hasher> m_seen_ssa_names; 2066 }; 2067 2068 /* Called by walk_dominator_tree, when entering the block BB. */ 2069 edge 2070 nontrapping_dom_walker::before_dom_children (basic_block bb) 2071 { 2072 edge e; 2073 edge_iterator ei; 2074 gimple_stmt_iterator gsi; 2075 2076 /* If we haven't seen all our predecessors, clear the hash-table. */ 2077 FOR_EACH_EDGE (e, ei, bb->preds) 2078 if ((((size_t)e->src->aux) & 2) == 0) 2079 { 2080 nt_call_phase++; 2081 break; 2082 } 2083 2084 /* Mark this BB as being on the path to dominator root and as visited. */ 2085 bb->aux = (void*)(1 | 2); 2086 2087 /* And walk the statements in order. */ 2088 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2089 { 2090 gimple *stmt = gsi_stmt (gsi); 2091 2092 if ((gimple_code (stmt) == GIMPLE_ASM && gimple_vdef (stmt)) 2093 || (is_gimple_call (stmt) 2094 && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt)))) 2095 nt_call_phase++; 2096 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) 2097 { 2098 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true); 2099 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false); 2100 } 2101 } 2102 return NULL; 2103 } 2104 2105 /* Called by walk_dominator_tree, when basic block BB is exited. */ 2106 void 2107 nontrapping_dom_walker::after_dom_children (basic_block bb) 2108 { 2109 /* This BB isn't on the path to dominator root anymore. */ 2110 bb->aux = (void*)2; 2111 } 2112 2113 /* We see the expression EXP in basic block BB. If it's an interesting 2114 expression (an MEM_REF through an SSA_NAME) possibly insert the 2115 expression into the set NONTRAP or the hash table of seen expressions. 2116 STORE is true if this expression is on the LHS, otherwise it's on 2117 the RHS. */ 2118 void 2119 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) 2120 { 2121 HOST_WIDE_INT size; 2122 2123 if (TREE_CODE (exp) == MEM_REF 2124 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME 2125 && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) 2126 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) 2127 { 2128 tree name = TREE_OPERAND (exp, 0); 2129 struct name_to_bb map; 2130 name_to_bb **slot; 2131 struct name_to_bb *n2bb; 2132 basic_block found_bb = 0; 2133 2134 /* Try to find the last seen MEM_REF through the same 2135 SSA_NAME, which can trap. */ 2136 map.ssa_name_ver = SSA_NAME_VERSION (name); 2137 map.phase = 0; 2138 map.bb = 0; 2139 map.store = store; 2140 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); 2141 map.size = size; 2142 2143 slot = m_seen_ssa_names.find_slot (&map, INSERT); 2144 n2bb = *slot; 2145 if (n2bb && n2bb->phase >= nt_call_phase) 2146 found_bb = n2bb->bb; 2147 2148 /* If we've found a trapping MEM_REF, _and_ it dominates EXP 2149 (it's in a basic block on the path from us to the dominator root) 2150 then we can't trap. */ 2151 if (found_bb && (((size_t)found_bb->aux) & 1) == 1) 2152 { 2153 m_nontrapping->add (exp); 2154 } 2155 else 2156 { 2157 /* EXP might trap, so insert it into the hash table. */ 2158 if (n2bb) 2159 { 2160 n2bb->phase = nt_call_phase; 2161 n2bb->bb = bb; 2162 } 2163 else 2164 { 2165 n2bb = XNEW (struct name_to_bb); 2166 n2bb->ssa_name_ver = SSA_NAME_VERSION (name); 2167 n2bb->phase = nt_call_phase; 2168 n2bb->bb = bb; 2169 n2bb->store = store; 2170 n2bb->offset = map.offset; 2171 n2bb->size = size; 2172 *slot = n2bb; 2173 } 2174 } 2175 } 2176 } 2177 2178 /* This is the entry point of gathering non trapping memory accesses. 2179 It will do a dominator walk over the whole function, and it will 2180 make use of the bb->aux pointers. It returns a set of trees 2181 (the MEM_REFs itself) which can't trap. */ 2182 static hash_set<tree> * 2183 get_non_trapping (void) 2184 { 2185 nt_call_phase = 0; 2186 hash_set<tree> *nontrap = new hash_set<tree>; 2187 /* We're going to do a dominator walk, so ensure that we have 2188 dominance information. */ 2189 calculate_dominance_info (CDI_DOMINATORS); 2190 2191 nontrapping_dom_walker (CDI_DOMINATORS, nontrap) 2192 .walk (cfun->cfg->x_entry_block_ptr); 2193 2194 clear_aux_for_blocks (); 2195 return nontrap; 2196 } 2197 2198 /* Do the main work of conditional store replacement. We already know 2199 that the recognized pattern looks like so: 2200 2201 split: 2202 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1) 2203 MIDDLE_BB: 2204 something 2205 fallthrough (edge E0) 2206 JOIN_BB: 2207 some more 2208 2209 We check that MIDDLE_BB contains only one store, that that store 2210 doesn't trap (not via NOTRAP, but via checking if an access to the same 2211 memory location dominates us, or the store is to a local addressable 2212 object) and that the store has a "simple" RHS. */ 2213 2214 static bool 2215 cond_store_replacement (basic_block middle_bb, basic_block join_bb, 2216 edge e0, edge e1, hash_set<tree> *nontrap) 2217 { 2218 gimple *assign = last_and_only_stmt (middle_bb); 2219 tree lhs, rhs, name, name2; 2220 gphi *newphi; 2221 gassign *new_stmt; 2222 gimple_stmt_iterator gsi; 2223 location_t locus; 2224 2225 /* Check if middle_bb contains of only one store. */ 2226 if (!assign 2227 || !gimple_assign_single_p (assign) 2228 || gimple_has_volatile_ops (assign)) 2229 return false; 2230 2231 /* And no PHI nodes so all uses in the single stmt are also 2232 available where we insert to. */ 2233 if (!gimple_seq_empty_p (phi_nodes (middle_bb))) 2234 return false; 2235 2236 locus = gimple_location (assign); 2237 lhs = gimple_assign_lhs (assign); 2238 rhs = gimple_assign_rhs1 (assign); 2239 if ((TREE_CODE (lhs) != MEM_REF 2240 && TREE_CODE (lhs) != ARRAY_REF 2241 && TREE_CODE (lhs) != COMPONENT_REF) 2242 || !is_gimple_reg_type (TREE_TYPE (lhs))) 2243 return false; 2244 2245 /* Prove that we can move the store down. We could also check 2246 TREE_THIS_NOTRAP here, but in that case we also could move stores, 2247 whose value is not available readily, which we want to avoid. */ 2248 if (!nontrap->contains (lhs)) 2249 { 2250 /* If LHS is an access to a local variable without address-taken 2251 (or when we allow data races) and known not to trap, we could 2252 always safely move down the store. */ 2253 tree base = get_base_address (lhs); 2254 if (!auto_var_p (base) 2255 || (TREE_ADDRESSABLE (base) && !flag_store_data_races) 2256 || tree_could_trap_p (lhs)) 2257 return false; 2258 } 2259 2260 /* Now we've checked the constraints, so do the transformation: 2261 1) Remove the single store. */ 2262 gsi = gsi_for_stmt (assign); 2263 unlink_stmt_vdef (assign); 2264 gsi_remove (&gsi, true); 2265 release_defs (assign); 2266 2267 /* Make both store and load use alias-set zero as we have to 2268 deal with the case of the store being a conditional change 2269 of the dynamic type. */ 2270 lhs = unshare_expr (lhs); 2271 tree *basep = &lhs; 2272 while (handled_component_p (*basep)) 2273 basep = &TREE_OPERAND (*basep, 0); 2274 if (TREE_CODE (*basep) == MEM_REF 2275 || TREE_CODE (*basep) == TARGET_MEM_REF) 2276 TREE_OPERAND (*basep, 1) 2277 = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1)); 2278 else 2279 *basep = build2 (MEM_REF, TREE_TYPE (*basep), 2280 build_fold_addr_expr (*basep), 2281 build_zero_cst (ptr_type_node)); 2282 2283 /* 2) Insert a load from the memory of the store to the temporary 2284 on the edge which did not contain the store. */ 2285 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2286 new_stmt = gimple_build_assign (name, lhs); 2287 gimple_set_location (new_stmt, locus); 2288 lhs = unshare_expr (lhs); 2289 /* Set TREE_NO_WARNING on the rhs of the load to avoid uninit 2290 warnings. */ 2291 TREE_NO_WARNING (gimple_assign_rhs1 (new_stmt)) = 1; 2292 gsi_insert_on_edge (e1, new_stmt); 2293 2294 /* 3) Create a PHI node at the join block, with one argument 2295 holding the old RHS, and the other holding the temporary 2296 where we stored the old memory contents. */ 2297 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2298 newphi = create_phi_node (name2, join_bb); 2299 add_phi_arg (newphi, rhs, e0, locus); 2300 add_phi_arg (newphi, name, e1, locus); 2301 2302 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 2303 2304 /* 4) Insert that PHI node. */ 2305 gsi = gsi_after_labels (join_bb); 2306 if (gsi_end_p (gsi)) 2307 { 2308 gsi = gsi_last_bb (join_bb); 2309 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 2310 } 2311 else 2312 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 2313 2314 if (dump_file && (dump_flags & TDF_DETAILS)) 2315 { 2316 fprintf (dump_file, "\nConditional store replacement happened!"); 2317 fprintf (dump_file, "\nReplaced the store with a load."); 2318 fprintf (dump_file, "\nInserted a new PHI statement in joint block:\n"); 2319 print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS); 2320 } 2321 2322 return true; 2323 } 2324 2325 /* Do the main work of conditional store replacement. */ 2326 2327 static bool 2328 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, 2329 basic_block join_bb, gimple *then_assign, 2330 gimple *else_assign) 2331 { 2332 tree lhs_base, lhs, then_rhs, else_rhs, name; 2333 location_t then_locus, else_locus; 2334 gimple_stmt_iterator gsi; 2335 gphi *newphi; 2336 gassign *new_stmt; 2337 2338 if (then_assign == NULL 2339 || !gimple_assign_single_p (then_assign) 2340 || gimple_clobber_p (then_assign) 2341 || gimple_has_volatile_ops (then_assign) 2342 || else_assign == NULL 2343 || !gimple_assign_single_p (else_assign) 2344 || gimple_clobber_p (else_assign) 2345 || gimple_has_volatile_ops (else_assign)) 2346 return false; 2347 2348 lhs = gimple_assign_lhs (then_assign); 2349 if (!is_gimple_reg_type (TREE_TYPE (lhs)) 2350 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) 2351 return false; 2352 2353 lhs_base = get_base_address (lhs); 2354 if (lhs_base == NULL_TREE 2355 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) 2356 return false; 2357 2358 then_rhs = gimple_assign_rhs1 (then_assign); 2359 else_rhs = gimple_assign_rhs1 (else_assign); 2360 then_locus = gimple_location (then_assign); 2361 else_locus = gimple_location (else_assign); 2362 2363 /* Now we've checked the constraints, so do the transformation: 2364 1) Remove the stores. */ 2365 gsi = gsi_for_stmt (then_assign); 2366 unlink_stmt_vdef (then_assign); 2367 gsi_remove (&gsi, true); 2368 release_defs (then_assign); 2369 2370 gsi = gsi_for_stmt (else_assign); 2371 unlink_stmt_vdef (else_assign); 2372 gsi_remove (&gsi, true); 2373 release_defs (else_assign); 2374 2375 /* 2) Create a PHI node at the join block, with one argument 2376 holding the old RHS, and the other holding the temporary 2377 where we stored the old memory contents. */ 2378 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); 2379 newphi = create_phi_node (name, join_bb); 2380 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); 2381 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); 2382 2383 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); 2384 2385 /* 3) Insert that PHI node. */ 2386 gsi = gsi_after_labels (join_bb); 2387 if (gsi_end_p (gsi)) 2388 { 2389 gsi = gsi_last_bb (join_bb); 2390 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); 2391 } 2392 else 2393 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); 2394 2395 return true; 2396 } 2397 2398 /* Return the single store in BB with VDEF or NULL if there are 2399 other stores in the BB or loads following the store. */ 2400 2401 static gimple * 2402 single_trailing_store_in_bb (basic_block bb, tree vdef) 2403 { 2404 if (SSA_NAME_IS_DEFAULT_DEF (vdef)) 2405 return NULL; 2406 gimple *store = SSA_NAME_DEF_STMT (vdef); 2407 if (gimple_bb (store) != bb 2408 || gimple_code (store) == GIMPLE_PHI) 2409 return NULL; 2410 2411 /* Verify there is no other store in this BB. */ 2412 if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store)) 2413 && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb 2414 && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI) 2415 return NULL; 2416 2417 /* Verify there is no load or store after the store. */ 2418 use_operand_p use_p; 2419 imm_use_iterator imm_iter; 2420 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store)) 2421 if (USE_STMT (use_p) != store 2422 && gimple_bb (USE_STMT (use_p)) == bb) 2423 return NULL; 2424 2425 return store; 2426 } 2427 2428 /* Conditional store replacement. We already know 2429 that the recognized pattern looks like so: 2430 2431 split: 2432 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) 2433 THEN_BB: 2434 ... 2435 X = Y; 2436 ... 2437 goto JOIN_BB; 2438 ELSE_BB: 2439 ... 2440 X = Z; 2441 ... 2442 fallthrough (edge E0) 2443 JOIN_BB: 2444 some more 2445 2446 We check that it is safe to sink the store to JOIN_BB by verifying that 2447 there are no read-after-write or write-after-write dependencies in 2448 THEN_BB and ELSE_BB. */ 2449 2450 static bool 2451 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, 2452 basic_block join_bb) 2453 { 2454 vec<data_reference_p> then_datarefs, else_datarefs; 2455 vec<ddr_p> then_ddrs, else_ddrs; 2456 gimple *then_store, *else_store; 2457 bool found, ok = false, res; 2458 struct data_dependence_relation *ddr; 2459 data_reference_p then_dr, else_dr; 2460 int i, j; 2461 tree then_lhs, else_lhs; 2462 basic_block blocks[3]; 2463 2464 /* Handle the case with single store in THEN_BB and ELSE_BB. That is 2465 cheap enough to always handle as it allows us to elide dependence 2466 checking. */ 2467 gphi *vphi = NULL; 2468 for (gphi_iterator si = gsi_start_phis (join_bb); !gsi_end_p (si); 2469 gsi_next (&si)) 2470 if (virtual_operand_p (gimple_phi_result (si.phi ()))) 2471 { 2472 vphi = si.phi (); 2473 break; 2474 } 2475 if (!vphi) 2476 return false; 2477 tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb)); 2478 tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb)); 2479 gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef); 2480 if (then_assign) 2481 { 2482 gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef); 2483 if (else_assign) 2484 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 2485 then_assign, else_assign); 2486 } 2487 2488 /* If either vectorization or if-conversion is disabled then do 2489 not sink any stores. */ 2490 if (param_max_stores_to_sink == 0 2491 || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize) 2492 || !flag_tree_loop_if_convert) 2493 return false; 2494 2495 /* Find data references. */ 2496 then_datarefs.create (1); 2497 else_datarefs.create (1); 2498 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) 2499 == chrec_dont_know) 2500 || !then_datarefs.length () 2501 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) 2502 == chrec_dont_know) 2503 || !else_datarefs.length ()) 2504 { 2505 free_data_refs (then_datarefs); 2506 free_data_refs (else_datarefs); 2507 return false; 2508 } 2509 2510 /* Find pairs of stores with equal LHS. */ 2511 auto_vec<gimple *, 1> then_stores, else_stores; 2512 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr) 2513 { 2514 if (DR_IS_READ (then_dr)) 2515 continue; 2516 2517 then_store = DR_STMT (then_dr); 2518 then_lhs = gimple_get_lhs (then_store); 2519 if (then_lhs == NULL_TREE) 2520 continue; 2521 found = false; 2522 2523 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr) 2524 { 2525 if (DR_IS_READ (else_dr)) 2526 continue; 2527 2528 else_store = DR_STMT (else_dr); 2529 else_lhs = gimple_get_lhs (else_store); 2530 if (else_lhs == NULL_TREE) 2531 continue; 2532 2533 if (operand_equal_p (then_lhs, else_lhs, 0)) 2534 { 2535 found = true; 2536 break; 2537 } 2538 } 2539 2540 if (!found) 2541 continue; 2542 2543 then_stores.safe_push (then_store); 2544 else_stores.safe_push (else_store); 2545 } 2546 2547 /* No pairs of stores found. */ 2548 if (!then_stores.length () 2549 || then_stores.length () > (unsigned) param_max_stores_to_sink) 2550 { 2551 free_data_refs (then_datarefs); 2552 free_data_refs (else_datarefs); 2553 return false; 2554 } 2555 2556 /* Compute and check data dependencies in both basic blocks. */ 2557 then_ddrs.create (1); 2558 else_ddrs.create (1); 2559 if (!compute_all_dependences (then_datarefs, &then_ddrs, 2560 vNULL, false) 2561 || !compute_all_dependences (else_datarefs, &else_ddrs, 2562 vNULL, false)) 2563 { 2564 free_dependence_relations (then_ddrs); 2565 free_dependence_relations (else_ddrs); 2566 free_data_refs (then_datarefs); 2567 free_data_refs (else_datarefs); 2568 return false; 2569 } 2570 blocks[0] = then_bb; 2571 blocks[1] = else_bb; 2572 blocks[2] = join_bb; 2573 renumber_gimple_stmt_uids_in_blocks (blocks, 3); 2574 2575 /* Check that there are no read-after-write or write-after-write dependencies 2576 in THEN_BB. */ 2577 FOR_EACH_VEC_ELT (then_ddrs, i, ddr) 2578 { 2579 struct data_reference *dra = DDR_A (ddr); 2580 struct data_reference *drb = DDR_B (ddr); 2581 2582 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2583 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2584 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2585 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2586 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2587 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2588 { 2589 free_dependence_relations (then_ddrs); 2590 free_dependence_relations (else_ddrs); 2591 free_data_refs (then_datarefs); 2592 free_data_refs (else_datarefs); 2593 return false; 2594 } 2595 } 2596 2597 /* Check that there are no read-after-write or write-after-write dependencies 2598 in ELSE_BB. */ 2599 FOR_EACH_VEC_ELT (else_ddrs, i, ddr) 2600 { 2601 struct data_reference *dra = DDR_A (ddr); 2602 struct data_reference *drb = DDR_B (ddr); 2603 2604 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 2605 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) 2606 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) 2607 || (DR_IS_READ (drb) && DR_IS_WRITE (dra) 2608 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) 2609 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) 2610 { 2611 free_dependence_relations (then_ddrs); 2612 free_dependence_relations (else_ddrs); 2613 free_data_refs (then_datarefs); 2614 free_data_refs (else_datarefs); 2615 return false; 2616 } 2617 } 2618 2619 /* Sink stores with same LHS. */ 2620 FOR_EACH_VEC_ELT (then_stores, i, then_store) 2621 { 2622 else_store = else_stores[i]; 2623 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, 2624 then_store, else_store); 2625 ok = ok || res; 2626 } 2627 2628 free_dependence_relations (then_ddrs); 2629 free_dependence_relations (else_ddrs); 2630 free_data_refs (then_datarefs); 2631 free_data_refs (else_datarefs); 2632 2633 return ok; 2634 } 2635 2636 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */ 2637 2638 static bool 2639 local_mem_dependence (gimple *stmt, basic_block bb) 2640 { 2641 tree vuse = gimple_vuse (stmt); 2642 gimple *def; 2643 2644 if (!vuse) 2645 return false; 2646 2647 def = SSA_NAME_DEF_STMT (vuse); 2648 return (def && gimple_bb (def) == bb); 2649 } 2650 2651 /* Given a "diamond" control-flow pattern where BB0 tests a condition, 2652 BB1 and BB2 are "then" and "else" blocks dependent on this test, 2653 and BB3 rejoins control flow following BB1 and BB2, look for 2654 opportunities to hoist loads as follows. If BB3 contains a PHI of 2655 two loads, one each occurring in BB1 and BB2, and the loads are 2656 provably of adjacent fields in the same structure, then move both 2657 loads into BB0. Of course this can only be done if there are no 2658 dependencies preventing such motion. 2659 2660 One of the hoisted loads will always be speculative, so the 2661 transformation is currently conservative: 2662 2663 - The fields must be strictly adjacent. 2664 - The two fields must occupy a single memory block that is 2665 guaranteed to not cross a page boundary. 2666 2667 The last is difficult to prove, as such memory blocks should be 2668 aligned on the minimum of the stack alignment boundary and the 2669 alignment guaranteed by heap allocation interfaces. Thus we rely 2670 on a parameter for the alignment value. 2671 2672 Provided a good value is used for the last case, the first 2673 restriction could possibly be relaxed. */ 2674 2675 static void 2676 hoist_adjacent_loads (basic_block bb0, basic_block bb1, 2677 basic_block bb2, basic_block bb3) 2678 { 2679 int param_align = param_l1_cache_line_size; 2680 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); 2681 gphi_iterator gsi; 2682 2683 /* Walk the phis in bb3 looking for an opportunity. We are looking 2684 for phis of two SSA names, one each of which is defined in bb1 and 2685 bb2. */ 2686 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi)) 2687 { 2688 gphi *phi_stmt = gsi.phi (); 2689 gimple *def1, *def2; 2690 tree arg1, arg2, ref1, ref2, field1, field2; 2691 tree tree_offset1, tree_offset2, tree_size2, next; 2692 int offset1, offset2, size2; 2693 unsigned align1; 2694 gimple_stmt_iterator gsi2; 2695 basic_block bb_for_def1, bb_for_def2; 2696 2697 if (gimple_phi_num_args (phi_stmt) != 2 2698 || virtual_operand_p (gimple_phi_result (phi_stmt))) 2699 continue; 2700 2701 arg1 = gimple_phi_arg_def (phi_stmt, 0); 2702 arg2 = gimple_phi_arg_def (phi_stmt, 1); 2703 2704 if (TREE_CODE (arg1) != SSA_NAME 2705 || TREE_CODE (arg2) != SSA_NAME 2706 || SSA_NAME_IS_DEFAULT_DEF (arg1) 2707 || SSA_NAME_IS_DEFAULT_DEF (arg2)) 2708 continue; 2709 2710 def1 = SSA_NAME_DEF_STMT (arg1); 2711 def2 = SSA_NAME_DEF_STMT (arg2); 2712 2713 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2) 2714 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2)) 2715 continue; 2716 2717 /* Check the mode of the arguments to be sure a conditional move 2718 can be generated for it. */ 2719 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1))) 2720 == CODE_FOR_nothing) 2721 continue; 2722 2723 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */ 2724 if (!gimple_assign_single_p (def1) 2725 || !gimple_assign_single_p (def2) 2726 || gimple_has_volatile_ops (def1) 2727 || gimple_has_volatile_ops (def2)) 2728 continue; 2729 2730 ref1 = gimple_assign_rhs1 (def1); 2731 ref2 = gimple_assign_rhs1 (def2); 2732 2733 if (TREE_CODE (ref1) != COMPONENT_REF 2734 || TREE_CODE (ref2) != COMPONENT_REF) 2735 continue; 2736 2737 /* The zeroth operand of the two component references must be 2738 identical. It is not sufficient to compare get_base_address of 2739 the two references, because this could allow for different 2740 elements of the same array in the two trees. It is not safe to 2741 assume that the existence of one array element implies the 2742 existence of a different one. */ 2743 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0)) 2744 continue; 2745 2746 field1 = TREE_OPERAND (ref1, 1); 2747 field2 = TREE_OPERAND (ref2, 1); 2748 2749 /* Check for field adjacency, and ensure field1 comes first. */ 2750 for (next = DECL_CHAIN (field1); 2751 next && TREE_CODE (next) != FIELD_DECL; 2752 next = DECL_CHAIN (next)) 2753 ; 2754 2755 if (next != field2) 2756 { 2757 for (next = DECL_CHAIN (field2); 2758 next && TREE_CODE (next) != FIELD_DECL; 2759 next = DECL_CHAIN (next)) 2760 ; 2761 2762 if (next != field1) 2763 continue; 2764 2765 std::swap (field1, field2); 2766 std::swap (def1, def2); 2767 } 2768 2769 bb_for_def1 = gimple_bb (def1); 2770 bb_for_def2 = gimple_bb (def2); 2771 2772 /* Check for proper alignment of the first field. */ 2773 tree_offset1 = bit_position (field1); 2774 tree_offset2 = bit_position (field2); 2775 tree_size2 = DECL_SIZE (field2); 2776 2777 if (!tree_fits_uhwi_p (tree_offset1) 2778 || !tree_fits_uhwi_p (tree_offset2) 2779 || !tree_fits_uhwi_p (tree_size2)) 2780 continue; 2781 2782 offset1 = tree_to_uhwi (tree_offset1); 2783 offset2 = tree_to_uhwi (tree_offset2); 2784 size2 = tree_to_uhwi (tree_size2); 2785 align1 = DECL_ALIGN (field1) % param_align_bits; 2786 2787 if (offset1 % BITS_PER_UNIT != 0) 2788 continue; 2789 2790 /* For profitability, the two field references should fit within 2791 a single cache line. */ 2792 if (align1 + offset2 - offset1 + size2 > param_align_bits) 2793 continue; 2794 2795 /* The two expressions cannot be dependent upon vdefs defined 2796 in bb1/bb2. */ 2797 if (local_mem_dependence (def1, bb_for_def1) 2798 || local_mem_dependence (def2, bb_for_def2)) 2799 continue; 2800 2801 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into 2802 bb0. We hoist the first one first so that a cache miss is handled 2803 efficiently regardless of hardware cache-fill policy. */ 2804 gsi2 = gsi_for_stmt (def1); 2805 gsi_move_to_bb_end (&gsi2, bb0); 2806 gsi2 = gsi_for_stmt (def2); 2807 gsi_move_to_bb_end (&gsi2, bb0); 2808 2809 if (dump_file && (dump_flags & TDF_DETAILS)) 2810 { 2811 fprintf (dump_file, 2812 "\nHoisting adjacent loads from %d and %d into %d: \n", 2813 bb_for_def1->index, bb_for_def2->index, bb0->index); 2814 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS); 2815 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS); 2816 } 2817 } 2818 } 2819 2820 /* Determine whether we should attempt to hoist adjacent loads out of 2821 diamond patterns in pass_phiopt. Always hoist loads if 2822 -fhoist-adjacent-loads is specified and the target machine has 2823 both a conditional move instruction and a defined cache line size. */ 2824 2825 static bool 2826 gate_hoist_loads (void) 2827 { 2828 return (flag_hoist_adjacent_loads == 1 2829 && param_l1_cache_line_size 2830 && HAVE_conditional_move); 2831 } 2832 2833 /* This pass tries to replaces an if-then-else block with an 2834 assignment. We have four kinds of transformations. Some of these 2835 transformations are also performed by the ifcvt RTL optimizer. 2836 2837 Conditional Replacement 2838 ----------------------- 2839 2840 This transformation, implemented in conditional_replacement, 2841 replaces 2842 2843 bb0: 2844 if (cond) goto bb2; else goto bb1; 2845 bb1: 2846 bb2: 2847 x = PHI <0 (bb1), 1 (bb0), ...>; 2848 2849 with 2850 2851 bb0: 2852 x' = cond; 2853 goto bb2; 2854 bb2: 2855 x = PHI <x' (bb0), ...>; 2856 2857 We remove bb1 as it becomes unreachable. This occurs often due to 2858 gimplification of conditionals. 2859 2860 Value Replacement 2861 ----------------- 2862 2863 This transformation, implemented in value_replacement, replaces 2864 2865 bb0: 2866 if (a != b) goto bb2; else goto bb1; 2867 bb1: 2868 bb2: 2869 x = PHI <a (bb1), b (bb0), ...>; 2870 2871 with 2872 2873 bb0: 2874 bb2: 2875 x = PHI <b (bb0), ...>; 2876 2877 This opportunity can sometimes occur as a result of other 2878 optimizations. 2879 2880 2881 Another case caught by value replacement looks like this: 2882 2883 bb0: 2884 t1 = a == CONST; 2885 t2 = b > c; 2886 t3 = t1 & t2; 2887 if (t3 != 0) goto bb1; else goto bb2; 2888 bb1: 2889 bb2: 2890 x = PHI (CONST, a) 2891 2892 Gets replaced with: 2893 bb0: 2894 bb2: 2895 t1 = a == CONST; 2896 t2 = b > c; 2897 t3 = t1 & t2; 2898 x = a; 2899 2900 ABS Replacement 2901 --------------- 2902 2903 This transformation, implemented in abs_replacement, replaces 2904 2905 bb0: 2906 if (a >= 0) goto bb2; else goto bb1; 2907 bb1: 2908 x = -a; 2909 bb2: 2910 x = PHI <x (bb1), a (bb0), ...>; 2911 2912 with 2913 2914 bb0: 2915 x' = ABS_EXPR< a >; 2916 bb2: 2917 x = PHI <x' (bb0), ...>; 2918 2919 MIN/MAX Replacement 2920 ------------------- 2921 2922 This transformation, minmax_replacement replaces 2923 2924 bb0: 2925 if (a <= b) goto bb2; else goto bb1; 2926 bb1: 2927 bb2: 2928 x = PHI <b (bb1), a (bb0), ...>; 2929 2930 with 2931 2932 bb0: 2933 x' = MIN_EXPR (a, b) 2934 bb2: 2935 x = PHI <x' (bb0), ...>; 2936 2937 A similar transformation is done for MAX_EXPR. 2938 2939 2940 This pass also performs a fifth transformation of a slightly different 2941 flavor. 2942 2943 Factor conversion in COND_EXPR 2944 ------------------------------ 2945 2946 This transformation factors the conversion out of COND_EXPR with 2947 factor_out_conditional_conversion. 2948 2949 For example: 2950 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2951 <bb 3>: 2952 tmp = (int) a; 2953 <bb 4>: 2954 tmp = PHI <tmp, CST> 2955 2956 Into: 2957 if (a <= CST) goto <bb 3>; else goto <bb 4>; 2958 <bb 3>: 2959 <bb 4>: 2960 a = PHI <a, CST> 2961 tmp = (int) a; 2962 2963 Adjacent Load Hoisting 2964 ---------------------- 2965 2966 This transformation replaces 2967 2968 bb0: 2969 if (...) goto bb2; else goto bb1; 2970 bb1: 2971 x1 = (<expr>).field1; 2972 goto bb3; 2973 bb2: 2974 x2 = (<expr>).field2; 2975 bb3: 2976 # x = PHI <x1, x2>; 2977 2978 with 2979 2980 bb0: 2981 x1 = (<expr>).field1; 2982 x2 = (<expr>).field2; 2983 if (...) goto bb2; else goto bb1; 2984 bb1: 2985 goto bb3; 2986 bb2: 2987 bb3: 2988 # x = PHI <x1, x2>; 2989 2990 The purpose of this transformation is to enable generation of conditional 2991 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of 2992 the loads is speculative, the transformation is restricted to very 2993 specific cases to avoid introducing a page fault. We are looking for 2994 the common idiom: 2995 2996 if (...) 2997 x = y->left; 2998 else 2999 x = y->right; 3000 3001 where left and right are typically adjacent pointers in a tree structure. */ 3002 3003 namespace { 3004 3005 const pass_data pass_data_phiopt = 3006 { 3007 GIMPLE_PASS, /* type */ 3008 "phiopt", /* name */ 3009 OPTGROUP_NONE, /* optinfo_flags */ 3010 TV_TREE_PHIOPT, /* tv_id */ 3011 ( PROP_cfg | PROP_ssa ), /* properties_required */ 3012 0, /* properties_provided */ 3013 0, /* properties_destroyed */ 3014 0, /* todo_flags_start */ 3015 0, /* todo_flags_finish */ 3016 }; 3017 3018 class pass_phiopt : public gimple_opt_pass 3019 { 3020 public: 3021 pass_phiopt (gcc::context *ctxt) 3022 : gimple_opt_pass (pass_data_phiopt, ctxt), early_p (false) 3023 {} 3024 3025 /* opt_pass methods: */ 3026 opt_pass * clone () { return new pass_phiopt (m_ctxt); } 3027 void set_pass_param (unsigned n, bool param) 3028 { 3029 gcc_assert (n == 0); 3030 early_p = param; 3031 } 3032 virtual bool gate (function *) { return flag_ssa_phiopt; } 3033 virtual unsigned int execute (function *) 3034 { 3035 return tree_ssa_phiopt_worker (false, 3036 !early_p ? gate_hoist_loads () : false, 3037 early_p); 3038 } 3039 3040 private: 3041 bool early_p; 3042 }; // class pass_phiopt 3043 3044 } // anon namespace 3045 3046 gimple_opt_pass * 3047 make_pass_phiopt (gcc::context *ctxt) 3048 { 3049 return new pass_phiopt (ctxt); 3050 } 3051 3052 namespace { 3053 3054 const pass_data pass_data_cselim = 3055 { 3056 GIMPLE_PASS, /* type */ 3057 "cselim", /* name */ 3058 OPTGROUP_NONE, /* optinfo_flags */ 3059 TV_TREE_PHIOPT, /* tv_id */ 3060 ( PROP_cfg | PROP_ssa ), /* properties_required */ 3061 0, /* properties_provided */ 3062 0, /* properties_destroyed */ 3063 0, /* todo_flags_start */ 3064 0, /* todo_flags_finish */ 3065 }; 3066 3067 class pass_cselim : public gimple_opt_pass 3068 { 3069 public: 3070 pass_cselim (gcc::context *ctxt) 3071 : gimple_opt_pass (pass_data_cselim, ctxt) 3072 {} 3073 3074 /* opt_pass methods: */ 3075 virtual bool gate (function *) { return flag_tree_cselim; } 3076 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); } 3077 3078 }; // class pass_cselim 3079 3080 } // anon namespace 3081 3082 gimple_opt_pass * 3083 make_pass_cselim (gcc::context *ctxt) 3084 { 3085 return new pass_cselim (ctxt); 3086 } 3087