1 /* Loop header copying on trees. 2 Copyright (C) 2004-2022 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "tree.h" 25 #include "gimple.h" 26 #include "cfghooks.h" 27 #include "tree-pass.h" 28 #include "gimple-ssa.h" 29 #include "gimple-iterator.h" 30 #include "tree-cfg.h" 31 #include "tree-into-ssa.h" 32 #include "cfgloop.h" 33 #include "tree-inline.h" 34 #include "tree-ssa-threadedge.h" 35 #include "tree-ssa-sccvn.h" 36 #include "tree-phinodes.h" 37 #include "ssa-iterators.h" 38 #include "value-range.h" 39 #include "gimple-range.h" 40 #include "gimple-range-path.h" 41 #include "cfganal.h" 42 43 /* Duplicates headers of loops if they are small enough, so that the statements 44 in the loop body are always executed when the loop is entered. This 45 increases effectiveness of code motion optimizations, and reduces the need 46 for loop preconditioning. */ 47 48 /* Return true if the condition on the first iteration of the loop can 49 be statically determined. */ 50 51 static bool 52 entry_loop_condition_is_static (class loop *l, path_range_query *query) 53 { 54 edge e = loop_preheader_edge (l); 55 gcond *last = safe_dyn_cast <gcond *> (last_stmt (e->dest)); 56 57 if (!last 58 || !irange::supports_type_p (TREE_TYPE (gimple_cond_lhs (last)))) 59 return false; 60 61 edge true_e, false_e; 62 extract_true_false_edges_from_block (e->dest, &true_e, &false_e); 63 64 /* If neither edge is the exit edge, this is not a case we'd like to 65 special-case. */ 66 if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e)) 67 return false; 68 69 tree desired_static_value; 70 if (loop_exit_edge_p (l, true_e)) 71 desired_static_value = boolean_false_node; 72 else 73 desired_static_value = boolean_true_node; 74 75 int_range<2> r; 76 query->compute_ranges (e); 77 query->range_of_stmt (r, last); 78 return r == int_range<2> (desired_static_value, desired_static_value); 79 } 80 81 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT 82 instructions should be duplicated, limit is decreased by the actual 83 amount. */ 84 85 static bool 86 should_duplicate_loop_header_p (basic_block header, class loop *loop, 87 int *limit) 88 { 89 gimple_stmt_iterator bsi; 90 91 gcc_assert (!header->aux); 92 93 gcc_assert (EDGE_COUNT (header->succs) > 0); 94 if (single_succ_p (header)) 95 { 96 if (dump_file && (dump_flags & TDF_DETAILS)) 97 fprintf (dump_file, 98 " Not duplicating bb %i: it is single succ.\n", 99 header->index); 100 return false; 101 } 102 103 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) 104 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) 105 { 106 if (dump_file && (dump_flags & TDF_DETAILS)) 107 fprintf (dump_file, 108 " Not duplicating bb %i: both successors are in loop.\n", 109 loop->num); 110 return false; 111 } 112 113 /* If this is not the original loop header, we want it to have just 114 one predecessor in order to match the && pattern. */ 115 if (header != loop->header && !single_pred_p (header)) 116 { 117 if (dump_file && (dump_flags & TDF_DETAILS)) 118 fprintf (dump_file, 119 " Not duplicating bb %i: it has mutiple predecestors.\n", 120 header->index); 121 return false; 122 } 123 124 gcond *last = safe_dyn_cast <gcond *> (last_stmt (header)); 125 if (!last) 126 { 127 if (dump_file && (dump_flags & TDF_DETAILS)) 128 fprintf (dump_file, 129 " Not duplicating bb %i: it does not end by conditional.\n", 130 header->index); 131 return false; 132 } 133 134 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi); 135 gsi_next (&psi)) 136 { 137 gphi *phi = psi.phi (); 138 tree res = gimple_phi_result (phi); 139 if (INTEGRAL_TYPE_P (TREE_TYPE (res)) 140 || POINTER_TYPE_P (TREE_TYPE (res))) 141 gimple_set_uid (phi, 1 /* IV */); 142 else 143 gimple_set_uid (phi, 0); 144 } 145 146 /* Count number of instructions and punt on calls. 147 Populate stmts INV/IV flag to later apply heuristics to the 148 kind of conditions we want to copy. */ 149 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) 150 { 151 gimple *last = gsi_stmt (bsi); 152 153 if (gimple_code (last) == GIMPLE_LABEL) 154 continue; 155 156 if (is_gimple_debug (last)) 157 continue; 158 159 if (gimple_code (last) == GIMPLE_CALL 160 && (!gimple_inexpensive_call_p (as_a <gcall *> (last)) 161 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed 162 at current loop's header. Don't copy in this case. */ 163 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS))) 164 { 165 if (dump_file && (dump_flags & TDF_DETAILS)) 166 fprintf (dump_file, 167 " Not duplicating bb %i: it contains call.\n", 168 header->index); 169 return false; 170 } 171 172 *limit -= estimate_num_insns (last, &eni_size_weights); 173 if (*limit < 0) 174 { 175 if (dump_file && (dump_flags & TDF_DETAILS)) 176 fprintf (dump_file, 177 " Not duplicating bb %i contains too many insns.\n", 178 header->index); 179 return false; 180 } 181 182 /* Classify the stmt based on whether its computation is based 183 on a IV or whether it is invariant in the loop. */ 184 gimple_set_uid (last, 0); 185 if (!gimple_vuse (last)) 186 { 187 bool inv = true; 188 bool iv = false; 189 ssa_op_iter i; 190 tree op; 191 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) 192 if (!SSA_NAME_IS_DEFAULT_DEF (op) 193 && flow_bb_inside_loop_p (loop, 194 gimple_bb (SSA_NAME_DEF_STMT (op)))) 195 { 196 if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */)) 197 inv = false; 198 if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */) 199 iv = true; 200 } 201 gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0)); 202 } 203 } 204 205 /* If the condition tests a non-IV loop variant we do not want to rotate 206 the loop further. Unless this is the original loop header. */ 207 tree lhs = gimple_cond_lhs (last); 208 tree rhs = gimple_cond_rhs (last); 209 if (header != loop->header 210 && ((TREE_CODE (lhs) == SSA_NAME 211 && !SSA_NAME_IS_DEFAULT_DEF (lhs) 212 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs))) 213 && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0) 214 || (TREE_CODE (rhs) == SSA_NAME 215 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 216 && flow_bb_inside_loop_p (loop, 217 gimple_bb (SSA_NAME_DEF_STMT (rhs))) 218 && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0))) 219 { 220 if (dump_file && (dump_flags & TDF_DETAILS)) 221 fprintf (dump_file, 222 " Not duplicating bb %i: condition based on non-IV loop" 223 " variant.\n", header->index); 224 return false; 225 } 226 227 return true; 228 } 229 230 /* Checks whether LOOP is a do-while style loop. */ 231 232 static bool 233 do_while_loop_p (class loop *loop) 234 { 235 gimple *stmt = last_stmt (loop->latch); 236 237 /* If the latch of the loop is not empty, it is not a do-while loop. */ 238 if (stmt 239 && gimple_code (stmt) != GIMPLE_LABEL) 240 { 241 if (dump_file && (dump_flags & TDF_DETAILS)) 242 fprintf (dump_file, 243 "Loop %i is not do-while loop: latch is not empty.\n", 244 loop->num); 245 return false; 246 } 247 248 /* If the latch does not have a single predecessor, it is not a 249 do-while loop. */ 250 if (!single_pred_p (loop->latch)) 251 { 252 if (dump_file && (dump_flags & TDF_DETAILS)) 253 fprintf (dump_file, 254 "Loop %i is not do-while loop: latch has multiple " 255 "predecessors.\n", loop->num); 256 return false; 257 } 258 259 /* If the latch predecessor doesn't exit the loop, it is not a 260 do-while loop. */ 261 if (!loop_exits_from_bb_p (loop, single_pred (loop->latch))) 262 { 263 if (dump_file && (dump_flags & TDF_DETAILS)) 264 fprintf (dump_file, 265 "Loop %i is not do-while loop: latch predecessor " 266 "does not exit loop.\n", loop->num); 267 return false; 268 } 269 270 if (dump_file && (dump_flags & TDF_DETAILS)) 271 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num); 272 273 return true; 274 } 275 276 namespace { 277 278 /* Common superclass for both header-copying phases. */ 279 class ch_base : public gimple_opt_pass 280 { 281 protected: 282 ch_base (pass_data data, gcc::context *ctxt) 283 : gimple_opt_pass (data, ctxt) 284 {} 285 286 /* Copies headers of all loops in FUN for which process_loop_p is true. */ 287 unsigned int copy_headers (function *fun); 288 289 /* Return true to copy headers of LOOP or false to skip. */ 290 virtual bool process_loop_p (class loop *loop) = 0; 291 }; 292 293 const pass_data pass_data_ch = 294 { 295 GIMPLE_PASS, /* type */ 296 "ch", /* name */ 297 OPTGROUP_LOOP, /* optinfo_flags */ 298 TV_TREE_CH, /* tv_id */ 299 ( PROP_cfg | PROP_ssa ), /* properties_required */ 300 0, /* properties_provided */ 301 0, /* properties_destroyed */ 302 0, /* todo_flags_start */ 303 0, /* todo_flags_finish */ 304 }; 305 306 class pass_ch : public ch_base 307 { 308 public: 309 pass_ch (gcc::context *ctxt) 310 : ch_base (pass_data_ch, ctxt) 311 {} 312 313 /* opt_pass methods: */ 314 virtual bool gate (function *) { return flag_tree_ch != 0; } 315 316 /* Initialize and finalize loop structures, copying headers inbetween. */ 317 virtual unsigned int execute (function *); 318 319 opt_pass * clone () { return new pass_ch (m_ctxt); } 320 321 protected: 322 /* ch_base method: */ 323 virtual bool process_loop_p (class loop *loop); 324 }; // class pass_ch 325 326 const pass_data pass_data_ch_vect = 327 { 328 GIMPLE_PASS, /* type */ 329 "ch_vect", /* name */ 330 OPTGROUP_LOOP, /* optinfo_flags */ 331 TV_TREE_CH, /* tv_id */ 332 ( PROP_cfg | PROP_ssa ), /* properties_required */ 333 0, /* properties_provided */ 334 0, /* properties_destroyed */ 335 0, /* todo_flags_start */ 336 0, /* todo_flags_finish */ 337 }; 338 339 /* This is a more aggressive version of the same pass, designed to run just 340 before if-conversion and vectorization, to put more loops into the form 341 required for those phases. */ 342 class pass_ch_vect : public ch_base 343 { 344 public: 345 pass_ch_vect (gcc::context *ctxt) 346 : ch_base (pass_data_ch_vect, ctxt) 347 {} 348 349 /* opt_pass methods: */ 350 virtual bool gate (function *fun) 351 { 352 return flag_tree_ch != 0 353 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); 354 } 355 356 /* Just copy headers, no initialization/finalization of loop structures. */ 357 virtual unsigned int execute (function *); 358 359 protected: 360 /* ch_base method: */ 361 virtual bool process_loop_p (class loop *loop); 362 }; // class pass_ch_vect 363 364 /* For all loops, copy the condition at the end of the loop body in front 365 of the loop. This is beneficial since it increases efficiency of 366 code motion optimizations. It also saves one jump on entry to the loop. */ 367 368 unsigned int 369 ch_base::copy_headers (function *fun) 370 { 371 basic_block header; 372 edge exit, entry; 373 basic_block *bbs, *copied_bbs; 374 unsigned n_bbs; 375 unsigned bbs_size; 376 bool changed = false; 377 378 if (number_of_loops (fun) <= 1) 379 return 0; 380 381 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 382 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 383 bbs_size = n_basic_blocks_for_fn (fun); 384 385 auto_vec<loop_p> candidates; 386 auto_vec<std::pair<edge, loop_p> > copied; 387 388 mark_dfs_back_edges (); 389 path_range_query *query = new path_range_query; 390 for (auto loop : loops_list (cfun, 0)) 391 { 392 int initial_limit = param_max_loop_header_insns; 393 int remaining_limit = initial_limit; 394 if (dump_file && (dump_flags & TDF_DETAILS)) 395 fprintf (dump_file, 396 "Analyzing loop %i\n", loop->num); 397 398 header = loop->header; 399 400 /* If the loop is already a do-while style one (either because it was 401 written as such, or because jump threading transformed it into one), 402 we might be in fact peeling the first iteration of the loop. This 403 in general is not a good idea. Also avoid touching infinite loops. */ 404 if (!loop_has_exit_edges (loop) 405 || !process_loop_p (loop)) 406 continue; 407 408 /* Avoid loop header copying when optimizing for size unless we can 409 determine that the loop condition is static in the first 410 iteration. */ 411 if (optimize_loop_for_size_p (loop) 412 && !loop->force_vectorize 413 && !entry_loop_condition_is_static (loop, query)) 414 { 415 if (dump_file && (dump_flags & TDF_DETAILS)) 416 fprintf (dump_file, 417 " Not duplicating bb %i: optimizing for size.\n", 418 header->index); 419 continue; 420 } 421 422 if (should_duplicate_loop_header_p (header, loop, &remaining_limit)) 423 candidates.safe_push (loop); 424 } 425 /* Do not use ranger after we change the IL and not have updated SSA. */ 426 delete query; 427 428 for (auto loop : candidates) 429 { 430 int initial_limit = param_max_loop_header_insns; 431 int remaining_limit = initial_limit; 432 if (dump_file && (dump_flags & TDF_DETAILS)) 433 fprintf (dump_file, 434 "Copying headers of loop %i\n", loop->num); 435 436 header = loop->header; 437 438 /* Iterate the header copying up to limit; this takes care of the cases 439 like while (a && b) {...}, where we want to have both of the conditions 440 copied. TODO -- handle while (a || b) - like cases, by not requiring 441 the header to have just a single successor and copying up to 442 postdominator. */ 443 444 exit = NULL; 445 n_bbs = 0; 446 while (should_duplicate_loop_header_p (header, loop, &remaining_limit)) 447 { 448 if (dump_file && (dump_flags & TDF_DETAILS)) 449 fprintf (dump_file, " Will duplicate bb %i\n", header->index); 450 451 /* Find a successor of header that is inside a loop; i.e. the new 452 header after the condition is copied. */ 453 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) 454 exit = EDGE_SUCC (header, 0); 455 else 456 exit = EDGE_SUCC (header, 1); 457 bbs[n_bbs++] = header; 458 gcc_assert (bbs_size > n_bbs); 459 header = exit->dest; 460 } 461 462 if (!exit) 463 continue; 464 465 if (dump_file && (dump_flags & TDF_DETAILS)) 466 fprintf (dump_file, 467 "Duplicating header of the loop %d up to edge %d->%d," 468 " %i insns.\n", 469 loop->num, exit->src->index, exit->dest->index, 470 initial_limit - remaining_limit); 471 472 /* Ensure that the header will have just the latch as a predecessor 473 inside the loop. */ 474 if (!single_pred_p (exit->dest)) 475 exit = single_pred_edge (split_edge (exit)); 476 477 entry = loop_preheader_edge (loop); 478 479 propagate_threaded_block_debug_into (exit->dest, entry->dest); 480 if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, 481 true)) 482 { 483 if (dump_file && (dump_flags & TDF_DETAILS)) 484 fprintf (dump_file, "Duplication failed.\n"); 485 continue; 486 } 487 copied.safe_push (std::make_pair (entry, loop)); 488 489 /* If the loop has the form "for (i = j; i < j + 10; i++)" then 490 this copying can introduce a case where we rely on undefined 491 signed overflow to eliminate the preheader condition, because 492 we assume that "j < j + 10" is true. We don't want to warn 493 about that case for -Wstrict-overflow, because in general we 494 don't warn about overflow involving loops. Prevent the 495 warning by setting the no_warning flag in the condition. */ 496 if (warn_strict_overflow > 0) 497 { 498 unsigned int i; 499 500 for (i = 0; i < n_bbs; ++i) 501 { 502 gimple_stmt_iterator bsi; 503 504 for (bsi = gsi_start_bb (copied_bbs[i]); 505 !gsi_end_p (bsi); 506 gsi_next (&bsi)) 507 { 508 gimple *stmt = gsi_stmt (bsi); 509 if (gimple_code (stmt) == GIMPLE_COND) 510 { 511 tree lhs = gimple_cond_lhs (stmt); 512 if (gimple_cond_code (stmt) != EQ_EXPR 513 && gimple_cond_code (stmt) != NE_EXPR 514 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 515 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) 516 suppress_warning (stmt, OPT_Wstrict_overflow_); 517 } 518 else if (is_gimple_assign (stmt)) 519 { 520 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 521 tree rhs1 = gimple_assign_rhs1 (stmt); 522 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison 523 && rhs_code != EQ_EXPR 524 && rhs_code != NE_EXPR 525 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 526 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))) 527 suppress_warning (stmt, OPT_Wstrict_overflow_); 528 } 529 } 530 } 531 } 532 533 /* Ensure that the latch and the preheader is simple (we know that they 534 are not now, since there was the loop exit condition. */ 535 split_edge (loop_preheader_edge (loop)); 536 split_edge (loop_latch_edge (loop)); 537 538 if (dump_file && (dump_flags & TDF_DETAILS)) 539 { 540 if (do_while_loop_p (loop)) 541 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num); 542 else 543 fprintf (dump_file, "Loop %d is still not do-while loop.\n", 544 loop->num); 545 } 546 547 changed = true; 548 } 549 550 if (changed) 551 { 552 update_ssa (TODO_update_ssa); 553 /* After updating SSA form perform CSE on the loop header 554 copies. This is esp. required for the pass before 555 vectorization since nothing cleans up copied exit tests 556 that can now be simplified. CSE from the entry of the 557 region we copied till all loop exit blocks but not 558 entering the loop itself. */ 559 for (unsigned i = 0; i < copied.length (); ++i) 560 { 561 edge entry = copied[i].first; 562 loop_p loop = copied[i].second; 563 auto_vec<edge> exit_edges = get_loop_exit_edges (loop); 564 bitmap exit_bbs = BITMAP_ALLOC (NULL); 565 for (unsigned j = 0; j < exit_edges.length (); ++j) 566 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); 567 bitmap_set_bit (exit_bbs, loop->header->index); 568 do_rpo_vn (cfun, entry, exit_bbs); 569 BITMAP_FREE (exit_bbs); 570 } 571 } 572 free (bbs); 573 free (copied_bbs); 574 575 return changed ? TODO_cleanup_cfg : 0; 576 } 577 578 /* Initialize the loop structures we need, and finalize after. */ 579 580 unsigned int 581 pass_ch::execute (function *fun) 582 { 583 loop_optimizer_init (LOOPS_HAVE_PREHEADERS 584 | LOOPS_HAVE_SIMPLE_LATCHES 585 | LOOPS_HAVE_RECORDED_EXITS); 586 587 unsigned int res = copy_headers (fun); 588 589 loop_optimizer_finalize (); 590 return res; 591 } 592 593 /* Assume an earlier phase has already initialized all the loop structures that 594 we need here (and perhaps others too), and that these will be finalized by 595 a later phase. */ 596 597 unsigned int 598 pass_ch_vect::execute (function *fun) 599 { 600 return copy_headers (fun); 601 } 602 603 /* Apply header copying according to a very simple test of do-while shape. */ 604 605 bool 606 pass_ch::process_loop_p (class loop *loop) 607 { 608 return !do_while_loop_p (loop); 609 } 610 611 /* Apply header-copying to loops where we might enable vectorization. */ 612 613 bool 614 pass_ch_vect::process_loop_p (class loop *loop) 615 { 616 if (!flag_tree_loop_vectorize && !loop->force_vectorize) 617 return false; 618 619 if (loop->dont_vectorize) 620 return false; 621 622 /* The vectorizer won't handle anything with multiple exits, so skip. */ 623 edge exit = single_exit (loop); 624 if (!exit) 625 return false; 626 627 if (!do_while_loop_p (loop)) 628 return true; 629 630 return false; 631 } 632 633 } // anon namespace 634 635 gimple_opt_pass * 636 make_pass_ch_vect (gcc::context *ctxt) 637 { 638 return new pass_ch_vect (ctxt); 639 } 640 641 gimple_opt_pass * 642 make_pass_ch (gcc::context *ctxt) 643 { 644 return new pass_ch (ctxt); 645 } 646