1 /* Loop header copying on trees. 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "tree.h" 25 #include "gimple.h" 26 #include "cfghooks.h" 27 #include "tree-pass.h" 28 #include "gimple-ssa.h" 29 #include "gimple-iterator.h" 30 #include "tree-cfg.h" 31 #include "tree-into-ssa.h" 32 #include "cfgloop.h" 33 #include "tree-inline.h" 34 #include "tree-ssa-scopedtables.h" 35 #include "tree-ssa-threadedge.h" 36 #include "tree-ssa-sccvn.h" 37 #include "tree-phinodes.h" 38 #include "ssa-iterators.h" 39 40 /* Duplicates headers of loops if they are small enough, so that the statements 41 in the loop body are always executed when the loop is entered. This 42 increases effectiveness of code motion optimizations, and reduces the need 43 for loop preconditioning. */ 44 45 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT 46 instructions should be duplicated, limit is decreased by the actual 47 amount. */ 48 49 static bool 50 should_duplicate_loop_header_p (basic_block header, class loop *loop, 51 int *limit) 52 { 53 gimple_stmt_iterator bsi; 54 55 gcc_assert (!header->aux); 56 57 /* Loop header copying usually increases size of the code. This used not to 58 be true, since quite often it is possible to verify that the condition is 59 satisfied in the first iteration and therefore to eliminate it. Jump 60 threading handles these cases now. */ 61 if (optimize_loop_for_size_p (loop) 62 && !loop->force_vectorize) 63 { 64 if (dump_file && (dump_flags & TDF_DETAILS)) 65 fprintf (dump_file, 66 " Not duplicating bb %i: optimizing for size.\n", 67 header->index); 68 return false; 69 } 70 71 gcc_assert (EDGE_COUNT (header->succs) > 0); 72 if (single_succ_p (header)) 73 { 74 if (dump_file && (dump_flags & TDF_DETAILS)) 75 fprintf (dump_file, 76 " Not duplicating bb %i: it is single succ.\n", 77 header->index); 78 return false; 79 } 80 81 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) 82 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) 83 { 84 if (dump_file && (dump_flags & TDF_DETAILS)) 85 fprintf (dump_file, 86 " Not duplicating bb %i: both successors are in loop.\n", 87 loop->num); 88 return false; 89 } 90 91 /* If this is not the original loop header, we want it to have just 92 one predecessor in order to match the && pattern. */ 93 if (header != loop->header && !single_pred_p (header)) 94 { 95 if (dump_file && (dump_flags & TDF_DETAILS)) 96 fprintf (dump_file, 97 " Not duplicating bb %i: it has mutiple predecestors.\n", 98 header->index); 99 return false; 100 } 101 102 gcond *last = safe_dyn_cast <gcond *> (last_stmt (header)); 103 if (!last) 104 { 105 if (dump_file && (dump_flags & TDF_DETAILS)) 106 fprintf (dump_file, 107 " Not duplicating bb %i: it does not end by conditional.\n", 108 header->index); 109 return false; 110 } 111 112 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi); 113 gsi_next (&psi)) 114 { 115 gphi *phi = psi.phi (); 116 tree res = gimple_phi_result (phi); 117 if (INTEGRAL_TYPE_P (TREE_TYPE (res)) 118 || POINTER_TYPE_P (TREE_TYPE (res))) 119 gimple_set_uid (phi, 1 /* IV */); 120 else 121 gimple_set_uid (phi, 0); 122 } 123 124 /* Count number of instructions and punt on calls. 125 Populate stmts INV/IV flag to later apply heuristics to the 126 kind of conditions we want to copy. */ 127 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) 128 { 129 gimple *last = gsi_stmt (bsi); 130 131 if (gimple_code (last) == GIMPLE_LABEL) 132 continue; 133 134 if (is_gimple_debug (last)) 135 continue; 136 137 if (gimple_code (last) == GIMPLE_CALL 138 && (!gimple_inexpensive_call_p (as_a <gcall *> (last)) 139 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed 140 at current loop's header. Don't copy in this case. */ 141 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS))) 142 { 143 if (dump_file && (dump_flags & TDF_DETAILS)) 144 fprintf (dump_file, 145 " Not duplicating bb %i: it contains call.\n", 146 header->index); 147 return false; 148 } 149 150 *limit -= estimate_num_insns (last, &eni_size_weights); 151 if (*limit < 0) 152 { 153 if (dump_file && (dump_flags & TDF_DETAILS)) 154 fprintf (dump_file, 155 " Not duplicating bb %i contains too many insns.\n", 156 header->index); 157 return false; 158 } 159 160 /* Classify the stmt based on whether its computation is based 161 on a IV or whether it is invariant in the loop. */ 162 gimple_set_uid (last, 0); 163 if (!gimple_vuse (last)) 164 { 165 bool inv = true; 166 bool iv = false; 167 ssa_op_iter i; 168 tree op; 169 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) 170 if (!SSA_NAME_IS_DEFAULT_DEF (op) 171 && flow_bb_inside_loop_p (loop, 172 gimple_bb (SSA_NAME_DEF_STMT (op)))) 173 { 174 if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */)) 175 inv = false; 176 if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */) 177 iv = true; 178 } 179 gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0)); 180 } 181 } 182 183 /* If the condition tests a non-IV loop variant we do not want to rotate 184 the loop further. Unless this is the original loop header. */ 185 tree lhs = gimple_cond_lhs (last); 186 tree rhs = gimple_cond_rhs (last); 187 if (header != loop->header 188 && ((TREE_CODE (lhs) == SSA_NAME 189 && !SSA_NAME_IS_DEFAULT_DEF (lhs) 190 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs))) 191 && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0) 192 || (TREE_CODE (rhs) == SSA_NAME 193 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 194 && flow_bb_inside_loop_p (loop, 195 gimple_bb (SSA_NAME_DEF_STMT (rhs))) 196 && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0))) 197 { 198 if (dump_file && (dump_flags & TDF_DETAILS)) 199 fprintf (dump_file, 200 " Not duplicating bb %i: condition based on non-IV loop" 201 " variant.\n", header->index); 202 return false; 203 } 204 205 if (dump_file && (dump_flags & TDF_DETAILS)) 206 fprintf (dump_file, " Will duplicate bb %i\n", header->index); 207 return true; 208 } 209 210 /* Checks whether LOOP is a do-while style loop. */ 211 212 static bool 213 do_while_loop_p (class loop *loop) 214 { 215 gimple *stmt = last_stmt (loop->latch); 216 217 /* If the latch of the loop is not empty, it is not a do-while loop. */ 218 if (stmt 219 && gimple_code (stmt) != GIMPLE_LABEL) 220 { 221 if (dump_file && (dump_flags & TDF_DETAILS)) 222 fprintf (dump_file, 223 "Loop %i is not do-while loop: latch is not empty.\n", 224 loop->num); 225 return false; 226 } 227 228 /* If the latch does not have a single predecessor, it is not a 229 do-while loop. */ 230 if (!single_pred_p (loop->latch)) 231 { 232 if (dump_file && (dump_flags & TDF_DETAILS)) 233 fprintf (dump_file, 234 "Loop %i is not do-while loop: latch has multiple " 235 "predecessors.\n", loop->num); 236 return false; 237 } 238 239 /* If the latch predecessor doesn't exit the loop, it is not a 240 do-while loop. */ 241 if (!loop_exits_from_bb_p (loop, single_pred (loop->latch))) 242 { 243 if (dump_file && (dump_flags & TDF_DETAILS)) 244 fprintf (dump_file, 245 "Loop %i is not do-while loop: latch predecessor " 246 "does not exit loop.\n", loop->num); 247 return false; 248 } 249 250 if (dump_file && (dump_flags & TDF_DETAILS)) 251 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num); 252 253 return true; 254 } 255 256 namespace { 257 258 /* Common superclass for both header-copying phases. */ 259 class ch_base : public gimple_opt_pass 260 { 261 protected: 262 ch_base (pass_data data, gcc::context *ctxt) 263 : gimple_opt_pass (data, ctxt) 264 {} 265 266 /* Copies headers of all loops in FUN for which process_loop_p is true. */ 267 unsigned int copy_headers (function *fun); 268 269 /* Return true to copy headers of LOOP or false to skip. */ 270 virtual bool process_loop_p (class loop *loop) = 0; 271 }; 272 273 const pass_data pass_data_ch = 274 { 275 GIMPLE_PASS, /* type */ 276 "ch", /* name */ 277 OPTGROUP_LOOP, /* optinfo_flags */ 278 TV_TREE_CH, /* tv_id */ 279 ( PROP_cfg | PROP_ssa ), /* properties_required */ 280 0, /* properties_provided */ 281 0, /* properties_destroyed */ 282 0, /* todo_flags_start */ 283 0, /* todo_flags_finish */ 284 }; 285 286 class pass_ch : public ch_base 287 { 288 public: 289 pass_ch (gcc::context *ctxt) 290 : ch_base (pass_data_ch, ctxt) 291 {} 292 293 /* opt_pass methods: */ 294 virtual bool gate (function *) { return flag_tree_ch != 0; } 295 296 /* Initialize and finalize loop structures, copying headers inbetween. */ 297 virtual unsigned int execute (function *); 298 299 opt_pass * clone () { return new pass_ch (m_ctxt); } 300 301 protected: 302 /* ch_base method: */ 303 virtual bool process_loop_p (class loop *loop); 304 }; // class pass_ch 305 306 const pass_data pass_data_ch_vect = 307 { 308 GIMPLE_PASS, /* type */ 309 "ch_vect", /* name */ 310 OPTGROUP_LOOP, /* optinfo_flags */ 311 TV_TREE_CH, /* tv_id */ 312 ( PROP_cfg | PROP_ssa ), /* properties_required */ 313 0, /* properties_provided */ 314 0, /* properties_destroyed */ 315 0, /* todo_flags_start */ 316 0, /* todo_flags_finish */ 317 }; 318 319 /* This is a more aggressive version of the same pass, designed to run just 320 before if-conversion and vectorization, to put more loops into the form 321 required for those phases. */ 322 class pass_ch_vect : public ch_base 323 { 324 public: 325 pass_ch_vect (gcc::context *ctxt) 326 : ch_base (pass_data_ch_vect, ctxt) 327 {} 328 329 /* opt_pass methods: */ 330 virtual bool gate (function *fun) 331 { 332 return flag_tree_ch != 0 333 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); 334 } 335 336 /* Just copy headers, no initialization/finalization of loop structures. */ 337 virtual unsigned int execute (function *); 338 339 protected: 340 /* ch_base method: */ 341 virtual bool process_loop_p (class loop *loop); 342 }; // class pass_ch_vect 343 344 /* For all loops, copy the condition at the end of the loop body in front 345 of the loop. This is beneficial since it increases efficiency of 346 code motion optimizations. It also saves one jump on entry to the loop. */ 347 348 unsigned int 349 ch_base::copy_headers (function *fun) 350 { 351 class loop *loop; 352 basic_block header; 353 edge exit, entry; 354 basic_block *bbs, *copied_bbs; 355 unsigned n_bbs; 356 unsigned bbs_size; 357 bool changed = false; 358 359 if (number_of_loops (fun) <= 1) 360 return 0; 361 362 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 363 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 364 bbs_size = n_basic_blocks_for_fn (fun); 365 366 auto_vec<std::pair<edge, loop_p> > copied; 367 368 FOR_EACH_LOOP (loop, 0) 369 { 370 int initial_limit = param_max_loop_header_insns; 371 int remaining_limit = initial_limit; 372 if (dump_file && (dump_flags & TDF_DETAILS)) 373 fprintf (dump_file, 374 "Analyzing loop %i\n", loop->num); 375 376 header = loop->header; 377 378 /* If the loop is already a do-while style one (either because it was 379 written as such, or because jump threading transformed it into one), 380 we might be in fact peeling the first iteration of the loop. This 381 in general is not a good idea. Also avoid touching infinite loops. */ 382 if (!loop_has_exit_edges (loop) 383 || !process_loop_p (loop)) 384 continue; 385 386 /* Iterate the header copying up to limit; this takes care of the cases 387 like while (a && b) {...}, where we want to have both of the conditions 388 copied. TODO -- handle while (a || b) - like cases, by not requiring 389 the header to have just a single successor and copying up to 390 postdominator. */ 391 392 exit = NULL; 393 n_bbs = 0; 394 while (should_duplicate_loop_header_p (header, loop, &remaining_limit)) 395 { 396 /* Find a successor of header that is inside a loop; i.e. the new 397 header after the condition is copied. */ 398 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) 399 exit = EDGE_SUCC (header, 0); 400 else 401 exit = EDGE_SUCC (header, 1); 402 bbs[n_bbs++] = header; 403 gcc_assert (bbs_size > n_bbs); 404 header = exit->dest; 405 } 406 407 if (!exit) 408 continue; 409 410 if (dump_file && (dump_flags & TDF_DETAILS)) 411 fprintf (dump_file, 412 "Duplicating header of the loop %d up to edge %d->%d," 413 " %i insns.\n", 414 loop->num, exit->src->index, exit->dest->index, 415 initial_limit - remaining_limit); 416 417 /* Ensure that the header will have just the latch as a predecessor 418 inside the loop. */ 419 if (!single_pred_p (exit->dest)) 420 exit = single_pred_edge (split_edge (exit)); 421 422 entry = loop_preheader_edge (loop); 423 424 propagate_threaded_block_debug_into (exit->dest, entry->dest); 425 if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, 426 true)) 427 { 428 fprintf (dump_file, "Duplication failed.\n"); 429 continue; 430 } 431 copied.safe_push (std::make_pair (entry, loop)); 432 433 /* If the loop has the form "for (i = j; i < j + 10; i++)" then 434 this copying can introduce a case where we rely on undefined 435 signed overflow to eliminate the preheader condition, because 436 we assume that "j < j + 10" is true. We don't want to warn 437 about that case for -Wstrict-overflow, because in general we 438 don't warn about overflow involving loops. Prevent the 439 warning by setting the no_warning flag in the condition. */ 440 if (warn_strict_overflow > 0) 441 { 442 unsigned int i; 443 444 for (i = 0; i < n_bbs; ++i) 445 { 446 gimple_stmt_iterator bsi; 447 448 for (bsi = gsi_start_bb (copied_bbs[i]); 449 !gsi_end_p (bsi); 450 gsi_next (&bsi)) 451 { 452 gimple *stmt = gsi_stmt (bsi); 453 if (gimple_code (stmt) == GIMPLE_COND) 454 { 455 tree lhs = gimple_cond_lhs (stmt); 456 if (gimple_cond_code (stmt) != EQ_EXPR 457 && gimple_cond_code (stmt) != NE_EXPR 458 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 459 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) 460 gimple_set_no_warning (stmt, true); 461 } 462 else if (is_gimple_assign (stmt)) 463 { 464 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 465 tree rhs1 = gimple_assign_rhs1 (stmt); 466 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison 467 && rhs_code != EQ_EXPR 468 && rhs_code != NE_EXPR 469 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 470 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))) 471 gimple_set_no_warning (stmt, true); 472 } 473 } 474 } 475 } 476 477 /* Ensure that the latch and the preheader is simple (we know that they 478 are not now, since there was the loop exit condition. */ 479 split_edge (loop_preheader_edge (loop)); 480 split_edge (loop_latch_edge (loop)); 481 482 if (dump_file && (dump_flags & TDF_DETAILS)) 483 { 484 if (do_while_loop_p (loop)) 485 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num); 486 else 487 fprintf (dump_file, "Loop %d is still not do-while loop.\n", 488 loop->num); 489 } 490 491 changed = true; 492 } 493 494 if (changed) 495 { 496 update_ssa (TODO_update_ssa); 497 /* After updating SSA form perform CSE on the loop header 498 copies. This is esp. required for the pass before 499 vectorization since nothing cleans up copied exit tests 500 that can now be simplified. CSE from the entry of the 501 region we copied till all loop exit blocks but not 502 entering the loop itself. */ 503 for (unsigned i = 0; i < copied.length (); ++i) 504 { 505 edge entry = copied[i].first; 506 loop_p loop = copied[i].second; 507 vec<edge> exit_edges = get_loop_exit_edges (loop); 508 bitmap exit_bbs = BITMAP_ALLOC (NULL); 509 for (unsigned j = 0; j < exit_edges.length (); ++j) 510 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); 511 bitmap_set_bit (exit_bbs, loop->header->index); 512 do_rpo_vn (cfun, entry, exit_bbs); 513 BITMAP_FREE (exit_bbs); 514 exit_edges.release (); 515 } 516 } 517 free (bbs); 518 free (copied_bbs); 519 520 return changed ? TODO_cleanup_cfg : 0; 521 } 522 523 /* Initialize the loop structures we need, and finalize after. */ 524 525 unsigned int 526 pass_ch::execute (function *fun) 527 { 528 loop_optimizer_init (LOOPS_HAVE_PREHEADERS 529 | LOOPS_HAVE_SIMPLE_LATCHES 530 | LOOPS_HAVE_RECORDED_EXITS); 531 532 unsigned int res = copy_headers (fun); 533 534 loop_optimizer_finalize (); 535 return res; 536 } 537 538 /* Assume an earlier phase has already initialized all the loop structures that 539 we need here (and perhaps others too), and that these will be finalized by 540 a later phase. */ 541 542 unsigned int 543 pass_ch_vect::execute (function *fun) 544 { 545 return copy_headers (fun); 546 } 547 548 /* Apply header copying according to a very simple test of do-while shape. */ 549 550 bool 551 pass_ch::process_loop_p (class loop *loop) 552 { 553 return !do_while_loop_p (loop); 554 } 555 556 /* Apply header-copying to loops where we might enable vectorization. */ 557 558 bool 559 pass_ch_vect::process_loop_p (class loop *loop) 560 { 561 if (!flag_tree_loop_vectorize && !loop->force_vectorize) 562 return false; 563 564 if (loop->dont_vectorize) 565 return false; 566 567 /* The vectorizer won't handle anything with multiple exits, so skip. */ 568 edge exit = single_exit (loop); 569 if (!exit) 570 return false; 571 572 if (!do_while_loop_p (loop)) 573 return true; 574 575 return false; 576 } 577 578 } // anon namespace 579 580 gimple_opt_pass * 581 make_pass_ch_vect (gcc::context *ctxt) 582 { 583 return new pass_ch_vect (ctxt); 584 } 585 586 gimple_opt_pass * 587 make_pass_ch (gcc::context *ctxt) 588 { 589 return new pass_ch (ctxt); 590 } 591