1 /* Loop header copying on trees. 2 Copyright (C) 2004-2019 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "tree.h" 25 #include "gimple.h" 26 #include "cfghooks.h" 27 #include "tree-pass.h" 28 #include "gimple-ssa.h" 29 #include "gimple-iterator.h" 30 #include "tree-cfg.h" 31 #include "tree-into-ssa.h" 32 #include "cfgloop.h" 33 #include "tree-inline.h" 34 #include "tree-ssa-scopedtables.h" 35 #include "tree-ssa-threadedge.h" 36 #include "tree-ssa-sccvn.h" 37 #include "tree-phinodes.h" 38 #include "ssa-iterators.h" 39 #include "params.h" 40 41 /* Duplicates headers of loops if they are small enough, so that the statements 42 in the loop body are always executed when the loop is entered. This 43 increases effectiveness of code motion optimizations, and reduces the need 44 for loop preconditioning. */ 45 46 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT 47 instructions should be duplicated, limit is decreased by the actual 48 amount. */ 49 50 static bool 51 should_duplicate_loop_header_p (basic_block header, struct loop *loop, 52 int *limit) 53 { 54 gimple_stmt_iterator bsi; 55 56 gcc_assert (!header->aux); 57 58 /* Loop header copying usually increases size of the code. This used not to 59 be true, since quite often it is possible to verify that the condition is 60 satisfied in the first iteration and therefore to eliminate it. Jump 61 threading handles these cases now. */ 62 if (optimize_loop_for_size_p (loop) 63 && !loop->force_vectorize) 64 { 65 if (dump_file && (dump_flags & TDF_DETAILS)) 66 fprintf (dump_file, 67 " Not duplicating bb %i: optimizing for size.\n", 68 header->index); 69 return false; 70 } 71 72 gcc_assert (EDGE_COUNT (header->succs) > 0); 73 if (single_succ_p (header)) 74 { 75 if (dump_file && (dump_flags & TDF_DETAILS)) 76 fprintf (dump_file, 77 " Not duplicating bb %i: it is single succ.\n", 78 header->index); 79 return false; 80 } 81 82 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) 83 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) 84 { 85 if (dump_file && (dump_flags & TDF_DETAILS)) 86 fprintf (dump_file, 87 " Not duplicating bb %i: both sucessors are in loop.\n", 88 loop->num); 89 return false; 90 } 91 92 /* If this is not the original loop header, we want it to have just 93 one predecessor in order to match the && pattern. */ 94 if (header != loop->header && !single_pred_p (header)) 95 { 96 if (dump_file && (dump_flags & TDF_DETAILS)) 97 fprintf (dump_file, 98 " Not duplicating bb %i: it has mutiple predecestors.\n", 99 header->index); 100 return false; 101 } 102 103 gcond *last = safe_dyn_cast <gcond *> (last_stmt (header)); 104 if (!last) 105 { 106 if (dump_file && (dump_flags & TDF_DETAILS)) 107 fprintf (dump_file, 108 " Not duplicating bb %i: it does not end by conditional.\n", 109 header->index); 110 return false; 111 } 112 113 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi); 114 gsi_next (&psi)) 115 { 116 gphi *phi = psi.phi (); 117 tree res = gimple_phi_result (phi); 118 if (INTEGRAL_TYPE_P (TREE_TYPE (res)) 119 || POINTER_TYPE_P (TREE_TYPE (res))) 120 gimple_set_uid (phi, 1 /* IV */); 121 else 122 gimple_set_uid (phi, 0); 123 } 124 125 /* Count number of instructions and punt on calls. 126 Populate stmts INV/IV flag to later apply heuristics to the 127 kind of conditions we want to copy. */ 128 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) 129 { 130 gimple *last = gsi_stmt (bsi); 131 132 if (gimple_code (last) == GIMPLE_LABEL) 133 continue; 134 135 if (is_gimple_debug (last)) 136 continue; 137 138 if (gimple_code (last) == GIMPLE_CALL 139 && (!gimple_inexpensive_call_p (as_a <gcall *> (last)) 140 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed 141 at current loop's header. Don't copy in this case. */ 142 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS))) 143 { 144 if (dump_file && (dump_flags & TDF_DETAILS)) 145 fprintf (dump_file, 146 " Not duplicating bb %i: it contains call.\n", 147 header->index); 148 return false; 149 } 150 151 *limit -= estimate_num_insns (last, &eni_size_weights); 152 if (*limit < 0) 153 { 154 if (dump_file && (dump_flags & TDF_DETAILS)) 155 fprintf (dump_file, 156 " Not duplicating bb %i contains too many insns.\n", 157 header->index); 158 return false; 159 } 160 161 /* Classify the stmt based on whether its computation is based 162 on a IV or whether it is invariant in the loop. */ 163 gimple_set_uid (last, 0); 164 if (!gimple_vuse (last)) 165 { 166 bool inv = true; 167 bool iv = false; 168 ssa_op_iter i; 169 tree op; 170 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) 171 if (!SSA_NAME_IS_DEFAULT_DEF (op) 172 && flow_bb_inside_loop_p (loop, 173 gimple_bb (SSA_NAME_DEF_STMT (op)))) 174 { 175 if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */)) 176 inv = false; 177 if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */) 178 iv = true; 179 } 180 gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0)); 181 } 182 } 183 184 /* If the condition tests a non-IV loop variant we do not want to rotate 185 the loop further. Unless this is the original loop header. */ 186 tree lhs = gimple_cond_lhs (last); 187 tree rhs = gimple_cond_rhs (last); 188 if (header != loop->header 189 && ((TREE_CODE (lhs) == SSA_NAME 190 && !SSA_NAME_IS_DEFAULT_DEF (lhs) 191 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs))) 192 && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0) 193 || (TREE_CODE (rhs) == SSA_NAME 194 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 195 && flow_bb_inside_loop_p (loop, 196 gimple_bb (SSA_NAME_DEF_STMT (rhs))) 197 && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0))) 198 { 199 if (dump_file && (dump_flags & TDF_DETAILS)) 200 fprintf (dump_file, 201 " Not duplicating bb %i: condition based on non-IV loop" 202 "variant.\n", header->index); 203 return false; 204 } 205 206 if (dump_file && (dump_flags & TDF_DETAILS)) 207 fprintf (dump_file, " Will duplicate bb %i\n", header->index); 208 return true; 209 } 210 211 /* Checks whether LOOP is a do-while style loop. */ 212 213 static bool 214 do_while_loop_p (struct loop *loop) 215 { 216 gimple *stmt = last_stmt (loop->latch); 217 218 /* If the latch of the loop is not empty, it is not a do-while loop. */ 219 if (stmt 220 && gimple_code (stmt) != GIMPLE_LABEL) 221 { 222 if (dump_file && (dump_flags & TDF_DETAILS)) 223 fprintf (dump_file, 224 "Loop %i is not do-while loop: latch is not empty.\n", 225 loop->num); 226 return false; 227 } 228 229 /* If the latch does not have a single predecessor, it is not a 230 do-while loop. */ 231 if (!single_pred_p (loop->latch)) 232 { 233 if (dump_file && (dump_flags & TDF_DETAILS)) 234 fprintf (dump_file, 235 "Loop %i is not do-while loop: latch has multiple " 236 "predecessors.\n", loop->num); 237 return false; 238 } 239 240 /* If the latch predecessor doesn't exit the loop, it is not a 241 do-while loop. */ 242 if (!loop_exits_from_bb_p (loop, single_pred (loop->latch))) 243 { 244 if (dump_file && (dump_flags & TDF_DETAILS)) 245 fprintf (dump_file, 246 "Loop %i is not do-while loop: latch predecessor " 247 "does not exit loop.\n", loop->num); 248 return false; 249 } 250 251 if (dump_file && (dump_flags & TDF_DETAILS)) 252 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num); 253 254 return true; 255 } 256 257 namespace { 258 259 /* Common superclass for both header-copying phases. */ 260 class ch_base : public gimple_opt_pass 261 { 262 protected: 263 ch_base (pass_data data, gcc::context *ctxt) 264 : gimple_opt_pass (data, ctxt) 265 {} 266 267 /* Copies headers of all loops in FUN for which process_loop_p is true. */ 268 unsigned int copy_headers (function *fun); 269 270 /* Return true to copy headers of LOOP or false to skip. */ 271 virtual bool process_loop_p (struct loop *loop) = 0; 272 }; 273 274 const pass_data pass_data_ch = 275 { 276 GIMPLE_PASS, /* type */ 277 "ch", /* name */ 278 OPTGROUP_LOOP, /* optinfo_flags */ 279 TV_TREE_CH, /* tv_id */ 280 ( PROP_cfg | PROP_ssa ), /* properties_required */ 281 0, /* properties_provided */ 282 0, /* properties_destroyed */ 283 0, /* todo_flags_start */ 284 0, /* todo_flags_finish */ 285 }; 286 287 class pass_ch : public ch_base 288 { 289 public: 290 pass_ch (gcc::context *ctxt) 291 : ch_base (pass_data_ch, ctxt) 292 {} 293 294 /* opt_pass methods: */ 295 virtual bool gate (function *) { return flag_tree_ch != 0; } 296 297 /* Initialize and finalize loop structures, copying headers inbetween. */ 298 virtual unsigned int execute (function *); 299 300 opt_pass * clone () { return new pass_ch (m_ctxt); } 301 302 protected: 303 /* ch_base method: */ 304 virtual bool process_loop_p (struct loop *loop); 305 }; // class pass_ch 306 307 const pass_data pass_data_ch_vect = 308 { 309 GIMPLE_PASS, /* type */ 310 "ch_vect", /* name */ 311 OPTGROUP_LOOP, /* optinfo_flags */ 312 TV_TREE_CH, /* tv_id */ 313 ( PROP_cfg | PROP_ssa ), /* properties_required */ 314 0, /* properties_provided */ 315 0, /* properties_destroyed */ 316 0, /* todo_flags_start */ 317 0, /* todo_flags_finish */ 318 }; 319 320 /* This is a more aggressive version of the same pass, designed to run just 321 before if-conversion and vectorization, to put more loops into the form 322 required for those phases. */ 323 class pass_ch_vect : public ch_base 324 { 325 public: 326 pass_ch_vect (gcc::context *ctxt) 327 : ch_base (pass_data_ch_vect, ctxt) 328 {} 329 330 /* opt_pass methods: */ 331 virtual bool gate (function *fun) 332 { 333 return flag_tree_ch != 0 334 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops); 335 } 336 337 /* Just copy headers, no initialization/finalization of loop structures. */ 338 virtual unsigned int execute (function *); 339 340 protected: 341 /* ch_base method: */ 342 virtual bool process_loop_p (struct loop *loop); 343 }; // class pass_ch_vect 344 345 /* For all loops, copy the condition at the end of the loop body in front 346 of the loop. This is beneficial since it increases efficiency of 347 code motion optimizations. It also saves one jump on entry to the loop. */ 348 349 unsigned int 350 ch_base::copy_headers (function *fun) 351 { 352 struct loop *loop; 353 basic_block header; 354 edge exit, entry; 355 basic_block *bbs, *copied_bbs; 356 unsigned n_bbs; 357 unsigned bbs_size; 358 bool changed = false; 359 360 if (number_of_loops (fun) <= 1) 361 return 0; 362 363 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 364 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); 365 bbs_size = n_basic_blocks_for_fn (fun); 366 367 auto_vec<std::pair<edge, loop_p> > copied; 368 369 FOR_EACH_LOOP (loop, 0) 370 { 371 int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS); 372 int remaining_limit = initial_limit; 373 if (dump_file && (dump_flags & TDF_DETAILS)) 374 fprintf (dump_file, 375 "Analyzing loop %i\n", loop->num); 376 377 header = loop->header; 378 379 /* If the loop is already a do-while style one (either because it was 380 written as such, or because jump threading transformed it into one), 381 we might be in fact peeling the first iteration of the loop. This 382 in general is not a good idea. Also avoid touching infinite loops. */ 383 if (!loop_has_exit_edges (loop) 384 || !process_loop_p (loop)) 385 continue; 386 387 /* Iterate the header copying up to limit; this takes care of the cases 388 like while (a && b) {...}, where we want to have both of the conditions 389 copied. TODO -- handle while (a || b) - like cases, by not requiring 390 the header to have just a single successor and copying up to 391 postdominator. */ 392 393 exit = NULL; 394 n_bbs = 0; 395 while (should_duplicate_loop_header_p (header, loop, &remaining_limit)) 396 { 397 /* Find a successor of header that is inside a loop; i.e. the new 398 header after the condition is copied. */ 399 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) 400 exit = EDGE_SUCC (header, 0); 401 else 402 exit = EDGE_SUCC (header, 1); 403 bbs[n_bbs++] = header; 404 gcc_assert (bbs_size > n_bbs); 405 header = exit->dest; 406 } 407 408 if (!exit) 409 continue; 410 411 if (dump_file && (dump_flags & TDF_DETAILS)) 412 fprintf (dump_file, 413 "Duplicating header of the loop %d up to edge %d->%d," 414 " %i insns.\n", 415 loop->num, exit->src->index, exit->dest->index, 416 initial_limit - remaining_limit); 417 418 /* Ensure that the header will have just the latch as a predecessor 419 inside the loop. */ 420 if (!single_pred_p (exit->dest)) 421 exit = single_pred_edge (split_edge (exit)); 422 423 entry = loop_preheader_edge (loop); 424 425 propagate_threaded_block_debug_into (exit->dest, entry->dest); 426 if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, 427 true)) 428 { 429 fprintf (dump_file, "Duplication failed.\n"); 430 continue; 431 } 432 copied.safe_push (std::make_pair (entry, loop)); 433 434 /* If the loop has the form "for (i = j; i < j + 10; i++)" then 435 this copying can introduce a case where we rely on undefined 436 signed overflow to eliminate the preheader condition, because 437 we assume that "j < j + 10" is true. We don't want to warn 438 about that case for -Wstrict-overflow, because in general we 439 don't warn about overflow involving loops. Prevent the 440 warning by setting the no_warning flag in the condition. */ 441 if (warn_strict_overflow > 0) 442 { 443 unsigned int i; 444 445 for (i = 0; i < n_bbs; ++i) 446 { 447 gimple_stmt_iterator bsi; 448 449 for (bsi = gsi_start_bb (copied_bbs[i]); 450 !gsi_end_p (bsi); 451 gsi_next (&bsi)) 452 { 453 gimple *stmt = gsi_stmt (bsi); 454 if (gimple_code (stmt) == GIMPLE_COND) 455 { 456 tree lhs = gimple_cond_lhs (stmt); 457 if (gimple_cond_code (stmt) != EQ_EXPR 458 && gimple_cond_code (stmt) != NE_EXPR 459 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 460 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) 461 gimple_set_no_warning (stmt, true); 462 } 463 else if (is_gimple_assign (stmt)) 464 { 465 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 466 tree rhs1 = gimple_assign_rhs1 (stmt); 467 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison 468 && rhs_code != EQ_EXPR 469 && rhs_code != NE_EXPR 470 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 471 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1))) 472 gimple_set_no_warning (stmt, true); 473 } 474 } 475 } 476 } 477 478 /* Ensure that the latch and the preheader is simple (we know that they 479 are not now, since there was the loop exit condition. */ 480 split_edge (loop_preheader_edge (loop)); 481 split_edge (loop_latch_edge (loop)); 482 483 if (dump_file && (dump_flags & TDF_DETAILS)) 484 { 485 if (do_while_loop_p (loop)) 486 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num); 487 else 488 fprintf (dump_file, "Loop %d is still not do-while loop.\n", 489 loop->num); 490 } 491 492 changed = true; 493 } 494 495 if (changed) 496 { 497 update_ssa (TODO_update_ssa); 498 /* After updating SSA form perform CSE on the loop header 499 copies. This is esp. required for the pass before 500 vectorization since nothing cleans up copied exit tests 501 that can now be simplified. CSE from the entry of the 502 region we copied till all loop exit blocks but not 503 entering the loop itself. */ 504 for (unsigned i = 0; i < copied.length (); ++i) 505 { 506 edge entry = copied[i].first; 507 loop_p loop = copied[i].second; 508 vec<edge> exit_edges = get_loop_exit_edges (loop); 509 bitmap exit_bbs = BITMAP_ALLOC (NULL); 510 for (unsigned j = 0; j < exit_edges.length (); ++j) 511 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); 512 bitmap_set_bit (exit_bbs, loop->header->index); 513 do_rpo_vn (cfun, entry, exit_bbs); 514 BITMAP_FREE (exit_bbs); 515 exit_edges.release (); 516 } 517 } 518 free (bbs); 519 free (copied_bbs); 520 521 return changed ? TODO_cleanup_cfg : 0; 522 } 523 524 /* Initialize the loop structures we need, and finalize after. */ 525 526 unsigned int 527 pass_ch::execute (function *fun) 528 { 529 loop_optimizer_init (LOOPS_HAVE_PREHEADERS 530 | LOOPS_HAVE_SIMPLE_LATCHES 531 | LOOPS_HAVE_RECORDED_EXITS); 532 533 unsigned int res = copy_headers (fun); 534 535 loop_optimizer_finalize (); 536 return res; 537 } 538 539 /* Assume an earlier phase has already initialized all the loop structures that 540 we need here (and perhaps others too), and that these will be finalized by 541 a later phase. */ 542 543 unsigned int 544 pass_ch_vect::execute (function *fun) 545 { 546 return copy_headers (fun); 547 } 548 549 /* Apply header copying according to a very simple test of do-while shape. */ 550 551 bool 552 pass_ch::process_loop_p (struct loop *loop) 553 { 554 return !do_while_loop_p (loop); 555 } 556 557 /* Apply header-copying to loops where we might enable vectorization. */ 558 559 bool 560 pass_ch_vect::process_loop_p (struct loop *loop) 561 { 562 if (!flag_tree_loop_vectorize && !loop->force_vectorize) 563 return false; 564 565 if (loop->dont_vectorize) 566 return false; 567 568 /* The vectorizer won't handle anything with multiple exits, so skip. */ 569 edge exit = single_exit (loop); 570 if (!exit) 571 return false; 572 573 if (!do_while_loop_p (loop)) 574 return true; 575 576 return false; 577 } 578 579 } // anon namespace 580 581 gimple_opt_pass * 582 make_pass_ch_vect (gcc::context *ctxt) 583 { 584 return new pass_ch_vect (ctxt); 585 } 586 587 gimple_opt_pass * 588 make_pass_ch (gcc::context *ctxt) 589 { 590 return new pass_ch (ctxt); 591 } 592