1 /* Loop unrolling. 2 Copyright (C) 2002-2019 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "target.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "cfghooks.h" 28 #include "memmodel.h" 29 #include "optabs.h" 30 #include "emit-rtl.h" 31 #include "recog.h" 32 #include "profile.h" 33 #include "cfgrtl.h" 34 #include "cfgloop.h" 35 #include "params.h" 36 #include "dojump.h" 37 #include "expr.h" 38 #include "dumpfile.h" 39 40 /* This pass performs loop unrolling. We only perform this 41 optimization on innermost loops (with single exception) because 42 the impact on performance is greatest here, and we want to avoid 43 unnecessary code size growth. The gain is caused by greater sequentiality 44 of code, better code to optimize for further passes and in some cases 45 by fewer testings of exit conditions. The main problem is code growth, 46 that impacts performance negatively due to effect of caches. 47 48 What we do: 49 50 -- unrolling of loops that roll constant times; this is almost always 51 win, as we get rid of exit condition tests. 52 -- unrolling of loops that roll number of times that we can compute 53 in runtime; we also get rid of exit condition tests here, but there 54 is the extra expense for calculating the number of iterations 55 -- simple unrolling of remaining loops; this is performed only if we 56 are asked to, as the gain is questionable in this case and often 57 it may even slow down the code 58 For more detailed descriptions of each of those, see comments at 59 appropriate function below. 60 61 There is a lot of parameters (defined and described in params.def) that 62 control how much we unroll. 63 64 ??? A great problem is that we don't have a good way how to determine 65 how many times we should unroll the loop; the experiments I have made 66 showed that this choice may affect performance in order of several %. 67 */ 68 69 /* Information about induction variables to split. */ 70 71 struct iv_to_split 72 { 73 rtx_insn *insn; /* The insn in that the induction variable occurs. */ 74 rtx orig_var; /* The variable (register) for the IV before split. */ 75 rtx base_var; /* The variable on that the values in the further 76 iterations are based. */ 77 rtx step; /* Step of the induction variable. */ 78 struct iv_to_split *next; /* Next entry in walking order. */ 79 }; 80 81 /* Information about accumulators to expand. */ 82 83 struct var_to_expand 84 { 85 rtx_insn *insn; /* The insn in that the variable expansion occurs. */ 86 rtx reg; /* The accumulator which is expanded. */ 87 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */ 88 struct var_to_expand *next; /* Next entry in walking order. */ 89 enum rtx_code op; /* The type of the accumulation - addition, subtraction 90 or multiplication. */ 91 int expansion_count; /* Count the number of expansions generated so far. */ 92 int reuse_expansion; /* The expansion we intend to reuse to expand 93 the accumulator. If REUSE_EXPANSION is 0 reuse 94 the original accumulator. Else use 95 var_expansions[REUSE_EXPANSION - 1]. */ 96 }; 97 98 /* Hashtable helper for iv_to_split. */ 99 100 struct iv_split_hasher : free_ptr_hash <iv_to_split> 101 { 102 static inline hashval_t hash (const iv_to_split *); 103 static inline bool equal (const iv_to_split *, const iv_to_split *); 104 }; 105 106 107 /* A hash function for information about insns to split. */ 108 109 inline hashval_t 110 iv_split_hasher::hash (const iv_to_split *ivts) 111 { 112 return (hashval_t) INSN_UID (ivts->insn); 113 } 114 115 /* An equality functions for information about insns to split. */ 116 117 inline bool 118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2) 119 { 120 return i1->insn == i2->insn; 121 } 122 123 /* Hashtable helper for iv_to_split. */ 124 125 struct var_expand_hasher : free_ptr_hash <var_to_expand> 126 { 127 static inline hashval_t hash (const var_to_expand *); 128 static inline bool equal (const var_to_expand *, const var_to_expand *); 129 }; 130 131 /* Return a hash for VES. */ 132 133 inline hashval_t 134 var_expand_hasher::hash (const var_to_expand *ves) 135 { 136 return (hashval_t) INSN_UID (ves->insn); 137 } 138 139 /* Return true if I1 and I2 refer to the same instruction. */ 140 141 inline bool 142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2) 143 { 144 return i1->insn == i2->insn; 145 } 146 147 /* Information about optimization applied in 148 the unrolled loop. */ 149 150 struct opt_info 151 { 152 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to 153 split. */ 154 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ 155 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ 156 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of 157 insns with accumulators to expand. */ 158 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ 159 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ 160 unsigned first_new_block; /* The first basic block that was 161 duplicated. */ 162 basic_block loop_exit; /* The loop exit basic block. */ 163 basic_block loop_preheader; /* The loop preheader basic block. */ 164 }; 165 166 static void decide_unroll_stupid (struct loop *, int); 167 static void decide_unroll_constant_iterations (struct loop *, int); 168 static void decide_unroll_runtime_iterations (struct loop *, int); 169 static void unroll_loop_stupid (struct loop *); 170 static void decide_unrolling (int); 171 static void unroll_loop_constant_iterations (struct loop *); 172 static void unroll_loop_runtime_iterations (struct loop *); 173 static struct opt_info *analyze_insns_in_loop (struct loop *); 174 static void opt_info_start_duplication (struct opt_info *); 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); 176 static void free_opt_info (struct opt_info *); 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *); 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *); 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *); 181 static void insert_var_expansion_initialization (struct var_to_expand *, 182 basic_block); 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *, 184 basic_block); 185 static rtx get_expansion (struct var_to_expand *); 186 187 /* Emit a message summarizing the unroll that will be 188 performed for LOOP, along with the loop's location LOCUS, if 189 appropriate given the dump or -fopt-info settings. */ 190 191 static void 192 report_unroll (struct loop *loop, dump_location_t locus) 193 { 194 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; 195 196 if (loop->lpt_decision.decision == LPT_NONE) 197 return; 198 199 if (!dump_enabled_p ()) 200 return; 201 202 dump_metadata_t metadata (report_flags, locus.get_impl_location ()); 203 dump_printf_loc (metadata, locus.get_user_location (), 204 "loop unrolled %d times", 205 loop->lpt_decision.times); 206 if (profile_info && loop->header->count.initialized_p ()) 207 dump_printf (metadata, 208 " (header execution count %d)", 209 (int)loop->header->count.to_gcov_type ()); 210 211 dump_printf (metadata, "\n"); 212 } 213 214 /* Decide whether unroll loops and how much. */ 215 static void 216 decide_unrolling (int flags) 217 { 218 struct loop *loop; 219 220 /* Scan the loops, inner ones first. */ 221 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 222 { 223 loop->lpt_decision.decision = LPT_NONE; 224 dump_user_location_t locus = get_loop_location (loop); 225 226 if (dump_enabled_p ()) 227 dump_printf_loc (MSG_NOTE, locus, 228 "considering unrolling loop %d at BB %d\n", 229 loop->num, loop->header->index); 230 231 if (loop->unroll == 1) 232 { 233 if (dump_file) 234 fprintf (dump_file, 235 ";; Not unrolling loop, user didn't want it unrolled\n"); 236 continue; 237 } 238 239 /* Do not peel cold areas. */ 240 if (optimize_loop_for_size_p (loop)) 241 { 242 if (dump_file) 243 fprintf (dump_file, ";; Not considering loop, cold area\n"); 244 continue; 245 } 246 247 /* Can the loop be manipulated? */ 248 if (!can_duplicate_loop_p (loop)) 249 { 250 if (dump_file) 251 fprintf (dump_file, 252 ";; Not considering loop, cannot duplicate\n"); 253 continue; 254 } 255 256 /* Skip non-innermost loops. */ 257 if (loop->inner) 258 { 259 if (dump_file) 260 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); 261 continue; 262 } 263 264 loop->ninsns = num_loop_insns (loop); 265 loop->av_ninsns = average_num_loop_insns (loop); 266 267 /* Try transformations one by one in decreasing order of priority. */ 268 decide_unroll_constant_iterations (loop, flags); 269 if (loop->lpt_decision.decision == LPT_NONE) 270 decide_unroll_runtime_iterations (loop, flags); 271 if (loop->lpt_decision.decision == LPT_NONE) 272 decide_unroll_stupid (loop, flags); 273 274 report_unroll (loop, locus); 275 } 276 } 277 278 /* Unroll LOOPS. */ 279 void 280 unroll_loops (int flags) 281 { 282 struct loop *loop; 283 bool changed = false; 284 285 /* Now decide rest of unrolling. */ 286 decide_unrolling (flags); 287 288 /* Scan the loops, inner ones first. */ 289 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 290 { 291 /* And perform the appropriate transformations. */ 292 switch (loop->lpt_decision.decision) 293 { 294 case LPT_UNROLL_CONSTANT: 295 unroll_loop_constant_iterations (loop); 296 changed = true; 297 break; 298 case LPT_UNROLL_RUNTIME: 299 unroll_loop_runtime_iterations (loop); 300 changed = true; 301 break; 302 case LPT_UNROLL_STUPID: 303 unroll_loop_stupid (loop); 304 changed = true; 305 break; 306 case LPT_NONE: 307 break; 308 default: 309 gcc_unreachable (); 310 } 311 } 312 313 if (changed) 314 { 315 calculate_dominance_info (CDI_DOMINATORS); 316 fix_loop_structure (NULL); 317 } 318 319 iv_analysis_done (); 320 } 321 322 /* Check whether exit of the LOOP is at the end of loop body. */ 323 324 static bool 325 loop_exit_at_end_p (struct loop *loop) 326 { 327 struct niter_desc *desc = get_simple_loop_desc (loop); 328 rtx_insn *insn; 329 330 /* We should never have conditional in latch block. */ 331 gcc_assert (desc->in_edge->dest != loop->header); 332 333 if (desc->in_edge->dest != loop->latch) 334 return false; 335 336 /* Check that the latch is empty. */ 337 FOR_BB_INSNS (loop->latch, insn) 338 { 339 if (INSN_P (insn) && active_insn_p (insn)) 340 return false; 341 } 342 343 return true; 344 } 345 346 /* Decide whether to unroll LOOP iterating constant number of times 347 and how much. */ 348 349 static void 350 decide_unroll_constant_iterations (struct loop *loop, int flags) 351 { 352 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; 353 struct niter_desc *desc; 354 widest_int iterations; 355 356 /* If we were not asked to unroll this loop, just return back silently. */ 357 if (!(flags & UAP_UNROLL) && !loop->unroll) 358 return; 359 360 if (dump_enabled_p ()) 361 dump_printf (MSG_NOTE, 362 "considering unrolling loop with constant " 363 "number of iterations\n"); 364 365 /* nunroll = total number of copies of the original loop body in 366 unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */ 367 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 368 nunroll_by_av 369 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 370 if (nunroll > nunroll_by_av) 371 nunroll = nunroll_by_av; 372 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 373 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 374 375 if (targetm.loop_unroll_adjust) 376 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 377 378 /* Skip big loops. */ 379 if (nunroll <= 1) 380 { 381 if (dump_file) 382 fprintf (dump_file, ";; Not considering loop, is too big\n"); 383 return; 384 } 385 386 /* Check for simple loops. */ 387 desc = get_simple_loop_desc (loop); 388 389 /* Check number of iterations. */ 390 if (!desc->simple_p || !desc->const_iter || desc->assumptions) 391 { 392 if (dump_file) 393 fprintf (dump_file, 394 ";; Unable to prove that the loop iterates constant times\n"); 395 return; 396 } 397 398 /* Check for an explicit unrolling factor. */ 399 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) 400 { 401 /* However we cannot unroll completely at the RTL level a loop with 402 constant number of iterations; it should have been peeled instead. */ 403 if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1) 404 { 405 if (dump_file) 406 fprintf (dump_file, ";; Loop should have been peeled\n"); 407 } 408 else 409 { 410 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 411 loop->lpt_decision.times = loop->unroll - 1; 412 } 413 return; 414 } 415 416 /* Check whether the loop rolls enough to consider. 417 Consult also loop bounds and profile; in the case the loop has more 418 than one exit it may well loop less than determined maximal number 419 of iterations. */ 420 if (desc->niter < 2 * nunroll 421 || ((get_estimated_loop_iterations (loop, &iterations) 422 || get_likely_max_loop_iterations (loop, &iterations)) 423 && wi::ltu_p (iterations, 2 * nunroll))) 424 { 425 if (dump_file) 426 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 427 return; 428 } 429 430 /* Success; now compute number of iterations to unroll. We alter 431 nunroll so that as few as possible copies of loop body are 432 necessary, while still not decreasing the number of unrollings 433 too much (at most by 1). */ 434 best_copies = 2 * nunroll + 10; 435 436 i = 2 * nunroll + 2; 437 if (i > desc->niter - 2) 438 i = desc->niter - 2; 439 440 for (; i >= nunroll - 1; i--) 441 { 442 unsigned exit_mod = desc->niter % (i + 1); 443 444 if (!loop_exit_at_end_p (loop)) 445 n_copies = exit_mod + i + 1; 446 else if (exit_mod != (unsigned) i 447 || desc->noloop_assumptions != NULL_RTX) 448 n_copies = exit_mod + i + 2; 449 else 450 n_copies = i + 1; 451 452 if (n_copies < best_copies) 453 { 454 best_copies = n_copies; 455 best_unroll = i; 456 } 457 } 458 459 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 460 loop->lpt_decision.times = best_unroll; 461 } 462 463 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times. 464 The transformation does this: 465 466 for (i = 0; i < 102; i++) 467 body; 468 469 ==> (LOOP->LPT_DECISION.TIMES == 3) 470 471 i = 0; 472 body; i++; 473 body; i++; 474 while (i < 102) 475 { 476 body; i++; 477 body; i++; 478 body; i++; 479 body; i++; 480 } 481 */ 482 static void 483 unroll_loop_constant_iterations (struct loop *loop) 484 { 485 unsigned HOST_WIDE_INT niter; 486 unsigned exit_mod; 487 unsigned i; 488 edge e; 489 unsigned max_unroll = loop->lpt_decision.times; 490 struct niter_desc *desc = get_simple_loop_desc (loop); 491 bool exit_at_end = loop_exit_at_end_p (loop); 492 struct opt_info *opt_info = NULL; 493 bool ok; 494 495 niter = desc->niter; 496 497 /* Should not get here (such loop should be peeled instead). */ 498 gcc_assert (niter > max_unroll + 1); 499 500 exit_mod = niter % (max_unroll + 1); 501 502 auto_sbitmap wont_exit (max_unroll + 2); 503 bitmap_ones (wont_exit); 504 505 auto_vec<edge> remove_edges; 506 if (flag_split_ivs_in_unroller 507 || flag_variable_expansion_in_unroller) 508 opt_info = analyze_insns_in_loop (loop); 509 510 if (!exit_at_end) 511 { 512 /* The exit is not at the end of the loop; leave exit test 513 in the first copy, so that the loops that start with test 514 of exit condition have continuous body after unrolling. */ 515 516 if (dump_file) 517 fprintf (dump_file, ";; Condition at beginning of loop.\n"); 518 519 /* Peel exit_mod iterations. */ 520 bitmap_clear_bit (wont_exit, 0); 521 if (desc->noloop_assumptions) 522 bitmap_clear_bit (wont_exit, 1); 523 524 if (exit_mod) 525 { 526 opt_info_start_duplication (opt_info); 527 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 528 exit_mod, 529 wont_exit, desc->out_edge, 530 &remove_edges, 531 DLTHE_FLAG_UPDATE_FREQ 532 | (opt_info && exit_mod > 1 533 ? DLTHE_RECORD_COPY_NUMBER 534 : 0)); 535 gcc_assert (ok); 536 537 if (opt_info && exit_mod > 1) 538 apply_opt_in_copies (opt_info, exit_mod, false, false); 539 540 desc->noloop_assumptions = NULL_RTX; 541 desc->niter -= exit_mod; 542 loop->nb_iterations_upper_bound -= exit_mod; 543 if (loop->any_estimate 544 && wi::leu_p (exit_mod, loop->nb_iterations_estimate)) 545 loop->nb_iterations_estimate -= exit_mod; 546 else 547 loop->any_estimate = false; 548 if (loop->any_likely_upper_bound 549 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound)) 550 loop->nb_iterations_likely_upper_bound -= exit_mod; 551 else 552 loop->any_likely_upper_bound = false; 553 } 554 555 bitmap_set_bit (wont_exit, 1); 556 } 557 else 558 { 559 /* Leave exit test in last copy, for the same reason as above if 560 the loop tests the condition at the end of loop body. */ 561 562 if (dump_file) 563 fprintf (dump_file, ";; Condition at end of loop.\n"); 564 565 /* We know that niter >= max_unroll + 2; so we do not need to care of 566 case when we would exit before reaching the loop. So just peel 567 exit_mod + 1 iterations. */ 568 if (exit_mod != max_unroll 569 || desc->noloop_assumptions) 570 { 571 bitmap_clear_bit (wont_exit, 0); 572 if (desc->noloop_assumptions) 573 bitmap_clear_bit (wont_exit, 1); 574 575 opt_info_start_duplication (opt_info); 576 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 577 exit_mod + 1, 578 wont_exit, desc->out_edge, 579 &remove_edges, 580 DLTHE_FLAG_UPDATE_FREQ 581 | (opt_info && exit_mod > 0 582 ? DLTHE_RECORD_COPY_NUMBER 583 : 0)); 584 gcc_assert (ok); 585 586 if (opt_info && exit_mod > 0) 587 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); 588 589 desc->niter -= exit_mod + 1; 590 loop->nb_iterations_upper_bound -= exit_mod + 1; 591 if (loop->any_estimate 592 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate)) 593 loop->nb_iterations_estimate -= exit_mod + 1; 594 else 595 loop->any_estimate = false; 596 if (loop->any_likely_upper_bound 597 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound)) 598 loop->nb_iterations_likely_upper_bound -= exit_mod + 1; 599 else 600 loop->any_likely_upper_bound = false; 601 desc->noloop_assumptions = NULL_RTX; 602 603 bitmap_set_bit (wont_exit, 0); 604 bitmap_set_bit (wont_exit, 1); 605 } 606 607 bitmap_clear_bit (wont_exit, max_unroll); 608 } 609 610 /* Now unroll the loop. */ 611 612 opt_info_start_duplication (opt_info); 613 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 614 max_unroll, 615 wont_exit, desc->out_edge, 616 &remove_edges, 617 DLTHE_FLAG_UPDATE_FREQ 618 | (opt_info 619 ? DLTHE_RECORD_COPY_NUMBER 620 : 0)); 621 gcc_assert (ok); 622 623 if (opt_info) 624 { 625 apply_opt_in_copies (opt_info, max_unroll, true, true); 626 free_opt_info (opt_info); 627 } 628 629 if (exit_at_end) 630 { 631 basic_block exit_block = get_bb_copy (desc->in_edge->src); 632 /* Find a new in and out edge; they are in the last copy we have made. */ 633 634 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 635 { 636 desc->out_edge = EDGE_SUCC (exit_block, 0); 637 desc->in_edge = EDGE_SUCC (exit_block, 1); 638 } 639 else 640 { 641 desc->out_edge = EDGE_SUCC (exit_block, 1); 642 desc->in_edge = EDGE_SUCC (exit_block, 0); 643 } 644 } 645 646 desc->niter /= max_unroll + 1; 647 loop->nb_iterations_upper_bound 648 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); 649 if (loop->any_estimate) 650 loop->nb_iterations_estimate 651 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); 652 if (loop->any_likely_upper_bound) 653 loop->nb_iterations_likely_upper_bound 654 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); 655 desc->niter_expr = gen_int_mode (desc->niter, desc->mode); 656 657 /* Remove the edges. */ 658 FOR_EACH_VEC_ELT (remove_edges, i, e) 659 remove_path (e); 660 661 if (dump_file) 662 fprintf (dump_file, 663 ";; Unrolled loop %d times, constant # of iterations %i insns\n", 664 max_unroll, num_loop_insns (loop)); 665 } 666 667 /* Decide whether to unroll LOOP iterating runtime computable number of times 668 and how much. */ 669 static void 670 decide_unroll_runtime_iterations (struct loop *loop, int flags) 671 { 672 unsigned nunroll, nunroll_by_av, i; 673 struct niter_desc *desc; 674 widest_int iterations; 675 676 /* If we were not asked to unroll this loop, just return back silently. */ 677 if (!(flags & UAP_UNROLL) && !loop->unroll) 678 return; 679 680 if (dump_enabled_p ()) 681 dump_printf (MSG_NOTE, 682 "considering unrolling loop with runtime-" 683 "computable number of iterations\n"); 684 685 /* nunroll = total number of copies of the original loop body in 686 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 687 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 688 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 689 if (nunroll > nunroll_by_av) 690 nunroll = nunroll_by_av; 691 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 692 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 693 694 if (targetm.loop_unroll_adjust) 695 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 696 697 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) 698 nunroll = loop->unroll; 699 700 /* Skip big loops. */ 701 if (nunroll <= 1) 702 { 703 if (dump_file) 704 fprintf (dump_file, ";; Not considering loop, is too big\n"); 705 return; 706 } 707 708 /* Check for simple loops. */ 709 desc = get_simple_loop_desc (loop); 710 711 /* Check simpleness. */ 712 if (!desc->simple_p || desc->assumptions) 713 { 714 if (dump_file) 715 fprintf (dump_file, 716 ";; Unable to prove that the number of iterations " 717 "can be counted in runtime\n"); 718 return; 719 } 720 721 if (desc->const_iter) 722 { 723 if (dump_file) 724 fprintf (dump_file, ";; Loop iterates constant times\n"); 725 return; 726 } 727 728 /* Check whether the loop rolls. */ 729 if ((get_estimated_loop_iterations (loop, &iterations) 730 || get_likely_max_loop_iterations (loop, &iterations)) 731 && wi::ltu_p (iterations, 2 * nunroll)) 732 { 733 if (dump_file) 734 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 735 return; 736 } 737 738 /* Success; now force nunroll to be power of 2, as code-gen 739 requires it, we are unable to cope with overflows in 740 computation of number of iterations. */ 741 for (i = 1; 2 * i <= nunroll; i *= 2) 742 continue; 743 744 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 745 loop->lpt_decision.times = i - 1; 746 } 747 748 /* Splits edge E and inserts the sequence of instructions INSNS on it, and 749 returns the newly created block. If INSNS is NULL_RTX, nothing is changed 750 and NULL is returned instead. */ 751 752 basic_block 753 split_edge_and_insert (edge e, rtx_insn *insns) 754 { 755 basic_block bb; 756 757 if (!insns) 758 return NULL; 759 bb = split_edge (e); 760 emit_insn_after (insns, BB_END (bb)); 761 762 /* ??? We used to assume that INSNS can contain control flow insns, and 763 that we had to try to find sub basic blocks in BB to maintain a valid 764 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB 765 and call break_superblocks when going out of cfglayout mode. But it 766 turns out that this never happens; and that if it does ever happen, 767 the verify_flow_info at the end of the RTL loop passes would fail. 768 769 There are two reasons why we expected we could have control flow insns 770 in INSNS. The first is when a comparison has to be done in parts, and 771 the second is when the number of iterations is computed for loops with 772 the number of iterations known at runtime. In both cases, test cases 773 to get control flow in INSNS appear to be impossible to construct: 774 775 * If do_compare_rtx_and_jump needs several branches to do comparison 776 in a mode that needs comparison by parts, we cannot analyze the 777 number of iterations of the loop, and we never get to unrolling it. 778 779 * The code in expand_divmod that was suspected to cause creation of 780 branching code seems to be only accessed for signed division. The 781 divisions used by # of iterations analysis are always unsigned. 782 Problems might arise on architectures that emits branching code 783 for some operations that may appear in the unroller (especially 784 for division), but we have no such architectures. 785 786 Considering all this, it was decided that we should for now assume 787 that INSNS can in theory contain control flow insns, but in practice 788 it never does. So we don't handle the theoretical case, and should 789 a real failure ever show up, we have a pretty good clue for how to 790 fix it. */ 791 792 return bb; 793 } 794 795 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if 796 true, with probability PROB. If CINSN is not NULL, it is the insn to copy 797 in order to create a jump. */ 798 799 static rtx_insn * 800 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, 801 rtx_code_label *label, profile_probability prob, 802 rtx_insn *cinsn) 803 { 804 rtx_insn *seq; 805 rtx_jump_insn *jump; 806 rtx cond; 807 machine_mode mode; 808 809 mode = GET_MODE (op0); 810 if (mode == VOIDmode) 811 mode = GET_MODE (op1); 812 813 start_sequence (); 814 if (GET_MODE_CLASS (mode) == MODE_CC) 815 { 816 /* A hack -- there seems to be no easy generic way how to make a 817 conditional jump from a ccmode comparison. */ 818 gcc_assert (cinsn); 819 cond = XEXP (SET_SRC (pc_set (cinsn)), 0); 820 gcc_assert (GET_CODE (cond) == comp); 821 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); 822 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); 823 emit_jump_insn (copy_insn (PATTERN (cinsn))); 824 jump = as_a <rtx_jump_insn *> (get_last_insn ()); 825 JUMP_LABEL (jump) = JUMP_LABEL (cinsn); 826 LABEL_NUSES (JUMP_LABEL (jump))++; 827 redirect_jump (jump, label, 0); 828 } 829 else 830 { 831 gcc_assert (!cinsn); 832 833 op0 = force_operand (op0, NULL_RTX); 834 op1 = force_operand (op1, NULL_RTX); 835 do_compare_rtx_and_jump (op0, op1, comp, 0, 836 mode, NULL_RTX, NULL, label, 837 profile_probability::uninitialized ()); 838 jump = as_a <rtx_jump_insn *> (get_last_insn ()); 839 jump->set_jump_target (label); 840 LABEL_NUSES (label)++; 841 } 842 if (prob.initialized_p ()) 843 add_reg_br_prob_note (jump, prob); 844 845 seq = get_insns (); 846 end_sequence (); 847 848 return seq; 849 } 850 851 /* Unroll LOOP for which we are able to count number of iterations in 852 runtime LOOP->LPT_DECISION.TIMES times. The times value must be a 853 power of two. The transformation does this (with some extra care 854 for case n < 0): 855 856 for (i = 0; i < n; i++) 857 body; 858 859 ==> (LOOP->LPT_DECISION.TIMES == 3) 860 861 i = 0; 862 mod = n % 4; 863 864 switch (mod) 865 { 866 case 3: 867 body; i++; 868 case 2: 869 body; i++; 870 case 1: 871 body; i++; 872 case 0: ; 873 } 874 875 while (i < n) 876 { 877 body; i++; 878 body; i++; 879 body; i++; 880 body; i++; 881 } 882 */ 883 static void 884 unroll_loop_runtime_iterations (struct loop *loop) 885 { 886 rtx old_niter, niter, tmp; 887 rtx_insn *init_code, *branch_code; 888 unsigned i, j; 889 profile_probability p; 890 basic_block preheader, *body, swtch, ezc_swtch = NULL; 891 int may_exit_copy; 892 profile_count iter_count, new_count; 893 unsigned n_peel; 894 edge e; 895 bool extra_zero_check, last_may_exit; 896 unsigned max_unroll = loop->lpt_decision.times; 897 struct niter_desc *desc = get_simple_loop_desc (loop); 898 bool exit_at_end = loop_exit_at_end_p (loop); 899 struct opt_info *opt_info = NULL; 900 bool ok; 901 902 if (flag_split_ivs_in_unroller 903 || flag_variable_expansion_in_unroller) 904 opt_info = analyze_insns_in_loop (loop); 905 906 /* Remember blocks whose dominators will have to be updated. */ 907 auto_vec<basic_block> dom_bbs; 908 909 body = get_loop_body (loop); 910 for (i = 0; i < loop->num_nodes; i++) 911 { 912 vec<basic_block> ldom; 913 basic_block bb; 914 915 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); 916 FOR_EACH_VEC_ELT (ldom, j, bb) 917 if (!flow_bb_inside_loop_p (loop, bb)) 918 dom_bbs.safe_push (bb); 919 920 ldom.release (); 921 } 922 free (body); 923 924 if (!exit_at_end) 925 { 926 /* Leave exit in first copy (for explanation why see comment in 927 unroll_loop_constant_iterations). */ 928 may_exit_copy = 0; 929 n_peel = max_unroll - 1; 930 extra_zero_check = true; 931 last_may_exit = false; 932 } 933 else 934 { 935 /* Leave exit in last copy (for explanation why see comment in 936 unroll_loop_constant_iterations). */ 937 may_exit_copy = max_unroll; 938 n_peel = max_unroll; 939 extra_zero_check = false; 940 last_may_exit = true; 941 } 942 943 /* Get expression for number of iterations. */ 944 start_sequence (); 945 old_niter = niter = gen_reg_rtx (desc->mode); 946 tmp = force_operand (copy_rtx (desc->niter_expr), niter); 947 if (tmp != niter) 948 emit_move_insn (niter, tmp); 949 950 /* For loops that exit at end and whose number of iterations is reliable, 951 add one to niter to account for first pass through loop body before 952 reaching exit test. */ 953 if (exit_at_end && !desc->noloop_assumptions) 954 { 955 niter = expand_simple_binop (desc->mode, PLUS, 956 niter, const1_rtx, 957 NULL_RTX, 0, OPTAB_LIB_WIDEN); 958 old_niter = niter; 959 } 960 961 /* Count modulo by ANDing it with max_unroll; we use the fact that 962 the number of unrollings is a power of two, and thus this is correct 963 even if there is overflow in the computation. */ 964 niter = expand_simple_binop (desc->mode, AND, 965 niter, gen_int_mode (max_unroll, desc->mode), 966 NULL_RTX, 0, OPTAB_LIB_WIDEN); 967 968 init_code = get_insns (); 969 end_sequence (); 970 unshare_all_rtl_in_chain (init_code); 971 972 /* Precondition the loop. */ 973 split_edge_and_insert (loop_preheader_edge (loop), init_code); 974 975 auto_vec<edge> remove_edges; 976 977 auto_sbitmap wont_exit (max_unroll + 2); 978 979 if (extra_zero_check || desc->noloop_assumptions) 980 { 981 /* Peel the first copy of loop body. Leave the exit test if the number 982 of iterations is not reliable. Also record the place of the extra zero 983 check. */ 984 bitmap_clear (wont_exit); 985 if (!desc->noloop_assumptions) 986 bitmap_set_bit (wont_exit, 1); 987 ezc_swtch = loop_preheader_edge (loop)->src; 988 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 989 1, wont_exit, desc->out_edge, 990 &remove_edges, 991 DLTHE_FLAG_UPDATE_FREQ); 992 gcc_assert (ok); 993 } 994 995 /* Record the place where switch will be built for preconditioning. */ 996 swtch = split_edge (loop_preheader_edge (loop)); 997 998 /* Compute count increments for each switch block and initialize 999 innermost switch block. Switch blocks and peeled loop copies are built 1000 from innermost outward. */ 1001 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); 1002 swtch->count = new_count; 1003 1004 for (i = 0; i < n_peel; i++) 1005 { 1006 /* Peel the copy. */ 1007 bitmap_clear (wont_exit); 1008 if (i != n_peel - 1 || !last_may_exit) 1009 bitmap_set_bit (wont_exit, 1); 1010 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1011 1, wont_exit, desc->out_edge, 1012 &remove_edges, 1013 DLTHE_FLAG_UPDATE_FREQ); 1014 gcc_assert (ok); 1015 1016 /* Create item for switch. */ 1017 j = n_peel - i - (extra_zero_check ? 0 : 1); 1018 p = profile_probability::always ().apply_scale (1, i + 2); 1019 1020 preheader = split_edge (loop_preheader_edge (loop)); 1021 /* Add in count of edge from switch block. */ 1022 preheader->count += iter_count; 1023 branch_code = compare_and_jump_seq (copy_rtx (niter), 1024 gen_int_mode (j, desc->mode), EQ, 1025 block_label (preheader), p, NULL); 1026 1027 /* We rely on the fact that the compare and jump cannot be optimized out, 1028 and hence the cfg we create is correct. */ 1029 gcc_assert (branch_code != NULL_RTX); 1030 1031 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); 1032 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1033 single_succ_edge (swtch)->probability = p.invert (); 1034 new_count += iter_count; 1035 swtch->count = new_count; 1036 e = make_edge (swtch, preheader, 1037 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1038 e->probability = p; 1039 } 1040 1041 if (extra_zero_check) 1042 { 1043 /* Add branch for zero iterations. */ 1044 p = profile_probability::always ().apply_scale (1, max_unroll + 1); 1045 swtch = ezc_swtch; 1046 preheader = split_edge (loop_preheader_edge (loop)); 1047 /* Recompute count adjustments since initial peel copy may 1048 have exited and reduced those values that were computed above. */ 1049 iter_count = swtch->count.apply_scale (1, max_unroll + 1); 1050 /* Add in count of edge from switch block. */ 1051 preheader->count += iter_count; 1052 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, 1053 block_label (preheader), p, 1054 NULL); 1055 gcc_assert (branch_code != NULL_RTX); 1056 1057 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); 1058 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1059 single_succ_edge (swtch)->probability = p.invert (); 1060 e = make_edge (swtch, preheader, 1061 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1062 e->probability = p; 1063 } 1064 1065 /* Recount dominators for outer blocks. */ 1066 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); 1067 1068 /* And unroll loop. */ 1069 1070 bitmap_ones (wont_exit); 1071 bitmap_clear_bit (wont_exit, may_exit_copy); 1072 opt_info_start_duplication (opt_info); 1073 1074 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1075 max_unroll, 1076 wont_exit, desc->out_edge, 1077 &remove_edges, 1078 DLTHE_FLAG_UPDATE_FREQ 1079 | (opt_info 1080 ? DLTHE_RECORD_COPY_NUMBER 1081 : 0)); 1082 gcc_assert (ok); 1083 1084 if (opt_info) 1085 { 1086 apply_opt_in_copies (opt_info, max_unroll, true, true); 1087 free_opt_info (opt_info); 1088 } 1089 1090 if (exit_at_end) 1091 { 1092 basic_block exit_block = get_bb_copy (desc->in_edge->src); 1093 /* Find a new in and out edge; they are in the last copy we have 1094 made. */ 1095 1096 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 1097 { 1098 desc->out_edge = EDGE_SUCC (exit_block, 0); 1099 desc->in_edge = EDGE_SUCC (exit_block, 1); 1100 } 1101 else 1102 { 1103 desc->out_edge = EDGE_SUCC (exit_block, 1); 1104 desc->in_edge = EDGE_SUCC (exit_block, 0); 1105 } 1106 } 1107 1108 /* Remove the edges. */ 1109 FOR_EACH_VEC_ELT (remove_edges, i, e) 1110 remove_path (e); 1111 1112 /* We must be careful when updating the number of iterations due to 1113 preconditioning and the fact that the value must be valid at entry 1114 of the loop. After passing through the above code, we see that 1115 the correct new number of iterations is this: */ 1116 gcc_assert (!desc->const_iter); 1117 desc->niter_expr = 1118 simplify_gen_binary (UDIV, desc->mode, old_niter, 1119 gen_int_mode (max_unroll + 1, desc->mode)); 1120 loop->nb_iterations_upper_bound 1121 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); 1122 if (loop->any_estimate) 1123 loop->nb_iterations_estimate 1124 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); 1125 if (loop->any_likely_upper_bound) 1126 loop->nb_iterations_likely_upper_bound 1127 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); 1128 if (exit_at_end) 1129 { 1130 desc->niter_expr = 1131 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); 1132 desc->noloop_assumptions = NULL_RTX; 1133 --loop->nb_iterations_upper_bound; 1134 if (loop->any_estimate 1135 && loop->nb_iterations_estimate != 0) 1136 --loop->nb_iterations_estimate; 1137 else 1138 loop->any_estimate = false; 1139 if (loop->any_likely_upper_bound 1140 && loop->nb_iterations_likely_upper_bound != 0) 1141 --loop->nb_iterations_likely_upper_bound; 1142 else 1143 loop->any_likely_upper_bound = false; 1144 } 1145 1146 if (dump_file) 1147 fprintf (dump_file, 1148 ";; Unrolled loop %d times, counting # of iterations " 1149 "in runtime, %i insns\n", 1150 max_unroll, num_loop_insns (loop)); 1151 } 1152 1153 /* Decide whether to unroll LOOP stupidly and how much. */ 1154 static void 1155 decide_unroll_stupid (struct loop *loop, int flags) 1156 { 1157 unsigned nunroll, nunroll_by_av, i; 1158 struct niter_desc *desc; 1159 widest_int iterations; 1160 1161 /* If we were not asked to unroll this loop, just return back silently. */ 1162 if (!(flags & UAP_UNROLL_ALL) && !loop->unroll) 1163 return; 1164 1165 if (dump_enabled_p ()) 1166 dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n"); 1167 1168 /* nunroll = total number of copies of the original loop body in 1169 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 1170 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 1171 nunroll_by_av 1172 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 1173 if (nunroll > nunroll_by_av) 1174 nunroll = nunroll_by_av; 1175 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 1176 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 1177 1178 if (targetm.loop_unroll_adjust) 1179 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 1180 1181 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) 1182 nunroll = loop->unroll; 1183 1184 /* Skip big loops. */ 1185 if (nunroll <= 1) 1186 { 1187 if (dump_file) 1188 fprintf (dump_file, ";; Not considering loop, is too big\n"); 1189 return; 1190 } 1191 1192 /* Check for simple loops. */ 1193 desc = get_simple_loop_desc (loop); 1194 1195 /* Check simpleness. */ 1196 if (desc->simple_p && !desc->assumptions) 1197 { 1198 if (dump_file) 1199 fprintf (dump_file, ";; Loop is simple\n"); 1200 return; 1201 } 1202 1203 /* Do not unroll loops with branches inside -- it increases number 1204 of mispredicts. 1205 TODO: this heuristic needs tunning; call inside the loop body 1206 is also relatively good reason to not unroll. */ 1207 if (num_loop_branches (loop) > 1) 1208 { 1209 if (dump_file) 1210 fprintf (dump_file, ";; Not unrolling, contains branches\n"); 1211 return; 1212 } 1213 1214 /* Check whether the loop rolls. */ 1215 if ((get_estimated_loop_iterations (loop, &iterations) 1216 || get_likely_max_loop_iterations (loop, &iterations)) 1217 && wi::ltu_p (iterations, 2 * nunroll)) 1218 { 1219 if (dump_file) 1220 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 1221 return; 1222 } 1223 1224 /* Success. Now force nunroll to be power of 2, as it seems that this 1225 improves results (partially because of better alignments, partially 1226 because of some dark magic). */ 1227 for (i = 1; 2 * i <= nunroll; i *= 2) 1228 continue; 1229 1230 loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1231 loop->lpt_decision.times = i - 1; 1232 } 1233 1234 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this: 1235 1236 while (cond) 1237 body; 1238 1239 ==> (LOOP->LPT_DECISION.TIMES == 3) 1240 1241 while (cond) 1242 { 1243 body; 1244 if (!cond) break; 1245 body; 1246 if (!cond) break; 1247 body; 1248 if (!cond) break; 1249 body; 1250 } 1251 */ 1252 static void 1253 unroll_loop_stupid (struct loop *loop) 1254 { 1255 unsigned nunroll = loop->lpt_decision.times; 1256 struct niter_desc *desc = get_simple_loop_desc (loop); 1257 struct opt_info *opt_info = NULL; 1258 bool ok; 1259 1260 if (flag_split_ivs_in_unroller 1261 || flag_variable_expansion_in_unroller) 1262 opt_info = analyze_insns_in_loop (loop); 1263 1264 auto_sbitmap wont_exit (nunroll + 1); 1265 bitmap_clear (wont_exit); 1266 opt_info_start_duplication (opt_info); 1267 1268 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1269 nunroll, wont_exit, 1270 NULL, NULL, 1271 DLTHE_FLAG_UPDATE_FREQ 1272 | (opt_info 1273 ? DLTHE_RECORD_COPY_NUMBER 1274 : 0)); 1275 gcc_assert (ok); 1276 1277 if (opt_info) 1278 { 1279 apply_opt_in_copies (opt_info, nunroll, true, true); 1280 free_opt_info (opt_info); 1281 } 1282 1283 if (desc->simple_p) 1284 { 1285 /* We indeed may get here provided that there are nontrivial assumptions 1286 for a loop to be really simple. We could update the counts, but the 1287 problem is that we are unable to decide which exit will be taken 1288 (not really true in case the number of iterations is constant, 1289 but no one will do anything with this information, so we do not 1290 worry about it). */ 1291 desc->simple_p = false; 1292 } 1293 1294 if (dump_file) 1295 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", 1296 nunroll, num_loop_insns (loop)); 1297 } 1298 1299 /* Returns true if REG is referenced in one nondebug insn in LOOP. 1300 Set *DEBUG_USES to the number of debug insns that reference the 1301 variable. */ 1302 1303 static bool 1304 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, 1305 int *debug_uses) 1306 { 1307 basic_block *body, bb; 1308 unsigned i; 1309 int count_ref = 0; 1310 rtx_insn *insn; 1311 1312 body = get_loop_body (loop); 1313 for (i = 0; i < loop->num_nodes; i++) 1314 { 1315 bb = body[i]; 1316 1317 FOR_BB_INSNS (bb, insn) 1318 if (!rtx_referenced_p (reg, insn)) 1319 continue; 1320 else if (DEBUG_INSN_P (insn)) 1321 ++*debug_uses; 1322 else if (++count_ref > 1) 1323 break; 1324 } 1325 free (body); 1326 return (count_ref == 1); 1327 } 1328 1329 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */ 1330 1331 static void 1332 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) 1333 { 1334 basic_block *body, bb; 1335 unsigned i; 1336 rtx_insn *insn; 1337 1338 body = get_loop_body (loop); 1339 for (i = 0; debug_uses && i < loop->num_nodes; i++) 1340 { 1341 bb = body[i]; 1342 1343 FOR_BB_INSNS (bb, insn) 1344 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn)) 1345 continue; 1346 else 1347 { 1348 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), 1349 gen_rtx_UNKNOWN_VAR_LOC (), 0); 1350 if (!--debug_uses) 1351 break; 1352 } 1353 } 1354 free (body); 1355 } 1356 1357 /* Determine whether INSN contains an accumulator 1358 which can be expanded into separate copies, 1359 one for each copy of the LOOP body. 1360 1361 for (i = 0 ; i < n; i++) 1362 sum += a[i]; 1363 1364 ==> 1365 1366 sum += a[i] 1367 .... 1368 i = i+1; 1369 sum1 += a[i] 1370 .... 1371 i = i+1 1372 sum2 += a[i]; 1373 .... 1374 1375 Return NULL if INSN contains no opportunity for expansion of accumulator. 1376 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant 1377 information and return a pointer to it. 1378 */ 1379 1380 static struct var_to_expand * 1381 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn) 1382 { 1383 rtx set, dest, src; 1384 struct var_to_expand *ves; 1385 unsigned accum_pos; 1386 enum rtx_code code; 1387 int debug_uses = 0; 1388 1389 set = single_set (insn); 1390 if (!set) 1391 return NULL; 1392 1393 dest = SET_DEST (set); 1394 src = SET_SRC (set); 1395 code = GET_CODE (src); 1396 1397 if (code != PLUS && code != MINUS && code != MULT && code != FMA) 1398 return NULL; 1399 1400 if (FLOAT_MODE_P (GET_MODE (dest))) 1401 { 1402 if (!flag_associative_math) 1403 return NULL; 1404 /* In the case of FMA, we're also changing the rounding. */ 1405 if (code == FMA && !flag_unsafe_math_optimizations) 1406 return NULL; 1407 } 1408 1409 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn 1410 in MD. But if there is no optab to generate the insn, we cannot 1411 perform the variable expansion. This can happen if an MD provides 1412 an insn but not a named pattern to generate it, for example to avoid 1413 producing code that needs additional mode switches like for x87/mmx. 1414 1415 So we check have_insn_for which looks for an optab for the operation 1416 in SRC. If it doesn't exist, we can't perform the expansion even 1417 though INSN is valid. */ 1418 if (!have_insn_for (code, GET_MODE (src))) 1419 return NULL; 1420 1421 if (!REG_P (dest) 1422 && !(GET_CODE (dest) == SUBREG 1423 && REG_P (SUBREG_REG (dest)))) 1424 return NULL; 1425 1426 /* Find the accumulator use within the operation. */ 1427 if (code == FMA) 1428 { 1429 /* We only support accumulation via FMA in the ADD position. */ 1430 if (!rtx_equal_p (dest, XEXP (src, 2))) 1431 return NULL; 1432 accum_pos = 2; 1433 } 1434 else if (rtx_equal_p (dest, XEXP (src, 0))) 1435 accum_pos = 0; 1436 else if (rtx_equal_p (dest, XEXP (src, 1))) 1437 { 1438 /* The method of expansion that we are using; which includes the 1439 initialization of the expansions with zero and the summation of 1440 the expansions at the end of the computation will yield wrong 1441 results for (x = something - x) thus avoid using it in that case. */ 1442 if (code == MINUS) 1443 return NULL; 1444 accum_pos = 1; 1445 } 1446 else 1447 return NULL; 1448 1449 /* It must not otherwise be used. */ 1450 if (code == FMA) 1451 { 1452 if (rtx_referenced_p (dest, XEXP (src, 0)) 1453 || rtx_referenced_p (dest, XEXP (src, 1))) 1454 return NULL; 1455 } 1456 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos))) 1457 return NULL; 1458 1459 /* It must be used in exactly one insn. */ 1460 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) 1461 return NULL; 1462 1463 if (dump_file) 1464 { 1465 fprintf (dump_file, "\n;; Expanding Accumulator "); 1466 print_rtl (dump_file, dest); 1467 fprintf (dump_file, "\n"); 1468 } 1469 1470 if (debug_uses) 1471 /* Instead of resetting the debug insns, we could replace each 1472 debug use in the loop with the sum or product of all expanded 1473 accumulators. Since we'll only know of all expansions at the 1474 end, we'd have to keep track of which vars_to_expand a debug 1475 insn in the loop references, take note of each copy of the 1476 debug insn during unrolling, and when it's all done, compute 1477 the sum or product of each variable and adjust the original 1478 debug insn and each copy thereof. What a pain! */ 1479 reset_debug_uses_in_loop (loop, dest, debug_uses); 1480 1481 /* Record the accumulator to expand. */ 1482 ves = XNEW (struct var_to_expand); 1483 ves->insn = insn; 1484 ves->reg = copy_rtx (dest); 1485 ves->var_expansions.create (1); 1486 ves->next = NULL; 1487 ves->op = GET_CODE (src); 1488 ves->expansion_count = 0; 1489 ves->reuse_expansion = 0; 1490 return ves; 1491 } 1492 1493 /* Determine whether there is an induction variable in INSN that 1494 we would like to split during unrolling. 1495 1496 I.e. replace 1497 1498 i = i + 1; 1499 ... 1500 i = i + 1; 1501 ... 1502 i = i + 1; 1503 ... 1504 1505 type chains by 1506 1507 i0 = i + 1 1508 ... 1509 i = i0 + 1 1510 ... 1511 i = i0 + 2 1512 ... 1513 1514 Return NULL if INSN contains no interesting IVs. Otherwise, allocate 1515 an IV_TO_SPLIT structure, fill it with the relevant information and return a 1516 pointer to it. */ 1517 1518 static struct iv_to_split * 1519 analyze_iv_to_split_insn (rtx_insn *insn) 1520 { 1521 rtx set, dest; 1522 struct rtx_iv iv; 1523 struct iv_to_split *ivts; 1524 scalar_int_mode mode; 1525 bool ok; 1526 1527 /* For now we just split the basic induction variables. Later this may be 1528 extended for example by selecting also addresses of memory references. */ 1529 set = single_set (insn); 1530 if (!set) 1531 return NULL; 1532 1533 dest = SET_DEST (set); 1534 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode)) 1535 return NULL; 1536 1537 if (!biv_p (insn, mode, dest)) 1538 return NULL; 1539 1540 ok = iv_analyze_result (insn, dest, &iv); 1541 1542 /* This used to be an assert under the assumption that if biv_p returns 1543 true that iv_analyze_result must also return true. However, that 1544 assumption is not strictly correct as evidenced by pr25569. 1545 1546 Returning NULL when iv_analyze_result returns false is safe and 1547 avoids the problems in pr25569 until the iv_analyze_* routines 1548 can be fixed, which is apparently hard and time consuming 1549 according to their author. */ 1550 if (! ok) 1551 return NULL; 1552 1553 if (iv.step == const0_rtx 1554 || iv.mode != iv.extend_mode) 1555 return NULL; 1556 1557 /* Record the insn to split. */ 1558 ivts = XNEW (struct iv_to_split); 1559 ivts->insn = insn; 1560 ivts->orig_var = dest; 1561 ivts->base_var = NULL_RTX; 1562 ivts->step = iv.step; 1563 ivts->next = NULL; 1564 1565 return ivts; 1566 } 1567 1568 /* Determines which of insns in LOOP can be optimized. 1569 Return a OPT_INFO struct with the relevant hash tables filled 1570 with all insns to be optimized. The FIRST_NEW_BLOCK field 1571 is undefined for the return value. */ 1572 1573 static struct opt_info * 1574 analyze_insns_in_loop (struct loop *loop) 1575 { 1576 basic_block *body, bb; 1577 unsigned i; 1578 struct opt_info *opt_info = XCNEW (struct opt_info); 1579 rtx_insn *insn; 1580 struct iv_to_split *ivts = NULL; 1581 struct var_to_expand *ves = NULL; 1582 iv_to_split **slot1; 1583 var_to_expand **slot2; 1584 vec<edge> edges = get_loop_exit_edges (loop); 1585 edge exit; 1586 bool can_apply = false; 1587 1588 iv_analysis_loop_init (loop); 1589 1590 body = get_loop_body (loop); 1591 1592 if (flag_split_ivs_in_unroller) 1593 { 1594 opt_info->insns_to_split 1595 = new hash_table<iv_split_hasher> (5 * loop->num_nodes); 1596 opt_info->iv_to_split_head = NULL; 1597 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; 1598 } 1599 1600 /* Record the loop exit bb and loop preheader before the unrolling. */ 1601 opt_info->loop_preheader = loop_preheader_edge (loop)->src; 1602 1603 if (edges.length () == 1) 1604 { 1605 exit = edges[0]; 1606 if (!(exit->flags & EDGE_COMPLEX)) 1607 { 1608 opt_info->loop_exit = split_edge (exit); 1609 can_apply = true; 1610 } 1611 } 1612 1613 if (flag_variable_expansion_in_unroller 1614 && can_apply) 1615 { 1616 opt_info->insns_with_var_to_expand 1617 = new hash_table<var_expand_hasher> (5 * loop->num_nodes); 1618 opt_info->var_to_expand_head = NULL; 1619 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; 1620 } 1621 1622 for (i = 0; i < loop->num_nodes; i++) 1623 { 1624 bb = body[i]; 1625 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 1626 continue; 1627 1628 FOR_BB_INSNS (bb, insn) 1629 { 1630 if (!INSN_P (insn)) 1631 continue; 1632 1633 if (opt_info->insns_to_split) 1634 ivts = analyze_iv_to_split_insn (insn); 1635 1636 if (ivts) 1637 { 1638 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT); 1639 gcc_assert (*slot1 == NULL); 1640 *slot1 = ivts; 1641 *opt_info->iv_to_split_tail = ivts; 1642 opt_info->iv_to_split_tail = &ivts->next; 1643 continue; 1644 } 1645 1646 if (opt_info->insns_with_var_to_expand) 1647 ves = analyze_insn_to_expand_var (loop, insn); 1648 1649 if (ves) 1650 { 1651 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT); 1652 gcc_assert (*slot2 == NULL); 1653 *slot2 = ves; 1654 *opt_info->var_to_expand_tail = ves; 1655 opt_info->var_to_expand_tail = &ves->next; 1656 } 1657 } 1658 } 1659 1660 edges.release (); 1661 free (body); 1662 return opt_info; 1663 } 1664 1665 /* Called just before loop duplication. Records start of duplicated area 1666 to OPT_INFO. */ 1667 1668 static void 1669 opt_info_start_duplication (struct opt_info *opt_info) 1670 { 1671 if (opt_info) 1672 opt_info->first_new_block = last_basic_block_for_fn (cfun); 1673 } 1674 1675 /* Determine the number of iterations between initialization of the base 1676 variable and the current copy (N_COPY). N_COPIES is the total number 1677 of newly created copies. UNROLLING is true if we are unrolling 1678 (not peeling) the loop. */ 1679 1680 static unsigned 1681 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) 1682 { 1683 if (unrolling) 1684 { 1685 /* If we are unrolling, initialization is done in the original loop 1686 body (number 0). */ 1687 return n_copy; 1688 } 1689 else 1690 { 1691 /* If we are peeling, the copy in that the initialization occurs has 1692 number 1. The original loop (number 0) is the last. */ 1693 if (n_copy) 1694 return n_copy - 1; 1695 else 1696 return n_copies; 1697 } 1698 } 1699 1700 /* Allocate basic variable for the induction variable chain. */ 1701 1702 static void 1703 allocate_basic_variable (struct iv_to_split *ivts) 1704 { 1705 rtx expr = SET_SRC (single_set (ivts->insn)); 1706 1707 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); 1708 } 1709 1710 /* Insert initialization of basic variable of IVTS before INSN, taking 1711 the initial value from INSN. */ 1712 1713 static void 1714 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn) 1715 { 1716 rtx expr = copy_rtx (SET_SRC (single_set (insn))); 1717 rtx_insn *seq; 1718 1719 start_sequence (); 1720 expr = force_operand (expr, ivts->base_var); 1721 if (expr != ivts->base_var) 1722 emit_move_insn (ivts->base_var, expr); 1723 seq = get_insns (); 1724 end_sequence (); 1725 1726 emit_insn_before (seq, insn); 1727 } 1728 1729 /* Replace the use of induction variable described in IVTS in INSN 1730 by base variable + DELTA * step. */ 1731 1732 static void 1733 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta) 1734 { 1735 rtx expr, *loc, incr, var; 1736 rtx_insn *seq; 1737 machine_mode mode = GET_MODE (ivts->base_var); 1738 rtx src, dest, set; 1739 1740 /* Construct base + DELTA * step. */ 1741 if (!delta) 1742 expr = ivts->base_var; 1743 else 1744 { 1745 incr = simplify_gen_binary (MULT, mode, 1746 copy_rtx (ivts->step), 1747 gen_int_mode (delta, mode)); 1748 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), 1749 ivts->base_var, incr); 1750 } 1751 1752 /* Figure out where to do the replacement. */ 1753 loc = &SET_SRC (single_set (insn)); 1754 1755 /* If we can make the replacement right away, we're done. */ 1756 if (validate_change (insn, loc, expr, 0)) 1757 return; 1758 1759 /* Otherwise, force EXPR into a register and try again. */ 1760 start_sequence (); 1761 var = gen_reg_rtx (mode); 1762 expr = force_operand (expr, var); 1763 if (expr != var) 1764 emit_move_insn (var, expr); 1765 seq = get_insns (); 1766 end_sequence (); 1767 emit_insn_before (seq, insn); 1768 1769 if (validate_change (insn, loc, var, 0)) 1770 return; 1771 1772 /* The last chance. Try recreating the assignment in insn 1773 completely from scratch. */ 1774 set = single_set (insn); 1775 gcc_assert (set); 1776 1777 start_sequence (); 1778 *loc = var; 1779 src = copy_rtx (SET_SRC (set)); 1780 dest = copy_rtx (SET_DEST (set)); 1781 src = force_operand (src, dest); 1782 if (src != dest) 1783 emit_move_insn (dest, src); 1784 seq = get_insns (); 1785 end_sequence (); 1786 1787 emit_insn_before (seq, insn); 1788 delete_insn (insn); 1789 } 1790 1791 1792 /* Return one expansion of the accumulator recorded in struct VE. */ 1793 1794 static rtx 1795 get_expansion (struct var_to_expand *ve) 1796 { 1797 rtx reg; 1798 1799 if (ve->reuse_expansion == 0) 1800 reg = ve->reg; 1801 else 1802 reg = ve->var_expansions[ve->reuse_expansion - 1]; 1803 1804 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion) 1805 ve->reuse_expansion = 0; 1806 else 1807 ve->reuse_expansion++; 1808 1809 return reg; 1810 } 1811 1812 1813 /* Given INSN replace the uses of the accumulator recorded in VE 1814 with a new register. */ 1815 1816 static void 1817 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn) 1818 { 1819 rtx new_reg, set; 1820 bool really_new_expansion = false; 1821 1822 set = single_set (insn); 1823 gcc_assert (set); 1824 1825 /* Generate a new register only if the expansion limit has not been 1826 reached. Else reuse an already existing expansion. */ 1827 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) 1828 { 1829 really_new_expansion = true; 1830 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); 1831 } 1832 else 1833 new_reg = get_expansion (ve); 1834 1835 validate_replace_rtx_group (SET_DEST (set), new_reg, insn); 1836 if (apply_change_group ()) 1837 if (really_new_expansion) 1838 { 1839 ve->var_expansions.safe_push (new_reg); 1840 ve->expansion_count++; 1841 } 1842 } 1843 1844 /* Initialize the variable expansions in loop preheader. PLACE is the 1845 loop-preheader basic block where the initialization of the 1846 expansions should take place. The expansions are initialized with 1847 (-0) when the operation is plus or minus to honor sign zero. This 1848 way we can prevent cases where the sign of the final result is 1849 effected by the sign of the expansion. Here is an example to 1850 demonstrate this: 1851 1852 for (i = 0 ; i < n; i++) 1853 sum += something; 1854 1855 ==> 1856 1857 sum += something 1858 .... 1859 i = i+1; 1860 sum1 += something 1861 .... 1862 i = i+1 1863 sum2 += something; 1864 .... 1865 1866 When SUM is initialized with -zero and SOMETHING is also -zero; the 1867 final result of sum should be -zero thus the expansions sum1 and sum2 1868 should be initialized with -zero as well (otherwise we will get +zero 1869 as the final result). */ 1870 1871 static void 1872 insert_var_expansion_initialization (struct var_to_expand *ve, 1873 basic_block place) 1874 { 1875 rtx_insn *seq; 1876 rtx var, zero_init; 1877 unsigned i; 1878 machine_mode mode = GET_MODE (ve->reg); 1879 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); 1880 1881 if (ve->var_expansions.length () == 0) 1882 return; 1883 1884 start_sequence (); 1885 switch (ve->op) 1886 { 1887 case FMA: 1888 /* Note that we only accumulate FMA via the ADD operand. */ 1889 case PLUS: 1890 case MINUS: 1891 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1892 { 1893 if (honor_signed_zero_p) 1894 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); 1895 else 1896 zero_init = CONST0_RTX (mode); 1897 emit_move_insn (var, zero_init); 1898 } 1899 break; 1900 1901 case MULT: 1902 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1903 { 1904 zero_init = CONST1_RTX (GET_MODE (var)); 1905 emit_move_insn (var, zero_init); 1906 } 1907 break; 1908 1909 default: 1910 gcc_unreachable (); 1911 } 1912 1913 seq = get_insns (); 1914 end_sequence (); 1915 1916 emit_insn_after (seq, BB_END (place)); 1917 } 1918 1919 /* Combine the variable expansions at the loop exit. PLACE is the 1920 loop exit basic block where the summation of the expansions should 1921 take place. */ 1922 1923 static void 1924 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) 1925 { 1926 rtx sum = ve->reg; 1927 rtx expr, var; 1928 rtx_insn *seq, *insn; 1929 unsigned i; 1930 1931 if (ve->var_expansions.length () == 0) 1932 return; 1933 1934 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use 1935 it both here and as the destination of the assignment. */ 1936 sum = copy_rtx (sum); 1937 start_sequence (); 1938 switch (ve->op) 1939 { 1940 case FMA: 1941 /* Note that we only accumulate FMA via the ADD operand. */ 1942 case PLUS: 1943 case MINUS: 1944 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1945 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); 1946 break; 1947 1948 case MULT: 1949 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1950 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); 1951 break; 1952 1953 default: 1954 gcc_unreachable (); 1955 } 1956 1957 expr = force_operand (sum, ve->reg); 1958 if (expr != ve->reg) 1959 emit_move_insn (ve->reg, expr); 1960 seq = get_insns (); 1961 end_sequence (); 1962 1963 insn = BB_HEAD (place); 1964 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 1965 insn = NEXT_INSN (insn); 1966 1967 emit_insn_after (seq, insn); 1968 } 1969 1970 /* Strip away REG_EQUAL notes for IVs we're splitting. 1971 1972 Updating REG_EQUAL notes for IVs we split is tricky: We 1973 cannot tell until after unrolling, DF-rescanning, and liveness 1974 updating, whether an EQ_USE is reached by the split IV while 1975 the IV reg is still live. See PR55006. 1976 1977 ??? We cannot use remove_reg_equal_equiv_notes_for_regno, 1978 because RTL loop-iv requires us to defer rescanning insns and 1979 any notes attached to them. So resort to old techniques... */ 1980 1981 static void 1982 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn) 1983 { 1984 struct iv_to_split *ivts; 1985 rtx note = find_reg_equal_equiv_note (insn); 1986 if (! note) 1987 return; 1988 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) 1989 if (reg_mentioned_p (ivts->orig_var, note)) 1990 { 1991 remove_note (insn, note); 1992 return; 1993 } 1994 } 1995 1996 /* Apply loop optimizations in loop copies using the 1997 data which gathered during the unrolling. Structure 1998 OPT_INFO record that data. 1999 2000 UNROLLING is true if we unrolled (not peeled) the loop. 2001 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of 2002 the loop (as it should happen in complete unrolling, but not in ordinary 2003 peeling of the loop). */ 2004 2005 static void 2006 apply_opt_in_copies (struct opt_info *opt_info, 2007 unsigned n_copies, bool unrolling, 2008 bool rewrite_original_loop) 2009 { 2010 unsigned i, delta; 2011 basic_block bb, orig_bb; 2012 rtx_insn *insn, *orig_insn, *next; 2013 struct iv_to_split ivts_templ, *ivts; 2014 struct var_to_expand ve_templ, *ves; 2015 2016 /* Sanity check -- we need to put initialization in the original loop 2017 body. */ 2018 gcc_assert (!unrolling || rewrite_original_loop); 2019 2020 /* Allocate the basic variables (i0). */ 2021 if (opt_info->insns_to_split) 2022 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) 2023 allocate_basic_variable (ivts); 2024 2025 for (i = opt_info->first_new_block; 2026 i < (unsigned) last_basic_block_for_fn (cfun); 2027 i++) 2028 { 2029 bb = BASIC_BLOCK_FOR_FN (cfun, i); 2030 orig_bb = get_bb_original (bb); 2031 2032 /* bb->aux holds position in copy sequence initialized by 2033 duplicate_loop_to_header_edge. */ 2034 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, 2035 unrolling); 2036 bb->aux = 0; 2037 orig_insn = BB_HEAD (orig_bb); 2038 FOR_BB_INSNS_SAFE (bb, insn, next) 2039 { 2040 if (!INSN_P (insn) 2041 || (DEBUG_BIND_INSN_P (insn) 2042 && INSN_VAR_LOCATION_DECL (insn) 2043 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL)) 2044 continue; 2045 2046 while (!INSN_P (orig_insn) 2047 || (DEBUG_BIND_INSN_P (orig_insn) 2048 && INSN_VAR_LOCATION_DECL (orig_insn) 2049 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn)) 2050 == LABEL_DECL))) 2051 orig_insn = NEXT_INSN (orig_insn); 2052 2053 ivts_templ.insn = orig_insn; 2054 ve_templ.insn = orig_insn; 2055 2056 /* Apply splitting iv optimization. */ 2057 if (opt_info->insns_to_split) 2058 { 2059 maybe_strip_eq_note_for_split_iv (opt_info, insn); 2060 2061 ivts = opt_info->insns_to_split->find (&ivts_templ); 2062 2063 if (ivts) 2064 { 2065 gcc_assert (GET_CODE (PATTERN (insn)) 2066 == GET_CODE (PATTERN (orig_insn))); 2067 2068 if (!delta) 2069 insert_base_initialization (ivts, insn); 2070 split_iv (ivts, insn, delta); 2071 } 2072 } 2073 /* Apply variable expansion optimization. */ 2074 if (unrolling && opt_info->insns_with_var_to_expand) 2075 { 2076 ves = (struct var_to_expand *) 2077 opt_info->insns_with_var_to_expand->find (&ve_templ); 2078 if (ves) 2079 { 2080 gcc_assert (GET_CODE (PATTERN (insn)) 2081 == GET_CODE (PATTERN (orig_insn))); 2082 expand_var_during_unrolling (ves, insn); 2083 } 2084 } 2085 orig_insn = NEXT_INSN (orig_insn); 2086 } 2087 } 2088 2089 if (!rewrite_original_loop) 2090 return; 2091 2092 /* Initialize the variable expansions in the loop preheader 2093 and take care of combining them at the loop exit. */ 2094 if (opt_info->insns_with_var_to_expand) 2095 { 2096 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2097 insert_var_expansion_initialization (ves, opt_info->loop_preheader); 2098 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2099 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); 2100 } 2101 2102 /* Rewrite also the original loop body. Find them as originals of the blocks 2103 in the last copied iteration, i.e. those that have 2104 get_bb_copy (get_bb_original (bb)) == bb. */ 2105 for (i = opt_info->first_new_block; 2106 i < (unsigned) last_basic_block_for_fn (cfun); 2107 i++) 2108 { 2109 bb = BASIC_BLOCK_FOR_FN (cfun, i); 2110 orig_bb = get_bb_original (bb); 2111 if (get_bb_copy (orig_bb) != bb) 2112 continue; 2113 2114 delta = determine_split_iv_delta (0, n_copies, unrolling); 2115 for (orig_insn = BB_HEAD (orig_bb); 2116 orig_insn != NEXT_INSN (BB_END (bb)); 2117 orig_insn = next) 2118 { 2119 next = NEXT_INSN (orig_insn); 2120 2121 if (!INSN_P (orig_insn)) 2122 continue; 2123 2124 ivts_templ.insn = orig_insn; 2125 if (opt_info->insns_to_split) 2126 { 2127 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn); 2128 2129 ivts = (struct iv_to_split *) 2130 opt_info->insns_to_split->find (&ivts_templ); 2131 if (ivts) 2132 { 2133 if (!delta) 2134 insert_base_initialization (ivts, orig_insn); 2135 split_iv (ivts, orig_insn, delta); 2136 continue; 2137 } 2138 } 2139 2140 } 2141 } 2142 } 2143 2144 /* Release OPT_INFO. */ 2145 2146 static void 2147 free_opt_info (struct opt_info *opt_info) 2148 { 2149 delete opt_info->insns_to_split; 2150 opt_info->insns_to_split = NULL; 2151 if (opt_info->insns_with_var_to_expand) 2152 { 2153 struct var_to_expand *ves; 2154 2155 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2156 ves->var_expansions.release (); 2157 delete opt_info->insns_with_var_to_expand; 2158 opt_info->insns_with_var_to_expand = NULL; 2159 } 2160 free (opt_info); 2161 } 2162