1 /* Perform doloop optimizations 2 Copyright (C) 2004-2015 Free Software Foundation, Inc. 3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "rtl.h" 26 #include "flags.h" 27 #include "symtab.h" 28 #include "hashtab.h" 29 #include "hash-set.h" 30 #include "vec.h" 31 #include "machmode.h" 32 #include "hard-reg-set.h" 33 #include "input.h" 34 #include "function.h" 35 #include "statistics.h" 36 #include "double-int.h" 37 #include "real.h" 38 #include "fixed-value.h" 39 #include "alias.h" 40 #include "wide-int.h" 41 #include "inchash.h" 42 #include "tree.h" 43 #include "insn-config.h" 44 #include "expmed.h" 45 #include "dojump.h" 46 #include "explow.h" 47 #include "calls.h" 48 #include "emit-rtl.h" 49 #include "varasm.h" 50 #include "stmt.h" 51 #include "expr.h" 52 #include "diagnostic-core.h" 53 #include "tm_p.h" 54 #include "predict.h" 55 #include "dominance.h" 56 #include "cfg.h" 57 #include "cfgloop.h" 58 #include "cfgrtl.h" 59 #include "basic-block.h" 60 #include "params.h" 61 #include "target.h" 62 #include "dumpfile.h" 63 #include "loop-unroll.h" 64 65 /* This module is used to modify loops with a determinable number of 66 iterations to use special low-overhead looping instructions. 67 68 It first validates whether the loop is well behaved and has a 69 determinable number of iterations (either at compile or run-time). 70 It then modifies the loop to use a low-overhead looping pattern as 71 follows: 72 73 1. A pseudo register is allocated as the loop iteration counter. 74 75 2. The number of loop iterations is calculated and is stored 76 in the loop counter. 77 78 3. At the end of the loop, the jump insn is replaced by the 79 doloop_end pattern. The compare must remain because it might be 80 used elsewhere. If the loop-variable or condition register are 81 used elsewhere, they will be eliminated by flow. 82 83 4. An optional doloop_begin pattern is inserted at the top of the 84 loop. 85 86 TODO The optimization should only performed when either the biv used for exit 87 condition is unused at all except for the exit test, or if we do not have to 88 change its value, since otherwise we have to add a new induction variable, 89 which usually will not pay up (unless the cost of the doloop pattern is 90 somehow extremely lower than the cost of compare & jump, or unless the bct 91 register cannot be used for anything else but doloop -- ??? detect these 92 cases). */ 93 94 #ifdef HAVE_doloop_end 95 96 /* Return the loop termination condition for PATTERN or zero 97 if it is not a decrement and branch jump insn. */ 98 99 rtx 100 doloop_condition_get (rtx doloop_pat) 101 { 102 rtx cmp; 103 rtx inc; 104 rtx reg; 105 rtx inc_src; 106 rtx condition; 107 rtx pattern; 108 rtx cc_reg = NULL_RTX; 109 rtx reg_orig = NULL_RTX; 110 111 /* The canonical doloop pattern we expect has one of the following 112 forms: 113 114 1) (parallel [(set (pc) (if_then_else (condition) 115 (label_ref (label)) 116 (pc))) 117 (set (reg) (plus (reg) (const_int -1))) 118 (additional clobbers and uses)]) 119 120 The branch must be the first entry of the parallel (also required 121 by jump.c), and the second entry of the parallel must be a set of 122 the loop counter register. Some targets (IA-64) wrap the set of 123 the loop counter in an if_then_else too. 124 125 2) (set (reg) (plus (reg) (const_int -1)) 126 (set (pc) (if_then_else (reg != 0) 127 (label_ref (label)) 128 (pc))). 129 130 Some targets (ARM) do the comparison before the branch, as in the 131 following form: 132 133 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) 134 (set (reg) (plus (reg) (const_int -1)))]) 135 (set (pc) (if_then_else (cc == NE) 136 (label_ref (label)) 137 (pc))) */ 138 139 pattern = PATTERN (doloop_pat); 140 141 if (GET_CODE (pattern) != PARALLEL) 142 { 143 rtx cond; 144 rtx prev_insn = prev_nondebug_insn (doloop_pat); 145 rtx cmp_arg1, cmp_arg2; 146 rtx cmp_orig; 147 148 /* In case the pattern is not PARALLEL we expect two forms 149 of doloop which are cases 2) and 3) above: in case 2) the 150 decrement immediately precedes the branch, while in case 3) 151 the compare and decrement instructions immediately precede 152 the branch. */ 153 154 if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) 155 return 0; 156 157 cmp = pattern; 158 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) 159 { 160 /* The third case: the compare and decrement instructions 161 immediately precede the branch. */ 162 cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); 163 if (GET_CODE (cmp_orig) != SET) 164 return 0; 165 if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) 166 return 0; 167 cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); 168 cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); 169 if (cmp_arg2 != const0_rtx 170 || GET_CODE (cmp_arg1) != PLUS) 171 return 0; 172 reg_orig = XEXP (cmp_arg1, 0); 173 if (XEXP (cmp_arg1, 1) != GEN_INT (-1) 174 || !REG_P (reg_orig)) 175 return 0; 176 cc_reg = SET_DEST (cmp_orig); 177 178 inc = XVECEXP (PATTERN (prev_insn), 0, 1); 179 } 180 else 181 inc = PATTERN (prev_insn); 182 if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE) 183 { 184 /* We expect the condition to be of the form (reg != 0) */ 185 cond = XEXP (SET_SRC (cmp), 0); 186 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) 187 return 0; 188 } 189 } 190 else 191 { 192 cmp = XVECEXP (pattern, 0, 0); 193 inc = XVECEXP (pattern, 0, 1); 194 } 195 196 /* Check for (set (reg) (something)). */ 197 if (GET_CODE (inc) != SET) 198 return 0; 199 reg = SET_DEST (inc); 200 if (! REG_P (reg)) 201 return 0; 202 203 /* Check if something = (plus (reg) (const_int -1)). 204 On IA-64, this decrement is wrapped in an if_then_else. */ 205 inc_src = SET_SRC (inc); 206 if (GET_CODE (inc_src) == IF_THEN_ELSE) 207 inc_src = XEXP (inc_src, 1); 208 if (GET_CODE (inc_src) != PLUS 209 || XEXP (inc_src, 0) != reg 210 || XEXP (inc_src, 1) != constm1_rtx) 211 return 0; 212 213 /* Check for (set (pc) (if_then_else (condition) 214 (label_ref (label)) 215 (pc))). */ 216 if (GET_CODE (cmp) != SET 217 || SET_DEST (cmp) != pc_rtx 218 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE 219 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF 220 || XEXP (SET_SRC (cmp), 2) != pc_rtx) 221 return 0; 222 223 /* Extract loop termination condition. */ 224 condition = XEXP (SET_SRC (cmp), 0); 225 226 /* We expect a GE or NE comparison with 0 or 1. */ 227 if ((GET_CODE (condition) != GE 228 && GET_CODE (condition) != NE) 229 || (XEXP (condition, 1) != const0_rtx 230 && XEXP (condition, 1) != const1_rtx)) 231 return 0; 232 233 if ((XEXP (condition, 0) == reg) 234 /* For the third case: */ 235 || ((cc_reg != NULL_RTX) 236 && (XEXP (condition, 0) == cc_reg) 237 && (reg_orig == reg)) 238 || (GET_CODE (XEXP (condition, 0)) == PLUS 239 && XEXP (XEXP (condition, 0), 0) == reg)) 240 { 241 if (GET_CODE (pattern) != PARALLEL) 242 /* For the second form we expect: 243 244 (set (reg) (plus (reg) (const_int -1)) 245 (set (pc) (if_then_else (reg != 0) 246 (label_ref (label)) 247 (pc))). 248 249 is equivalent to the following: 250 251 (parallel [(set (pc) (if_then_else (reg != 1) 252 (label_ref (label)) 253 (pc))) 254 (set (reg) (plus (reg) (const_int -1))) 255 (additional clobbers and uses)]) 256 257 For the third form we expect: 258 259 (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) 260 (set (reg) (plus (reg) (const_int -1)))]) 261 (set (pc) (if_then_else (cc == NE) 262 (label_ref (label)) 263 (pc))) 264 265 which is equivalent to the following: 266 267 (parallel [(set (cc) (compare (reg, 1)) 268 (set (reg) (plus (reg) (const_int -1))) 269 (set (pc) (if_then_else (NE == cc) 270 (label_ref (label)) 271 (pc))))]) 272 273 So we return the second form instead for the two cases. 274 275 */ 276 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); 277 278 return condition; 279 } 280 281 /* ??? If a machine uses a funny comparison, we could return a 282 canonicalized form here. */ 283 284 return 0; 285 } 286 287 /* Return nonzero if the loop specified by LOOP is suitable for 288 the use of special low-overhead looping instructions. DESC 289 describes the number of iterations of the loop. */ 290 291 static bool 292 doloop_valid_p (struct loop *loop, struct niter_desc *desc) 293 { 294 basic_block *body = get_loop_body (loop), bb; 295 rtx_insn *insn; 296 unsigned i; 297 bool result = true; 298 299 /* Check for loops that may not terminate under special conditions. */ 300 if (!desc->simple_p 301 || desc->assumptions 302 || desc->infinite) 303 { 304 /* There are some cases that would require a special attention. 305 For example if the comparison is LEU and the comparison value 306 is UINT_MAX then the loop will not terminate. Similarly, if the 307 comparison code is GEU and the comparison value is 0, the 308 loop will not terminate. 309 310 If the absolute increment is not 1, the loop can be infinite 311 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) 312 313 ??? We could compute these conditions at run-time and have a 314 additional jump around the loop to ensure an infinite loop. 315 However, it is very unlikely that this is the intended 316 behavior of the loop and checking for these rare boundary 317 conditions would pessimize all other code. 318 319 If the loop is executed only a few times an extra check to 320 restart the loop could use up most of the benefits of using a 321 count register loop. Note however, that normally, this 322 restart branch would never execute, so it could be predicted 323 well by the CPU. We should generate the pessimistic code by 324 default, and have an option, e.g. -funsafe-loops that would 325 enable count-register loops in this case. */ 326 if (dump_file) 327 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); 328 result = false; 329 goto cleanup; 330 } 331 332 for (i = 0; i < loop->num_nodes; i++) 333 { 334 bb = body[i]; 335 336 for (insn = BB_HEAD (bb); 337 insn != NEXT_INSN (BB_END (bb)); 338 insn = NEXT_INSN (insn)) 339 { 340 /* Different targets have different necessities for low-overhead 341 looping. Call the back end for each instruction within the loop 342 to let it decide whether the insn prohibits a low-overhead loop. 343 It will then return the cause for it to emit to the dump file. */ 344 const char * invalid = targetm.invalid_within_doloop (insn); 345 if (invalid) 346 { 347 if (dump_file) 348 fprintf (dump_file, "Doloop: %s\n", invalid); 349 result = false; 350 goto cleanup; 351 } 352 } 353 } 354 result = true; 355 356 cleanup: 357 free (body); 358 359 return result; 360 } 361 362 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru 363 edge. If the condition is always false, do not do anything. If it is always 364 true, redirect E to DEST and return false. In all other cases, true is 365 returned. */ 366 367 static bool 368 add_test (rtx cond, edge *e, basic_block dest) 369 { 370 rtx_insn *seq, *jump; 371 rtx label; 372 machine_mode mode; 373 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); 374 enum rtx_code code = GET_CODE (cond); 375 basic_block bb; 376 377 mode = GET_MODE (XEXP (cond, 0)); 378 if (mode == VOIDmode) 379 mode = GET_MODE (XEXP (cond, 1)); 380 381 start_sequence (); 382 op0 = force_operand (op0, NULL_RTX); 383 op1 = force_operand (op1, NULL_RTX); 384 label = block_label (dest); 385 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, 386 NULL_RTX, label, -1); 387 388 jump = get_last_insn (); 389 if (!jump || !JUMP_P (jump)) 390 { 391 /* The condition is always false and the jump was optimized out. */ 392 end_sequence (); 393 return true; 394 } 395 396 seq = get_insns (); 397 end_sequence (); 398 399 /* There always is at least the jump insn in the sequence. */ 400 gcc_assert (seq != NULL_RTX); 401 402 bb = split_edge_and_insert (*e, seq); 403 *e = single_succ_edge (bb); 404 405 if (any_uncondjump_p (jump)) 406 { 407 /* The condition is always true. */ 408 delete_insn (jump); 409 redirect_edge_and_branch_force (*e, dest); 410 return false; 411 } 412 413 JUMP_LABEL (jump) = label; 414 415 /* The jump is supposed to handle an unlikely special case. */ 416 add_int_reg_note (jump, REG_BR_PROB, 0); 417 418 LABEL_NUSES (label)++; 419 420 make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); 421 return true; 422 } 423 424 /* Modify the loop to use the low-overhead looping insn where LOOP 425 describes the loop, DESC describes the number of iterations of the 426 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the 427 end of the loop. CONDITION is the condition separated from the 428 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ 429 430 static void 431 doloop_modify (struct loop *loop, struct niter_desc *desc, 432 rtx doloop_seq, rtx condition, rtx count) 433 { 434 rtx counter_reg; 435 rtx tmp, noloop = NULL_RTX; 436 rtx_insn *sequence; 437 rtx_insn *jump_insn; 438 rtx jump_label; 439 int nonneg = 0; 440 bool increment_count; 441 basic_block loop_end = desc->out_edge->src; 442 machine_mode mode; 443 rtx true_prob_val; 444 widest_int iterations; 445 446 jump_insn = BB_END (loop_end); 447 448 if (dump_file) 449 { 450 fprintf (dump_file, "Doloop: Inserting doloop pattern ("); 451 if (desc->const_iter) 452 fprintf (dump_file, "%"PRId64, desc->niter); 453 else 454 fputs ("runtime", dump_file); 455 fputs (" iterations).\n", dump_file); 456 } 457 458 /* Get the probability of the original branch. If it exists we would 459 need to update REG_BR_PROB of the new jump_insn. */ 460 true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); 461 462 /* Discard original jump to continue loop. The original compare 463 result may still be live, so it cannot be discarded explicitly. */ 464 delete_insn (jump_insn); 465 466 counter_reg = XEXP (condition, 0); 467 if (GET_CODE (counter_reg) == PLUS) 468 counter_reg = XEXP (counter_reg, 0); 469 mode = GET_MODE (counter_reg); 470 471 increment_count = false; 472 switch (GET_CODE (condition)) 473 { 474 case NE: 475 /* Currently only NE tests against zero and one are supported. */ 476 noloop = XEXP (condition, 1); 477 if (noloop != const0_rtx) 478 { 479 gcc_assert (noloop == const1_rtx); 480 increment_count = true; 481 } 482 break; 483 484 case GE: 485 /* Currently only GE tests against zero are supported. */ 486 gcc_assert (XEXP (condition, 1) == const0_rtx); 487 488 noloop = constm1_rtx; 489 490 /* The iteration count does not need incrementing for a GE test. */ 491 increment_count = false; 492 493 /* Determine if the iteration counter will be non-negative. 494 Note that the maximum value loaded is iterations_max - 1. */ 495 if (get_max_loop_iterations (loop, &iterations) 496 && wi::leu_p (iterations, 497 wi::set_bit_in_zero <widest_int> 498 (GET_MODE_PRECISION (mode) - 1))) 499 nonneg = 1; 500 break; 501 502 /* Abort if an invalid doloop pattern has been generated. */ 503 default: 504 gcc_unreachable (); 505 } 506 507 if (increment_count) 508 count = simplify_gen_binary (PLUS, mode, count, const1_rtx); 509 510 /* Insert initialization of the count register into the loop header. */ 511 start_sequence (); 512 tmp = force_operand (count, counter_reg); 513 convert_move (counter_reg, tmp, 1); 514 sequence = get_insns (); 515 end_sequence (); 516 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); 517 518 if (desc->noloop_assumptions) 519 { 520 rtx ass = copy_rtx (desc->noloop_assumptions); 521 basic_block preheader = loop_preheader_edge (loop)->src; 522 basic_block set_zero 523 = split_edge (loop_preheader_edge (loop)); 524 basic_block new_preheader 525 = split_edge (loop_preheader_edge (loop)); 526 edge te; 527 528 /* Expand the condition testing the assumptions and if it does not pass, 529 reset the count register to 0. */ 530 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); 531 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); 532 533 set_zero->count = 0; 534 set_zero->frequency = 0; 535 536 te = single_succ_edge (preheader); 537 for (; ass; ass = XEXP (ass, 1)) 538 if (!add_test (XEXP (ass, 0), &te, set_zero)) 539 break; 540 541 if (ass) 542 { 543 /* We reached a condition that is always true. This is very hard to 544 reproduce (such a loop does not roll, and thus it would most 545 likely get optimized out by some of the preceding optimizations). 546 In fact, I do not have any testcase for it. However, it would 547 also be very hard to show that it is impossible, so we must 548 handle this case. */ 549 set_zero->count = preheader->count; 550 set_zero->frequency = preheader->frequency; 551 } 552 553 if (EDGE_COUNT (set_zero->preds) == 0) 554 { 555 /* All the conditions were simplified to false, remove the 556 unreachable set_zero block. */ 557 delete_basic_block (set_zero); 558 } 559 else 560 { 561 /* Reset the counter to zero in the set_zero block. */ 562 start_sequence (); 563 convert_move (counter_reg, noloop, 0); 564 sequence = get_insns (); 565 end_sequence (); 566 emit_insn_after (sequence, BB_END (set_zero)); 567 568 set_immediate_dominator (CDI_DOMINATORS, set_zero, 569 recompute_dominator (CDI_DOMINATORS, 570 set_zero)); 571 } 572 573 set_immediate_dominator (CDI_DOMINATORS, new_preheader, 574 recompute_dominator (CDI_DOMINATORS, 575 new_preheader)); 576 } 577 578 /* Some targets (eg, C4x) need to initialize special looping 579 registers. */ 580 #ifdef HAVE_doloop_begin 581 { 582 rtx init; 583 584 init = gen_doloop_begin (counter_reg, doloop_seq); 585 if (init) 586 { 587 start_sequence (); 588 emit_insn (init); 589 sequence = get_insns (); 590 end_sequence (); 591 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); 592 } 593 } 594 #endif 595 596 /* Insert the new low-overhead looping insn. */ 597 emit_jump_insn_after (doloop_seq, BB_END (loop_end)); 598 jump_insn = BB_END (loop_end); 599 jump_label = block_label (desc->in_edge->dest); 600 JUMP_LABEL (jump_insn) = jump_label; 601 LABEL_NUSES (jump_label)++; 602 603 /* Ensure the right fallthru edge is marked, for case we have reversed 604 the condition. */ 605 desc->in_edge->flags &= ~EDGE_FALLTHRU; 606 desc->out_edge->flags |= EDGE_FALLTHRU; 607 608 /* Add a REG_NONNEG note if the actual or estimated maximum number 609 of iterations is non-negative. */ 610 if (nonneg) 611 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); 612 613 /* Update the REG_BR_PROB note. */ 614 if (true_prob_val) 615 { 616 /* Seems safer to use the branch probability. */ 617 add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability); 618 } 619 } 620 621 /* Process loop described by LOOP validating that the loop is suitable for 622 conversion to use a low overhead looping instruction, replacing the jump 623 insn where suitable. Returns true if the loop was successfully 624 modified. */ 625 626 static bool 627 doloop_optimize (struct loop *loop) 628 { 629 machine_mode mode; 630 rtx doloop_seq, doloop_pat, doloop_reg; 631 rtx count; 632 widest_int iterations, iterations_max; 633 rtx start_label; 634 rtx condition; 635 unsigned level, est_niter; 636 int max_cost; 637 struct niter_desc *desc; 638 unsigned word_mode_size; 639 unsigned HOST_WIDE_INT word_mode_max; 640 int entered_at_top; 641 642 if (dump_file) 643 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); 644 645 iv_analysis_loop_init (loop); 646 647 /* Find the simple exit of a LOOP. */ 648 desc = get_simple_loop_desc (loop); 649 650 /* Check that loop is a candidate for a low-overhead looping insn. */ 651 if (!doloop_valid_p (loop, desc)) 652 { 653 if (dump_file) 654 fprintf (dump_file, 655 "Doloop: The loop is not suitable.\n"); 656 return false; 657 } 658 mode = desc->mode; 659 660 est_niter = 3; 661 if (desc->const_iter) 662 est_niter = desc->niter; 663 /* If the estimate on number of iterations is reliable (comes from profile 664 feedback), use it. Do not use it normally, since the expected number 665 of iterations of an unrolled loop is 2. */ 666 if (loop->header->count) 667 est_niter = expected_loop_iterations (loop); 668 669 if (est_niter < 3) 670 { 671 if (dump_file) 672 fprintf (dump_file, 673 "Doloop: Too few iterations (%u) to be profitable.\n", 674 est_niter); 675 return false; 676 } 677 678 max_cost 679 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); 680 if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop)) 681 > max_cost) 682 { 683 if (dump_file) 684 fprintf (dump_file, 685 "Doloop: number of iterations too costly to compute.\n"); 686 return false; 687 } 688 689 if (desc->const_iter) 690 iterations = widest_int::from (std::make_pair (desc->niter_expr, mode), 691 UNSIGNED); 692 else 693 iterations = 0; 694 if (!get_max_loop_iterations (loop, &iterations_max)) 695 iterations_max = 0; 696 level = get_loop_level (loop) + 1; 697 entered_at_top = (loop->latch == desc->in_edge->dest 698 && contains_no_active_insn_p (loop->latch)); 699 if (!targetm.can_use_doloop_p (iterations, iterations_max, level, 700 entered_at_top)) 701 { 702 if (dump_file) 703 fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n"); 704 return false; 705 } 706 707 /* Generate looping insn. If the pattern FAILs then give up trying 708 to modify the loop since there is some aspect the back-end does 709 not like. */ 710 count = copy_rtx (desc->niter_expr); 711 start_label = block_label (desc->in_edge->dest); 712 doloop_reg = gen_reg_rtx (mode); 713 doloop_seq = gen_doloop_end (doloop_reg, start_label); 714 715 word_mode_size = GET_MODE_PRECISION (word_mode); 716 word_mode_max 717 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; 718 if (! doloop_seq 719 && mode != word_mode 720 /* Before trying mode different from the one in that # of iterations is 721 computed, we must be sure that the number of iterations fits into 722 the new mode. */ 723 && (word_mode_size >= GET_MODE_PRECISION (mode) 724 || wi::leu_p (iterations_max, word_mode_max))) 725 { 726 if (word_mode_size > GET_MODE_PRECISION (mode)) 727 count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode); 728 else 729 count = lowpart_subreg (word_mode, count, mode); 730 PUT_MODE (doloop_reg, word_mode); 731 doloop_seq = gen_doloop_end (doloop_reg, start_label); 732 } 733 if (! doloop_seq) 734 { 735 if (dump_file) 736 fprintf (dump_file, 737 "Doloop: Target unwilling to use doloop pattern!\n"); 738 return false; 739 } 740 741 /* If multiple instructions were created, the last must be the 742 jump instruction. Also, a raw define_insn may yield a plain 743 pattern. */ 744 doloop_pat = doloop_seq; 745 if (INSN_P (doloop_pat)) 746 { 747 rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat); 748 while (NEXT_INSN (doloop_insn) != NULL_RTX) 749 doloop_insn = NEXT_INSN (doloop_insn); 750 if (!JUMP_P (doloop_insn)) 751 doloop_insn = NULL; 752 doloop_pat = doloop_insn; 753 } 754 755 if (! doloop_pat 756 || ! (condition = doloop_condition_get (doloop_pat))) 757 { 758 if (dump_file) 759 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); 760 return false; 761 } 762 763 doloop_modify (loop, desc, doloop_seq, condition, count); 764 return true; 765 } 766 767 /* This is the main entry point. Process all loops using doloop_optimize. */ 768 769 void 770 doloop_optimize_loops (void) 771 { 772 struct loop *loop; 773 774 FOR_EACH_LOOP (loop, 0) 775 { 776 doloop_optimize (loop); 777 } 778 779 iv_analysis_done (); 780 781 #ifdef ENABLE_CHECKING 782 verify_loop_structure (); 783 #endif 784 } 785 #endif /* HAVE_doloop_end */ 786 787