1 /* Code to analyze doloop loops in order for targets to perform late 2 optimizations converting doloops to other forms of hardware loops. 3 Copyright (C) 2011-2013 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "rtl.h" 26 #include "flags.h" 27 #include "expr.h" 28 #include "hard-reg-set.h" 29 #include "regs.h" 30 #include "basic-block.h" 31 #include "tm_p.h" 32 #include "df.h" 33 #include "cfgloop.h" 34 #include "recog.h" 35 #include "target.h" 36 #include "hw-doloop.h" 37 #include "dumpfile.h" 38 39 #ifdef HAVE_doloop_end 40 41 /* Dump information collected in LOOPS. */ 42 static void 43 dump_hwloops (hwloop_info loops) 44 { 45 hwloop_info loop; 46 47 for (loop = loops; loop; loop = loop->next) 48 { 49 hwloop_info i; 50 basic_block b; 51 unsigned ix; 52 53 fprintf (dump_file, ";; loop %d: ", loop->loop_no); 54 if (loop->bad) 55 fprintf (dump_file, "(bad) "); 56 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", 57 loop->head == NULL ? -1 : loop->head->index, 58 loop->depth, REGNO (loop->iter_reg)); 59 60 fprintf (dump_file, " blocks: [ "); 61 for (ix = 0; loop->blocks.iterate (ix, &b); ix++) 62 fprintf (dump_file, "%d ", b->index); 63 fprintf (dump_file, "] "); 64 65 fprintf (dump_file, " inner loops: [ "); 66 for (ix = 0; loop->loops.iterate (ix, &i); ix++) 67 fprintf (dump_file, "%d ", i->loop_no); 68 fprintf (dump_file, "]\n"); 69 } 70 fprintf (dump_file, "\n"); 71 } 72 73 /* Return true if BB is part of LOOP. */ 74 static bool 75 bb_in_loop_p (hwloop_info loop, basic_block bb) 76 { 77 return bitmap_bit_p (loop->block_bitmap, bb->index); 78 } 79 80 /* Scan the blocks of LOOP (and its inferiors), and record things such as 81 hard registers set, jumps out of and within the loop. */ 82 static void 83 scan_loop (hwloop_info loop) 84 { 85 unsigned ix; 86 basic_block bb; 87 88 if (loop->bad) 89 return; 90 91 if (REGNO_REG_SET_P (df_get_live_in (loop->successor), 92 REGNO (loop->iter_reg))) 93 loop->iter_reg_used_outside = true; 94 95 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++) 96 { 97 rtx insn; 98 edge e; 99 edge_iterator ei; 100 101 if (bb != loop->tail) 102 FOR_EACH_EDGE (e, ei, bb->succs) 103 { 104 if (bb_in_loop_p (loop, e->dest)) 105 { 106 if (!(e->flags & EDGE_FALLTHRU)) 107 loop->jumps_within = true; 108 } 109 else 110 { 111 loop->jumps_outof = true; 112 if (!loop->bad) 113 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), 114 REGNO (loop->iter_reg))); 115 } 116 } 117 118 for (insn = BB_HEAD (bb); 119 insn != NEXT_INSN (BB_END (bb)); 120 insn = NEXT_INSN (insn)) 121 { 122 df_ref *def_rec; 123 HARD_REG_SET set_this_insn; 124 125 if (!NONDEBUG_INSN_P (insn)) 126 continue; 127 128 if (recog_memoized (insn) < 0 129 && (GET_CODE (PATTERN (insn)) == ASM_INPUT 130 || asm_noperands (PATTERN (insn)) >= 0)) 131 loop->has_asm = true; 132 133 CLEAR_HARD_REG_SET (set_this_insn); 134 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) 135 { 136 rtx dreg = DF_REF_REG (*def_rec); 137 138 if (!REG_P (dreg)) 139 continue; 140 141 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), 142 REGNO (dreg)); 143 } 144 145 if (insn == loop->loop_end) 146 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); 147 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) 148 loop->iter_reg_used = true; 149 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); 150 } 151 } 152 } 153 154 /* Compute the incoming_dest and incoming_src members of LOOP by 155 identifying the edges that jump into the loop. If there is more 156 than one block that jumps into the loop, incoming_src will be set 157 to NULL; likewise, if there is more than one block in the loop that 158 is the destination of an incoming edge, incoming_dest will be NULL. 159 160 Return true if either of these two fields is nonnull, false 161 otherwise. */ 162 static bool 163 process_incoming_edges (hwloop_info loop) 164 { 165 edge e; 166 edge_iterator ei; 167 bool first = true; 168 169 FOR_EACH_EDGE (e, ei, loop->incoming) 170 { 171 if (first) 172 { 173 loop->incoming_src = e->src; 174 loop->incoming_dest = e->dest; 175 first = false; 176 } 177 else 178 { 179 if (e->dest != loop->incoming_dest) 180 loop->incoming_dest = NULL; 181 if (e->src != loop->incoming_src) 182 loop->incoming_src = NULL; 183 } 184 } 185 if (loop->incoming_src == NULL && loop->incoming_dest == NULL) 186 return false; 187 188 return true; 189 } 190 191 /* Try to identify a forwarder block that jump into LOOP, and add it to 192 the set of blocks in the loop, updating the vector of incoming blocks as 193 well. This transformation gives a second chance to loops we couldn't 194 otherwise handle by increasing the chance that we'll end up with one 195 incoming_src block. 196 Return true if we made a change, false otherwise. */ 197 static bool 198 add_forwarder_blocks (hwloop_info loop) 199 { 200 edge e; 201 edge_iterator ei; 202 203 FOR_EACH_EDGE (e, ei, loop->incoming) 204 { 205 if (forwarder_block_p (e->src)) 206 { 207 edge e2; 208 edge_iterator ei2; 209 210 if (dump_file) 211 fprintf (dump_file, 212 ";; Adding forwarder block %d to loop %d and retrying\n", 213 e->src->index, loop->loop_no); 214 loop->blocks.safe_push (e->src); 215 bitmap_set_bit (loop->block_bitmap, e->src->index); 216 FOR_EACH_EDGE (e2, ei2, e->src->preds) 217 vec_safe_push (loop->incoming, e2); 218 loop->incoming->unordered_remove (ei.index); 219 return true; 220 } 221 } 222 return false; 223 } 224 225 /* Called from reorg_loops when a potential loop end is found. LOOP is 226 a newly set up structure describing the loop, it is this function's 227 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the 228 loop_end insn and its enclosing basic block. REG is the loop counter 229 register. 230 For our purposes, a loop is defined by the set of blocks reachable from 231 the loop head in which the loop counter register is live. This matches 232 the expected use; targets that call into this code usually replace the 233 loop counter with a different special register. */ 234 static void 235 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg) 236 { 237 bool found_tail; 238 unsigned dwork = 0; 239 basic_block bb; 240 vec<basic_block> works; 241 242 loop->tail = tail_bb; 243 loop->loop_end = tail_insn; 244 loop->iter_reg = reg; 245 vec_alloc (loop->incoming, 2); 246 loop->start_label = JUMP_LABEL (tail_insn); 247 248 if (EDGE_COUNT (tail_bb->succs) != 2) 249 { 250 loop->bad = true; 251 return; 252 } 253 loop->head = BRANCH_EDGE (tail_bb)->dest; 254 loop->successor = FALLTHRU_EDGE (tail_bb)->dest; 255 256 works.create (20); 257 works.safe_push (loop->head); 258 259 found_tail = false; 260 for (dwork = 0; works.iterate (dwork, &bb); dwork++) 261 { 262 edge e; 263 edge_iterator ei; 264 if (bb == EXIT_BLOCK_PTR) 265 { 266 /* We've reached the exit block. The loop must be bad. */ 267 if (dump_file) 268 fprintf (dump_file, 269 ";; Loop is bad - reached exit block while scanning\n"); 270 loop->bad = true; 271 break; 272 } 273 274 if (bitmap_bit_p (loop->block_bitmap, bb->index)) 275 continue; 276 277 /* We've not seen this block before. Add it to the loop's 278 list and then add each successor to the work list. */ 279 280 loop->blocks.safe_push (bb); 281 bitmap_set_bit (loop->block_bitmap, bb->index); 282 283 if (bb == tail_bb) 284 found_tail = true; 285 else 286 { 287 FOR_EACH_EDGE (e, ei, bb->succs) 288 { 289 basic_block succ = EDGE_SUCC (bb, ei.index)->dest; 290 if (REGNO_REG_SET_P (df_get_live_in (succ), 291 REGNO (loop->iter_reg))) 292 works.safe_push (succ); 293 } 294 } 295 } 296 297 if (!found_tail) 298 loop->bad = true; 299 300 /* Find the predecessor, and make sure nothing else jumps into this loop. */ 301 if (!loop->bad) 302 { 303 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb) 304 { 305 edge e; 306 edge_iterator ei; 307 FOR_EACH_EDGE (e, ei, bb->preds) 308 { 309 basic_block pred = e->src; 310 311 if (!bb_in_loop_p (loop, pred)) 312 { 313 if (dump_file) 314 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", 315 loop->loop_no, pred->index, 316 e->dest->index); 317 vec_safe_push (loop->incoming, e); 318 } 319 } 320 } 321 322 if (!process_incoming_edges (loop)) 323 { 324 if (dump_file) 325 fprintf (dump_file, 326 ";; retrying loop %d with forwarder blocks\n", 327 loop->loop_no); 328 if (!add_forwarder_blocks (loop)) 329 { 330 if (dump_file) 331 fprintf (dump_file, ";; No forwarder blocks found\n"); 332 loop->bad = true; 333 } 334 else if (!process_incoming_edges (loop)) 335 { 336 if (dump_file) 337 fprintf (dump_file, 338 ";; can't find suitable entry for loop %d\n", 339 loop->loop_no); 340 } 341 } 342 } 343 344 works.release (); 345 } 346 347 /* Analyze the structure of the loops in the current function. Use 348 LOOP_STACK for bitmap allocations. Returns all the valid candidates for 349 hardware loops found in this function. HOOKS is the argument 350 passed to reorg_loops, used here to find the iteration registers 351 from a loop_end pattern. */ 352 static hwloop_info 353 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks) 354 { 355 hwloop_info loops = NULL; 356 hwloop_info loop; 357 basic_block bb; 358 int nloops = 0; 359 360 /* Find all the possible loop tails. This means searching for every 361 loop_end instruction. For each one found, create a hwloop_info 362 structure and add the head block to the work list. */ 363 FOR_EACH_BB (bb) 364 { 365 rtx tail = BB_END (bb); 366 rtx insn, reg; 367 368 while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb)) 369 tail = PREV_INSN (tail); 370 371 if (tail == NULL_RTX) 372 continue; 373 374 if (!JUMP_P (tail)) 375 continue; 376 reg = hooks->end_pattern_reg (tail); 377 if (reg == NULL_RTX) 378 continue; 379 380 /* A possible loop end */ 381 382 /* There's a degenerate case we can handle - an empty loop consisting 383 of only a back branch. Handle that by deleting the branch. */ 384 insn = JUMP_LABEL (tail); 385 while (insn && !NONDEBUG_INSN_P (insn)) 386 insn = NEXT_INSN (insn); 387 if (insn == tail) 388 { 389 basic_block succ = FALLTHRU_EDGE (bb)->dest; 390 if (dump_file) 391 { 392 fprintf (dump_file, ";; degenerate loop ending at\n"); 393 print_rtl_single (dump_file, tail); 394 } 395 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) 396 { 397 if (dump_file) 398 fprintf (dump_file, ";; deleting it\n"); 399 delete_insn_and_edges (tail); 400 continue; 401 } 402 } 403 404 loop = XCNEW (struct hwloop_info_d); 405 loop->next = loops; 406 loops = loop; 407 loop->loop_no = nloops++; 408 loop->blocks.create (20); 409 loop->block_bitmap = BITMAP_ALLOC (loop_stack); 410 411 if (dump_file) 412 { 413 fprintf (dump_file, ";; potential loop %d ending at\n", 414 loop->loop_no); 415 print_rtl_single (dump_file, tail); 416 } 417 418 discover_loop (loop, bb, tail, reg); 419 } 420 421 /* Compute loop nestings. Given two loops A and B, either the sets 422 of their blocks don't intersect at all, or one is the subset of 423 the other, or the blocks don't form a good nesting structure. */ 424 for (loop = loops; loop; loop = loop->next) 425 { 426 hwloop_info other; 427 428 if (loop->bad) 429 continue; 430 431 for (other = loops; other; other = other->next) 432 { 433 if (other->bad) 434 continue; 435 436 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) 437 continue; 438 if (!bitmap_intersect_compl_p (other->block_bitmap, 439 loop->block_bitmap)) 440 loop->loops.safe_push (other); 441 else if (!bitmap_intersect_compl_p (loop->block_bitmap, 442 other->block_bitmap)) 443 other->loops.safe_push (loop); 444 else 445 { 446 if (dump_file) 447 fprintf (dump_file, 448 ";; can't find suitable nesting for loops %d and %d\n", 449 loop->loop_no, other->loop_no); 450 loop->bad = other->bad = true; 451 } 452 } 453 } 454 455 if (dump_file) 456 dump_hwloops (loops); 457 458 return loops; 459 } 460 461 /* Free up the loop structures in LOOPS. */ 462 static void 463 free_loops (hwloop_info loops) 464 { 465 while (loops) 466 { 467 hwloop_info loop = loops; 468 loops = loop->next; 469 loop->loops.release (); 470 loop->blocks.release (); 471 BITMAP_FREE (loop->block_bitmap); 472 XDELETE (loop); 473 } 474 } 475 476 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) 477 478 /* Initialize the aux fields to give ascending indices to basic blocks. */ 479 static void 480 set_bb_indices (void) 481 { 482 basic_block bb; 483 intptr_t index; 484 485 index = 0; 486 FOR_EACH_BB (bb) 487 bb->aux = (void *) index++; 488 } 489 490 /* The taken-branch edge from the loop end can actually go forward. 491 If the target's hardware loop support requires that the loop end be 492 after the loop start, try to reorder a loop's basic blocks when we 493 find such a case. 494 This is not very aggressive; it only moves at most one block. It 495 does not introduce new branches into loops; it may remove them, or 496 it may switch fallthru/jump edges. */ 497 static void 498 reorder_loops (hwloop_info loops) 499 { 500 basic_block bb; 501 hwloop_info loop; 502 503 cfg_layout_initialize (0); 504 505 set_bb_indices (); 506 507 for (loop = loops; loop; loop = loop->next) 508 { 509 edge e; 510 edge_iterator ei; 511 512 if (loop->bad) 513 continue; 514 515 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) 516 continue; 517 518 FOR_EACH_EDGE (e, ei, loop->head->succs) 519 { 520 if (bitmap_bit_p (loop->block_bitmap, e->dest->index) 521 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) 522 { 523 basic_block start_bb = e->dest; 524 basic_block start_prev_bb = start_bb->prev_bb; 525 526 if (dump_file) 527 fprintf (dump_file, ";; Moving block %d before block %d\n", 528 loop->head->index, start_bb->index); 529 loop->head->prev_bb->next_bb = loop->head->next_bb; 530 loop->head->next_bb->prev_bb = loop->head->prev_bb; 531 532 loop->head->prev_bb = start_prev_bb; 533 loop->head->next_bb = start_bb; 534 start_prev_bb->next_bb = start_bb->prev_bb = loop->head; 535 536 set_bb_indices (); 537 break; 538 } 539 } 540 loops = loops->next; 541 } 542 543 FOR_EACH_BB (bb) 544 { 545 if (bb->next_bb != EXIT_BLOCK_PTR) 546 bb->aux = bb->next_bb; 547 else 548 bb->aux = NULL; 549 } 550 cfg_layout_finalize (); 551 clear_aux_for_blocks (); 552 df_analyze (); 553 } 554 555 /* Call the OPT function for LOOP and all of its sub-loops. This is 556 done in a depth-first search; innermost loops are visited first. 557 OPTIMIZE and FAIL are the functions passed to reorg_loops by the 558 target's reorg pass. */ 559 static void 560 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) 561 { 562 int ix; 563 hwloop_info inner; 564 int inner_depth = 0; 565 566 if (loop->visited) 567 return; 568 569 loop->visited = 1; 570 571 if (loop->bad) 572 { 573 if (dump_file) 574 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); 575 goto bad_loop; 576 } 577 578 /* Every loop contains in its list of inner loops every loop nested inside 579 it, even if there are intermediate loops. This works because we're doing 580 a depth-first search here and never visit a loop more than once. 581 Recursion depth is effectively limited by the number of available 582 hardware registers. */ 583 for (ix = 0; loop->loops.iterate (ix, &inner); ix++) 584 { 585 optimize_loop (inner, hooks); 586 587 if (!inner->bad && inner_depth < inner->depth) 588 inner_depth = inner->depth; 589 /* The set of registers may be changed while optimizing the inner 590 loop. */ 591 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); 592 } 593 594 loop->depth = inner_depth + 1; 595 596 if (hooks->opt (loop)) 597 return; 598 599 bad_loop: 600 if (dump_file) 601 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); 602 603 loop->bad = true; 604 hooks->fail (loop); 605 } 606 607 /* This function can be used from a port's machine_dependent_reorg to 608 find and analyze loops that end in loop_end instructions. It uses 609 a set of function pointers in HOOKS to call back into the 610 target-specific functions to perform the actual machine-specific 611 transformations. 612 613 Such transformations typically involve additional set-up 614 instructions before the loop, to define loop bounds or set up a 615 special loop counter register. 616 617 DO_REORDER should be set to true if we should try to use the 618 reorder_loops function to ensure the loop end occurs after the loop 619 start. This is for use by targets where the loop hardware requires 620 this condition. 621 622 HOOKS is used to pass in target specific hooks; see 623 hw-doloop.h. */ 624 void 625 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) 626 { 627 hwloop_info loops = NULL; 628 hwloop_info loop; 629 bitmap_obstack loop_stack; 630 631 df_live_add_problem (); 632 df_live_set_all_dirty (); 633 df_analyze (); 634 635 bitmap_obstack_initialize (&loop_stack); 636 637 if (dump_file) 638 fprintf (dump_file, ";; Find loops, first pass\n\n"); 639 640 loops = discover_loops (&loop_stack, hooks); 641 642 if (do_reorder) 643 { 644 reorder_loops (loops); 645 free_loops (loops); 646 647 if (dump_file) 648 fprintf (dump_file, ";; Find loops, second pass\n\n"); 649 650 loops = discover_loops (&loop_stack, hooks); 651 } 652 653 for (loop = loops; loop; loop = loop->next) 654 scan_loop (loop); 655 656 /* Now apply the optimizations. */ 657 for (loop = loops; loop; loop = loop->next) 658 optimize_loop (loop, hooks); 659 660 if (dump_file) 661 { 662 fprintf (dump_file, ";; After hardware loops optimization:\n\n"); 663 dump_hwloops (loops); 664 } 665 666 free_loops (loops); 667 668 if (dump_file) 669 print_rtl (dump_file, get_insns ()); 670 } 671 #endif 672