1 /* RTL-level loop invariant motion. 2 Copyright (C) 2004-2015 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This implements the loop invariant motion pass. It is very simple 21 (no calls, no loads/stores, etc.). This should be sufficient to cleanup 22 things like address arithmetics -- other more complicated invariants should 23 be eliminated on GIMPLE either in tree-ssa-loop-im.c or in tree-ssa-pre.c. 24 25 We proceed loop by loop -- it is simpler than trying to handle things 26 globally and should not lose much. First we inspect all sets inside loop 27 and create a dependency graph on insns (saying "to move this insn, you must 28 also move the following insns"). 29 30 We then need to determine what to move. We estimate the number of registers 31 used and move as many invariants as possible while we still have enough free 32 registers. We prefer the expensive invariants. 33 34 Then we move the selected invariants out of the loop, creating a new 35 temporaries for them if necessary. */ 36 37 #include "config.h" 38 #include "system.h" 39 #include "coretypes.h" 40 #include "tm.h" 41 #include "hard-reg-set.h" 42 #include "rtl.h" 43 #include "tm_p.h" 44 #include "obstack.h" 45 #include "predict.h" 46 #include "vec.h" 47 #include "hashtab.h" 48 #include "hash-set.h" 49 #include "machmode.h" 50 #include "input.h" 51 #include "function.h" 52 #include "dominance.h" 53 #include "cfg.h" 54 #include "cfgrtl.h" 55 #include "basic-block.h" 56 #include "cfgloop.h" 57 #include "symtab.h" 58 #include "flags.h" 59 #include "statistics.h" 60 #include "double-int.h" 61 #include "real.h" 62 #include "fixed-value.h" 63 #include "alias.h" 64 #include "wide-int.h" 65 #include "inchash.h" 66 #include "tree.h" 67 #include "insn-config.h" 68 #include "expmed.h" 69 #include "dojump.h" 70 #include "explow.h" 71 #include "calls.h" 72 #include "emit-rtl.h" 73 #include "varasm.h" 74 #include "stmt.h" 75 #include "expr.h" 76 #include "recog.h" 77 #include "target.h" 78 #include "df.h" 79 #include "hash-table.h" 80 #include "except.h" 81 #include "params.h" 82 #include "regs.h" 83 #include "ira.h" 84 #include "dumpfile.h" 85 86 /* The data stored for the loop. */ 87 88 struct loop_data 89 { 90 struct loop *outermost_exit; /* The outermost exit of the loop. */ 91 bool has_call; /* True if the loop contains a call. */ 92 /* Maximal register pressure inside loop for given register class 93 (defined only for the pressure classes). */ 94 int max_reg_pressure[N_REG_CLASSES]; 95 /* Loop regs referenced and live pseudo-registers. */ 96 bitmap_head regs_ref; 97 bitmap_head regs_live; 98 }; 99 100 #define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux) 101 102 /* The description of an use. */ 103 104 struct use 105 { 106 rtx *pos; /* Position of the use. */ 107 rtx_insn *insn; /* The insn in that the use occurs. */ 108 unsigned addr_use_p; /* Whether the use occurs in an address. */ 109 struct use *next; /* Next use in the list. */ 110 }; 111 112 /* The description of a def. */ 113 114 struct def 115 { 116 struct use *uses; /* The list of uses that are uniquely reached 117 by it. */ 118 unsigned n_uses; /* Number of such uses. */ 119 unsigned n_addr_uses; /* Number of uses in addresses. */ 120 unsigned invno; /* The corresponding invariant. */ 121 }; 122 123 /* The data stored for each invariant. */ 124 125 struct invariant 126 { 127 /* The number of the invariant. */ 128 unsigned invno; 129 130 /* The number of the invariant with the same value. */ 131 unsigned eqto; 132 133 /* The number of invariants which eqto this. */ 134 unsigned eqno; 135 136 /* If we moved the invariant out of the loop, the register that contains its 137 value. */ 138 rtx reg; 139 140 /* If we moved the invariant out of the loop, the original regno 141 that contained its value. */ 142 int orig_regno; 143 144 /* The definition of the invariant. */ 145 struct def *def; 146 147 /* The insn in that it is defined. */ 148 rtx_insn *insn; 149 150 /* Whether it is always executed. */ 151 bool always_executed; 152 153 /* Whether to move the invariant. */ 154 bool move; 155 156 /* Whether the invariant is cheap when used as an address. */ 157 bool cheap_address; 158 159 /* Cost of the invariant. */ 160 unsigned cost; 161 162 /* The invariants it depends on. */ 163 bitmap depends_on; 164 165 /* Used for detecting already visited invariants during determining 166 costs of movements. */ 167 unsigned stamp; 168 }; 169 170 /* Currently processed loop. */ 171 static struct loop *curr_loop; 172 173 /* Table of invariants indexed by the df_ref uid field. */ 174 175 static unsigned int invariant_table_size = 0; 176 static struct invariant ** invariant_table; 177 178 /* Entry for hash table of invariant expressions. */ 179 180 struct invariant_expr_entry 181 { 182 /* The invariant. */ 183 struct invariant *inv; 184 185 /* Its value. */ 186 rtx expr; 187 188 /* Its mode. */ 189 machine_mode mode; 190 191 /* Its hash. */ 192 hashval_t hash; 193 }; 194 195 /* The actual stamp for marking already visited invariants during determining 196 costs of movements. */ 197 198 static unsigned actual_stamp; 199 200 typedef struct invariant *invariant_p; 201 202 203 /* The invariants. */ 204 205 static vec<invariant_p> invariants; 206 207 /* Check the size of the invariant table and realloc if necessary. */ 208 209 static void 210 check_invariant_table_size (void) 211 { 212 if (invariant_table_size < DF_DEFS_TABLE_SIZE ()) 213 { 214 unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4); 215 invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size); 216 memset (&invariant_table[invariant_table_size], 0, 217 (new_size - invariant_table_size) * sizeof (struct invariant *)); 218 invariant_table_size = new_size; 219 } 220 } 221 222 /* Test for possibility of invariantness of X. */ 223 224 static bool 225 check_maybe_invariant (rtx x) 226 { 227 enum rtx_code code = GET_CODE (x); 228 int i, j; 229 const char *fmt; 230 231 switch (code) 232 { 233 CASE_CONST_ANY: 234 case SYMBOL_REF: 235 case CONST: 236 case LABEL_REF: 237 return true; 238 239 case PC: 240 case CC0: 241 case UNSPEC_VOLATILE: 242 case CALL: 243 return false; 244 245 case REG: 246 return true; 247 248 case MEM: 249 /* Load/store motion is done elsewhere. ??? Perhaps also add it here? 250 It should not be hard, and might be faster than "elsewhere". */ 251 252 /* Just handle the most trivial case where we load from an unchanging 253 location (most importantly, pic tables). */ 254 if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x)) 255 break; 256 257 return false; 258 259 case ASM_OPERANDS: 260 /* Don't mess with insns declared volatile. */ 261 if (MEM_VOLATILE_P (x)) 262 return false; 263 break; 264 265 default: 266 break; 267 } 268 269 fmt = GET_RTX_FORMAT (code); 270 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 271 { 272 if (fmt[i] == 'e') 273 { 274 if (!check_maybe_invariant (XEXP (x, i))) 275 return false; 276 } 277 else if (fmt[i] == 'E') 278 { 279 for (j = 0; j < XVECLEN (x, i); j++) 280 if (!check_maybe_invariant (XVECEXP (x, i, j))) 281 return false; 282 } 283 } 284 285 return true; 286 } 287 288 /* Returns the invariant definition for USE, or NULL if USE is not 289 invariant. */ 290 291 static struct invariant * 292 invariant_for_use (df_ref use) 293 { 294 struct df_link *defs; 295 df_ref def; 296 basic_block bb = DF_REF_BB (use), def_bb; 297 298 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) 299 return NULL; 300 301 defs = DF_REF_CHAIN (use); 302 if (!defs || defs->next) 303 return NULL; 304 def = defs->ref; 305 check_invariant_table_size (); 306 if (!invariant_table[DF_REF_ID (def)]) 307 return NULL; 308 309 def_bb = DF_REF_BB (def); 310 if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) 311 return NULL; 312 return invariant_table[DF_REF_ID (def)]; 313 } 314 315 /* Computes hash value for invariant expression X in INSN. */ 316 317 static hashval_t 318 hash_invariant_expr_1 (rtx_insn *insn, rtx x) 319 { 320 enum rtx_code code = GET_CODE (x); 321 int i, j; 322 const char *fmt; 323 hashval_t val = code; 324 int do_not_record_p; 325 df_ref use; 326 struct invariant *inv; 327 328 switch (code) 329 { 330 CASE_CONST_ANY: 331 case SYMBOL_REF: 332 case CONST: 333 case LABEL_REF: 334 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); 335 336 case REG: 337 use = df_find_use (insn, x); 338 if (!use) 339 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); 340 inv = invariant_for_use (use); 341 if (!inv) 342 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); 343 344 gcc_assert (inv->eqto != ~0u); 345 return inv->eqto; 346 347 default: 348 break; 349 } 350 351 fmt = GET_RTX_FORMAT (code); 352 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 353 { 354 if (fmt[i] == 'e') 355 val ^= hash_invariant_expr_1 (insn, XEXP (x, i)); 356 else if (fmt[i] == 'E') 357 { 358 for (j = 0; j < XVECLEN (x, i); j++) 359 val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j)); 360 } 361 else if (fmt[i] == 'i' || fmt[i] == 'n') 362 val ^= XINT (x, i); 363 } 364 365 return val; 366 } 367 368 /* Returns true if the invariant expressions E1 and E2 used in insns INSN1 369 and INSN2 have always the same value. */ 370 371 static bool 372 invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2) 373 { 374 enum rtx_code code = GET_CODE (e1); 375 int i, j; 376 const char *fmt; 377 df_ref use1, use2; 378 struct invariant *inv1 = NULL, *inv2 = NULL; 379 rtx sub1, sub2; 380 381 /* If mode of only one of the operands is VOIDmode, it is not equivalent to 382 the other one. If both are VOIDmode, we rely on the caller of this 383 function to verify that their modes are the same. */ 384 if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2)) 385 return false; 386 387 switch (code) 388 { 389 CASE_CONST_ANY: 390 case SYMBOL_REF: 391 case CONST: 392 case LABEL_REF: 393 return rtx_equal_p (e1, e2); 394 395 case REG: 396 use1 = df_find_use (insn1, e1); 397 use2 = df_find_use (insn2, e2); 398 if (use1) 399 inv1 = invariant_for_use (use1); 400 if (use2) 401 inv2 = invariant_for_use (use2); 402 403 if (!inv1 && !inv2) 404 return rtx_equal_p (e1, e2); 405 406 if (!inv1 || !inv2) 407 return false; 408 409 gcc_assert (inv1->eqto != ~0u); 410 gcc_assert (inv2->eqto != ~0u); 411 return inv1->eqto == inv2->eqto; 412 413 default: 414 break; 415 } 416 417 fmt = GET_RTX_FORMAT (code); 418 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 419 { 420 if (fmt[i] == 'e') 421 { 422 sub1 = XEXP (e1, i); 423 sub2 = XEXP (e2, i); 424 425 if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2)) 426 return false; 427 } 428 429 else if (fmt[i] == 'E') 430 { 431 if (XVECLEN (e1, i) != XVECLEN (e2, i)) 432 return false; 433 434 for (j = 0; j < XVECLEN (e1, i); j++) 435 { 436 sub1 = XVECEXP (e1, i, j); 437 sub2 = XVECEXP (e2, i, j); 438 439 if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2)) 440 return false; 441 } 442 } 443 else if (fmt[i] == 'i' || fmt[i] == 'n') 444 { 445 if (XINT (e1, i) != XINT (e2, i)) 446 return false; 447 } 448 /* Unhandled type of subexpression, we fail conservatively. */ 449 else 450 return false; 451 } 452 453 return true; 454 } 455 456 struct invariant_expr_hasher : typed_free_remove <invariant_expr_entry> 457 { 458 typedef invariant_expr_entry value_type; 459 typedef invariant_expr_entry compare_type; 460 static inline hashval_t hash (const value_type *); 461 static inline bool equal (const value_type *, const compare_type *); 462 }; 463 464 /* Returns hash value for invariant expression entry ENTRY. */ 465 466 inline hashval_t 467 invariant_expr_hasher::hash (const value_type *entry) 468 { 469 return entry->hash; 470 } 471 472 /* Compares invariant expression entries ENTRY1 and ENTRY2. */ 473 474 inline bool 475 invariant_expr_hasher::equal (const value_type *entry1, 476 const compare_type *entry2) 477 { 478 if (entry1->mode != entry2->mode) 479 return 0; 480 481 return invariant_expr_equal_p (entry1->inv->insn, entry1->expr, 482 entry2->inv->insn, entry2->expr); 483 } 484 485 typedef hash_table<invariant_expr_hasher> invariant_htab_type; 486 487 /* Checks whether invariant with value EXPR in machine mode MODE is 488 recorded in EQ. If this is the case, return the invariant. Otherwise 489 insert INV to the table for this expression and return INV. */ 490 491 static struct invariant * 492 find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode, 493 struct invariant *inv) 494 { 495 hashval_t hash = hash_invariant_expr_1 (inv->insn, expr); 496 struct invariant_expr_entry *entry; 497 struct invariant_expr_entry pentry; 498 invariant_expr_entry **slot; 499 500 pentry.expr = expr; 501 pentry.inv = inv; 502 pentry.mode = mode; 503 slot = eq->find_slot_with_hash (&pentry, hash, INSERT); 504 entry = *slot; 505 506 if (entry) 507 return entry->inv; 508 509 entry = XNEW (struct invariant_expr_entry); 510 entry->inv = inv; 511 entry->expr = expr; 512 entry->mode = mode; 513 entry->hash = hash; 514 *slot = entry; 515 516 return inv; 517 } 518 519 /* Finds invariants identical to INV and records the equivalence. EQ is the 520 hash table of the invariants. */ 521 522 static void 523 find_identical_invariants (invariant_htab_type *eq, struct invariant *inv) 524 { 525 unsigned depno; 526 bitmap_iterator bi; 527 struct invariant *dep; 528 rtx expr, set; 529 machine_mode mode; 530 struct invariant *tmp; 531 532 if (inv->eqto != ~0u) 533 return; 534 535 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi) 536 { 537 dep = invariants[depno]; 538 find_identical_invariants (eq, dep); 539 } 540 541 set = single_set (inv->insn); 542 expr = SET_SRC (set); 543 mode = GET_MODE (expr); 544 if (mode == VOIDmode) 545 mode = GET_MODE (SET_DEST (set)); 546 547 tmp = find_or_insert_inv (eq, expr, mode, inv); 548 inv->eqto = tmp->invno; 549 550 if (tmp->invno != inv->invno && inv->always_executed) 551 tmp->eqno++; 552 553 if (dump_file && inv->eqto != inv->invno) 554 fprintf (dump_file, 555 "Invariant %d is equivalent to invariant %d.\n", 556 inv->invno, inv->eqto); 557 } 558 559 /* Find invariants with the same value and record the equivalences. */ 560 561 static void 562 merge_identical_invariants (void) 563 { 564 unsigned i; 565 struct invariant *inv; 566 invariant_htab_type eq (invariants.length ()); 567 568 FOR_EACH_VEC_ELT (invariants, i, inv) 569 find_identical_invariants (&eq, inv); 570 } 571 572 /* Determines the basic blocks inside LOOP that are always executed and 573 stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of 574 basic blocks that may either exit the loop, or contain the call that 575 does not have to return. BODY is body of the loop obtained by 576 get_loop_body_in_dom_order. */ 577 578 static void 579 compute_always_reached (struct loop *loop, basic_block *body, 580 bitmap may_exit, bitmap always_reached) 581 { 582 unsigned i; 583 584 for (i = 0; i < loop->num_nodes; i++) 585 { 586 if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i])) 587 bitmap_set_bit (always_reached, i); 588 589 if (bitmap_bit_p (may_exit, i)) 590 return; 591 } 592 } 593 594 /* Finds exits out of the LOOP with body BODY. Marks blocks in that we may 595 exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT 596 additionally mark blocks that may exit due to a call. */ 597 598 static void 599 find_exits (struct loop *loop, basic_block *body, 600 bitmap may_exit, bitmap has_exit) 601 { 602 unsigned i; 603 edge_iterator ei; 604 edge e; 605 struct loop *outermost_exit = loop, *aexit; 606 bool has_call = false; 607 rtx_insn *insn; 608 609 for (i = 0; i < loop->num_nodes; i++) 610 { 611 if (body[i]->loop_father == loop) 612 { 613 FOR_BB_INSNS (body[i], insn) 614 { 615 if (CALL_P (insn) 616 && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) 617 || !RTL_CONST_OR_PURE_CALL_P (insn))) 618 { 619 has_call = true; 620 bitmap_set_bit (may_exit, i); 621 break; 622 } 623 } 624 625 FOR_EACH_EDGE (e, ei, body[i]->succs) 626 { 627 if (! flow_bb_inside_loop_p (loop, e->dest)) 628 { 629 bitmap_set_bit (may_exit, i); 630 bitmap_set_bit (has_exit, i); 631 outermost_exit = find_common_loop (outermost_exit, 632 e->dest->loop_father); 633 } 634 /* If we enter a subloop that might never terminate treat 635 it like a possible exit. */ 636 if (flow_loop_nested_p (loop, e->dest->loop_father)) 637 bitmap_set_bit (may_exit, i); 638 } 639 continue; 640 } 641 642 /* Use the data stored for the subloop to decide whether we may exit 643 through it. It is sufficient to do this for header of the loop, 644 as other basic blocks inside it must be dominated by it. */ 645 if (body[i]->loop_father->header != body[i]) 646 continue; 647 648 if (LOOP_DATA (body[i]->loop_father)->has_call) 649 { 650 has_call = true; 651 bitmap_set_bit (may_exit, i); 652 } 653 aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit; 654 if (aexit != loop) 655 { 656 bitmap_set_bit (may_exit, i); 657 bitmap_set_bit (has_exit, i); 658 659 if (flow_loop_nested_p (aexit, outermost_exit)) 660 outermost_exit = aexit; 661 } 662 } 663 664 if (loop->aux == NULL) 665 { 666 loop->aux = xcalloc (1, sizeof (struct loop_data)); 667 bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack); 668 bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack); 669 } 670 LOOP_DATA (loop)->outermost_exit = outermost_exit; 671 LOOP_DATA (loop)->has_call = has_call; 672 } 673 674 /* Check whether we may assign a value to X from a register. */ 675 676 static bool 677 may_assign_reg_p (rtx x) 678 { 679 return (GET_MODE (x) != VOIDmode 680 && GET_MODE (x) != BLKmode 681 && can_copy_p (GET_MODE (x)) 682 && (!REG_P (x) 683 || !HARD_REGISTER_P (x) 684 || REGNO_REG_CLASS (REGNO (x)) != NO_REGS)); 685 } 686 687 /* Finds definitions that may correspond to invariants in LOOP with body 688 BODY. */ 689 690 static void 691 find_defs (struct loop *loop) 692 { 693 if (dump_file) 694 { 695 fprintf (dump_file, 696 "*****starting processing of loop %d ******\n", 697 loop->num); 698 } 699 700 df_remove_problem (df_chain); 701 df_process_deferred_rescans (); 702 df_chain_add_problem (DF_UD_CHAIN); 703 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 704 df_analyze_loop (loop); 705 check_invariant_table_size (); 706 707 if (dump_file) 708 { 709 df_dump_region (dump_file); 710 fprintf (dump_file, 711 "*****ending processing of loop %d ******\n", 712 loop->num); 713 } 714 } 715 716 /* Creates a new invariant for definition DEF in INSN, depending on invariants 717 in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed, 718 unless the program ends due to a function call. The newly created invariant 719 is returned. */ 720 721 static struct invariant * 722 create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on, 723 bool always_executed) 724 { 725 struct invariant *inv = XNEW (struct invariant); 726 rtx set = single_set (insn); 727 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 728 729 inv->def = def; 730 inv->always_executed = always_executed; 731 inv->depends_on = depends_on; 732 733 /* If the set is simple, usually by moving it we move the whole store out of 734 the loop. Otherwise we save only cost of the computation. */ 735 if (def) 736 { 737 inv->cost = set_rtx_cost (set, speed); 738 /* ??? Try to determine cheapness of address computation. Unfortunately 739 the address cost is only a relative measure, we can't really compare 740 it with any absolute number, but only with other address costs. 741 But here we don't have any other addresses, so compare with a magic 742 number anyway. It has to be large enough to not regress PR33928 743 (by avoiding to move reg+8,reg+16,reg+24 invariants), but small 744 enough to not regress 410.bwaves either (by still moving reg+reg 745 invariants). 746 See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */ 747 inv->cheap_address = address_cost (SET_SRC (set), word_mode, 748 ADDR_SPACE_GENERIC, speed) < 3; 749 } 750 else 751 { 752 inv->cost = set_src_cost (SET_SRC (set), speed); 753 inv->cheap_address = false; 754 } 755 756 inv->move = false; 757 inv->reg = NULL_RTX; 758 inv->orig_regno = -1; 759 inv->stamp = 0; 760 inv->insn = insn; 761 762 inv->invno = invariants.length (); 763 inv->eqto = ~0u; 764 765 /* Itself. */ 766 inv->eqno = 1; 767 768 if (def) 769 def->invno = inv->invno; 770 invariants.safe_push (inv); 771 772 if (dump_file) 773 { 774 fprintf (dump_file, 775 "Set in insn %d is invariant (%d), cost %d, depends on ", 776 INSN_UID (insn), inv->invno, inv->cost); 777 dump_bitmap (dump_file, inv->depends_on); 778 } 779 780 return inv; 781 } 782 783 /* Record USE at DEF. */ 784 785 static void 786 record_use (struct def *def, df_ref use) 787 { 788 struct use *u = XNEW (struct use); 789 790 u->pos = DF_REF_REAL_LOC (use); 791 u->insn = DF_REF_INSN (use); 792 u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD 793 || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE); 794 u->next = def->uses; 795 def->uses = u; 796 def->n_uses++; 797 if (u->addr_use_p) 798 def->n_addr_uses++; 799 } 800 801 /* Finds the invariants USE depends on and store them to the DEPENDS_ON 802 bitmap. Returns true if all dependencies of USE are known to be 803 loop invariants, false otherwise. */ 804 805 static bool 806 check_dependency (basic_block bb, df_ref use, bitmap depends_on) 807 { 808 df_ref def; 809 basic_block def_bb; 810 struct df_link *defs; 811 struct def *def_data; 812 struct invariant *inv; 813 814 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) 815 return false; 816 817 defs = DF_REF_CHAIN (use); 818 if (!defs) 819 { 820 unsigned int regno = DF_REF_REGNO (use); 821 822 /* If this is the use of an uninitialized argument register that is 823 likely to be spilled, do not move it lest this might extend its 824 lifetime and cause reload to die. This can occur for a call to 825 a function taking complex number arguments and moving the insns 826 preparing the arguments without moving the call itself wouldn't 827 gain much in practice. */ 828 if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE) 829 && FUNCTION_ARG_REGNO_P (regno) 830 && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno))) 831 return false; 832 833 return true; 834 } 835 836 if (defs->next) 837 return false; 838 839 def = defs->ref; 840 check_invariant_table_size (); 841 inv = invariant_table[DF_REF_ID (def)]; 842 if (!inv) 843 return false; 844 845 def_data = inv->def; 846 gcc_assert (def_data != NULL); 847 848 def_bb = DF_REF_BB (def); 849 /* Note that in case bb == def_bb, we know that the definition 850 dominates insn, because def has invariant_table[DF_REF_ID(def)] 851 defined and we process the insns in the basic block bb 852 sequentially. */ 853 if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) 854 return false; 855 856 bitmap_set_bit (depends_on, def_data->invno); 857 return true; 858 } 859 860 861 /* Finds the invariants INSN depends on and store them to the DEPENDS_ON 862 bitmap. Returns true if all dependencies of INSN are known to be 863 loop invariants, false otherwise. */ 864 865 static bool 866 check_dependencies (rtx_insn *insn, bitmap depends_on) 867 { 868 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 869 df_ref use; 870 basic_block bb = BLOCK_FOR_INSN (insn); 871 872 FOR_EACH_INSN_INFO_USE (use, insn_info) 873 if (!check_dependency (bb, use, depends_on)) 874 return false; 875 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) 876 if (!check_dependency (bb, use, depends_on)) 877 return false; 878 879 return true; 880 } 881 882 /* Pre-check candidate DEST to skip the one which can not make a valid insn 883 during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */ 884 static bool 885 pre_check_invariant_p (bool simple, rtx dest) 886 { 887 if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1) 888 { 889 df_ref use; 890 rtx ref; 891 unsigned int i = REGNO (dest); 892 struct df_insn_info *insn_info; 893 df_ref def_rec; 894 895 for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use)) 896 { 897 ref = DF_REF_INSN (use); 898 insn_info = DF_INSN_INFO_GET (ref); 899 900 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info) 901 if (DF_REF_REGNO (def_rec) == i) 902 { 903 /* Multi definitions at this stage, most likely are due to 904 instruction constraints, which requires both read and write 905 on the same register. Since move_invariant_reg is not 906 powerful enough to handle such cases, just ignore the INV 907 and leave the chance to others. */ 908 return false; 909 } 910 } 911 } 912 return true; 913 } 914 915 /* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always 916 executed. ALWAYS_EXECUTED is true if the insn is always executed, 917 unless the program ends due to a function call. */ 918 919 static void 920 find_invariant_insn (rtx_insn *insn, bool, bool always_executed) 921 { 922 df_ref ref; 923 struct def *def; 924 bitmap depends_on; 925 rtx set, dest; 926 bool simple = true; 927 struct invariant *inv; 928 929 #ifdef HAVE_cc0 930 /* We can't move a CC0 setter without the user. */ 931 if (sets_cc0_p (insn)) 932 return; 933 #endif 934 935 set = single_set (insn); 936 if (!set) 937 return; 938 dest = SET_DEST (set); 939 940 if (!REG_P (dest) 941 || HARD_REGISTER_P (dest)) 942 simple = false; 943 944 if (!may_assign_reg_p (dest) 945 || !pre_check_invariant_p (simple, dest) 946 || !check_maybe_invariant (SET_SRC (set))) 947 return; 948 949 /* If the insn can throw exception, we cannot move it at all without changing 950 cfg. */ 951 if (can_throw_internal (insn)) 952 return; 953 954 /* We cannot make trapping insn executed. */ 955 if (may_trap_or_fault_p (PATTERN (insn))) 956 return; 957 958 depends_on = BITMAP_ALLOC (NULL); 959 if (!check_dependencies (insn, depends_on)) 960 { 961 BITMAP_FREE (depends_on); 962 return; 963 } 964 965 if (simple) 966 def = XCNEW (struct def); 967 else 968 def = NULL; 969 970 inv = create_new_invariant (def, insn, depends_on, always_executed); 971 972 if (simple) 973 { 974 ref = df_find_def (insn, dest); 975 check_invariant_table_size (); 976 invariant_table[DF_REF_ID (ref)] = inv; 977 } 978 } 979 980 /* Record registers used in INSN that have a unique invariant definition. */ 981 982 static void 983 record_uses (rtx_insn *insn) 984 { 985 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 986 df_ref use; 987 struct invariant *inv; 988 989 FOR_EACH_INSN_INFO_USE (use, insn_info) 990 { 991 inv = invariant_for_use (use); 992 if (inv) 993 record_use (inv->def, use); 994 } 995 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) 996 { 997 inv = invariant_for_use (use); 998 if (inv) 999 record_use (inv->def, use); 1000 } 1001 } 1002 1003 /* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always 1004 executed. ALWAYS_EXECUTED is true if the insn is always executed, 1005 unless the program ends due to a function call. */ 1006 1007 static void 1008 find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed) 1009 { 1010 find_invariant_insn (insn, always_reached, always_executed); 1011 record_uses (insn); 1012 } 1013 1014 /* Finds invariants in basic block BB. ALWAYS_REACHED is true if the 1015 basic block is always executed. ALWAYS_EXECUTED is true if the basic 1016 block is always executed, unless the program ends due to a function 1017 call. */ 1018 1019 static void 1020 find_invariants_bb (basic_block bb, bool always_reached, bool always_executed) 1021 { 1022 rtx_insn *insn; 1023 1024 FOR_BB_INSNS (bb, insn) 1025 { 1026 if (!NONDEBUG_INSN_P (insn)) 1027 continue; 1028 1029 find_invariants_insn (insn, always_reached, always_executed); 1030 1031 if (always_reached 1032 && CALL_P (insn) 1033 && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) 1034 || ! RTL_CONST_OR_PURE_CALL_P (insn))) 1035 always_reached = false; 1036 } 1037 } 1038 1039 /* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of 1040 basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the 1041 bitmap of basic blocks in BODY that are always executed unless the program 1042 ends due to a function call. */ 1043 1044 static void 1045 find_invariants_body (struct loop *loop, basic_block *body, 1046 bitmap always_reached, bitmap always_executed) 1047 { 1048 unsigned i; 1049 1050 for (i = 0; i < loop->num_nodes; i++) 1051 find_invariants_bb (body[i], 1052 bitmap_bit_p (always_reached, i), 1053 bitmap_bit_p (always_executed, i)); 1054 } 1055 1056 /* Finds invariants in LOOP. */ 1057 1058 static void 1059 find_invariants (struct loop *loop) 1060 { 1061 bitmap may_exit = BITMAP_ALLOC (NULL); 1062 bitmap always_reached = BITMAP_ALLOC (NULL); 1063 bitmap has_exit = BITMAP_ALLOC (NULL); 1064 bitmap always_executed = BITMAP_ALLOC (NULL); 1065 basic_block *body = get_loop_body_in_dom_order (loop); 1066 1067 find_exits (loop, body, may_exit, has_exit); 1068 compute_always_reached (loop, body, may_exit, always_reached); 1069 compute_always_reached (loop, body, has_exit, always_executed); 1070 1071 find_defs (loop); 1072 find_invariants_body (loop, body, always_reached, always_executed); 1073 merge_identical_invariants (); 1074 1075 BITMAP_FREE (always_reached); 1076 BITMAP_FREE (always_executed); 1077 BITMAP_FREE (may_exit); 1078 BITMAP_FREE (has_exit); 1079 free (body); 1080 } 1081 1082 /* Frees a list of uses USE. */ 1083 1084 static void 1085 free_use_list (struct use *use) 1086 { 1087 struct use *next; 1088 1089 for (; use; use = next) 1090 { 1091 next = use->next; 1092 free (use); 1093 } 1094 } 1095 1096 /* Return pressure class and number of hard registers (through *NREGS) 1097 for destination of INSN. */ 1098 static enum reg_class 1099 get_pressure_class_and_nregs (rtx_insn *insn, int *nregs) 1100 { 1101 rtx reg; 1102 enum reg_class pressure_class; 1103 rtx set = single_set (insn); 1104 1105 /* Considered invariant insns have only one set. */ 1106 gcc_assert (set != NULL_RTX); 1107 reg = SET_DEST (set); 1108 if (GET_CODE (reg) == SUBREG) 1109 reg = SUBREG_REG (reg); 1110 if (MEM_P (reg)) 1111 { 1112 *nregs = 0; 1113 pressure_class = NO_REGS; 1114 } 1115 else 1116 { 1117 if (! REG_P (reg)) 1118 reg = NULL_RTX; 1119 if (reg == NULL_RTX) 1120 pressure_class = GENERAL_REGS; 1121 else 1122 { 1123 pressure_class = reg_allocno_class (REGNO (reg)); 1124 pressure_class = ira_pressure_class_translate[pressure_class]; 1125 } 1126 *nregs 1127 = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))]; 1128 } 1129 return pressure_class; 1130 } 1131 1132 /* Calculates cost and number of registers needed for moving invariant INV 1133 out of the loop and stores them to *COST and *REGS_NEEDED. *CL will be 1134 the REG_CLASS of INV. Return 1135 -1: if INV is invalid. 1136 0: if INV and its depends_on have same reg_class 1137 1: if INV and its depends_on have different reg_classes. */ 1138 1139 static int 1140 get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed, 1141 enum reg_class *cl) 1142 { 1143 int i, acomp_cost; 1144 unsigned aregs_needed[N_REG_CLASSES]; 1145 unsigned depno; 1146 struct invariant *dep; 1147 bitmap_iterator bi; 1148 int ret = 1; 1149 1150 /* Find the representative of the class of the equivalent invariants. */ 1151 inv = invariants[inv->eqto]; 1152 1153 *comp_cost = 0; 1154 if (! flag_ira_loop_pressure) 1155 regs_needed[0] = 0; 1156 else 1157 { 1158 for (i = 0; i < ira_pressure_classes_num; i++) 1159 regs_needed[ira_pressure_classes[i]] = 0; 1160 } 1161 1162 if (inv->move 1163 || inv->stamp == actual_stamp) 1164 return -1; 1165 inv->stamp = actual_stamp; 1166 1167 if (! flag_ira_loop_pressure) 1168 regs_needed[0]++; 1169 else 1170 { 1171 int nregs; 1172 enum reg_class pressure_class; 1173 1174 pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs); 1175 regs_needed[pressure_class] += nregs; 1176 *cl = pressure_class; 1177 ret = 0; 1178 } 1179 1180 if (!inv->cheap_address 1181 || inv->def->n_addr_uses < inv->def->n_uses) 1182 (*comp_cost) += inv->cost * inv->eqno; 1183 1184 #ifdef STACK_REGS 1185 { 1186 /* Hoisting constant pool constants into stack regs may cost more than 1187 just single register. On x87, the balance is affected both by the 1188 small number of FP registers, and by its register stack organization, 1189 that forces us to add compensation code in and around the loop to 1190 shuffle the operands to the top of stack before use, and pop them 1191 from the stack after the loop finishes. 1192 1193 To model this effect, we increase the number of registers needed for 1194 stack registers by two: one register push, and one register pop. 1195 This usually has the effect that FP constant loads from the constant 1196 pool are not moved out of the loop. 1197 1198 Note that this also means that dependent invariants can not be moved. 1199 However, the primary purpose of this pass is to move loop invariant 1200 address arithmetic out of loops, and address arithmetic that depends 1201 on floating point constants is unlikely to ever occur. */ 1202 rtx set = single_set (inv->insn); 1203 if (set 1204 && IS_STACK_MODE (GET_MODE (SET_SRC (set))) 1205 && constant_pool_constant_p (SET_SRC (set))) 1206 { 1207 if (flag_ira_loop_pressure) 1208 regs_needed[ira_stack_reg_pressure_class] += 2; 1209 else 1210 regs_needed[0] += 2; 1211 } 1212 } 1213 #endif 1214 1215 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi) 1216 { 1217 bool check_p; 1218 enum reg_class dep_cl = ALL_REGS; 1219 int dep_ret; 1220 1221 dep = invariants[depno]; 1222 1223 /* If DEP is moved out of the loop, it is not a depends_on any more. */ 1224 if (dep->move) 1225 continue; 1226 1227 dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl); 1228 1229 if (! flag_ira_loop_pressure) 1230 check_p = aregs_needed[0] != 0; 1231 else 1232 { 1233 for (i = 0; i < ira_pressure_classes_num; i++) 1234 if (aregs_needed[ira_pressure_classes[i]] != 0) 1235 break; 1236 check_p = i < ira_pressure_classes_num; 1237 1238 if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl))) 1239 { 1240 *cl = ALL_REGS; 1241 ret = 1; 1242 } 1243 } 1244 if (check_p 1245 /* We need to check always_executed, since if the original value of 1246 the invariant may be preserved, we may need to keep it in a 1247 separate register. TODO check whether the register has an 1248 use outside of the loop. */ 1249 && dep->always_executed 1250 && !dep->def->uses->next) 1251 { 1252 /* If this is a single use, after moving the dependency we will not 1253 need a new register. */ 1254 if (! flag_ira_loop_pressure) 1255 aregs_needed[0]--; 1256 else 1257 { 1258 int nregs; 1259 enum reg_class pressure_class; 1260 1261 pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs); 1262 aregs_needed[pressure_class] -= nregs; 1263 } 1264 } 1265 1266 if (! flag_ira_loop_pressure) 1267 regs_needed[0] += aregs_needed[0]; 1268 else 1269 { 1270 for (i = 0; i < ira_pressure_classes_num; i++) 1271 regs_needed[ira_pressure_classes[i]] 1272 += aregs_needed[ira_pressure_classes[i]]; 1273 } 1274 (*comp_cost) += acomp_cost; 1275 } 1276 return ret; 1277 } 1278 1279 /* Calculates gain for eliminating invariant INV. REGS_USED is the number 1280 of registers used in the loop, NEW_REGS is the number of new variables 1281 already added due to the invariant motion. The number of registers needed 1282 for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed 1283 through to estimate_reg_pressure_cost. */ 1284 1285 static int 1286 gain_for_invariant (struct invariant *inv, unsigned *regs_needed, 1287 unsigned *new_regs, unsigned regs_used, 1288 bool speed, bool call_p) 1289 { 1290 int comp_cost, size_cost; 1291 /* Workaround -Wmaybe-uninitialized false positive during 1292 profiledbootstrap by initializing it. */ 1293 enum reg_class cl = NO_REGS; 1294 int ret; 1295 1296 actual_stamp++; 1297 1298 ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl); 1299 1300 if (! flag_ira_loop_pressure) 1301 { 1302 size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0], 1303 regs_used, speed, call_p) 1304 - estimate_reg_pressure_cost (new_regs[0], 1305 regs_used, speed, call_p)); 1306 } 1307 else if (ret < 0) 1308 return -1; 1309 else if ((ret == 0) && (cl == NO_REGS)) 1310 /* Hoist it anyway since it does not impact register pressure. */ 1311 return 1; 1312 else 1313 { 1314 int i; 1315 enum reg_class pressure_class; 1316 1317 for (i = 0; i < ira_pressure_classes_num; i++) 1318 { 1319 pressure_class = ira_pressure_classes[i]; 1320 1321 if (!reg_classes_intersect_p (pressure_class, cl)) 1322 continue; 1323 1324 if ((int) new_regs[pressure_class] 1325 + (int) regs_needed[pressure_class] 1326 + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] 1327 + IRA_LOOP_RESERVED_REGS 1328 > ira_class_hard_regs_num[pressure_class]) 1329 break; 1330 } 1331 if (i < ira_pressure_classes_num) 1332 /* There will be register pressure excess and we want not to 1333 make this loop invariant motion. All loop invariants with 1334 non-positive gains will be rejected in function 1335 find_invariants_to_move. Therefore we return the negative 1336 number here. 1337 1338 One could think that this rejects also expensive loop 1339 invariant motions and this will hurt code performance. 1340 However numerous experiments with different heuristics 1341 taking invariant cost into account did not confirm this 1342 assumption. There are possible explanations for this 1343 result: 1344 o probably all expensive invariants were already moved out 1345 of the loop by PRE and gimple invariant motion pass. 1346 o expensive invariant execution will be hidden by insn 1347 scheduling or OOO processor hardware because usually such 1348 invariants have a lot of freedom to be executed 1349 out-of-order. 1350 Another reason for ignoring invariant cost vs spilling cost 1351 heuristics is also in difficulties to evaluate accurately 1352 spill cost at this stage. */ 1353 return -1; 1354 else 1355 size_cost = 0; 1356 } 1357 1358 return comp_cost - size_cost; 1359 } 1360 1361 /* Finds invariant with best gain for moving. Returns the gain, stores 1362 the invariant in *BEST and number of registers needed for it to 1363 *REGS_NEEDED. REGS_USED is the number of registers used in the loop. 1364 NEW_REGS is the number of new variables already added due to invariant 1365 motion. */ 1366 1367 static int 1368 best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, 1369 unsigned *new_regs, unsigned regs_used, 1370 bool speed, bool call_p) 1371 { 1372 struct invariant *inv; 1373 int i, gain = 0, again; 1374 unsigned aregs_needed[N_REG_CLASSES], invno; 1375 1376 FOR_EACH_VEC_ELT (invariants, invno, inv) 1377 { 1378 if (inv->move) 1379 continue; 1380 1381 /* Only consider the "representatives" of equivalent invariants. */ 1382 if (inv->eqto != inv->invno) 1383 continue; 1384 1385 again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used, 1386 speed, call_p); 1387 if (again > gain) 1388 { 1389 gain = again; 1390 *best = inv; 1391 if (! flag_ira_loop_pressure) 1392 regs_needed[0] = aregs_needed[0]; 1393 else 1394 { 1395 for (i = 0; i < ira_pressure_classes_num; i++) 1396 regs_needed[ira_pressure_classes[i]] 1397 = aregs_needed[ira_pressure_classes[i]]; 1398 } 1399 } 1400 } 1401 1402 return gain; 1403 } 1404 1405 /* Marks invariant INVNO and all its dependencies for moving. */ 1406 1407 static void 1408 set_move_mark (unsigned invno, int gain) 1409 { 1410 struct invariant *inv = invariants[invno]; 1411 bitmap_iterator bi; 1412 1413 /* Find the representative of the class of the equivalent invariants. */ 1414 inv = invariants[inv->eqto]; 1415 1416 if (inv->move) 1417 return; 1418 inv->move = true; 1419 1420 if (dump_file) 1421 { 1422 if (gain >= 0) 1423 fprintf (dump_file, "Decided to move invariant %d -- gain %d\n", 1424 invno, gain); 1425 else 1426 fprintf (dump_file, "Decided to move dependent invariant %d\n", 1427 invno); 1428 }; 1429 1430 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, bi) 1431 { 1432 set_move_mark (invno, -1); 1433 } 1434 } 1435 1436 /* Determines which invariants to move. */ 1437 1438 static void 1439 find_invariants_to_move (bool speed, bool call_p) 1440 { 1441 int gain; 1442 unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES]; 1443 struct invariant *inv = NULL; 1444 1445 if (!invariants.length ()) 1446 return; 1447 1448 if (flag_ira_loop_pressure) 1449 /* REGS_USED is actually never used when the flag is on. */ 1450 regs_used = 0; 1451 else 1452 /* We do not really do a good job in estimating number of 1453 registers used; we put some initial bound here to stand for 1454 induction variables etc. that we do not detect. */ 1455 { 1456 unsigned int n_regs = DF_REG_SIZE (df); 1457 1458 regs_used = 2; 1459 1460 for (i = 0; i < n_regs; i++) 1461 { 1462 if (!DF_REGNO_FIRST_DEF (i) && DF_REGNO_LAST_USE (i)) 1463 { 1464 /* This is a value that is used but not changed inside loop. */ 1465 regs_used++; 1466 } 1467 } 1468 } 1469 1470 if (! flag_ira_loop_pressure) 1471 new_regs[0] = regs_needed[0] = 0; 1472 else 1473 { 1474 for (i = 0; (int) i < ira_pressure_classes_num; i++) 1475 new_regs[ira_pressure_classes[i]] = 0; 1476 } 1477 while ((gain = best_gain_for_invariant (&inv, regs_needed, 1478 new_regs, regs_used, 1479 speed, call_p)) > 0) 1480 { 1481 set_move_mark (inv->invno, gain); 1482 if (! flag_ira_loop_pressure) 1483 new_regs[0] += regs_needed[0]; 1484 else 1485 { 1486 for (i = 0; (int) i < ira_pressure_classes_num; i++) 1487 new_regs[ira_pressure_classes[i]] 1488 += regs_needed[ira_pressure_classes[i]]; 1489 } 1490 } 1491 } 1492 1493 /* Replace the uses, reached by the definition of invariant INV, by REG. 1494 1495 IN_GROUP is nonzero if this is part of a group of changes that must be 1496 performed as a group. In that case, the changes will be stored. The 1497 function `apply_change_group' will validate and apply the changes. */ 1498 1499 static int 1500 replace_uses (struct invariant *inv, rtx reg, bool in_group) 1501 { 1502 /* Replace the uses we know to be dominated. It saves work for copy 1503 propagation, and also it is necessary so that dependent invariants 1504 are computed right. */ 1505 if (inv->def) 1506 { 1507 struct use *use; 1508 for (use = inv->def->uses; use; use = use->next) 1509 validate_change (use->insn, use->pos, reg, true); 1510 1511 /* If we aren't part of a larger group, apply the changes now. */ 1512 if (!in_group) 1513 return apply_change_group (); 1514 } 1515 1516 return 1; 1517 } 1518 1519 /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false 1520 otherwise. */ 1521 1522 static bool 1523 move_invariant_reg (struct loop *loop, unsigned invno) 1524 { 1525 struct invariant *inv = invariants[invno]; 1526 struct invariant *repr = invariants[inv->eqto]; 1527 unsigned i; 1528 basic_block preheader = loop_preheader_edge (loop)->src; 1529 rtx reg, set, dest, note; 1530 bitmap_iterator bi; 1531 int regno = -1; 1532 1533 if (inv->reg) 1534 return true; 1535 if (!repr->move) 1536 return false; 1537 1538 /* If this is a representative of the class of equivalent invariants, 1539 really move the invariant. Otherwise just replace its use with 1540 the register used for the representative. */ 1541 if (inv == repr) 1542 { 1543 if (inv->depends_on) 1544 { 1545 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, bi) 1546 { 1547 if (!move_invariant_reg (loop, i)) 1548 goto fail; 1549 } 1550 } 1551 1552 /* Move the set out of the loop. If the set is always executed (we could 1553 omit this condition if we know that the register is unused outside of 1554 the loop, but it does not seem worth finding out) and it has no uses 1555 that would not be dominated by it, we may just move it (TODO). 1556 Otherwise we need to create a temporary register. */ 1557 set = single_set (inv->insn); 1558 reg = dest = SET_DEST (set); 1559 if (GET_CODE (reg) == SUBREG) 1560 reg = SUBREG_REG (reg); 1561 if (REG_P (reg)) 1562 regno = REGNO (reg); 1563 1564 reg = gen_reg_rtx_and_attrs (dest); 1565 1566 /* Try replacing the destination by a new pseudoregister. */ 1567 validate_change (inv->insn, &SET_DEST (set), reg, true); 1568 1569 /* As well as all the dominated uses. */ 1570 replace_uses (inv, reg, true); 1571 1572 /* And validate all the changes. */ 1573 if (!apply_change_group ()) 1574 goto fail; 1575 1576 emit_insn_after (gen_move_insn (dest, reg), inv->insn); 1577 reorder_insns (inv->insn, inv->insn, BB_END (preheader)); 1578 1579 /* If there is a REG_EQUAL note on the insn we just moved, and the 1580 insn is in a basic block that is not always executed or the note 1581 contains something for which we don't know the invariant status, 1582 the note may no longer be valid after we move the insn. Note that 1583 uses in REG_EQUAL notes are taken into account in the computation 1584 of invariants, so it is safe to retain the note even if it contains 1585 register references for which we know the invariant status. */ 1586 if ((note = find_reg_note (inv->insn, REG_EQUAL, NULL_RTX)) 1587 && (!inv->always_executed 1588 || !check_maybe_invariant (XEXP (note, 0)))) 1589 remove_note (inv->insn, note); 1590 } 1591 else 1592 { 1593 if (!move_invariant_reg (loop, repr->invno)) 1594 goto fail; 1595 reg = repr->reg; 1596 regno = repr->orig_regno; 1597 if (!replace_uses (inv, reg, false)) 1598 goto fail; 1599 set = single_set (inv->insn); 1600 emit_insn_after (gen_move_insn (SET_DEST (set), reg), inv->insn); 1601 delete_insn (inv->insn); 1602 } 1603 1604 inv->reg = reg; 1605 inv->orig_regno = regno; 1606 1607 return true; 1608 1609 fail: 1610 /* If we failed, clear move flag, so that we do not try to move inv 1611 again. */ 1612 if (dump_file) 1613 fprintf (dump_file, "Failed to move invariant %d\n", invno); 1614 inv->move = false; 1615 inv->reg = NULL_RTX; 1616 inv->orig_regno = -1; 1617 1618 return false; 1619 } 1620 1621 /* Move selected invariant out of the LOOP. Newly created regs are marked 1622 in TEMPORARY_REGS. */ 1623 1624 static void 1625 move_invariants (struct loop *loop) 1626 { 1627 struct invariant *inv; 1628 unsigned i; 1629 1630 FOR_EACH_VEC_ELT (invariants, i, inv) 1631 move_invariant_reg (loop, i); 1632 if (flag_ira_loop_pressure && resize_reg_info ()) 1633 { 1634 FOR_EACH_VEC_ELT (invariants, i, inv) 1635 if (inv->reg != NULL_RTX) 1636 { 1637 if (inv->orig_regno >= 0) 1638 setup_reg_classes (REGNO (inv->reg), 1639 reg_preferred_class (inv->orig_regno), 1640 reg_alternate_class (inv->orig_regno), 1641 reg_allocno_class (inv->orig_regno)); 1642 else 1643 setup_reg_classes (REGNO (inv->reg), 1644 GENERAL_REGS, NO_REGS, GENERAL_REGS); 1645 } 1646 } 1647 } 1648 1649 /* Initializes invariant motion data. */ 1650 1651 static void 1652 init_inv_motion_data (void) 1653 { 1654 actual_stamp = 1; 1655 1656 invariants.create (100); 1657 } 1658 1659 /* Frees the data allocated by invariant motion. */ 1660 1661 static void 1662 free_inv_motion_data (void) 1663 { 1664 unsigned i; 1665 struct def *def; 1666 struct invariant *inv; 1667 1668 check_invariant_table_size (); 1669 for (i = 0; i < DF_DEFS_TABLE_SIZE (); i++) 1670 { 1671 inv = invariant_table[i]; 1672 if (inv) 1673 { 1674 def = inv->def; 1675 gcc_assert (def != NULL); 1676 1677 free_use_list (def->uses); 1678 free (def); 1679 invariant_table[i] = NULL; 1680 } 1681 } 1682 1683 FOR_EACH_VEC_ELT (invariants, i, inv) 1684 { 1685 BITMAP_FREE (inv->depends_on); 1686 free (inv); 1687 } 1688 invariants.release (); 1689 } 1690 1691 /* Move the invariants out of the LOOP. */ 1692 1693 static void 1694 move_single_loop_invariants (struct loop *loop) 1695 { 1696 init_inv_motion_data (); 1697 1698 find_invariants (loop); 1699 find_invariants_to_move (optimize_loop_for_speed_p (loop), 1700 LOOP_DATA (loop)->has_call); 1701 move_invariants (loop); 1702 1703 free_inv_motion_data (); 1704 } 1705 1706 /* Releases the auxiliary data for LOOP. */ 1707 1708 static void 1709 free_loop_data (struct loop *loop) 1710 { 1711 struct loop_data *data = LOOP_DATA (loop); 1712 if (!data) 1713 return; 1714 1715 bitmap_clear (&LOOP_DATA (loop)->regs_ref); 1716 bitmap_clear (&LOOP_DATA (loop)->regs_live); 1717 free (data); 1718 loop->aux = NULL; 1719 } 1720 1721 1722 1723 /* Registers currently living. */ 1724 static bitmap_head curr_regs_live; 1725 1726 /* Current reg pressure for each pressure class. */ 1727 static int curr_reg_pressure[N_REG_CLASSES]; 1728 1729 /* Record all regs that are set in any one insn. Communication from 1730 mark_reg_{store,clobber} and global_conflicts. Asm can refer to 1731 all hard-registers. */ 1732 static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS 1733 ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2]; 1734 /* Number of regs stored in the previous array. */ 1735 static int n_regs_set; 1736 1737 /* Return pressure class and number of needed hard registers (through 1738 *NREGS) of register REGNO. */ 1739 static enum reg_class 1740 get_regno_pressure_class (int regno, int *nregs) 1741 { 1742 if (regno >= FIRST_PSEUDO_REGISTER) 1743 { 1744 enum reg_class pressure_class; 1745 1746 pressure_class = reg_allocno_class (regno); 1747 pressure_class = ira_pressure_class_translate[pressure_class]; 1748 *nregs 1749 = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)]; 1750 return pressure_class; 1751 } 1752 else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno) 1753 && ! TEST_HARD_REG_BIT (eliminable_regset, regno)) 1754 { 1755 *nregs = 1; 1756 return ira_pressure_class_translate[REGNO_REG_CLASS (regno)]; 1757 } 1758 else 1759 { 1760 *nregs = 0; 1761 return NO_REGS; 1762 } 1763 } 1764 1765 /* Increase (if INCR_P) or decrease current register pressure for 1766 register REGNO. */ 1767 static void 1768 change_pressure (int regno, bool incr_p) 1769 { 1770 int nregs; 1771 enum reg_class pressure_class; 1772 1773 pressure_class = get_regno_pressure_class (regno, &nregs); 1774 if (! incr_p) 1775 curr_reg_pressure[pressure_class] -= nregs; 1776 else 1777 { 1778 curr_reg_pressure[pressure_class] += nregs; 1779 if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] 1780 < curr_reg_pressure[pressure_class]) 1781 LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] 1782 = curr_reg_pressure[pressure_class]; 1783 } 1784 } 1785 1786 /* Mark REGNO birth. */ 1787 static void 1788 mark_regno_live (int regno) 1789 { 1790 struct loop *loop; 1791 1792 for (loop = curr_loop; 1793 loop != current_loops->tree_root; 1794 loop = loop_outer (loop)) 1795 bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno); 1796 if (!bitmap_set_bit (&curr_regs_live, regno)) 1797 return; 1798 change_pressure (regno, true); 1799 } 1800 1801 /* Mark REGNO death. */ 1802 static void 1803 mark_regno_death (int regno) 1804 { 1805 if (! bitmap_clear_bit (&curr_regs_live, regno)) 1806 return; 1807 change_pressure (regno, false); 1808 } 1809 1810 /* Mark setting register REG. */ 1811 static void 1812 mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED, 1813 void *data ATTRIBUTE_UNUSED) 1814 { 1815 int regno; 1816 1817 if (GET_CODE (reg) == SUBREG) 1818 reg = SUBREG_REG (reg); 1819 1820 if (! REG_P (reg)) 1821 return; 1822 1823 regs_set[n_regs_set++] = reg; 1824 1825 regno = REGNO (reg); 1826 1827 if (regno >= FIRST_PSEUDO_REGISTER) 1828 mark_regno_live (regno); 1829 else 1830 { 1831 int last = regno + hard_regno_nregs[regno][GET_MODE (reg)]; 1832 1833 while (regno < last) 1834 { 1835 mark_regno_live (regno); 1836 regno++; 1837 } 1838 } 1839 } 1840 1841 /* Mark clobbering register REG. */ 1842 static void 1843 mark_reg_clobber (rtx reg, const_rtx setter, void *data) 1844 { 1845 if (GET_CODE (setter) == CLOBBER) 1846 mark_reg_store (reg, setter, data); 1847 } 1848 1849 /* Mark register REG death. */ 1850 static void 1851 mark_reg_death (rtx reg) 1852 { 1853 int regno = REGNO (reg); 1854 1855 if (regno >= FIRST_PSEUDO_REGISTER) 1856 mark_regno_death (regno); 1857 else 1858 { 1859 int last = regno + hard_regno_nregs[regno][GET_MODE (reg)]; 1860 1861 while (regno < last) 1862 { 1863 mark_regno_death (regno); 1864 regno++; 1865 } 1866 } 1867 } 1868 1869 /* Mark occurrence of registers in X for the current loop. */ 1870 static void 1871 mark_ref_regs (rtx x) 1872 { 1873 RTX_CODE code; 1874 int i; 1875 const char *fmt; 1876 1877 if (!x) 1878 return; 1879 1880 code = GET_CODE (x); 1881 if (code == REG) 1882 { 1883 struct loop *loop; 1884 1885 for (loop = curr_loop; 1886 loop != current_loops->tree_root; 1887 loop = loop_outer (loop)) 1888 bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x)); 1889 return; 1890 } 1891 1892 fmt = GET_RTX_FORMAT (code); 1893 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 1894 if (fmt[i] == 'e') 1895 mark_ref_regs (XEXP (x, i)); 1896 else if (fmt[i] == 'E') 1897 { 1898 int j; 1899 1900 for (j = 0; j < XVECLEN (x, i); j++) 1901 mark_ref_regs (XVECEXP (x, i, j)); 1902 } 1903 } 1904 1905 /* Calculate register pressure in the loops. */ 1906 static void 1907 calculate_loop_reg_pressure (void) 1908 { 1909 int i; 1910 unsigned int j; 1911 bitmap_iterator bi; 1912 basic_block bb; 1913 rtx_insn *insn; 1914 rtx link; 1915 struct loop *loop, *parent; 1916 1917 FOR_EACH_LOOP (loop, 0) 1918 if (loop->aux == NULL) 1919 { 1920 loop->aux = xcalloc (1, sizeof (struct loop_data)); 1921 bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack); 1922 bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack); 1923 } 1924 ira_setup_eliminable_regset (); 1925 bitmap_initialize (&curr_regs_live, ®_obstack); 1926 FOR_EACH_BB_FN (bb, cfun) 1927 { 1928 curr_loop = bb->loop_father; 1929 if (curr_loop == current_loops->tree_root) 1930 continue; 1931 1932 for (loop = curr_loop; 1933 loop != current_loops->tree_root; 1934 loop = loop_outer (loop)) 1935 bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb)); 1936 1937 bitmap_copy (&curr_regs_live, DF_LR_IN (bb)); 1938 for (i = 0; i < ira_pressure_classes_num; i++) 1939 curr_reg_pressure[ira_pressure_classes[i]] = 0; 1940 EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi) 1941 change_pressure (j, true); 1942 1943 FOR_BB_INSNS (bb, insn) 1944 { 1945 if (! NONDEBUG_INSN_P (insn)) 1946 continue; 1947 1948 mark_ref_regs (PATTERN (insn)); 1949 n_regs_set = 0; 1950 note_stores (PATTERN (insn), mark_reg_clobber, NULL); 1951 1952 /* Mark any registers dead after INSN as dead now. */ 1953 1954 for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) 1955 if (REG_NOTE_KIND (link) == REG_DEAD) 1956 mark_reg_death (XEXP (link, 0)); 1957 1958 /* Mark any registers set in INSN as live, 1959 and mark them as conflicting with all other live regs. 1960 Clobbers are processed again, so they conflict with 1961 the registers that are set. */ 1962 1963 note_stores (PATTERN (insn), mark_reg_store, NULL); 1964 1965 #ifdef AUTO_INC_DEC 1966 for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) 1967 if (REG_NOTE_KIND (link) == REG_INC) 1968 mark_reg_store (XEXP (link, 0), NULL_RTX, NULL); 1969 #endif 1970 while (n_regs_set-- > 0) 1971 { 1972 rtx note = find_regno_note (insn, REG_UNUSED, 1973 REGNO (regs_set[n_regs_set])); 1974 if (! note) 1975 continue; 1976 1977 mark_reg_death (XEXP (note, 0)); 1978 } 1979 } 1980 } 1981 bitmap_clear (&curr_regs_live); 1982 if (flag_ira_region == IRA_REGION_MIXED 1983 || flag_ira_region == IRA_REGION_ALL) 1984 FOR_EACH_LOOP (loop, 0) 1985 { 1986 EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi) 1987 if (! bitmap_bit_p (&LOOP_DATA (loop)->regs_ref, j)) 1988 { 1989 enum reg_class pressure_class; 1990 int nregs; 1991 1992 pressure_class = get_regno_pressure_class (j, &nregs); 1993 LOOP_DATA (loop)->max_reg_pressure[pressure_class] -= nregs; 1994 } 1995 } 1996 if (dump_file == NULL) 1997 return; 1998 FOR_EACH_LOOP (loop, 0) 1999 { 2000 parent = loop_outer (loop); 2001 fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n", 2002 loop->num, (parent == NULL ? -1 : parent->num), 2003 loop->header->index, loop_depth (loop)); 2004 fprintf (dump_file, "\n ref. regnos:"); 2005 EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi) 2006 fprintf (dump_file, " %d", j); 2007 fprintf (dump_file, "\n live regnos:"); 2008 EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi) 2009 fprintf (dump_file, " %d", j); 2010 fprintf (dump_file, "\n Pressure:"); 2011 for (i = 0; (int) i < ira_pressure_classes_num; i++) 2012 { 2013 enum reg_class pressure_class; 2014 2015 pressure_class = ira_pressure_classes[i]; 2016 if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0) 2017 continue; 2018 fprintf (dump_file, " %s=%d", reg_class_names[pressure_class], 2019 LOOP_DATA (loop)->max_reg_pressure[pressure_class]); 2020 } 2021 fprintf (dump_file, "\n"); 2022 } 2023 } 2024 2025 2026 2027 /* Move the invariants out of the loops. */ 2028 2029 void 2030 move_loop_invariants (void) 2031 { 2032 struct loop *loop; 2033 2034 if (flag_ira_loop_pressure) 2035 { 2036 df_analyze (); 2037 regstat_init_n_sets_and_refs (); 2038 ira_set_pseudo_classes (true, dump_file); 2039 calculate_loop_reg_pressure (); 2040 regstat_free_n_sets_and_refs (); 2041 } 2042 df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN); 2043 /* Process the loops, innermost first. */ 2044 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 2045 { 2046 curr_loop = loop; 2047 /* move_single_loop_invariants for very large loops 2048 is time consuming and might need a lot of memory. */ 2049 if (loop->num_nodes <= (unsigned) LOOP_INVARIANT_MAX_BBS_IN_LOOP) 2050 move_single_loop_invariants (loop); 2051 } 2052 2053 FOR_EACH_LOOP (loop, 0) 2054 { 2055 free_loop_data (loop); 2056 } 2057 2058 if (flag_ira_loop_pressure) 2059 /* There is no sense to keep this info because it was most 2060 probably outdated by subsequent passes. */ 2061 free_reg_info (); 2062 free (invariant_table); 2063 invariant_table = NULL; 2064 invariant_table_size = 0; 2065 2066 #ifdef ENABLE_CHECKING 2067 verify_flow_info (); 2068 #endif 2069 } 2070