1 /* Perform simple optimizations to clean up the result of reload. 2 Copyright (C) 1987-2015 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "tm.h" 24 25 #include "machmode.h" 26 #include "hard-reg-set.h" 27 #include "rtl.h" 28 #include "tm_p.h" 29 #include "obstack.h" 30 #include "insn-config.h" 31 #include "flags.h" 32 #include "hashtab.h" 33 #include "hash-set.h" 34 #include "vec.h" 35 #include "input.h" 36 #include "function.h" 37 #include "symtab.h" 38 #include "statistics.h" 39 #include "double-int.h" 40 #include "real.h" 41 #include "fixed-value.h" 42 #include "alias.h" 43 #include "wide-int.h" 44 #include "inchash.h" 45 #include "tree.h" 46 #include "expmed.h" 47 #include "dojump.h" 48 #include "explow.h" 49 #include "calls.h" 50 #include "emit-rtl.h" 51 #include "varasm.h" 52 #include "stmt.h" 53 #include "expr.h" 54 #include "insn-codes.h" 55 #include "optabs.h" 56 #include "regs.h" 57 #include "predict.h" 58 #include "dominance.h" 59 #include "cfg.h" 60 #include "cfgrtl.h" 61 #include "cfgbuild.h" 62 #include "cfgcleanup.h" 63 #include "basic-block.h" 64 #include "reload.h" 65 #include "recog.h" 66 #include "cselib.h" 67 #include "diagnostic-core.h" 68 #include "except.h" 69 #include "target.h" 70 #include "tree-pass.h" 71 #include "df.h" 72 #include "dbgcnt.h" 73 74 static int reload_cse_noop_set_p (rtx); 75 static bool reload_cse_simplify (rtx_insn *, rtx); 76 static void reload_cse_regs_1 (void); 77 static int reload_cse_simplify_set (rtx, rtx_insn *); 78 static int reload_cse_simplify_operands (rtx_insn *, rtx); 79 80 static void reload_combine (void); 81 static void reload_combine_note_use (rtx *, rtx_insn *, int, rtx); 82 static void reload_combine_note_store (rtx, const_rtx, void *); 83 84 static bool reload_cse_move2add (rtx_insn *); 85 static void move2add_note_store (rtx, const_rtx, void *); 86 87 /* Call cse / combine like post-reload optimization phases. 88 FIRST is the first instruction. */ 89 90 static void 91 reload_cse_regs (rtx_insn *first ATTRIBUTE_UNUSED) 92 { 93 bool moves_converted; 94 reload_cse_regs_1 (); 95 reload_combine (); 96 moves_converted = reload_cse_move2add (first); 97 if (flag_expensive_optimizations) 98 { 99 if (moves_converted) 100 reload_combine (); 101 reload_cse_regs_1 (); 102 } 103 } 104 105 /* See whether a single set SET is a noop. */ 106 static int 107 reload_cse_noop_set_p (rtx set) 108 { 109 if (cselib_reg_set_mode (SET_DEST (set)) != GET_MODE (SET_DEST (set))) 110 return 0; 111 112 return rtx_equal_for_cselib_p (SET_DEST (set), SET_SRC (set)); 113 } 114 115 /* Try to simplify INSN. Return true if the CFG may have changed. */ 116 static bool 117 reload_cse_simplify (rtx_insn *insn, rtx testreg) 118 { 119 rtx body = PATTERN (insn); 120 basic_block insn_bb = BLOCK_FOR_INSN (insn); 121 unsigned insn_bb_succs = EDGE_COUNT (insn_bb->succs); 122 123 /* If NO_FUNCTION_CSE has been set by the target, then we should not try 124 to cse function calls. */ 125 #ifdef NO_FUNCTION_CSE 126 if (CALL_P (insn)) 127 return false; 128 #endif 129 130 if (GET_CODE (body) == SET) 131 { 132 int count = 0; 133 134 /* Simplify even if we may think it is a no-op. 135 We may think a memory load of a value smaller than WORD_SIZE 136 is redundant because we haven't taken into account possible 137 implicit extension. reload_cse_simplify_set() will bring 138 this out, so it's safer to simplify before we delete. */ 139 count += reload_cse_simplify_set (body, insn); 140 141 if (!count && reload_cse_noop_set_p (body)) 142 { 143 rtx value = SET_DEST (body); 144 if (REG_P (value) 145 && ! REG_FUNCTION_VALUE_P (value)) 146 value = 0; 147 if (check_for_inc_dec (insn)) 148 delete_insn_and_edges (insn); 149 /* We're done with this insn. */ 150 goto done; 151 } 152 153 if (count > 0) 154 apply_change_group (); 155 else 156 reload_cse_simplify_operands (insn, testreg); 157 } 158 else if (GET_CODE (body) == PARALLEL) 159 { 160 int i; 161 int count = 0; 162 rtx value = NULL_RTX; 163 164 /* Registers mentioned in the clobber list for an asm cannot be reused 165 within the body of the asm. Invalidate those registers now so that 166 we don't try to substitute values for them. */ 167 if (asm_noperands (body) >= 0) 168 { 169 for (i = XVECLEN (body, 0) - 1; i >= 0; --i) 170 { 171 rtx part = XVECEXP (body, 0, i); 172 if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0))) 173 cselib_invalidate_rtx (XEXP (part, 0)); 174 } 175 } 176 177 /* If every action in a PARALLEL is a noop, we can delete 178 the entire PARALLEL. */ 179 for (i = XVECLEN (body, 0) - 1; i >= 0; --i) 180 { 181 rtx part = XVECEXP (body, 0, i); 182 if (GET_CODE (part) == SET) 183 { 184 if (! reload_cse_noop_set_p (part)) 185 break; 186 if (REG_P (SET_DEST (part)) 187 && REG_FUNCTION_VALUE_P (SET_DEST (part))) 188 { 189 if (value) 190 break; 191 value = SET_DEST (part); 192 } 193 } 194 else if (GET_CODE (part) != CLOBBER) 195 break; 196 } 197 198 if (i < 0) 199 { 200 if (check_for_inc_dec (insn)) 201 delete_insn_and_edges (insn); 202 /* We're done with this insn. */ 203 goto done; 204 } 205 206 /* It's not a no-op, but we can try to simplify it. */ 207 for (i = XVECLEN (body, 0) - 1; i >= 0; --i) 208 if (GET_CODE (XVECEXP (body, 0, i)) == SET) 209 count += reload_cse_simplify_set (XVECEXP (body, 0, i), insn); 210 211 if (count > 0) 212 apply_change_group (); 213 else 214 reload_cse_simplify_operands (insn, testreg); 215 } 216 217 done: 218 return (EDGE_COUNT (insn_bb->succs) != insn_bb_succs); 219 } 220 221 /* Do a very simple CSE pass over the hard registers. 222 223 This function detects no-op moves where we happened to assign two 224 different pseudo-registers to the same hard register, and then 225 copied one to the other. Reload will generate a useless 226 instruction copying a register to itself. 227 228 This function also detects cases where we load a value from memory 229 into two different registers, and (if memory is more expensive than 230 registers) changes it to simply copy the first register into the 231 second register. 232 233 Another optimization is performed that scans the operands of each 234 instruction to see whether the value is already available in a 235 hard register. It then replaces the operand with the hard register 236 if possible, much like an optional reload would. */ 237 238 static void 239 reload_cse_regs_1 (void) 240 { 241 bool cfg_changed = false; 242 basic_block bb; 243 rtx_insn *insn; 244 rtx testreg = gen_rtx_REG (VOIDmode, -1); 245 246 cselib_init (CSELIB_RECORD_MEMORY); 247 init_alias_analysis (); 248 249 FOR_EACH_BB_FN (bb, cfun) 250 FOR_BB_INSNS (bb, insn) 251 { 252 if (INSN_P (insn)) 253 cfg_changed |= reload_cse_simplify (insn, testreg); 254 255 cselib_process_insn (insn); 256 } 257 258 /* Clean up. */ 259 end_alias_analysis (); 260 cselib_finish (); 261 if (cfg_changed) 262 cleanup_cfg (0); 263 } 264 265 /* Try to simplify a single SET instruction. SET is the set pattern. 266 INSN is the instruction it came from. 267 This function only handles one case: if we set a register to a value 268 which is not a register, we try to find that value in some other register 269 and change the set into a register copy. */ 270 271 static int 272 reload_cse_simplify_set (rtx set, rtx_insn *insn) 273 { 274 int did_change = 0; 275 int dreg; 276 rtx src; 277 reg_class_t dclass; 278 int old_cost; 279 cselib_val *val; 280 struct elt_loc_list *l; 281 #ifdef LOAD_EXTEND_OP 282 enum rtx_code extend_op = UNKNOWN; 283 #endif 284 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 285 286 dreg = true_regnum (SET_DEST (set)); 287 if (dreg < 0) 288 return 0; 289 290 src = SET_SRC (set); 291 if (side_effects_p (src) || true_regnum (src) >= 0) 292 return 0; 293 294 dclass = REGNO_REG_CLASS (dreg); 295 296 #ifdef LOAD_EXTEND_OP 297 /* When replacing a memory with a register, we need to honor assumptions 298 that combine made wrt the contents of sign bits. We'll do this by 299 generating an extend instruction instead of a reg->reg copy. Thus 300 the destination must be a register that we can widen. */ 301 if (MEM_P (src) 302 && GET_MODE_BITSIZE (GET_MODE (src)) < BITS_PER_WORD 303 && (extend_op = LOAD_EXTEND_OP (GET_MODE (src))) != UNKNOWN 304 && !REG_P (SET_DEST (set))) 305 return 0; 306 #endif 307 308 val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0, VOIDmode); 309 if (! val) 310 return 0; 311 312 /* If memory loads are cheaper than register copies, don't change them. */ 313 if (MEM_P (src)) 314 old_cost = memory_move_cost (GET_MODE (src), dclass, true); 315 else if (REG_P (src)) 316 old_cost = register_move_cost (GET_MODE (src), 317 REGNO_REG_CLASS (REGNO (src)), dclass); 318 else 319 old_cost = set_src_cost (src, speed); 320 321 for (l = val->locs; l; l = l->next) 322 { 323 rtx this_rtx = l->loc; 324 int this_cost; 325 326 if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0)) 327 { 328 #ifdef LOAD_EXTEND_OP 329 if (extend_op != UNKNOWN) 330 { 331 wide_int result; 332 333 if (!CONST_SCALAR_INT_P (this_rtx)) 334 continue; 335 336 switch (extend_op) 337 { 338 case ZERO_EXTEND: 339 result = wide_int::from (std::make_pair (this_rtx, 340 GET_MODE (src)), 341 BITS_PER_WORD, UNSIGNED); 342 break; 343 case SIGN_EXTEND: 344 result = wide_int::from (std::make_pair (this_rtx, 345 GET_MODE (src)), 346 BITS_PER_WORD, SIGNED); 347 break; 348 default: 349 gcc_unreachable (); 350 } 351 this_rtx = immed_wide_int_const (result, word_mode); 352 } 353 #endif 354 this_cost = set_src_cost (this_rtx, speed); 355 } 356 else if (REG_P (this_rtx)) 357 { 358 #ifdef LOAD_EXTEND_OP 359 if (extend_op != UNKNOWN) 360 { 361 this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx); 362 this_cost = set_src_cost (this_rtx, speed); 363 } 364 else 365 #endif 366 this_cost = register_move_cost (GET_MODE (this_rtx), 367 REGNO_REG_CLASS (REGNO (this_rtx)), 368 dclass); 369 } 370 else 371 continue; 372 373 /* If equal costs, prefer registers over anything else. That 374 tends to lead to smaller instructions on some machines. */ 375 if (this_cost < old_cost 376 || (this_cost == old_cost 377 && REG_P (this_rtx) 378 && !REG_P (SET_SRC (set)))) 379 { 380 #ifdef LOAD_EXTEND_OP 381 if (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set))) < BITS_PER_WORD 382 && extend_op != UNKNOWN 383 #ifdef CANNOT_CHANGE_MODE_CLASS 384 && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)), 385 word_mode, 386 REGNO_REG_CLASS (REGNO (SET_DEST (set)))) 387 #endif 388 ) 389 { 390 rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set))); 391 ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set)); 392 validate_change (insn, &SET_DEST (set), wide_dest, 1); 393 } 394 #endif 395 396 validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1); 397 old_cost = this_cost, did_change = 1; 398 } 399 } 400 401 return did_change; 402 } 403 404 /* Try to replace operands in INSN with equivalent values that are already 405 in registers. This can be viewed as optional reloading. 406 407 For each non-register operand in the insn, see if any hard regs are 408 known to be equivalent to that operand. Record the alternatives which 409 can accept these hard registers. Among all alternatives, select the 410 ones which are better or equal to the one currently matching, where 411 "better" is in terms of '?' and '!' constraints. Among the remaining 412 alternatives, select the one which replaces most operands with 413 hard registers. */ 414 415 static int 416 reload_cse_simplify_operands (rtx_insn *insn, rtx testreg) 417 { 418 int i, j; 419 420 /* For each operand, all registers that are equivalent to it. */ 421 HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS]; 422 423 const char *constraints[MAX_RECOG_OPERANDS]; 424 425 /* Vector recording how bad an alternative is. */ 426 int *alternative_reject; 427 /* Vector recording how many registers can be introduced by choosing 428 this alternative. */ 429 int *alternative_nregs; 430 /* Array of vectors recording, for each operand and each alternative, 431 which hard register to substitute, or -1 if the operand should be 432 left as it is. */ 433 int *op_alt_regno[MAX_RECOG_OPERANDS]; 434 /* Array of alternatives, sorted in order of decreasing desirability. */ 435 int *alternative_order; 436 437 extract_constrain_insn (insn); 438 439 if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0) 440 return 0; 441 442 alternative_reject = XALLOCAVEC (int, recog_data.n_alternatives); 443 alternative_nregs = XALLOCAVEC (int, recog_data.n_alternatives); 444 alternative_order = XALLOCAVEC (int, recog_data.n_alternatives); 445 memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int)); 446 memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int)); 447 448 /* For each operand, find out which regs are equivalent. */ 449 for (i = 0; i < recog_data.n_operands; i++) 450 { 451 cselib_val *v; 452 struct elt_loc_list *l; 453 rtx op; 454 455 CLEAR_HARD_REG_SET (equiv_regs[i]); 456 457 /* cselib blows up on CODE_LABELs. Trying to fix that doesn't seem 458 right, so avoid the problem here. Likewise if we have a constant 459 and the insn pattern doesn't tell us the mode we need. */ 460 if (LABEL_P (recog_data.operand[i]) 461 || (CONSTANT_P (recog_data.operand[i]) 462 && recog_data.operand_mode[i] == VOIDmode)) 463 continue; 464 465 op = recog_data.operand[i]; 466 #ifdef LOAD_EXTEND_OP 467 if (MEM_P (op) 468 && GET_MODE_BITSIZE (GET_MODE (op)) < BITS_PER_WORD 469 && LOAD_EXTEND_OP (GET_MODE (op)) != UNKNOWN) 470 { 471 rtx set = single_set (insn); 472 473 /* We might have multiple sets, some of which do implicit 474 extension. Punt on this for now. */ 475 if (! set) 476 continue; 477 /* If the destination is also a MEM or a STRICT_LOW_PART, no 478 extension applies. 479 Also, if there is an explicit extension, we don't have to 480 worry about an implicit one. */ 481 else if (MEM_P (SET_DEST (set)) 482 || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART 483 || GET_CODE (SET_SRC (set)) == ZERO_EXTEND 484 || GET_CODE (SET_SRC (set)) == SIGN_EXTEND) 485 ; /* Continue ordinary processing. */ 486 #ifdef CANNOT_CHANGE_MODE_CLASS 487 /* If the register cannot change mode to word_mode, it follows that 488 it cannot have been used in word_mode. */ 489 else if (REG_P (SET_DEST (set)) 490 && CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)), 491 word_mode, 492 REGNO_REG_CLASS (REGNO (SET_DEST (set))))) 493 ; /* Continue ordinary processing. */ 494 #endif 495 /* If this is a straight load, make the extension explicit. */ 496 else if (REG_P (SET_DEST (set)) 497 && recog_data.n_operands == 2 498 && SET_SRC (set) == op 499 && SET_DEST (set) == recog_data.operand[1-i]) 500 { 501 validate_change (insn, recog_data.operand_loc[i], 502 gen_rtx_fmt_e (LOAD_EXTEND_OP (GET_MODE (op)), 503 word_mode, op), 504 1); 505 validate_change (insn, recog_data.operand_loc[1-i], 506 gen_rtx_REG (word_mode, REGNO (SET_DEST (set))), 507 1); 508 if (! apply_change_group ()) 509 return 0; 510 return reload_cse_simplify_operands (insn, testreg); 511 } 512 else 513 /* ??? There might be arithmetic operations with memory that are 514 safe to optimize, but is it worth the trouble? */ 515 continue; 516 } 517 #endif /* LOAD_EXTEND_OP */ 518 if (side_effects_p (op)) 519 continue; 520 v = cselib_lookup (op, recog_data.operand_mode[i], 0, VOIDmode); 521 if (! v) 522 continue; 523 524 for (l = v->locs; l; l = l->next) 525 if (REG_P (l->loc)) 526 SET_HARD_REG_BIT (equiv_regs[i], REGNO (l->loc)); 527 } 528 529 alternative_mask preferred = get_preferred_alternatives (insn); 530 for (i = 0; i < recog_data.n_operands; i++) 531 { 532 machine_mode mode; 533 int regno; 534 const char *p; 535 536 op_alt_regno[i] = XALLOCAVEC (int, recog_data.n_alternatives); 537 for (j = 0; j < recog_data.n_alternatives; j++) 538 op_alt_regno[i][j] = -1; 539 540 p = constraints[i] = recog_data.constraints[i]; 541 mode = recog_data.operand_mode[i]; 542 543 /* Add the reject values for each alternative given by the constraints 544 for this operand. */ 545 j = 0; 546 while (*p != '\0') 547 { 548 char c = *p++; 549 if (c == ',') 550 j++; 551 else if (c == '?') 552 alternative_reject[j] += 3; 553 else if (c == '!') 554 alternative_reject[j] += 300; 555 } 556 557 /* We won't change operands which are already registers. We 558 also don't want to modify output operands. */ 559 regno = true_regnum (recog_data.operand[i]); 560 if (regno >= 0 561 || constraints[i][0] == '=' 562 || constraints[i][0] == '+') 563 continue; 564 565 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 566 { 567 enum reg_class rclass = NO_REGS; 568 569 if (! TEST_HARD_REG_BIT (equiv_regs[i], regno)) 570 continue; 571 572 SET_REGNO_RAW (testreg, regno); 573 PUT_MODE (testreg, mode); 574 575 /* We found a register equal to this operand. Now look for all 576 alternatives that can accept this register and have not been 577 assigned a register they can use yet. */ 578 j = 0; 579 p = constraints[i]; 580 for (;;) 581 { 582 char c = *p; 583 584 switch (c) 585 { 586 case 'g': 587 rclass = reg_class_subunion[rclass][GENERAL_REGS]; 588 break; 589 590 default: 591 rclass 592 = (reg_class_subunion 593 [rclass] 594 [reg_class_for_constraint (lookup_constraint (p))]); 595 break; 596 597 case ',': case '\0': 598 /* See if REGNO fits this alternative, and set it up as the 599 replacement register if we don't have one for this 600 alternative yet and the operand being replaced is not 601 a cheap CONST_INT. */ 602 if (op_alt_regno[i][j] == -1 603 && TEST_BIT (preferred, j) 604 && reg_fits_class_p (testreg, rclass, 0, mode) 605 && (!CONST_INT_P (recog_data.operand[i]) 606 || (set_src_cost (recog_data.operand[i], 607 optimize_bb_for_speed_p 608 (BLOCK_FOR_INSN (insn))) 609 > set_src_cost (testreg, 610 optimize_bb_for_speed_p 611 (BLOCK_FOR_INSN (insn)))))) 612 { 613 alternative_nregs[j]++; 614 op_alt_regno[i][j] = regno; 615 } 616 j++; 617 rclass = NO_REGS; 618 break; 619 } 620 p += CONSTRAINT_LEN (c, p); 621 622 if (c == '\0') 623 break; 624 } 625 } 626 } 627 628 /* Record all alternatives which are better or equal to the currently 629 matching one in the alternative_order array. */ 630 for (i = j = 0; i < recog_data.n_alternatives; i++) 631 if (alternative_reject[i] <= alternative_reject[which_alternative]) 632 alternative_order[j++] = i; 633 recog_data.n_alternatives = j; 634 635 /* Sort it. Given a small number of alternatives, a dumb algorithm 636 won't hurt too much. */ 637 for (i = 0; i < recog_data.n_alternatives - 1; i++) 638 { 639 int best = i; 640 int best_reject = alternative_reject[alternative_order[i]]; 641 int best_nregs = alternative_nregs[alternative_order[i]]; 642 int tmp; 643 644 for (j = i + 1; j < recog_data.n_alternatives; j++) 645 { 646 int this_reject = alternative_reject[alternative_order[j]]; 647 int this_nregs = alternative_nregs[alternative_order[j]]; 648 649 if (this_reject < best_reject 650 || (this_reject == best_reject && this_nregs > best_nregs)) 651 { 652 best = j; 653 best_reject = this_reject; 654 best_nregs = this_nregs; 655 } 656 } 657 658 tmp = alternative_order[best]; 659 alternative_order[best] = alternative_order[i]; 660 alternative_order[i] = tmp; 661 } 662 663 /* Substitute the operands as determined by op_alt_regno for the best 664 alternative. */ 665 j = alternative_order[0]; 666 667 for (i = 0; i < recog_data.n_operands; i++) 668 { 669 machine_mode mode = recog_data.operand_mode[i]; 670 if (op_alt_regno[i][j] == -1) 671 continue; 672 673 validate_change (insn, recog_data.operand_loc[i], 674 gen_rtx_REG (mode, op_alt_regno[i][j]), 1); 675 } 676 677 for (i = recog_data.n_dups - 1; i >= 0; i--) 678 { 679 int op = recog_data.dup_num[i]; 680 machine_mode mode = recog_data.operand_mode[op]; 681 682 if (op_alt_regno[op][j] == -1) 683 continue; 684 685 validate_change (insn, recog_data.dup_loc[i], 686 gen_rtx_REG (mode, op_alt_regno[op][j]), 1); 687 } 688 689 return apply_change_group (); 690 } 691 692 /* If reload couldn't use reg+reg+offset addressing, try to use reg+reg 693 addressing now. 694 This code might also be useful when reload gave up on reg+reg addressing 695 because of clashes between the return register and INDEX_REG_CLASS. */ 696 697 /* The maximum number of uses of a register we can keep track of to 698 replace them with reg+reg addressing. */ 699 #define RELOAD_COMBINE_MAX_USES 16 700 701 /* Describes a recorded use of a register. */ 702 struct reg_use 703 { 704 /* The insn where a register has been used. */ 705 rtx_insn *insn; 706 /* Points to the memory reference enclosing the use, if any, NULL_RTX 707 otherwise. */ 708 rtx containing_mem; 709 /* Location of the register within INSN. */ 710 rtx *usep; 711 /* The reverse uid of the insn. */ 712 int ruid; 713 }; 714 715 /* If the register is used in some unknown fashion, USE_INDEX is negative. 716 If it is dead, USE_INDEX is RELOAD_COMBINE_MAX_USES, and STORE_RUID 717 indicates where it is first set or clobbered. 718 Otherwise, USE_INDEX is the index of the last encountered use of the 719 register (which is first among these we have seen since we scan backwards). 720 USE_RUID indicates the first encountered, i.e. last, of these uses. 721 If ALL_OFFSETS_MATCH is true, all encountered uses were inside a PLUS 722 with a constant offset; OFFSET contains this constant in that case. 723 STORE_RUID is always meaningful if we only want to use a value in a 724 register in a different place: it denotes the next insn in the insn 725 stream (i.e. the last encountered) that sets or clobbers the register. 726 REAL_STORE_RUID is similar, but clobbers are ignored when updating it. */ 727 static struct 728 { 729 struct reg_use reg_use[RELOAD_COMBINE_MAX_USES]; 730 rtx offset; 731 int use_index; 732 int store_ruid; 733 int real_store_ruid; 734 int use_ruid; 735 bool all_offsets_match; 736 } reg_state[FIRST_PSEUDO_REGISTER]; 737 738 /* Reverse linear uid. This is increased in reload_combine while scanning 739 the instructions from last to first. It is used to set last_label_ruid 740 and the store_ruid / use_ruid fields in reg_state. */ 741 static int reload_combine_ruid; 742 743 /* The RUID of the last label we encountered in reload_combine. */ 744 static int last_label_ruid; 745 746 /* The RUID of the last jump we encountered in reload_combine. */ 747 static int last_jump_ruid; 748 749 /* The register numbers of the first and last index register. A value of 750 -1 in LAST_INDEX_REG indicates that we've previously computed these 751 values and found no suitable index registers. */ 752 static int first_index_reg = -1; 753 static int last_index_reg; 754 755 #define LABEL_LIVE(LABEL) \ 756 (label_live[CODE_LABEL_NUMBER (LABEL) - min_labelno]) 757 758 /* Subroutine of reload_combine_split_ruids, called to fix up a single 759 ruid pointed to by *PRUID if it is higher than SPLIT_RUID. */ 760 761 static inline void 762 reload_combine_split_one_ruid (int *pruid, int split_ruid) 763 { 764 if (*pruid > split_ruid) 765 (*pruid)++; 766 } 767 768 /* Called when we insert a new insn in a position we've already passed in 769 the scan. Examine all our state, increasing all ruids that are higher 770 than SPLIT_RUID by one in order to make room for a new insn. */ 771 772 static void 773 reload_combine_split_ruids (int split_ruid) 774 { 775 unsigned i; 776 777 reload_combine_split_one_ruid (&reload_combine_ruid, split_ruid); 778 reload_combine_split_one_ruid (&last_label_ruid, split_ruid); 779 reload_combine_split_one_ruid (&last_jump_ruid, split_ruid); 780 781 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 782 { 783 int j, idx = reg_state[i].use_index; 784 reload_combine_split_one_ruid (®_state[i].use_ruid, split_ruid); 785 reload_combine_split_one_ruid (®_state[i].store_ruid, split_ruid); 786 reload_combine_split_one_ruid (®_state[i].real_store_ruid, 787 split_ruid); 788 if (idx < 0) 789 continue; 790 for (j = idx; j < RELOAD_COMBINE_MAX_USES; j++) 791 { 792 reload_combine_split_one_ruid (®_state[i].reg_use[j].ruid, 793 split_ruid); 794 } 795 } 796 } 797 798 /* Called when we are about to rescan a previously encountered insn with 799 reload_combine_note_use after modifying some part of it. This clears all 800 information about uses in that particular insn. */ 801 802 static void 803 reload_combine_purge_insn_uses (rtx_insn *insn) 804 { 805 unsigned i; 806 807 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 808 { 809 int j, k, idx = reg_state[i].use_index; 810 if (idx < 0) 811 continue; 812 j = k = RELOAD_COMBINE_MAX_USES; 813 while (j-- > idx) 814 { 815 if (reg_state[i].reg_use[j].insn != insn) 816 { 817 k--; 818 if (k != j) 819 reg_state[i].reg_use[k] = reg_state[i].reg_use[j]; 820 } 821 } 822 reg_state[i].use_index = k; 823 } 824 } 825 826 /* Called when we need to forget about all uses of REGNO after an insn 827 which is identified by RUID. */ 828 829 static void 830 reload_combine_purge_reg_uses_after_ruid (unsigned regno, int ruid) 831 { 832 int j, k, idx = reg_state[regno].use_index; 833 if (idx < 0) 834 return; 835 j = k = RELOAD_COMBINE_MAX_USES; 836 while (j-- > idx) 837 { 838 if (reg_state[regno].reg_use[j].ruid >= ruid) 839 { 840 k--; 841 if (k != j) 842 reg_state[regno].reg_use[k] = reg_state[regno].reg_use[j]; 843 } 844 } 845 reg_state[regno].use_index = k; 846 } 847 848 /* Find the use of REGNO with the ruid that is highest among those 849 lower than RUID_LIMIT, and return it if it is the only use of this 850 reg in the insn. Return NULL otherwise. */ 851 852 static struct reg_use * 853 reload_combine_closest_single_use (unsigned regno, int ruid_limit) 854 { 855 int i, best_ruid = 0; 856 int use_idx = reg_state[regno].use_index; 857 struct reg_use *retval; 858 859 if (use_idx < 0) 860 return NULL; 861 retval = NULL; 862 for (i = use_idx; i < RELOAD_COMBINE_MAX_USES; i++) 863 { 864 struct reg_use *use = reg_state[regno].reg_use + i; 865 int this_ruid = use->ruid; 866 if (this_ruid >= ruid_limit) 867 continue; 868 if (this_ruid > best_ruid) 869 { 870 best_ruid = this_ruid; 871 retval = use; 872 } 873 else if (this_ruid == best_ruid) 874 retval = NULL; 875 } 876 if (last_label_ruid >= best_ruid) 877 return NULL; 878 return retval; 879 } 880 881 /* After we've moved an add insn, fix up any debug insns that occur 882 between the old location of the add and the new location. REG is 883 the destination register of the add insn; REPLACEMENT is the 884 SET_SRC of the add. FROM and TO specify the range in which we 885 should make this change on debug insns. */ 886 887 static void 888 fixup_debug_insns (rtx reg, rtx replacement, rtx_insn *from, rtx_insn *to) 889 { 890 rtx_insn *insn; 891 for (insn = from; insn != to; insn = NEXT_INSN (insn)) 892 { 893 rtx t; 894 895 if (!DEBUG_INSN_P (insn)) 896 continue; 897 898 t = INSN_VAR_LOCATION_LOC (insn); 899 t = simplify_replace_rtx (t, reg, replacement); 900 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), t, 0); 901 } 902 } 903 904 /* Subroutine of reload_combine_recognize_const_pattern. Try to replace REG 905 with SRC in the insn described by USE, taking costs into account. Return 906 true if we made the replacement. */ 907 908 static bool 909 try_replace_in_use (struct reg_use *use, rtx reg, rtx src) 910 { 911 rtx_insn *use_insn = use->insn; 912 rtx mem = use->containing_mem; 913 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)); 914 915 if (mem != NULL_RTX) 916 { 917 addr_space_t as = MEM_ADDR_SPACE (mem); 918 rtx oldaddr = XEXP (mem, 0); 919 rtx newaddr = NULL_RTX; 920 int old_cost = address_cost (oldaddr, GET_MODE (mem), as, speed); 921 int new_cost; 922 923 newaddr = simplify_replace_rtx (oldaddr, reg, src); 924 if (memory_address_addr_space_p (GET_MODE (mem), newaddr, as)) 925 { 926 XEXP (mem, 0) = newaddr; 927 new_cost = address_cost (newaddr, GET_MODE (mem), as, speed); 928 XEXP (mem, 0) = oldaddr; 929 if (new_cost <= old_cost 930 && validate_change (use_insn, 931 &XEXP (mem, 0), newaddr, 0)) 932 return true; 933 } 934 } 935 else 936 { 937 rtx new_set = single_set (use_insn); 938 if (new_set 939 && REG_P (SET_DEST (new_set)) 940 && GET_CODE (SET_SRC (new_set)) == PLUS 941 && REG_P (XEXP (SET_SRC (new_set), 0)) 942 && CONSTANT_P (XEXP (SET_SRC (new_set), 1))) 943 { 944 rtx new_src; 945 int old_cost = set_src_cost (SET_SRC (new_set), speed); 946 947 gcc_assert (rtx_equal_p (XEXP (SET_SRC (new_set), 0), reg)); 948 new_src = simplify_replace_rtx (SET_SRC (new_set), reg, src); 949 950 if (set_src_cost (new_src, speed) <= old_cost 951 && validate_change (use_insn, &SET_SRC (new_set), 952 new_src, 0)) 953 return true; 954 } 955 } 956 return false; 957 } 958 959 /* Called by reload_combine when scanning INSN. This function tries to detect 960 patterns where a constant is added to a register, and the result is used 961 in an address. 962 Return true if no further processing is needed on INSN; false if it wasn't 963 recognized and should be handled normally. */ 964 965 static bool 966 reload_combine_recognize_const_pattern (rtx_insn *insn) 967 { 968 int from_ruid = reload_combine_ruid; 969 rtx set, pat, reg, src, addreg; 970 unsigned int regno; 971 struct reg_use *use; 972 bool must_move_add; 973 rtx_insn *add_moved_after_insn = NULL; 974 int add_moved_after_ruid = 0; 975 int clobbered_regno = -1; 976 977 set = single_set (insn); 978 if (set == NULL_RTX) 979 return false; 980 981 reg = SET_DEST (set); 982 src = SET_SRC (set); 983 if (!REG_P (reg) 984 || hard_regno_nregs[REGNO (reg)][GET_MODE (reg)] != 1 985 || GET_MODE (reg) != Pmode 986 || reg == stack_pointer_rtx) 987 return false; 988 989 regno = REGNO (reg); 990 991 /* We look for a REG1 = REG2 + CONSTANT insn, followed by either 992 uses of REG1 inside an address, or inside another add insn. If 993 possible and profitable, merge the addition into subsequent 994 uses. */ 995 if (GET_CODE (src) != PLUS 996 || !REG_P (XEXP (src, 0)) 997 || !CONSTANT_P (XEXP (src, 1))) 998 return false; 999 1000 addreg = XEXP (src, 0); 1001 must_move_add = rtx_equal_p (reg, addreg); 1002 1003 pat = PATTERN (insn); 1004 if (must_move_add && set != pat) 1005 { 1006 /* We have to be careful when moving the add; apart from the 1007 single_set there may also be clobbers. Recognize one special 1008 case, that of one clobber alongside the set (likely a clobber 1009 of the CC register). */ 1010 gcc_assert (GET_CODE (PATTERN (insn)) == PARALLEL); 1011 if (XVECLEN (pat, 0) != 2 || XVECEXP (pat, 0, 0) != set 1012 || GET_CODE (XVECEXP (pat, 0, 1)) != CLOBBER 1013 || !REG_P (XEXP (XVECEXP (pat, 0, 1), 0))) 1014 return false; 1015 clobbered_regno = REGNO (XEXP (XVECEXP (pat, 0, 1), 0)); 1016 } 1017 1018 do 1019 { 1020 use = reload_combine_closest_single_use (regno, from_ruid); 1021 1022 if (use) 1023 /* Start the search for the next use from here. */ 1024 from_ruid = use->ruid; 1025 1026 if (use && GET_MODE (*use->usep) == Pmode) 1027 { 1028 bool delete_add = false; 1029 rtx_insn *use_insn = use->insn; 1030 int use_ruid = use->ruid; 1031 1032 /* Avoid moving the add insn past a jump. */ 1033 if (must_move_add && use_ruid <= last_jump_ruid) 1034 break; 1035 1036 /* If the add clobbers another hard reg in parallel, don't move 1037 it past a real set of this hard reg. */ 1038 if (must_move_add && clobbered_regno >= 0 1039 && reg_state[clobbered_regno].real_store_ruid >= use_ruid) 1040 break; 1041 1042 #ifdef HAVE_cc0 1043 /* Do not separate cc0 setter and cc0 user on HAVE_cc0 targets. */ 1044 if (must_move_add && sets_cc0_p (PATTERN (use_insn))) 1045 break; 1046 #endif 1047 1048 gcc_assert (reg_state[regno].store_ruid <= use_ruid); 1049 /* Avoid moving a use of ADDREG past a point where it is stored. */ 1050 if (reg_state[REGNO (addreg)].store_ruid > use_ruid) 1051 break; 1052 1053 /* We also must not move the addition past an insn that sets 1054 the same register, unless we can combine two add insns. */ 1055 if (must_move_add && reg_state[regno].store_ruid == use_ruid) 1056 { 1057 if (use->containing_mem == NULL_RTX) 1058 delete_add = true; 1059 else 1060 break; 1061 } 1062 1063 if (try_replace_in_use (use, reg, src)) 1064 { 1065 reload_combine_purge_insn_uses (use_insn); 1066 reload_combine_note_use (&PATTERN (use_insn), use_insn, 1067 use_ruid, NULL_RTX); 1068 1069 if (delete_add) 1070 { 1071 fixup_debug_insns (reg, src, insn, use_insn); 1072 delete_insn (insn); 1073 return true; 1074 } 1075 if (must_move_add) 1076 { 1077 add_moved_after_insn = use_insn; 1078 add_moved_after_ruid = use_ruid; 1079 } 1080 continue; 1081 } 1082 } 1083 /* If we get here, we couldn't handle this use. */ 1084 if (must_move_add) 1085 break; 1086 } 1087 while (use); 1088 1089 if (!must_move_add || add_moved_after_insn == NULL_RTX) 1090 /* Process the add normally. */ 1091 return false; 1092 1093 fixup_debug_insns (reg, src, insn, add_moved_after_insn); 1094 1095 reorder_insns (insn, insn, add_moved_after_insn); 1096 reload_combine_purge_reg_uses_after_ruid (regno, add_moved_after_ruid); 1097 reload_combine_split_ruids (add_moved_after_ruid - 1); 1098 reload_combine_note_use (&PATTERN (insn), insn, 1099 add_moved_after_ruid, NULL_RTX); 1100 reg_state[regno].store_ruid = add_moved_after_ruid; 1101 1102 return true; 1103 } 1104 1105 /* Called by reload_combine when scanning INSN. Try to detect a pattern we 1106 can handle and improve. Return true if no further processing is needed on 1107 INSN; false if it wasn't recognized and should be handled normally. */ 1108 1109 static bool 1110 reload_combine_recognize_pattern (rtx_insn *insn) 1111 { 1112 rtx set, reg, src; 1113 1114 set = single_set (insn); 1115 if (set == NULL_RTX) 1116 return false; 1117 1118 reg = SET_DEST (set); 1119 src = SET_SRC (set); 1120 if (!REG_P (reg) 1121 || hard_regno_nregs[REGNO (reg)][GET_MODE (reg)] != 1) 1122 return false; 1123 1124 unsigned int regno = REGNO (reg); 1125 machine_mode mode = GET_MODE (reg); 1126 1127 if (reg_state[regno].use_index < 0 1128 || reg_state[regno].use_index >= RELOAD_COMBINE_MAX_USES) 1129 return false; 1130 1131 for (int i = reg_state[regno].use_index; 1132 i < RELOAD_COMBINE_MAX_USES; i++) 1133 { 1134 struct reg_use *use = reg_state[regno].reg_use + i; 1135 if (GET_MODE (*use->usep) != mode) 1136 return false; 1137 } 1138 1139 /* Look for (set (REGX) (CONST_INT)) 1140 (set (REGX) (PLUS (REGX) (REGY))) 1141 ... 1142 ... (MEM (REGX)) ... 1143 and convert it to 1144 (set (REGZ) (CONST_INT)) 1145 ... 1146 ... (MEM (PLUS (REGZ) (REGY)))... . 1147 1148 First, check that we have (set (REGX) (PLUS (REGX) (REGY))) 1149 and that we know all uses of REGX before it dies. 1150 Also, explicitly check that REGX != REGY; our life information 1151 does not yet show whether REGY changes in this insn. */ 1152 1153 if (GET_CODE (src) == PLUS 1154 && reg_state[regno].all_offsets_match 1155 && last_index_reg != -1 1156 && REG_P (XEXP (src, 1)) 1157 && rtx_equal_p (XEXP (src, 0), reg) 1158 && !rtx_equal_p (XEXP (src, 1), reg) 1159 && last_label_ruid < reg_state[regno].use_ruid) 1160 { 1161 rtx base = XEXP (src, 1); 1162 rtx_insn *prev = prev_nonnote_nondebug_insn (insn); 1163 rtx prev_set = prev ? single_set (prev) : NULL_RTX; 1164 rtx index_reg = NULL_RTX; 1165 rtx reg_sum = NULL_RTX; 1166 int i; 1167 1168 /* Now we need to set INDEX_REG to an index register (denoted as 1169 REGZ in the illustration above) and REG_SUM to the expression 1170 register+register that we want to use to substitute uses of REG 1171 (typically in MEMs) with. First check REG and BASE for being 1172 index registers; we can use them even if they are not dead. */ 1173 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], regno) 1174 || TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], 1175 REGNO (base))) 1176 { 1177 index_reg = reg; 1178 reg_sum = src; 1179 } 1180 else 1181 { 1182 /* Otherwise, look for a free index register. Since we have 1183 checked above that neither REG nor BASE are index registers, 1184 if we find anything at all, it will be different from these 1185 two registers. */ 1186 for (i = first_index_reg; i <= last_index_reg; i++) 1187 { 1188 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], i) 1189 && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES 1190 && reg_state[i].store_ruid <= reg_state[regno].use_ruid 1191 && (call_used_regs[i] || df_regs_ever_live_p (i)) 1192 && (!frame_pointer_needed || i != HARD_FRAME_POINTER_REGNUM) 1193 && !fixed_regs[i] && !global_regs[i] 1194 && hard_regno_nregs[i][GET_MODE (reg)] == 1 1195 && targetm.hard_regno_scratch_ok (i)) 1196 { 1197 index_reg = gen_rtx_REG (GET_MODE (reg), i); 1198 reg_sum = gen_rtx_PLUS (GET_MODE (reg), index_reg, base); 1199 break; 1200 } 1201 } 1202 } 1203 1204 /* Check that PREV_SET is indeed (set (REGX) (CONST_INT)) and that 1205 (REGY), i.e. BASE, is not clobbered before the last use we'll 1206 create. */ 1207 if (reg_sum 1208 && prev_set 1209 && CONST_INT_P (SET_SRC (prev_set)) 1210 && rtx_equal_p (SET_DEST (prev_set), reg) 1211 && (reg_state[REGNO (base)].store_ruid 1212 <= reg_state[regno].use_ruid)) 1213 { 1214 /* Change destination register and, if necessary, the constant 1215 value in PREV, the constant loading instruction. */ 1216 validate_change (prev, &SET_DEST (prev_set), index_reg, 1); 1217 if (reg_state[regno].offset != const0_rtx) 1218 validate_change (prev, 1219 &SET_SRC (prev_set), 1220 GEN_INT (INTVAL (SET_SRC (prev_set)) 1221 + INTVAL (reg_state[regno].offset)), 1222 1); 1223 1224 /* Now for every use of REG that we have recorded, replace REG 1225 with REG_SUM. */ 1226 for (i = reg_state[regno].use_index; 1227 i < RELOAD_COMBINE_MAX_USES; i++) 1228 validate_unshare_change (reg_state[regno].reg_use[i].insn, 1229 reg_state[regno].reg_use[i].usep, 1230 /* Each change must have its own 1231 replacement. */ 1232 reg_sum, 1); 1233 1234 if (apply_change_group ()) 1235 { 1236 struct reg_use *lowest_ruid = NULL; 1237 1238 /* For every new use of REG_SUM, we have to record the use 1239 of BASE therein, i.e. operand 1. */ 1240 for (i = reg_state[regno].use_index; 1241 i < RELOAD_COMBINE_MAX_USES; i++) 1242 { 1243 struct reg_use *use = reg_state[regno].reg_use + i; 1244 reload_combine_note_use (&XEXP (*use->usep, 1), use->insn, 1245 use->ruid, use->containing_mem); 1246 if (lowest_ruid == NULL || use->ruid < lowest_ruid->ruid) 1247 lowest_ruid = use; 1248 } 1249 1250 fixup_debug_insns (reg, reg_sum, insn, lowest_ruid->insn); 1251 1252 /* Delete the reg-reg addition. */ 1253 delete_insn (insn); 1254 1255 if (reg_state[regno].offset != const0_rtx) 1256 /* Previous REG_EQUIV / REG_EQUAL notes for PREV 1257 are now invalid. */ 1258 remove_reg_equal_equiv_notes (prev); 1259 1260 reg_state[regno].use_index = RELOAD_COMBINE_MAX_USES; 1261 return true; 1262 } 1263 } 1264 } 1265 return false; 1266 } 1267 1268 static void 1269 reload_combine (void) 1270 { 1271 rtx_insn *insn, *prev; 1272 basic_block bb; 1273 unsigned int r; 1274 int min_labelno, n_labels; 1275 HARD_REG_SET ever_live_at_start, *label_live; 1276 1277 /* To avoid wasting too much time later searching for an index register, 1278 determine the minimum and maximum index register numbers. */ 1279 if (INDEX_REG_CLASS == NO_REGS) 1280 last_index_reg = -1; 1281 else if (first_index_reg == -1 && last_index_reg == 0) 1282 { 1283 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1284 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], r)) 1285 { 1286 if (first_index_reg == -1) 1287 first_index_reg = r; 1288 1289 last_index_reg = r; 1290 } 1291 1292 /* If no index register is available, we can quit now. Set LAST_INDEX_REG 1293 to -1 so we'll know to quit early the next time we get here. */ 1294 if (first_index_reg == -1) 1295 { 1296 last_index_reg = -1; 1297 return; 1298 } 1299 } 1300 1301 /* Set up LABEL_LIVE and EVER_LIVE_AT_START. The register lifetime 1302 information is a bit fuzzy immediately after reload, but it's 1303 still good enough to determine which registers are live at a jump 1304 destination. */ 1305 min_labelno = get_first_label_num (); 1306 n_labels = max_label_num () - min_labelno; 1307 label_live = XNEWVEC (HARD_REG_SET, n_labels); 1308 CLEAR_HARD_REG_SET (ever_live_at_start); 1309 1310 FOR_EACH_BB_REVERSE_FN (bb, cfun) 1311 { 1312 insn = BB_HEAD (bb); 1313 if (LABEL_P (insn)) 1314 { 1315 HARD_REG_SET live; 1316 bitmap live_in = df_get_live_in (bb); 1317 1318 REG_SET_TO_HARD_REG_SET (live, live_in); 1319 compute_use_by_pseudos (&live, live_in); 1320 COPY_HARD_REG_SET (LABEL_LIVE (insn), live); 1321 IOR_HARD_REG_SET (ever_live_at_start, live); 1322 } 1323 } 1324 1325 /* Initialize last_label_ruid, reload_combine_ruid and reg_state. */ 1326 last_label_ruid = last_jump_ruid = reload_combine_ruid = 0; 1327 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1328 { 1329 reg_state[r].store_ruid = 0; 1330 reg_state[r].real_store_ruid = 0; 1331 if (fixed_regs[r]) 1332 reg_state[r].use_index = -1; 1333 else 1334 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES; 1335 } 1336 1337 for (insn = get_last_insn (); insn; insn = prev) 1338 { 1339 bool control_flow_insn; 1340 rtx note; 1341 1342 prev = PREV_INSN (insn); 1343 1344 /* We cannot do our optimization across labels. Invalidating all the use 1345 information we have would be costly, so we just note where the label 1346 is and then later disable any optimization that would cross it. */ 1347 if (LABEL_P (insn)) 1348 last_label_ruid = reload_combine_ruid; 1349 else if (BARRIER_P (insn)) 1350 { 1351 /* Crossing a barrier resets all the use information. */ 1352 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1353 if (! fixed_regs[r]) 1354 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES; 1355 } 1356 else if (INSN_P (insn) && volatile_insn_p (PATTERN (insn))) 1357 /* Optimizations across insns being marked as volatile must be 1358 prevented. All the usage information is invalidated 1359 here. */ 1360 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1361 if (! fixed_regs[r] 1362 && reg_state[r].use_index != RELOAD_COMBINE_MAX_USES) 1363 reg_state[r].use_index = -1; 1364 1365 if (! NONDEBUG_INSN_P (insn)) 1366 continue; 1367 1368 reload_combine_ruid++; 1369 1370 control_flow_insn = control_flow_insn_p (insn); 1371 if (control_flow_insn) 1372 last_jump_ruid = reload_combine_ruid; 1373 1374 if (reload_combine_recognize_const_pattern (insn) 1375 || reload_combine_recognize_pattern (insn)) 1376 continue; 1377 1378 note_stores (PATTERN (insn), reload_combine_note_store, NULL); 1379 1380 if (CALL_P (insn)) 1381 { 1382 rtx link; 1383 HARD_REG_SET used_regs; 1384 1385 get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); 1386 1387 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1388 if (TEST_HARD_REG_BIT (used_regs, r)) 1389 { 1390 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES; 1391 reg_state[r].store_ruid = reload_combine_ruid; 1392 } 1393 1394 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; 1395 link = XEXP (link, 1)) 1396 { 1397 rtx setuse = XEXP (link, 0); 1398 rtx usage_rtx = XEXP (setuse, 0); 1399 if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER) 1400 && REG_P (usage_rtx)) 1401 { 1402 unsigned int i; 1403 unsigned int start_reg = REGNO (usage_rtx); 1404 unsigned int num_regs 1405 = hard_regno_nregs[start_reg][GET_MODE (usage_rtx)]; 1406 unsigned int end_reg = start_reg + num_regs - 1; 1407 for (i = start_reg; i <= end_reg; i++) 1408 if (GET_CODE (XEXP (link, 0)) == CLOBBER) 1409 { 1410 reg_state[i].use_index = RELOAD_COMBINE_MAX_USES; 1411 reg_state[i].store_ruid = reload_combine_ruid; 1412 } 1413 else 1414 reg_state[i].use_index = -1; 1415 } 1416 } 1417 } 1418 1419 if (control_flow_insn && !ANY_RETURN_P (PATTERN (insn))) 1420 { 1421 /* Non-spill registers might be used at the call destination in 1422 some unknown fashion, so we have to mark the unknown use. */ 1423 HARD_REG_SET *live; 1424 1425 if ((condjump_p (insn) || condjump_in_parallel_p (insn)) 1426 && JUMP_LABEL (insn)) 1427 { 1428 if (ANY_RETURN_P (JUMP_LABEL (insn))) 1429 live = NULL; 1430 else 1431 live = &LABEL_LIVE (JUMP_LABEL (insn)); 1432 } 1433 else 1434 live = &ever_live_at_start; 1435 1436 if (live) 1437 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) 1438 if (TEST_HARD_REG_BIT (*live, r)) 1439 reg_state[r].use_index = -1; 1440 } 1441 1442 reload_combine_note_use (&PATTERN (insn), insn, reload_combine_ruid, 1443 NULL_RTX); 1444 1445 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 1446 { 1447 if (REG_NOTE_KIND (note) == REG_INC && REG_P (XEXP (note, 0))) 1448 { 1449 int regno = REGNO (XEXP (note, 0)); 1450 reg_state[regno].store_ruid = reload_combine_ruid; 1451 reg_state[regno].real_store_ruid = reload_combine_ruid; 1452 reg_state[regno].use_index = -1; 1453 } 1454 } 1455 } 1456 1457 free (label_live); 1458 } 1459 1460 /* Check if DST is a register or a subreg of a register; if it is, 1461 update store_ruid, real_store_ruid and use_index in the reg_state 1462 structure accordingly. Called via note_stores from reload_combine. */ 1463 1464 static void 1465 reload_combine_note_store (rtx dst, const_rtx set, void *data ATTRIBUTE_UNUSED) 1466 { 1467 int regno = 0; 1468 int i; 1469 machine_mode mode = GET_MODE (dst); 1470 1471 if (GET_CODE (dst) == SUBREG) 1472 { 1473 regno = subreg_regno_offset (REGNO (SUBREG_REG (dst)), 1474 GET_MODE (SUBREG_REG (dst)), 1475 SUBREG_BYTE (dst), 1476 GET_MODE (dst)); 1477 dst = SUBREG_REG (dst); 1478 } 1479 1480 /* Some targets do argument pushes without adding REG_INC notes. */ 1481 1482 if (MEM_P (dst)) 1483 { 1484 dst = XEXP (dst, 0); 1485 if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC 1486 || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC 1487 || GET_CODE (dst) == PRE_MODIFY || GET_CODE (dst) == POST_MODIFY) 1488 { 1489 regno = REGNO (XEXP (dst, 0)); 1490 mode = GET_MODE (XEXP (dst, 0)); 1491 for (i = hard_regno_nregs[regno][mode] - 1 + regno; i >= regno; i--) 1492 { 1493 /* We could probably do better, but for now mark the register 1494 as used in an unknown fashion and set/clobbered at this 1495 insn. */ 1496 reg_state[i].use_index = -1; 1497 reg_state[i].store_ruid = reload_combine_ruid; 1498 reg_state[i].real_store_ruid = reload_combine_ruid; 1499 } 1500 } 1501 else 1502 return; 1503 } 1504 1505 if (!REG_P (dst)) 1506 return; 1507 regno += REGNO (dst); 1508 1509 /* note_stores might have stripped a STRICT_LOW_PART, so we have to be 1510 careful with registers / register parts that are not full words. 1511 Similarly for ZERO_EXTRACT. */ 1512 if (GET_CODE (SET_DEST (set)) == ZERO_EXTRACT 1513 || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART) 1514 { 1515 for (i = hard_regno_nregs[regno][mode] - 1 + regno; i >= regno; i--) 1516 { 1517 reg_state[i].use_index = -1; 1518 reg_state[i].store_ruid = reload_combine_ruid; 1519 reg_state[i].real_store_ruid = reload_combine_ruid; 1520 } 1521 } 1522 else 1523 { 1524 for (i = hard_regno_nregs[regno][mode] - 1 + regno; i >= regno; i--) 1525 { 1526 reg_state[i].store_ruid = reload_combine_ruid; 1527 if (GET_CODE (set) == SET) 1528 reg_state[i].real_store_ruid = reload_combine_ruid; 1529 reg_state[i].use_index = RELOAD_COMBINE_MAX_USES; 1530 } 1531 } 1532 } 1533 1534 /* XP points to a piece of rtl that has to be checked for any uses of 1535 registers. 1536 *XP is the pattern of INSN, or a part of it. 1537 Called from reload_combine, and recursively by itself. */ 1538 static void 1539 reload_combine_note_use (rtx *xp, rtx_insn *insn, int ruid, rtx containing_mem) 1540 { 1541 rtx x = *xp; 1542 enum rtx_code code = x->code; 1543 const char *fmt; 1544 int i, j; 1545 rtx offset = const0_rtx; /* For the REG case below. */ 1546 1547 switch (code) 1548 { 1549 case SET: 1550 if (REG_P (SET_DEST (x))) 1551 { 1552 reload_combine_note_use (&SET_SRC (x), insn, ruid, NULL_RTX); 1553 return; 1554 } 1555 break; 1556 1557 case USE: 1558 /* If this is the USE of a return value, we can't change it. */ 1559 if (REG_P (XEXP (x, 0)) && REG_FUNCTION_VALUE_P (XEXP (x, 0))) 1560 { 1561 /* Mark the return register as used in an unknown fashion. */ 1562 rtx reg = XEXP (x, 0); 1563 int regno = REGNO (reg); 1564 int nregs = hard_regno_nregs[regno][GET_MODE (reg)]; 1565 1566 while (--nregs >= 0) 1567 reg_state[regno + nregs].use_index = -1; 1568 return; 1569 } 1570 break; 1571 1572 case CLOBBER: 1573 if (REG_P (SET_DEST (x))) 1574 { 1575 /* No spurious CLOBBERs of pseudo registers may remain. */ 1576 gcc_assert (REGNO (SET_DEST (x)) < FIRST_PSEUDO_REGISTER); 1577 return; 1578 } 1579 break; 1580 1581 case PLUS: 1582 /* We are interested in (plus (reg) (const_int)) . */ 1583 if (!REG_P (XEXP (x, 0)) 1584 || !CONST_INT_P (XEXP (x, 1))) 1585 break; 1586 offset = XEXP (x, 1); 1587 x = XEXP (x, 0); 1588 /* Fall through. */ 1589 case REG: 1590 { 1591 int regno = REGNO (x); 1592 int use_index; 1593 int nregs; 1594 1595 /* No spurious USEs of pseudo registers may remain. */ 1596 gcc_assert (regno < FIRST_PSEUDO_REGISTER); 1597 1598 nregs = hard_regno_nregs[regno][GET_MODE (x)]; 1599 1600 /* We can't substitute into multi-hard-reg uses. */ 1601 if (nregs > 1) 1602 { 1603 while (--nregs >= 0) 1604 reg_state[regno + nregs].use_index = -1; 1605 return; 1606 } 1607 1608 /* We may be called to update uses in previously seen insns. 1609 Don't add uses beyond the last store we saw. */ 1610 if (ruid < reg_state[regno].store_ruid) 1611 return; 1612 1613 /* If this register is already used in some unknown fashion, we 1614 can't do anything. 1615 If we decrement the index from zero to -1, we can't store more 1616 uses, so this register becomes used in an unknown fashion. */ 1617 use_index = --reg_state[regno].use_index; 1618 if (use_index < 0) 1619 return; 1620 1621 if (use_index == RELOAD_COMBINE_MAX_USES - 1) 1622 { 1623 /* This is the first use of this register we have seen since we 1624 marked it as dead. */ 1625 reg_state[regno].offset = offset; 1626 reg_state[regno].all_offsets_match = true; 1627 reg_state[regno].use_ruid = ruid; 1628 } 1629 else 1630 { 1631 if (reg_state[regno].use_ruid > ruid) 1632 reg_state[regno].use_ruid = ruid; 1633 1634 if (! rtx_equal_p (offset, reg_state[regno].offset)) 1635 reg_state[regno].all_offsets_match = false; 1636 } 1637 1638 reg_state[regno].reg_use[use_index].insn = insn; 1639 reg_state[regno].reg_use[use_index].ruid = ruid; 1640 reg_state[regno].reg_use[use_index].containing_mem = containing_mem; 1641 reg_state[regno].reg_use[use_index].usep = xp; 1642 return; 1643 } 1644 1645 case MEM: 1646 containing_mem = x; 1647 break; 1648 1649 default: 1650 break; 1651 } 1652 1653 /* Recursively process the components of X. */ 1654 fmt = GET_RTX_FORMAT (code); 1655 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 1656 { 1657 if (fmt[i] == 'e') 1658 reload_combine_note_use (&XEXP (x, i), insn, ruid, containing_mem); 1659 else if (fmt[i] == 'E') 1660 { 1661 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 1662 reload_combine_note_use (&XVECEXP (x, i, j), insn, ruid, 1663 containing_mem); 1664 } 1665 } 1666 } 1667 1668 /* See if we can reduce the cost of a constant by replacing a move 1669 with an add. We track situations in which a register is set to a 1670 constant or to a register plus a constant. */ 1671 /* We cannot do our optimization across labels. Invalidating all the 1672 information about register contents we have would be costly, so we 1673 use move2add_last_label_luid to note where the label is and then 1674 later disable any optimization that would cross it. 1675 reg_offset[n] / reg_base_reg[n] / reg_symbol_ref[n] / reg_mode[n] 1676 are only valid if reg_set_luid[n] is greater than 1677 move2add_last_label_luid. 1678 For a set that established a new (potential) base register with 1679 non-constant value, we use move2add_luid from the place where the 1680 setting insn is encountered; registers based off that base then 1681 get the same reg_set_luid. Constants all get 1682 move2add_last_label_luid + 1 as their reg_set_luid. */ 1683 static int reg_set_luid[FIRST_PSEUDO_REGISTER]; 1684 1685 /* If reg_base_reg[n] is negative, register n has been set to 1686 reg_offset[n] or reg_symbol_ref[n] + reg_offset[n] in mode reg_mode[n]. 1687 If reg_base_reg[n] is non-negative, register n has been set to the 1688 sum of reg_offset[n] and the value of register reg_base_reg[n] 1689 before reg_set_luid[n], calculated in mode reg_mode[n] . 1690 For multi-hard-register registers, all but the first one are 1691 recorded as BLKmode in reg_mode. Setting reg_mode to VOIDmode 1692 marks it as invalid. */ 1693 static HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; 1694 static int reg_base_reg[FIRST_PSEUDO_REGISTER]; 1695 static rtx reg_symbol_ref[FIRST_PSEUDO_REGISTER]; 1696 static machine_mode reg_mode[FIRST_PSEUDO_REGISTER]; 1697 1698 /* move2add_luid is linearly increased while scanning the instructions 1699 from first to last. It is used to set reg_set_luid in 1700 reload_cse_move2add and move2add_note_store. */ 1701 static int move2add_luid; 1702 1703 /* move2add_last_label_luid is set whenever a label is found. Labels 1704 invalidate all previously collected reg_offset data. */ 1705 static int move2add_last_label_luid; 1706 1707 /* ??? We don't know how zero / sign extension is handled, hence we 1708 can't go from a narrower to a wider mode. */ 1709 #define MODES_OK_FOR_MOVE2ADD(OUTMODE, INMODE) \ 1710 (GET_MODE_SIZE (OUTMODE) == GET_MODE_SIZE (INMODE) \ 1711 || (GET_MODE_SIZE (OUTMODE) <= GET_MODE_SIZE (INMODE) \ 1712 && TRULY_NOOP_TRUNCATION_MODES_P (OUTMODE, INMODE))) 1713 1714 /* Record that REG is being set to a value with the mode of REG. */ 1715 1716 static void 1717 move2add_record_mode (rtx reg) 1718 { 1719 int regno, nregs; 1720 machine_mode mode = GET_MODE (reg); 1721 1722 if (GET_CODE (reg) == SUBREG) 1723 { 1724 regno = subreg_regno (reg); 1725 nregs = subreg_nregs (reg); 1726 } 1727 else if (REG_P (reg)) 1728 { 1729 regno = REGNO (reg); 1730 nregs = hard_regno_nregs[regno][mode]; 1731 } 1732 else 1733 gcc_unreachable (); 1734 for (int i = nregs - 1; i > 0; i--) 1735 reg_mode[regno + i] = BLKmode; 1736 reg_mode[regno] = mode; 1737 } 1738 1739 /* Record that REG is being set to the sum of SYM and OFF. */ 1740 1741 static void 1742 move2add_record_sym_value (rtx reg, rtx sym, rtx off) 1743 { 1744 int regno = REGNO (reg); 1745 1746 move2add_record_mode (reg); 1747 reg_set_luid[regno] = move2add_luid; 1748 reg_base_reg[regno] = -1; 1749 reg_symbol_ref[regno] = sym; 1750 reg_offset[regno] = INTVAL (off); 1751 } 1752 1753 /* Check if REGNO contains a valid value in MODE. */ 1754 1755 static bool 1756 move2add_valid_value_p (int regno, machine_mode mode) 1757 { 1758 if (reg_set_luid[regno] <= move2add_last_label_luid) 1759 return false; 1760 1761 if (mode != reg_mode[regno]) 1762 { 1763 if (!MODES_OK_FOR_MOVE2ADD (mode, reg_mode[regno])) 1764 return false; 1765 /* The value loaded into regno in reg_mode[regno] is also valid in 1766 mode after truncation only if (REG:mode regno) is the lowpart of 1767 (REG:reg_mode[regno] regno). Now, for big endian, the starting 1768 regno of the lowpart might be different. */ 1769 int s_off = subreg_lowpart_offset (mode, reg_mode[regno]); 1770 s_off = subreg_regno_offset (regno, reg_mode[regno], s_off, mode); 1771 if (s_off != 0) 1772 /* We could in principle adjust regno, check reg_mode[regno] to be 1773 BLKmode, and return s_off to the caller (vs. -1 for failure), 1774 but we currently have no callers that could make use of this 1775 information. */ 1776 return false; 1777 } 1778 1779 for (int i = hard_regno_nregs[regno][mode] - 1; i > 0; i--) 1780 if (reg_mode[regno + i] != BLKmode) 1781 return false; 1782 return true; 1783 } 1784 1785 /* This function is called with INSN that sets REG to (SYM + OFF), 1786 while REG is known to already have value (SYM + offset). 1787 This function tries to change INSN into an add instruction 1788 (set (REG) (plus (REG) (OFF - offset))) using the known value. 1789 It also updates the information about REG's known value. 1790 Return true if we made a change. */ 1791 1792 static bool 1793 move2add_use_add2_insn (rtx reg, rtx sym, rtx off, rtx_insn *insn) 1794 { 1795 rtx pat = PATTERN (insn); 1796 rtx src = SET_SRC (pat); 1797 int regno = REGNO (reg); 1798 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[regno], 1799 GET_MODE (reg)); 1800 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 1801 bool changed = false; 1802 1803 /* (set (reg) (plus (reg) (const_int 0))) is not canonical; 1804 use (set (reg) (reg)) instead. 1805 We don't delete this insn, nor do we convert it into a 1806 note, to avoid losing register notes or the return 1807 value flag. jump2 already knows how to get rid of 1808 no-op moves. */ 1809 if (new_src == const0_rtx) 1810 { 1811 /* If the constants are different, this is a 1812 truncation, that, if turned into (set (reg) 1813 (reg)), would be discarded. Maybe we should 1814 try a truncMN pattern? */ 1815 if (INTVAL (off) == reg_offset [regno]) 1816 changed = validate_change (insn, &SET_SRC (pat), reg, 0); 1817 } 1818 else 1819 { 1820 struct full_rtx_costs oldcst, newcst; 1821 rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src); 1822 1823 get_full_set_rtx_cost (pat, &oldcst); 1824 SET_SRC (pat) = tem; 1825 get_full_set_rtx_cost (pat, &newcst); 1826 SET_SRC (pat) = src; 1827 1828 if (costs_lt_p (&newcst, &oldcst, speed) 1829 && have_add2_insn (reg, new_src)) 1830 changed = validate_change (insn, &SET_SRC (pat), tem, 0); 1831 else if (sym == NULL_RTX && GET_MODE (reg) != BImode) 1832 { 1833 machine_mode narrow_mode; 1834 for (narrow_mode = GET_CLASS_NARROWEST_MODE (MODE_INT); 1835 narrow_mode != VOIDmode 1836 && narrow_mode != GET_MODE (reg); 1837 narrow_mode = GET_MODE_WIDER_MODE (narrow_mode)) 1838 { 1839 if (have_insn_for (STRICT_LOW_PART, narrow_mode) 1840 && ((reg_offset[regno] & ~GET_MODE_MASK (narrow_mode)) 1841 == (INTVAL (off) & ~GET_MODE_MASK (narrow_mode)))) 1842 { 1843 rtx narrow_reg = gen_lowpart_common (narrow_mode, reg); 1844 rtx narrow_src = gen_int_mode (INTVAL (off), 1845 narrow_mode); 1846 rtx new_set 1847 = gen_rtx_SET (VOIDmode, 1848 gen_rtx_STRICT_LOW_PART (VOIDmode, 1849 narrow_reg), 1850 narrow_src); 1851 get_full_set_rtx_cost (new_set, &newcst); 1852 if (costs_lt_p (&newcst, &oldcst, speed)) 1853 { 1854 changed = validate_change (insn, &PATTERN (insn), 1855 new_set, 0); 1856 if (changed) 1857 break; 1858 } 1859 } 1860 } 1861 } 1862 } 1863 move2add_record_sym_value (reg, sym, off); 1864 return changed; 1865 } 1866 1867 1868 /* This function is called with INSN that sets REG to (SYM + OFF), 1869 but REG doesn't have known value (SYM + offset). This function 1870 tries to find another register which is known to already have 1871 value (SYM + offset) and change INSN into an add instruction 1872 (set (REG) (plus (the found register) (OFF - offset))) if such 1873 a register is found. It also updates the information about 1874 REG's known value. 1875 Return true iff we made a change. */ 1876 1877 static bool 1878 move2add_use_add3_insn (rtx reg, rtx sym, rtx off, rtx_insn *insn) 1879 { 1880 rtx pat = PATTERN (insn); 1881 rtx src = SET_SRC (pat); 1882 int regno = REGNO (reg); 1883 int min_regno = 0; 1884 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 1885 int i; 1886 bool changed = false; 1887 struct full_rtx_costs oldcst, newcst, mincst; 1888 rtx plus_expr; 1889 1890 init_costs_to_max (&mincst); 1891 get_full_set_rtx_cost (pat, &oldcst); 1892 1893 plus_expr = gen_rtx_PLUS (GET_MODE (reg), reg, const0_rtx); 1894 SET_SRC (pat) = plus_expr; 1895 1896 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 1897 if (move2add_valid_value_p (i, GET_MODE (reg)) 1898 && reg_base_reg[i] < 0 1899 && reg_symbol_ref[i] != NULL_RTX 1900 && rtx_equal_p (sym, reg_symbol_ref[i])) 1901 { 1902 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[i], 1903 GET_MODE (reg)); 1904 /* (set (reg) (plus (reg) (const_int 0))) is not canonical; 1905 use (set (reg) (reg)) instead. 1906 We don't delete this insn, nor do we convert it into a 1907 note, to avoid losing register notes or the return 1908 value flag. jump2 already knows how to get rid of 1909 no-op moves. */ 1910 if (new_src == const0_rtx) 1911 { 1912 init_costs_to_zero (&mincst); 1913 min_regno = i; 1914 break; 1915 } 1916 else 1917 { 1918 XEXP (plus_expr, 1) = new_src; 1919 get_full_set_rtx_cost (pat, &newcst); 1920 1921 if (costs_lt_p (&newcst, &mincst, speed)) 1922 { 1923 mincst = newcst; 1924 min_regno = i; 1925 } 1926 } 1927 } 1928 SET_SRC (pat) = src; 1929 1930 if (costs_lt_p (&mincst, &oldcst, speed)) 1931 { 1932 rtx tem; 1933 1934 tem = gen_rtx_REG (GET_MODE (reg), min_regno); 1935 if (i != min_regno) 1936 { 1937 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[min_regno], 1938 GET_MODE (reg)); 1939 tem = gen_rtx_PLUS (GET_MODE (reg), tem, new_src); 1940 } 1941 if (validate_change (insn, &SET_SRC (pat), tem, 0)) 1942 changed = true; 1943 } 1944 reg_set_luid[regno] = move2add_luid; 1945 move2add_record_sym_value (reg, sym, off); 1946 return changed; 1947 } 1948 1949 /* Convert move insns with constant inputs to additions if they are cheaper. 1950 Return true if any changes were made. */ 1951 static bool 1952 reload_cse_move2add (rtx_insn *first) 1953 { 1954 int i; 1955 rtx_insn *insn; 1956 bool changed = false; 1957 1958 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) 1959 { 1960 reg_set_luid[i] = 0; 1961 reg_offset[i] = 0; 1962 reg_base_reg[i] = 0; 1963 reg_symbol_ref[i] = NULL_RTX; 1964 reg_mode[i] = VOIDmode; 1965 } 1966 1967 move2add_last_label_luid = 0; 1968 move2add_luid = 2; 1969 for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++) 1970 { 1971 rtx pat, note; 1972 1973 if (LABEL_P (insn)) 1974 { 1975 move2add_last_label_luid = move2add_luid; 1976 /* We're going to increment move2add_luid twice after a 1977 label, so that we can use move2add_last_label_luid + 1 as 1978 the luid for constants. */ 1979 move2add_luid++; 1980 continue; 1981 } 1982 if (! INSN_P (insn)) 1983 continue; 1984 pat = PATTERN (insn); 1985 /* For simplicity, we only perform this optimization on 1986 straightforward SETs. */ 1987 if (GET_CODE (pat) == SET 1988 && REG_P (SET_DEST (pat))) 1989 { 1990 rtx reg = SET_DEST (pat); 1991 int regno = REGNO (reg); 1992 rtx src = SET_SRC (pat); 1993 1994 /* Check if we have valid information on the contents of this 1995 register in the mode of REG. */ 1996 if (move2add_valid_value_p (regno, GET_MODE (reg)) 1997 && dbg_cnt (cse2_move2add)) 1998 { 1999 /* Try to transform (set (REGX) (CONST_INT A)) 2000 ... 2001 (set (REGX) (CONST_INT B)) 2002 to 2003 (set (REGX) (CONST_INT A)) 2004 ... 2005 (set (REGX) (plus (REGX) (CONST_INT B-A))) 2006 or 2007 (set (REGX) (CONST_INT A)) 2008 ... 2009 (set (STRICT_LOW_PART (REGX)) (CONST_INT B)) 2010 */ 2011 2012 if (CONST_INT_P (src) 2013 && reg_base_reg[regno] < 0 2014 && reg_symbol_ref[regno] == NULL_RTX) 2015 { 2016 changed |= move2add_use_add2_insn (reg, NULL_RTX, src, insn); 2017 continue; 2018 } 2019 2020 /* Try to transform (set (REGX) (REGY)) 2021 (set (REGX) (PLUS (REGX) (CONST_INT A))) 2022 ... 2023 (set (REGX) (REGY)) 2024 (set (REGX) (PLUS (REGX) (CONST_INT B))) 2025 to 2026 (set (REGX) (REGY)) 2027 (set (REGX) (PLUS (REGX) (CONST_INT A))) 2028 ... 2029 (set (REGX) (plus (REGX) (CONST_INT B-A))) */ 2030 else if (REG_P (src) 2031 && reg_set_luid[regno] == reg_set_luid[REGNO (src)] 2032 && reg_base_reg[regno] == reg_base_reg[REGNO (src)] 2033 && move2add_valid_value_p (REGNO (src), GET_MODE (reg))) 2034 { 2035 rtx_insn *next = next_nonnote_nondebug_insn (insn); 2036 rtx set = NULL_RTX; 2037 if (next) 2038 set = single_set (next); 2039 if (set 2040 && SET_DEST (set) == reg 2041 && GET_CODE (SET_SRC (set)) == PLUS 2042 && XEXP (SET_SRC (set), 0) == reg 2043 && CONST_INT_P (XEXP (SET_SRC (set), 1))) 2044 { 2045 rtx src3 = XEXP (SET_SRC (set), 1); 2046 unsigned HOST_WIDE_INT added_offset = UINTVAL (src3); 2047 HOST_WIDE_INT base_offset = reg_offset[REGNO (src)]; 2048 HOST_WIDE_INT regno_offset = reg_offset[regno]; 2049 rtx new_src = 2050 gen_int_mode (added_offset 2051 + base_offset 2052 - regno_offset, 2053 GET_MODE (reg)); 2054 bool success = false; 2055 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); 2056 2057 if (new_src == const0_rtx) 2058 /* See above why we create (set (reg) (reg)) here. */ 2059 success 2060 = validate_change (next, &SET_SRC (set), reg, 0); 2061 else 2062 { 2063 rtx old_src = SET_SRC (set); 2064 struct full_rtx_costs oldcst, newcst; 2065 rtx tem = gen_rtx_PLUS (GET_MODE (reg), reg, new_src); 2066 2067 get_full_set_rtx_cost (set, &oldcst); 2068 SET_SRC (set) = tem; 2069 get_full_set_src_cost (tem, &newcst); 2070 SET_SRC (set) = old_src; 2071 costs_add_n_insns (&oldcst, 1); 2072 2073 if (costs_lt_p (&newcst, &oldcst, speed) 2074 && have_add2_insn (reg, new_src)) 2075 { 2076 rtx newpat = gen_rtx_SET (VOIDmode, reg, tem); 2077 success 2078 = validate_change (next, &PATTERN (next), 2079 newpat, 0); 2080 } 2081 } 2082 if (success) 2083 delete_insn (insn); 2084 changed |= success; 2085 insn = next; 2086 move2add_record_mode (reg); 2087 reg_offset[regno] 2088 = trunc_int_for_mode (added_offset + base_offset, 2089 GET_MODE (reg)); 2090 continue; 2091 } 2092 } 2093 } 2094 2095 /* Try to transform 2096 (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A)))) 2097 ... 2098 (set (REGY) (CONST (PLUS (SYMBOL_REF) (CONST_INT B)))) 2099 to 2100 (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A)))) 2101 ... 2102 (set (REGY) (CONST (PLUS (REGX) (CONST_INT B-A)))) */ 2103 if ((GET_CODE (src) == SYMBOL_REF 2104 || (GET_CODE (src) == CONST 2105 && GET_CODE (XEXP (src, 0)) == PLUS 2106 && GET_CODE (XEXP (XEXP (src, 0), 0)) == SYMBOL_REF 2107 && CONST_INT_P (XEXP (XEXP (src, 0), 1)))) 2108 && dbg_cnt (cse2_move2add)) 2109 { 2110 rtx sym, off; 2111 2112 if (GET_CODE (src) == SYMBOL_REF) 2113 { 2114 sym = src; 2115 off = const0_rtx; 2116 } 2117 else 2118 { 2119 sym = XEXP (XEXP (src, 0), 0); 2120 off = XEXP (XEXP (src, 0), 1); 2121 } 2122 2123 /* If the reg already contains the value which is sum of 2124 sym and some constant value, we can use an add2 insn. */ 2125 if (move2add_valid_value_p (regno, GET_MODE (reg)) 2126 && reg_base_reg[regno] < 0 2127 && reg_symbol_ref[regno] != NULL_RTX 2128 && rtx_equal_p (sym, reg_symbol_ref[regno])) 2129 changed |= move2add_use_add2_insn (reg, sym, off, insn); 2130 2131 /* Otherwise, we have to find a register whose value is sum 2132 of sym and some constant value. */ 2133 else 2134 changed |= move2add_use_add3_insn (reg, sym, off, insn); 2135 2136 continue; 2137 } 2138 } 2139 2140 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 2141 { 2142 if (REG_NOTE_KIND (note) == REG_INC 2143 && REG_P (XEXP (note, 0))) 2144 { 2145 /* Reset the information about this register. */ 2146 int regno = REGNO (XEXP (note, 0)); 2147 if (regno < FIRST_PSEUDO_REGISTER) 2148 { 2149 move2add_record_mode (XEXP (note, 0)); 2150 reg_mode[regno] = VOIDmode; 2151 } 2152 } 2153 } 2154 note_stores (PATTERN (insn), move2add_note_store, insn); 2155 2156 /* If INSN is a conditional branch, we try to extract an 2157 implicit set out of it. */ 2158 if (any_condjump_p (insn)) 2159 { 2160 rtx cnd = fis_get_condition (insn); 2161 2162 if (cnd != NULL_RTX 2163 && GET_CODE (cnd) == NE 2164 && REG_P (XEXP (cnd, 0)) 2165 && !reg_set_p (XEXP (cnd, 0), insn) 2166 /* The following two checks, which are also in 2167 move2add_note_store, are intended to reduce the 2168 number of calls to gen_rtx_SET to avoid memory 2169 allocation if possible. */ 2170 && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0))) 2171 && hard_regno_nregs[REGNO (XEXP (cnd, 0))][GET_MODE (XEXP (cnd, 0))] == 1 2172 && CONST_INT_P (XEXP (cnd, 1))) 2173 { 2174 rtx implicit_set = 2175 gen_rtx_SET (VOIDmode, XEXP (cnd, 0), XEXP (cnd, 1)); 2176 move2add_note_store (SET_DEST (implicit_set), implicit_set, insn); 2177 } 2178 } 2179 2180 /* If this is a CALL_INSN, all call used registers are stored with 2181 unknown values. */ 2182 if (CALL_P (insn)) 2183 { 2184 rtx link; 2185 2186 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) 2187 { 2188 if (call_used_regs[i]) 2189 /* Reset the information about this register. */ 2190 reg_mode[i] = VOIDmode; 2191 } 2192 2193 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; 2194 link = XEXP (link, 1)) 2195 { 2196 rtx setuse = XEXP (link, 0); 2197 rtx usage_rtx = XEXP (setuse, 0); 2198 if (GET_CODE (setuse) == CLOBBER 2199 && REG_P (usage_rtx)) 2200 { 2201 unsigned int end_regno = END_REGNO (usage_rtx); 2202 for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r) 2203 /* Reset the information about this register. */ 2204 reg_mode[r] = VOIDmode; 2205 } 2206 } 2207 } 2208 } 2209 return changed; 2210 } 2211 2212 /* SET is a SET or CLOBBER that sets DST. DATA is the insn which 2213 contains SET. 2214 Update reg_set_luid, reg_offset and reg_base_reg accordingly. 2215 Called from reload_cse_move2add via note_stores. */ 2216 2217 static void 2218 move2add_note_store (rtx dst, const_rtx set, void *data) 2219 { 2220 rtx_insn *insn = (rtx_insn *) data; 2221 unsigned int regno = 0; 2222 machine_mode mode = GET_MODE (dst); 2223 2224 /* Some targets do argument pushes without adding REG_INC notes. */ 2225 2226 if (MEM_P (dst)) 2227 { 2228 dst = XEXP (dst, 0); 2229 if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC 2230 || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC) 2231 reg_mode[REGNO (XEXP (dst, 0))] = VOIDmode; 2232 return; 2233 } 2234 2235 if (GET_CODE (dst) == SUBREG) 2236 regno = subreg_regno (dst); 2237 else if (REG_P (dst)) 2238 regno = REGNO (dst); 2239 else 2240 return; 2241 2242 if (SCALAR_INT_MODE_P (mode) 2243 && GET_CODE (set) == SET) 2244 { 2245 rtx note, sym = NULL_RTX; 2246 rtx off; 2247 2248 note = find_reg_equal_equiv_note (insn); 2249 if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF) 2250 { 2251 sym = XEXP (note, 0); 2252 off = const0_rtx; 2253 } 2254 else if (note && GET_CODE (XEXP (note, 0)) == CONST 2255 && GET_CODE (XEXP (XEXP (note, 0), 0)) == PLUS 2256 && GET_CODE (XEXP (XEXP (XEXP (note, 0), 0), 0)) == SYMBOL_REF 2257 && CONST_INT_P (XEXP (XEXP (XEXP (note, 0), 0), 1))) 2258 { 2259 sym = XEXP (XEXP (XEXP (note, 0), 0), 0); 2260 off = XEXP (XEXP (XEXP (note, 0), 0), 1); 2261 } 2262 2263 if (sym != NULL_RTX) 2264 { 2265 move2add_record_sym_value (dst, sym, off); 2266 return; 2267 } 2268 } 2269 2270 if (SCALAR_INT_MODE_P (mode) 2271 && GET_CODE (set) == SET 2272 && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT 2273 && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART) 2274 { 2275 rtx src = SET_SRC (set); 2276 rtx base_reg; 2277 unsigned HOST_WIDE_INT offset; 2278 int base_regno; 2279 2280 switch (GET_CODE (src)) 2281 { 2282 case PLUS: 2283 if (REG_P (XEXP (src, 0))) 2284 { 2285 base_reg = XEXP (src, 0); 2286 2287 if (CONST_INT_P (XEXP (src, 1))) 2288 offset = UINTVAL (XEXP (src, 1)); 2289 else if (REG_P (XEXP (src, 1)) 2290 && move2add_valid_value_p (REGNO (XEXP (src, 1)), mode)) 2291 { 2292 if (reg_base_reg[REGNO (XEXP (src, 1))] < 0 2293 && reg_symbol_ref[REGNO (XEXP (src, 1))] == NULL_RTX) 2294 offset = reg_offset[REGNO (XEXP (src, 1))]; 2295 /* Maybe the first register is known to be a 2296 constant. */ 2297 else if (move2add_valid_value_p (REGNO (base_reg), mode) 2298 && reg_base_reg[REGNO (base_reg)] < 0 2299 && reg_symbol_ref[REGNO (base_reg)] == NULL_RTX) 2300 { 2301 offset = reg_offset[REGNO (base_reg)]; 2302 base_reg = XEXP (src, 1); 2303 } 2304 else 2305 goto invalidate; 2306 } 2307 else 2308 goto invalidate; 2309 2310 break; 2311 } 2312 2313 goto invalidate; 2314 2315 case REG: 2316 base_reg = src; 2317 offset = 0; 2318 break; 2319 2320 case CONST_INT: 2321 /* Start tracking the register as a constant. */ 2322 reg_base_reg[regno] = -1; 2323 reg_symbol_ref[regno] = NULL_RTX; 2324 reg_offset[regno] = INTVAL (SET_SRC (set)); 2325 /* We assign the same luid to all registers set to constants. */ 2326 reg_set_luid[regno] = move2add_last_label_luid + 1; 2327 move2add_record_mode (dst); 2328 return; 2329 2330 default: 2331 goto invalidate; 2332 } 2333 2334 base_regno = REGNO (base_reg); 2335 /* If information about the base register is not valid, set it 2336 up as a new base register, pretending its value is known 2337 starting from the current insn. */ 2338 if (!move2add_valid_value_p (base_regno, mode)) 2339 { 2340 reg_base_reg[base_regno] = base_regno; 2341 reg_symbol_ref[base_regno] = NULL_RTX; 2342 reg_offset[base_regno] = 0; 2343 reg_set_luid[base_regno] = move2add_luid; 2344 gcc_assert (GET_MODE (base_reg) == mode); 2345 move2add_record_mode (base_reg); 2346 } 2347 2348 /* Copy base information from our base register. */ 2349 reg_set_luid[regno] = reg_set_luid[base_regno]; 2350 reg_base_reg[regno] = reg_base_reg[base_regno]; 2351 reg_symbol_ref[regno] = reg_symbol_ref[base_regno]; 2352 2353 /* Compute the sum of the offsets or constants. */ 2354 reg_offset[regno] 2355 = trunc_int_for_mode (offset + reg_offset[base_regno], mode); 2356 2357 move2add_record_mode (dst); 2358 } 2359 else 2360 { 2361 invalidate: 2362 /* Invalidate the contents of the register. */ 2363 move2add_record_mode (dst); 2364 reg_mode[regno] = VOIDmode; 2365 } 2366 } 2367 2368 namespace { 2369 2370 const pass_data pass_data_postreload_cse = 2371 { 2372 RTL_PASS, /* type */ 2373 "postreload", /* name */ 2374 OPTGROUP_NONE, /* optinfo_flags */ 2375 TV_RELOAD_CSE_REGS, /* tv_id */ 2376 0, /* properties_required */ 2377 0, /* properties_provided */ 2378 0, /* properties_destroyed */ 2379 0, /* todo_flags_start */ 2380 TODO_df_finish, /* todo_flags_finish */ 2381 }; 2382 2383 class pass_postreload_cse : public rtl_opt_pass 2384 { 2385 public: 2386 pass_postreload_cse (gcc::context *ctxt) 2387 : rtl_opt_pass (pass_data_postreload_cse, ctxt) 2388 {} 2389 2390 /* opt_pass methods: */ 2391 virtual bool gate (function *) { return (optimize > 0 && reload_completed); } 2392 2393 virtual unsigned int execute (function *); 2394 2395 }; // class pass_postreload_cse 2396 2397 unsigned int 2398 pass_postreload_cse::execute (function *fun) 2399 { 2400 if (!dbg_cnt (postreload_cse)) 2401 return 0; 2402 2403 /* Do a very simple CSE pass over just the hard registers. */ 2404 reload_cse_regs (get_insns ()); 2405 /* Reload_cse_regs can eliminate potentially-trapping MEMs. 2406 Remove any EH edges associated with them. */ 2407 if (fun->can_throw_non_call_exceptions 2408 && purge_all_dead_edges ()) 2409 cleanup_cfg (0); 2410 2411 return 0; 2412 } 2413 2414 } // anon namespace 2415 2416 rtl_opt_pass * 2417 make_pass_postreload_cse (gcc::context *ctxt) 2418 { 2419 return new pass_postreload_cse (ctxt); 2420 } 2421