1 /* Decompose multiword subregs. 2 Copyright (C) 2007-2013 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com> 4 Ian Lance Taylor <iant@google.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "machmode.h" 26 #include "tm.h" 27 #include "rtl.h" 28 #include "tm_p.h" 29 #include "flags.h" 30 #include "insn-config.h" 31 #include "obstack.h" 32 #include "basic-block.h" 33 #include "recog.h" 34 #include "bitmap.h" 35 #include "dce.h" 36 #include "expr.h" 37 #include "except.h" 38 #include "regs.h" 39 #include "tree-pass.h" 40 #include "df.h" 41 #include "lower-subreg.h" 42 43 #ifdef STACK_GROWS_DOWNWARD 44 # undef STACK_GROWS_DOWNWARD 45 # define STACK_GROWS_DOWNWARD 1 46 #else 47 # define STACK_GROWS_DOWNWARD 0 48 #endif 49 50 51 /* Decompose multi-word pseudo-registers into individual 52 pseudo-registers when possible and profitable. This is possible 53 when all the uses of a multi-word register are via SUBREG, or are 54 copies of the register to another location. Breaking apart the 55 register permits more CSE and permits better register allocation. 56 This is profitable if the machine does not have move instructions 57 to do this. 58 59 This pass only splits moves with modes that are wider than 60 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with 61 integer modes that are twice the width of word_mode. The latter 62 could be generalized if there was a need to do this, but the trend in 63 architectures is to not need this. 64 65 There are two useful preprocessor defines for use by maintainers: 66 67 #define LOG_COSTS 1 68 69 if you wish to see the actual cost estimates that are being used 70 for each mode wider than word mode and the cost estimates for zero 71 extension and the shifts. This can be useful when port maintainers 72 are tuning insn rtx costs. 73 74 #define FORCE_LOWERING 1 75 76 if you wish to test the pass with all the transformation forced on. 77 This can be useful for finding bugs in the transformations. */ 78 79 #define LOG_COSTS 0 80 #define FORCE_LOWERING 0 81 82 /* Bit N in this bitmap is set if regno N is used in a context in 83 which we can decompose it. */ 84 static bitmap decomposable_context; 85 86 /* Bit N in this bitmap is set if regno N is used in a context in 87 which it can not be decomposed. */ 88 static bitmap non_decomposable_context; 89 90 /* Bit N in this bitmap is set if regno N is used in a subreg 91 which changes the mode but not the size. This typically happens 92 when the register accessed as a floating-point value; we want to 93 avoid generating accesses to its subwords in integer modes. */ 94 static bitmap subreg_context; 95 96 /* Bit N in the bitmap in element M of this array is set if there is a 97 copy from reg M to reg N. */ 98 static vec<bitmap> reg_copy_graph; 99 100 struct target_lower_subreg default_target_lower_subreg; 101 #if SWITCHABLE_TARGET 102 struct target_lower_subreg *this_target_lower_subreg 103 = &default_target_lower_subreg; 104 #endif 105 106 #define twice_word_mode \ 107 this_target_lower_subreg->x_twice_word_mode 108 #define choices \ 109 this_target_lower_subreg->x_choices 110 111 /* RTXes used while computing costs. */ 112 struct cost_rtxes { 113 /* Source and target registers. */ 114 rtx source; 115 rtx target; 116 117 /* A twice_word_mode ZERO_EXTEND of SOURCE. */ 118 rtx zext; 119 120 /* A shift of SOURCE. */ 121 rtx shift; 122 123 /* A SET of TARGET. */ 124 rtx set; 125 }; 126 127 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the 128 rtxes in RTXES. SPEED_P selects between the speed and size cost. */ 129 130 static int 131 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, 132 enum machine_mode mode, int op1) 133 { 134 PUT_CODE (rtxes->shift, code); 135 PUT_MODE (rtxes->shift, mode); 136 PUT_MODE (rtxes->source, mode); 137 XEXP (rtxes->shift, 1) = GEN_INT (op1); 138 return set_src_cost (rtxes->shift, speed_p); 139 } 140 141 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] 142 to true if it is profitable to split a double-word CODE shift 143 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing 144 for speed or size profitability. 145 146 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is 147 the cost of moving zero into a word-mode register. WORD_MOVE_COST 148 is the cost of moving between word registers. */ 149 150 static void 151 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, 152 bool *splitting, enum rtx_code code, 153 int word_move_zero_cost, int word_move_cost) 154 { 155 int wide_cost, narrow_cost, upper_cost, i; 156 157 for (i = 0; i < BITS_PER_WORD; i++) 158 { 159 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, 160 i + BITS_PER_WORD); 161 if (i == 0) 162 narrow_cost = word_move_cost; 163 else 164 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); 165 166 if (code != ASHIFTRT) 167 upper_cost = word_move_zero_cost; 168 else if (i == BITS_PER_WORD - 1) 169 upper_cost = word_move_cost; 170 else 171 upper_cost = shift_cost (speed_p, rtxes, code, word_mode, 172 BITS_PER_WORD - 1); 173 174 if (LOG_COSTS) 175 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", 176 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), 177 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); 178 179 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) 180 splitting[i] = true; 181 } 182 } 183 184 /* Compute what we should do when optimizing for speed or size; SPEED_P 185 selects which. Use RTXES for computing costs. */ 186 187 static void 188 compute_costs (bool speed_p, struct cost_rtxes *rtxes) 189 { 190 unsigned int i; 191 int word_move_zero_cost, word_move_cost; 192 193 PUT_MODE (rtxes->target, word_mode); 194 SET_SRC (rtxes->set) = CONST0_RTX (word_mode); 195 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); 196 197 SET_SRC (rtxes->set) = rtxes->source; 198 word_move_cost = set_rtx_cost (rtxes->set, speed_p); 199 200 if (LOG_COSTS) 201 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", 202 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); 203 204 for (i = 0; i < MAX_MACHINE_MODE; i++) 205 { 206 enum machine_mode mode = (enum machine_mode) i; 207 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 208 if (factor > 1) 209 { 210 int mode_move_cost; 211 212 PUT_MODE (rtxes->target, mode); 213 PUT_MODE (rtxes->source, mode); 214 mode_move_cost = set_rtx_cost (rtxes->set, speed_p); 215 216 if (LOG_COSTS) 217 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", 218 GET_MODE_NAME (mode), mode_move_cost, 219 word_move_cost, factor); 220 221 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) 222 { 223 choices[speed_p].move_modes_to_split[i] = true; 224 choices[speed_p].something_to_do = true; 225 } 226 } 227 } 228 229 /* For the moves and shifts, the only case that is checked is one 230 where the mode of the target is an integer mode twice the width 231 of the word_mode. 232 233 If it is not profitable to split a double word move then do not 234 even consider the shifts or the zero extension. */ 235 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) 236 { 237 int zext_cost; 238 239 /* The only case here to check to see if moving the upper part with a 240 zero is cheaper than doing the zext itself. */ 241 PUT_MODE (rtxes->source, word_mode); 242 zext_cost = set_src_cost (rtxes->zext, speed_p); 243 244 if (LOG_COSTS) 245 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", 246 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), 247 zext_cost, word_move_cost, word_move_zero_cost); 248 249 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) 250 choices[speed_p].splitting_zext = true; 251 252 compute_splitting_shift (speed_p, rtxes, 253 choices[speed_p].splitting_ashift, ASHIFT, 254 word_move_zero_cost, word_move_cost); 255 compute_splitting_shift (speed_p, rtxes, 256 choices[speed_p].splitting_lshiftrt, LSHIFTRT, 257 word_move_zero_cost, word_move_cost); 258 compute_splitting_shift (speed_p, rtxes, 259 choices[speed_p].splitting_ashiftrt, ASHIFTRT, 260 word_move_zero_cost, word_move_cost); 261 } 262 } 263 264 /* Do one-per-target initialisation. This involves determining 265 which operations on the machine are profitable. If none are found, 266 then the pass just returns when called. */ 267 268 void 269 init_lower_subreg (void) 270 { 271 struct cost_rtxes rtxes; 272 273 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); 274 275 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode); 276 277 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER); 278 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1); 279 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source); 280 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); 281 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); 282 283 if (LOG_COSTS) 284 fprintf (stderr, "\nSize costs\n==========\n\n"); 285 compute_costs (false, &rtxes); 286 287 if (LOG_COSTS) 288 fprintf (stderr, "\nSpeed costs\n===========\n\n"); 289 compute_costs (true, &rtxes); 290 } 291 292 static bool 293 simple_move_operand (rtx x) 294 { 295 if (GET_CODE (x) == SUBREG) 296 x = SUBREG_REG (x); 297 298 if (!OBJECT_P (x)) 299 return false; 300 301 if (GET_CODE (x) == LABEL_REF 302 || GET_CODE (x) == SYMBOL_REF 303 || GET_CODE (x) == HIGH 304 || GET_CODE (x) == CONST) 305 return false; 306 307 if (MEM_P (x) 308 && (MEM_VOLATILE_P (x) 309 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) 310 return false; 311 312 return true; 313 } 314 315 /* If INSN is a single set between two objects that we want to split, 316 return the single set. SPEED_P says whether we are optimizing 317 INSN for speed or size. 318 319 INSN should have been passed to recog and extract_insn before this 320 is called. */ 321 322 static rtx 323 simple_move (rtx insn, bool speed_p) 324 { 325 rtx x; 326 rtx set; 327 enum machine_mode mode; 328 329 if (recog_data.n_operands != 2) 330 return NULL_RTX; 331 332 set = single_set (insn); 333 if (!set) 334 return NULL_RTX; 335 336 x = SET_DEST (set); 337 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 338 return NULL_RTX; 339 if (!simple_move_operand (x)) 340 return NULL_RTX; 341 342 x = SET_SRC (set); 343 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 344 return NULL_RTX; 345 /* For the src we can handle ASM_OPERANDS, and it is beneficial for 346 things like x86 rdtsc which returns a DImode value. */ 347 if (GET_CODE (x) != ASM_OPERANDS 348 && !simple_move_operand (x)) 349 return NULL_RTX; 350 351 /* We try to decompose in integer modes, to avoid generating 352 inefficient code copying between integer and floating point 353 registers. That means that we can't decompose if this is a 354 non-integer mode for which there is no integer mode of the same 355 size. */ 356 mode = GET_MODE (SET_DEST (set)); 357 if (!SCALAR_INT_MODE_P (mode) 358 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) 359 == BLKmode)) 360 return NULL_RTX; 361 362 /* Reject PARTIAL_INT modes. They are used for processor specific 363 purposes and it's probably best not to tamper with them. */ 364 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 365 return NULL_RTX; 366 367 if (!choices[speed_p].move_modes_to_split[(int) mode]) 368 return NULL_RTX; 369 370 return set; 371 } 372 373 /* If SET is a copy from one multi-word pseudo-register to another, 374 record that in reg_copy_graph. Return whether it is such a 375 copy. */ 376 377 static bool 378 find_pseudo_copy (rtx set) 379 { 380 rtx dest = SET_DEST (set); 381 rtx src = SET_SRC (set); 382 unsigned int rd, rs; 383 bitmap b; 384 385 if (!REG_P (dest) || !REG_P (src)) 386 return false; 387 388 rd = REGNO (dest); 389 rs = REGNO (src); 390 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) 391 return false; 392 393 b = reg_copy_graph[rs]; 394 if (b == NULL) 395 { 396 b = BITMAP_ALLOC (NULL); 397 reg_copy_graph[rs] = b; 398 } 399 400 bitmap_set_bit (b, rd); 401 402 return true; 403 } 404 405 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case 406 where they are copied to another register, add the register to 407 which they are copied to DECOMPOSABLE_CONTEXT. Use 408 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track 409 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ 410 411 static void 412 propagate_pseudo_copies (void) 413 { 414 bitmap queue, propagate; 415 416 queue = BITMAP_ALLOC (NULL); 417 propagate = BITMAP_ALLOC (NULL); 418 419 bitmap_copy (queue, decomposable_context); 420 do 421 { 422 bitmap_iterator iter; 423 unsigned int i; 424 425 bitmap_clear (propagate); 426 427 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) 428 { 429 bitmap b = reg_copy_graph[i]; 430 if (b) 431 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); 432 } 433 434 bitmap_and_compl (queue, propagate, decomposable_context); 435 bitmap_ior_into (decomposable_context, propagate); 436 } 437 while (!bitmap_empty_p (queue)); 438 439 BITMAP_FREE (queue); 440 BITMAP_FREE (propagate); 441 } 442 443 /* A pointer to one of these values is passed to 444 find_decomposable_subregs via for_each_rtx. */ 445 446 enum classify_move_insn 447 { 448 /* Not a simple move from one location to another. */ 449 NOT_SIMPLE_MOVE, 450 /* A simple move we want to decompose. */ 451 DECOMPOSABLE_SIMPLE_MOVE, 452 /* Any other simple move. */ 453 SIMPLE_MOVE 454 }; 455 456 /* This is called via for_each_rtx. If we find a SUBREG which we 457 could use to decompose a pseudo-register, set a bit in 458 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is 459 not a simple pseudo-register copy, DATA will point at the type of 460 move, and we set a bit in DECOMPOSABLE_CONTEXT or 461 NON_DECOMPOSABLE_CONTEXT as appropriate. */ 462 463 static int 464 find_decomposable_subregs (rtx *px, void *data) 465 { 466 enum classify_move_insn *pcmi = (enum classify_move_insn *) data; 467 rtx x = *px; 468 469 if (x == NULL_RTX) 470 return 0; 471 472 if (GET_CODE (x) == SUBREG) 473 { 474 rtx inner = SUBREG_REG (x); 475 unsigned int regno, outer_size, inner_size, outer_words, inner_words; 476 477 if (!REG_P (inner)) 478 return 0; 479 480 regno = REGNO (inner); 481 if (HARD_REGISTER_NUM_P (regno)) 482 return -1; 483 484 outer_size = GET_MODE_SIZE (GET_MODE (x)); 485 inner_size = GET_MODE_SIZE (GET_MODE (inner)); 486 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 487 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 488 489 /* We only try to decompose single word subregs of multi-word 490 registers. When we find one, we return -1 to avoid iterating 491 over the inner register. 492 493 ??? This doesn't allow, e.g., DImode subregs of TImode values 494 on 32-bit targets. We would need to record the way the 495 pseudo-register was used, and only decompose if all the uses 496 were the same number and size of pieces. Hopefully this 497 doesn't happen much. */ 498 499 if (outer_words == 1 && inner_words > 1) 500 { 501 bitmap_set_bit (decomposable_context, regno); 502 return -1; 503 } 504 505 /* If this is a cast from one mode to another, where the modes 506 have the same size, and they are not tieable, then mark this 507 register as non-decomposable. If we decompose it we are 508 likely to mess up whatever the backend is trying to do. */ 509 if (outer_words > 1 510 && outer_size == inner_size 511 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner))) 512 { 513 bitmap_set_bit (non_decomposable_context, regno); 514 bitmap_set_bit (subreg_context, regno); 515 return -1; 516 } 517 } 518 else if (REG_P (x)) 519 { 520 unsigned int regno; 521 522 /* We will see an outer SUBREG before we see the inner REG, so 523 when we see a plain REG here it means a direct reference to 524 the register. 525 526 If this is not a simple copy from one location to another, 527 then we can not decompose this register. If this is a simple 528 copy we want to decompose, and the mode is right, 529 then we mark the register as decomposable. 530 Otherwise we don't say anything about this register -- 531 it could be decomposed, but whether that would be 532 profitable depends upon how it is used elsewhere. 533 534 We only set bits in the bitmap for multi-word 535 pseudo-registers, since those are the only ones we care about 536 and it keeps the size of the bitmaps down. */ 537 538 regno = REGNO (x); 539 if (!HARD_REGISTER_NUM_P (regno) 540 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 541 { 542 switch (*pcmi) 543 { 544 case NOT_SIMPLE_MOVE: 545 bitmap_set_bit (non_decomposable_context, regno); 546 break; 547 case DECOMPOSABLE_SIMPLE_MOVE: 548 if (MODES_TIEABLE_P (GET_MODE (x), word_mode)) 549 bitmap_set_bit (decomposable_context, regno); 550 break; 551 case SIMPLE_MOVE: 552 break; 553 default: 554 gcc_unreachable (); 555 } 556 } 557 } 558 else if (MEM_P (x)) 559 { 560 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; 561 562 /* Any registers used in a MEM do not participate in a 563 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion 564 here, and return -1 to block the parent's recursion. */ 565 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem); 566 return -1; 567 } 568 569 return 0; 570 } 571 572 /* Decompose REGNO into word-sized components. We smash the REG node 573 in place. This ensures that (1) something goes wrong quickly if we 574 fail to make some replacement, and (2) the debug information inside 575 the symbol table is automatically kept up to date. */ 576 577 static void 578 decompose_register (unsigned int regno) 579 { 580 rtx reg; 581 unsigned int words, i; 582 rtvec v; 583 584 reg = regno_reg_rtx[regno]; 585 586 regno_reg_rtx[regno] = NULL_RTX; 587 588 words = GET_MODE_SIZE (GET_MODE (reg)); 589 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 590 591 v = rtvec_alloc (words); 592 for (i = 0; i < words; ++i) 593 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); 594 595 PUT_CODE (reg, CONCATN); 596 XVEC (reg, 0) = v; 597 598 if (dump_file) 599 { 600 fprintf (dump_file, "; Splitting reg %u ->", regno); 601 for (i = 0; i < words; ++i) 602 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); 603 fputc ('\n', dump_file); 604 } 605 } 606 607 /* Get a SUBREG of a CONCATN. */ 608 609 static rtx 610 simplify_subreg_concatn (enum machine_mode outermode, rtx op, 611 unsigned int byte) 612 { 613 unsigned int inner_size; 614 enum machine_mode innermode, partmode; 615 rtx part; 616 unsigned int final_offset; 617 618 gcc_assert (GET_CODE (op) == CONCATN); 619 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); 620 621 innermode = GET_MODE (op); 622 gcc_assert (byte < GET_MODE_SIZE (innermode)); 623 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode)); 624 625 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); 626 part = XVECEXP (op, 0, byte / inner_size); 627 partmode = GET_MODE (part); 628 629 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of 630 regular CONST_VECTORs. They have vector or integer modes, depending 631 on the capabilities of the target. Cope with them. */ 632 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) 633 partmode = GET_MODE_INNER (innermode); 634 else if (partmode == VOIDmode) 635 { 636 enum mode_class mclass = GET_MODE_CLASS (innermode); 637 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0); 638 } 639 640 final_offset = byte % inner_size; 641 if (final_offset + GET_MODE_SIZE (outermode) > inner_size) 642 return NULL_RTX; 643 644 return simplify_gen_subreg (outermode, part, partmode, final_offset); 645 } 646 647 /* Wrapper around simplify_gen_subreg which handles CONCATN. */ 648 649 static rtx 650 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op, 651 enum machine_mode innermode, unsigned int byte) 652 { 653 rtx ret; 654 655 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. 656 If OP is a SUBREG of a CONCATN, then it must be a simple mode 657 change with the same size and offset 0, or it must extract a 658 part. We shouldn't see anything else here. */ 659 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) 660 { 661 rtx op2; 662 663 if ((GET_MODE_SIZE (GET_MODE (op)) 664 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) 665 && SUBREG_BYTE (op) == 0) 666 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), 667 GET_MODE (SUBREG_REG (op)), byte); 668 669 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), 670 SUBREG_BYTE (op)); 671 if (op2 == NULL_RTX) 672 { 673 /* We don't handle paradoxical subregs here. */ 674 gcc_assert (GET_MODE_SIZE (outermode) 675 <= GET_MODE_SIZE (GET_MODE (op))); 676 gcc_assert (GET_MODE_SIZE (GET_MODE (op)) 677 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); 678 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), 679 byte + SUBREG_BYTE (op)); 680 gcc_assert (op2 != NULL_RTX); 681 return op2; 682 } 683 684 op = op2; 685 gcc_assert (op != NULL_RTX); 686 gcc_assert (innermode == GET_MODE (op)); 687 } 688 689 if (GET_CODE (op) == CONCATN) 690 return simplify_subreg_concatn (outermode, op, byte); 691 692 ret = simplify_gen_subreg (outermode, op, innermode, byte); 693 694 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then 695 resolve_simple_move will ask for the high part of the paradoxical 696 subreg, which does not have a value. Just return a zero. */ 697 if (ret == NULL_RTX 698 && GET_CODE (op) == SUBREG 699 && SUBREG_BYTE (op) == 0 700 && (GET_MODE_SIZE (innermode) 701 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))) 702 return CONST0_RTX (outermode); 703 704 gcc_assert (ret != NULL_RTX); 705 return ret; 706 } 707 708 /* Return whether we should resolve X into the registers into which it 709 was decomposed. */ 710 711 static bool 712 resolve_reg_p (rtx x) 713 { 714 return GET_CODE (x) == CONCATN; 715 } 716 717 /* Return whether X is a SUBREG of a register which we need to 718 resolve. */ 719 720 static bool 721 resolve_subreg_p (rtx x) 722 { 723 if (GET_CODE (x) != SUBREG) 724 return false; 725 return resolve_reg_p (SUBREG_REG (x)); 726 } 727 728 /* This is called via for_each_rtx. Look for SUBREGs which need to be 729 decomposed. */ 730 731 static int 732 resolve_subreg_use (rtx *px, void *data) 733 { 734 rtx insn = (rtx) data; 735 rtx x = *px; 736 737 if (x == NULL_RTX) 738 return 0; 739 740 if (resolve_subreg_p (x)) 741 { 742 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 743 SUBREG_BYTE (x)); 744 745 /* It is possible for a note to contain a reference which we can 746 decompose. In this case, return 1 to the caller to indicate 747 that the note must be removed. */ 748 if (!x) 749 { 750 gcc_assert (!insn); 751 return 1; 752 } 753 754 validate_change (insn, px, x, 1); 755 return -1; 756 } 757 758 if (resolve_reg_p (x)) 759 { 760 /* Return 1 to the caller to indicate that we found a direct 761 reference to a register which is being decomposed. This can 762 happen inside notes, multiword shift or zero-extend 763 instructions. */ 764 return 1; 765 } 766 767 return 0; 768 } 769 770 /* This is called via for_each_rtx. Look for SUBREGs which can be 771 decomposed and decomposed REGs that need copying. */ 772 773 static int 774 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED) 775 { 776 rtx x = *px; 777 778 if (x == NULL_RTX) 779 return 0; 780 781 if (resolve_subreg_p (x)) 782 { 783 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 784 SUBREG_BYTE (x)); 785 786 if (x) 787 *px = x; 788 else 789 x = copy_rtx (*px); 790 } 791 792 if (resolve_reg_p (x)) 793 *px = copy_rtx (x); 794 795 return 0; 796 } 797 798 /* Resolve any decomposed registers which appear in register notes on 799 INSN. */ 800 801 static void 802 resolve_reg_notes (rtx insn) 803 { 804 rtx *pnote, note; 805 806 note = find_reg_equal_equiv_note (insn); 807 if (note) 808 { 809 int old_count = num_validated_changes (); 810 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL)) 811 remove_note (insn, note); 812 else 813 if (old_count != num_validated_changes ()) 814 df_notes_rescan (insn); 815 } 816 817 pnote = ®_NOTES (insn); 818 while (*pnote != NULL_RTX) 819 { 820 bool del = false; 821 822 note = *pnote; 823 switch (REG_NOTE_KIND (note)) 824 { 825 case REG_DEAD: 826 case REG_UNUSED: 827 if (resolve_reg_p (XEXP (note, 0))) 828 del = true; 829 break; 830 831 default: 832 break; 833 } 834 835 if (del) 836 *pnote = XEXP (note, 1); 837 else 838 pnote = &XEXP (note, 1); 839 } 840 } 841 842 /* Return whether X can be decomposed into subwords. */ 843 844 static bool 845 can_decompose_p (rtx x) 846 { 847 if (REG_P (x)) 848 { 849 unsigned int regno = REGNO (x); 850 851 if (HARD_REGISTER_NUM_P (regno)) 852 { 853 unsigned int byte, num_bytes; 854 855 num_bytes = GET_MODE_SIZE (GET_MODE (x)); 856 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) 857 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) 858 return false; 859 return true; 860 } 861 else 862 return !bitmap_bit_p (subreg_context, regno); 863 } 864 865 return true; 866 } 867 868 /* Decompose the registers used in a simple move SET within INSN. If 869 we don't change anything, return INSN, otherwise return the start 870 of the sequence of moves. */ 871 872 static rtx 873 resolve_simple_move (rtx set, rtx insn) 874 { 875 rtx src, dest, real_dest, insns; 876 enum machine_mode orig_mode, dest_mode; 877 unsigned int words; 878 bool pushing; 879 880 src = SET_SRC (set); 881 dest = SET_DEST (set); 882 orig_mode = GET_MODE (dest); 883 884 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 885 gcc_assert (words > 1); 886 887 start_sequence (); 888 889 /* We have to handle copying from a SUBREG of a decomposed reg where 890 the SUBREG is larger than word size. Rather than assume that we 891 can take a word_mode SUBREG of the destination, we copy to a new 892 register and then copy that to the destination. */ 893 894 real_dest = NULL_RTX; 895 896 if (GET_CODE (src) == SUBREG 897 && resolve_reg_p (SUBREG_REG (src)) 898 && (SUBREG_BYTE (src) != 0 899 || (GET_MODE_SIZE (orig_mode) 900 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) 901 { 902 real_dest = dest; 903 dest = gen_reg_rtx (orig_mode); 904 if (REG_P (real_dest)) 905 REG_ATTRS (dest) = REG_ATTRS (real_dest); 906 } 907 908 /* Similarly if we are copying to a SUBREG of a decomposed reg where 909 the SUBREG is larger than word size. */ 910 911 if (GET_CODE (dest) == SUBREG 912 && resolve_reg_p (SUBREG_REG (dest)) 913 && (SUBREG_BYTE (dest) != 0 914 || (GET_MODE_SIZE (orig_mode) 915 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) 916 { 917 rtx reg, minsn, smove; 918 919 reg = gen_reg_rtx (orig_mode); 920 minsn = emit_move_insn (reg, src); 921 smove = single_set (minsn); 922 gcc_assert (smove != NULL_RTX); 923 resolve_simple_move (smove, minsn); 924 src = reg; 925 } 926 927 /* If we didn't have any big SUBREGS of decomposed registers, and 928 neither side of the move is a register we are decomposing, then 929 we don't have to do anything here. */ 930 931 if (src == SET_SRC (set) 932 && dest == SET_DEST (set) 933 && !resolve_reg_p (src) 934 && !resolve_subreg_p (src) 935 && !resolve_reg_p (dest) 936 && !resolve_subreg_p (dest)) 937 { 938 end_sequence (); 939 return insn; 940 } 941 942 /* It's possible for the code to use a subreg of a decomposed 943 register while forming an address. We need to handle that before 944 passing the address to emit_move_insn. We pass NULL_RTX as the 945 insn parameter to resolve_subreg_use because we can not validate 946 the insn yet. */ 947 if (MEM_P (src) || MEM_P (dest)) 948 { 949 int acg; 950 951 if (MEM_P (src)) 952 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX); 953 if (MEM_P (dest)) 954 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX); 955 acg = apply_change_group (); 956 gcc_assert (acg); 957 } 958 959 /* If SRC is a register which we can't decompose, or has side 960 effects, we need to move via a temporary register. */ 961 962 if (!can_decompose_p (src) 963 || side_effects_p (src) 964 || GET_CODE (src) == ASM_OPERANDS) 965 { 966 rtx reg; 967 968 reg = gen_reg_rtx (orig_mode); 969 emit_move_insn (reg, src); 970 src = reg; 971 } 972 973 /* If DEST is a register which we can't decompose, or has side 974 effects, we need to first move to a temporary register. We 975 handle the common case of pushing an operand directly. We also 976 go through a temporary register if it holds a floating point 977 value. This gives us better code on systems which can't move 978 data easily between integer and floating point registers. */ 979 980 dest_mode = orig_mode; 981 pushing = push_operand (dest, dest_mode); 982 if (!can_decompose_p (dest) 983 || (side_effects_p (dest) && !pushing) 984 || (!SCALAR_INT_MODE_P (dest_mode) 985 && !resolve_reg_p (dest) 986 && !resolve_subreg_p (dest))) 987 { 988 if (real_dest == NULL_RTX) 989 real_dest = dest; 990 if (!SCALAR_INT_MODE_P (dest_mode)) 991 { 992 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, 993 MODE_INT, 0); 994 gcc_assert (dest_mode != BLKmode); 995 } 996 dest = gen_reg_rtx (dest_mode); 997 if (REG_P (real_dest)) 998 REG_ATTRS (dest) = REG_ATTRS (real_dest); 999 } 1000 1001 if (pushing) 1002 { 1003 unsigned int i, j, jinc; 1004 1005 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); 1006 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); 1007 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); 1008 1009 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) 1010 { 1011 j = 0; 1012 jinc = 1; 1013 } 1014 else 1015 { 1016 j = words - 1; 1017 jinc = -1; 1018 } 1019 1020 for (i = 0; i < words; ++i, j += jinc) 1021 { 1022 rtx temp; 1023 1024 temp = copy_rtx (XEXP (dest, 0)); 1025 temp = adjust_automodify_address_nv (dest, word_mode, temp, 1026 j * UNITS_PER_WORD); 1027 emit_move_insn (temp, 1028 simplify_gen_subreg_concatn (word_mode, src, 1029 orig_mode, 1030 j * UNITS_PER_WORD)); 1031 } 1032 } 1033 else 1034 { 1035 unsigned int i; 1036 1037 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) 1038 emit_clobber (dest); 1039 1040 for (i = 0; i < words; ++i) 1041 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, 1042 dest_mode, 1043 i * UNITS_PER_WORD), 1044 simplify_gen_subreg_concatn (word_mode, src, 1045 orig_mode, 1046 i * UNITS_PER_WORD)); 1047 } 1048 1049 if (real_dest != NULL_RTX) 1050 { 1051 rtx mdest, minsn, smove; 1052 1053 if (dest_mode == orig_mode) 1054 mdest = dest; 1055 else 1056 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); 1057 minsn = emit_move_insn (real_dest, mdest); 1058 1059 smove = single_set (minsn); 1060 gcc_assert (smove != NULL_RTX); 1061 1062 resolve_simple_move (smove, minsn); 1063 } 1064 1065 insns = get_insns (); 1066 end_sequence (); 1067 1068 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); 1069 1070 emit_insn_before (insns, insn); 1071 1072 delete_insn (insn); 1073 1074 return insns; 1075 } 1076 1077 /* Change a CLOBBER of a decomposed register into a CLOBBER of the 1078 component registers. Return whether we changed something. */ 1079 1080 static bool 1081 resolve_clobber (rtx pat, rtx insn) 1082 { 1083 rtx reg; 1084 enum machine_mode orig_mode; 1085 unsigned int words, i; 1086 int ret; 1087 1088 reg = XEXP (pat, 0); 1089 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) 1090 return false; 1091 1092 orig_mode = GET_MODE (reg); 1093 words = GET_MODE_SIZE (orig_mode); 1094 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1095 1096 ret = validate_change (NULL_RTX, &XEXP (pat, 0), 1097 simplify_gen_subreg_concatn (word_mode, reg, 1098 orig_mode, 0), 1099 0); 1100 df_insn_rescan (insn); 1101 gcc_assert (ret != 0); 1102 1103 for (i = words - 1; i > 0; --i) 1104 { 1105 rtx x; 1106 1107 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, 1108 i * UNITS_PER_WORD); 1109 x = gen_rtx_CLOBBER (VOIDmode, x); 1110 emit_insn_after (x, insn); 1111 } 1112 1113 resolve_reg_notes (insn); 1114 1115 return true; 1116 } 1117 1118 /* A USE of a decomposed register is no longer meaningful. Return 1119 whether we changed something. */ 1120 1121 static bool 1122 resolve_use (rtx pat, rtx insn) 1123 { 1124 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) 1125 { 1126 delete_insn (insn); 1127 return true; 1128 } 1129 1130 resolve_reg_notes (insn); 1131 1132 return false; 1133 } 1134 1135 /* A VAR_LOCATION can be simplified. */ 1136 1137 static void 1138 resolve_debug (rtx insn) 1139 { 1140 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX); 1141 1142 df_insn_rescan (insn); 1143 1144 resolve_reg_notes (insn); 1145 } 1146 1147 /* Check if INSN is a decomposable multiword-shift or zero-extend and 1148 set the decomposable_context bitmap accordingly. SPEED_P is true 1149 if we are optimizing INSN for speed rather than size. Return true 1150 if INSN is decomposable. */ 1151 1152 static bool 1153 find_decomposable_shift_zext (rtx insn, bool speed_p) 1154 { 1155 rtx set; 1156 rtx op; 1157 rtx op_operand; 1158 1159 set = single_set (insn); 1160 if (!set) 1161 return false; 1162 1163 op = SET_SRC (set); 1164 if (GET_CODE (op) != ASHIFT 1165 && GET_CODE (op) != LSHIFTRT 1166 && GET_CODE (op) != ASHIFTRT 1167 && GET_CODE (op) != ZERO_EXTEND) 1168 return false; 1169 1170 op_operand = XEXP (op, 0); 1171 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) 1172 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) 1173 || HARD_REGISTER_NUM_P (REGNO (op_operand)) 1174 || GET_MODE (op) != twice_word_mode) 1175 return false; 1176 1177 if (GET_CODE (op) == ZERO_EXTEND) 1178 { 1179 if (GET_MODE (op_operand) != word_mode 1180 || !choices[speed_p].splitting_zext) 1181 return false; 1182 } 1183 else /* left or right shift */ 1184 { 1185 bool *splitting = (GET_CODE (op) == ASHIFT 1186 ? choices[speed_p].splitting_ashift 1187 : GET_CODE (op) == ASHIFTRT 1188 ? choices[speed_p].splitting_ashiftrt 1189 : choices[speed_p].splitting_lshiftrt); 1190 if (!CONST_INT_P (XEXP (op, 1)) 1191 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, 1192 2 * BITS_PER_WORD - 1) 1193 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) 1194 return false; 1195 1196 bitmap_set_bit (decomposable_context, REGNO (op_operand)); 1197 } 1198 1199 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); 1200 1201 return true; 1202 } 1203 1204 /* Decompose a more than word wide shift (in INSN) of a multiword 1205 pseudo or a multiword zero-extend of a wordmode pseudo into a move 1206 and 'set to zero' insn. Return a pointer to the new insn when a 1207 replacement was done. */ 1208 1209 static rtx 1210 resolve_shift_zext (rtx insn) 1211 { 1212 rtx set; 1213 rtx op; 1214 rtx op_operand; 1215 rtx insns; 1216 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; 1217 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; 1218 1219 set = single_set (insn); 1220 if (!set) 1221 return NULL_RTX; 1222 1223 op = SET_SRC (set); 1224 if (GET_CODE (op) != ASHIFT 1225 && GET_CODE (op) != LSHIFTRT 1226 && GET_CODE (op) != ASHIFTRT 1227 && GET_CODE (op) != ZERO_EXTEND) 1228 return NULL_RTX; 1229 1230 op_operand = XEXP (op, 0); 1231 1232 /* We can tear this operation apart only if the regs were already 1233 torn apart. */ 1234 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) 1235 return NULL_RTX; 1236 1237 /* src_reg_num is the number of the word mode register which we 1238 are operating on. For a left shift and a zero_extend on little 1239 endian machines this is register 0. */ 1240 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) 1241 ? 1 : 0; 1242 1243 if (WORDS_BIG_ENDIAN 1244 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD) 1245 src_reg_num = 1 - src_reg_num; 1246 1247 if (GET_CODE (op) == ZERO_EXTEND) 1248 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; 1249 else 1250 dest_reg_num = 1 - src_reg_num; 1251 1252 offset1 = UNITS_PER_WORD * dest_reg_num; 1253 offset2 = UNITS_PER_WORD * (1 - dest_reg_num); 1254 src_offset = UNITS_PER_WORD * src_reg_num; 1255 1256 start_sequence (); 1257 1258 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1259 GET_MODE (SET_DEST (set)), 1260 offset1); 1261 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1262 GET_MODE (SET_DEST (set)), 1263 offset2); 1264 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, 1265 GET_MODE (op_operand), 1266 src_offset); 1267 if (GET_CODE (op) == ASHIFTRT 1268 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) 1269 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), 1270 BITS_PER_WORD - 1, NULL_RTX, 0); 1271 1272 if (GET_CODE (op) != ZERO_EXTEND) 1273 { 1274 int shift_count = INTVAL (XEXP (op, 1)); 1275 if (shift_count > BITS_PER_WORD) 1276 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? 1277 LSHIFT_EXPR : RSHIFT_EXPR, 1278 word_mode, src_reg, 1279 shift_count - BITS_PER_WORD, 1280 dest_reg, GET_CODE (op) != ASHIFTRT); 1281 } 1282 1283 if (dest_reg != src_reg) 1284 emit_move_insn (dest_reg, src_reg); 1285 if (GET_CODE (op) != ASHIFTRT) 1286 emit_move_insn (dest_upper, CONST0_RTX (word_mode)); 1287 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) 1288 emit_move_insn (dest_upper, copy_rtx (src_reg)); 1289 else 1290 emit_move_insn (dest_upper, upper_src); 1291 insns = get_insns (); 1292 1293 end_sequence (); 1294 1295 emit_insn_before (insns, insn); 1296 1297 if (dump_file) 1298 { 1299 rtx in; 1300 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); 1301 for (in = insns; in != insn; in = NEXT_INSN (in)) 1302 fprintf (dump_file, "%d ", INSN_UID (in)); 1303 fprintf (dump_file, "\n"); 1304 } 1305 1306 delete_insn (insn); 1307 return insns; 1308 } 1309 1310 /* Print to dump_file a description of what we're doing with shift code CODE. 1311 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ 1312 1313 static void 1314 dump_shift_choices (enum rtx_code code, bool *splitting) 1315 { 1316 int i; 1317 const char *sep; 1318 1319 fprintf (dump_file, 1320 " Splitting mode %s for %s lowering with shift amounts = ", 1321 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); 1322 sep = ""; 1323 for (i = 0; i < BITS_PER_WORD; i++) 1324 if (splitting[i]) 1325 { 1326 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); 1327 sep = ","; 1328 } 1329 fprintf (dump_file, "\n"); 1330 } 1331 1332 /* Print to dump_file a description of what we're doing when optimizing 1333 for speed or size; SPEED_P says which. DESCRIPTION is a description 1334 of the SPEED_P choice. */ 1335 1336 static void 1337 dump_choices (bool speed_p, const char *description) 1338 { 1339 unsigned int i; 1340 1341 fprintf (dump_file, "Choices when optimizing for %s:\n", description); 1342 1343 for (i = 0; i < MAX_MACHINE_MODE; i++) 1344 if (GET_MODE_SIZE (i) > UNITS_PER_WORD) 1345 fprintf (dump_file, " %s mode %s for copy lowering.\n", 1346 choices[speed_p].move_modes_to_split[i] 1347 ? "Splitting" 1348 : "Skipping", 1349 GET_MODE_NAME ((enum machine_mode) i)); 1350 1351 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", 1352 choices[speed_p].splitting_zext ? "Splitting" : "Skipping", 1353 GET_MODE_NAME (twice_word_mode)); 1354 1355 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); 1356 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); 1357 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); 1358 fprintf (dump_file, "\n"); 1359 } 1360 1361 /* Look for registers which are always accessed via word-sized SUBREGs 1362 or -if DECOMPOSE_COPIES is true- via copies. Decompose these 1363 registers into several word-sized pseudo-registers. */ 1364 1365 static void 1366 decompose_multiword_subregs (bool decompose_copies) 1367 { 1368 unsigned int max; 1369 basic_block bb; 1370 bool speed_p; 1371 1372 if (dump_file) 1373 { 1374 dump_choices (false, "size"); 1375 dump_choices (true, "speed"); 1376 } 1377 1378 /* Check if this target even has any modes to consider lowering. */ 1379 if (!choices[false].something_to_do && !choices[true].something_to_do) 1380 { 1381 if (dump_file) 1382 fprintf (dump_file, "Nothing to do!\n"); 1383 return; 1384 } 1385 1386 max = max_reg_num (); 1387 1388 /* First see if there are any multi-word pseudo-registers. If there 1389 aren't, there is nothing we can do. This should speed up this 1390 pass in the normal case, since it should be faster than scanning 1391 all the insns. */ 1392 { 1393 unsigned int i; 1394 bool useful_modes_seen = false; 1395 1396 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) 1397 if (regno_reg_rtx[i] != NULL) 1398 { 1399 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]); 1400 if (choices[false].move_modes_to_split[(int) mode] 1401 || choices[true].move_modes_to_split[(int) mode]) 1402 { 1403 useful_modes_seen = true; 1404 break; 1405 } 1406 } 1407 1408 if (!useful_modes_seen) 1409 { 1410 if (dump_file) 1411 fprintf (dump_file, "Nothing to lower in this function.\n"); 1412 return; 1413 } 1414 } 1415 1416 if (df) 1417 { 1418 df_set_flags (DF_DEFER_INSN_RESCAN); 1419 run_word_dce (); 1420 } 1421 1422 /* FIXME: It may be possible to change this code to look for each 1423 multi-word pseudo-register and to find each insn which sets or 1424 uses that register. That should be faster than scanning all the 1425 insns. */ 1426 1427 decomposable_context = BITMAP_ALLOC (NULL); 1428 non_decomposable_context = BITMAP_ALLOC (NULL); 1429 subreg_context = BITMAP_ALLOC (NULL); 1430 1431 reg_copy_graph.create (max); 1432 reg_copy_graph.safe_grow_cleared (max); 1433 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); 1434 1435 speed_p = optimize_function_for_speed_p (cfun); 1436 FOR_EACH_BB (bb) 1437 { 1438 rtx insn; 1439 1440 FOR_BB_INSNS (bb, insn) 1441 { 1442 rtx set; 1443 enum classify_move_insn cmi; 1444 int i, n; 1445 1446 if (!INSN_P (insn) 1447 || GET_CODE (PATTERN (insn)) == CLOBBER 1448 || GET_CODE (PATTERN (insn)) == USE) 1449 continue; 1450 1451 recog_memoized (insn); 1452 1453 if (find_decomposable_shift_zext (insn, speed_p)) 1454 continue; 1455 1456 extract_insn (insn); 1457 1458 set = simple_move (insn, speed_p); 1459 1460 if (!set) 1461 cmi = NOT_SIMPLE_MOVE; 1462 else 1463 { 1464 /* We mark pseudo-to-pseudo copies as decomposable during the 1465 second pass only. The first pass is so early that there is 1466 good chance such moves will be optimized away completely by 1467 subsequent optimizations anyway. 1468 1469 However, we call find_pseudo_copy even during the first pass 1470 so as to properly set up the reg_copy_graph. */ 1471 if (find_pseudo_copy (set)) 1472 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; 1473 else 1474 cmi = SIMPLE_MOVE; 1475 } 1476 1477 n = recog_data.n_operands; 1478 for (i = 0; i < n; ++i) 1479 { 1480 for_each_rtx (&recog_data.operand[i], 1481 find_decomposable_subregs, 1482 &cmi); 1483 1484 /* We handle ASM_OPERANDS as a special case to support 1485 things like x86 rdtsc which returns a DImode value. 1486 We can decompose the output, which will certainly be 1487 operand 0, but not the inputs. */ 1488 1489 if (cmi == SIMPLE_MOVE 1490 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) 1491 { 1492 gcc_assert (i == 0); 1493 cmi = NOT_SIMPLE_MOVE; 1494 } 1495 } 1496 } 1497 } 1498 1499 bitmap_and_compl_into (decomposable_context, non_decomposable_context); 1500 if (!bitmap_empty_p (decomposable_context)) 1501 { 1502 sbitmap sub_blocks; 1503 unsigned int i; 1504 sbitmap_iterator sbi; 1505 bitmap_iterator iter; 1506 unsigned int regno; 1507 1508 propagate_pseudo_copies (); 1509 1510 sub_blocks = sbitmap_alloc (last_basic_block); 1511 bitmap_clear (sub_blocks); 1512 1513 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) 1514 decompose_register (regno); 1515 1516 FOR_EACH_BB (bb) 1517 { 1518 rtx insn; 1519 1520 FOR_BB_INSNS (bb, insn) 1521 { 1522 rtx pat; 1523 1524 if (!INSN_P (insn)) 1525 continue; 1526 1527 pat = PATTERN (insn); 1528 if (GET_CODE (pat) == CLOBBER) 1529 resolve_clobber (pat, insn); 1530 else if (GET_CODE (pat) == USE) 1531 resolve_use (pat, insn); 1532 else if (DEBUG_INSN_P (insn)) 1533 resolve_debug (insn); 1534 else 1535 { 1536 rtx set; 1537 int i; 1538 1539 recog_memoized (insn); 1540 extract_insn (insn); 1541 1542 set = simple_move (insn, speed_p); 1543 if (set) 1544 { 1545 rtx orig_insn = insn; 1546 bool cfi = control_flow_insn_p (insn); 1547 1548 /* We can end up splitting loads to multi-word pseudos 1549 into separate loads to machine word size pseudos. 1550 When this happens, we first had one load that can 1551 throw, and after resolve_simple_move we'll have a 1552 bunch of loads (at least two). All those loads may 1553 trap if we can have non-call exceptions, so they 1554 all will end the current basic block. We split the 1555 block after the outer loop over all insns, but we 1556 make sure here that we will be able to split the 1557 basic block and still produce the correct control 1558 flow graph for it. */ 1559 gcc_assert (!cfi 1560 || (cfun->can_throw_non_call_exceptions 1561 && can_throw_internal (insn))); 1562 1563 insn = resolve_simple_move (set, insn); 1564 if (insn != orig_insn) 1565 { 1566 recog_memoized (insn); 1567 extract_insn (insn); 1568 1569 if (cfi) 1570 bitmap_set_bit (sub_blocks, bb->index); 1571 } 1572 } 1573 else 1574 { 1575 rtx decomposed_shift; 1576 1577 decomposed_shift = resolve_shift_zext (insn); 1578 if (decomposed_shift != NULL_RTX) 1579 { 1580 insn = decomposed_shift; 1581 recog_memoized (insn); 1582 extract_insn (insn); 1583 } 1584 } 1585 1586 for (i = recog_data.n_operands - 1; i >= 0; --i) 1587 for_each_rtx (recog_data.operand_loc[i], 1588 resolve_subreg_use, 1589 insn); 1590 1591 resolve_reg_notes (insn); 1592 1593 if (num_validated_changes () > 0) 1594 { 1595 for (i = recog_data.n_dups - 1; i >= 0; --i) 1596 { 1597 rtx *pl = recog_data.dup_loc[i]; 1598 int dup_num = recog_data.dup_num[i]; 1599 rtx *px = recog_data.operand_loc[dup_num]; 1600 1601 validate_unshare_change (insn, pl, *px, 1); 1602 } 1603 1604 i = apply_change_group (); 1605 gcc_assert (i); 1606 } 1607 } 1608 } 1609 } 1610 1611 /* If we had insns to split that caused control flow insns in the middle 1612 of a basic block, split those blocks now. Note that we only handle 1613 the case where splitting a load has caused multiple possibly trapping 1614 loads to appear. */ 1615 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) 1616 { 1617 rtx insn, end; 1618 edge fallthru; 1619 1620 bb = BASIC_BLOCK (i); 1621 insn = BB_HEAD (bb); 1622 end = BB_END (bb); 1623 1624 while (insn != end) 1625 { 1626 if (control_flow_insn_p (insn)) 1627 { 1628 /* Split the block after insn. There will be a fallthru 1629 edge, which is OK so we keep it. We have to create the 1630 exception edges ourselves. */ 1631 fallthru = split_block (bb, insn); 1632 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 1633 bb = fallthru->dest; 1634 insn = BB_HEAD (bb); 1635 } 1636 else 1637 insn = NEXT_INSN (insn); 1638 } 1639 } 1640 1641 sbitmap_free (sub_blocks); 1642 } 1643 1644 { 1645 unsigned int i; 1646 bitmap b; 1647 1648 FOR_EACH_VEC_ELT (reg_copy_graph, i, b) 1649 if (b) 1650 BITMAP_FREE (b); 1651 } 1652 1653 reg_copy_graph.release (); 1654 1655 BITMAP_FREE (decomposable_context); 1656 BITMAP_FREE (non_decomposable_context); 1657 BITMAP_FREE (subreg_context); 1658 } 1659 1660 /* Gate function for lower subreg pass. */ 1661 1662 static bool 1663 gate_handle_lower_subreg (void) 1664 { 1665 return flag_split_wide_types != 0; 1666 } 1667 1668 /* Implement first lower subreg pass. */ 1669 1670 static unsigned int 1671 rest_of_handle_lower_subreg (void) 1672 { 1673 decompose_multiword_subregs (false); 1674 return 0; 1675 } 1676 1677 /* Implement second lower subreg pass. */ 1678 1679 static unsigned int 1680 rest_of_handle_lower_subreg2 (void) 1681 { 1682 decompose_multiword_subregs (true); 1683 return 0; 1684 } 1685 1686 struct rtl_opt_pass pass_lower_subreg = 1687 { 1688 { 1689 RTL_PASS, 1690 "subreg1", /* name */ 1691 OPTGROUP_NONE, /* optinfo_flags */ 1692 gate_handle_lower_subreg, /* gate */ 1693 rest_of_handle_lower_subreg, /* execute */ 1694 NULL, /* sub */ 1695 NULL, /* next */ 1696 0, /* static_pass_number */ 1697 TV_LOWER_SUBREG, /* tv_id */ 1698 0, /* properties_required */ 1699 0, /* properties_provided */ 1700 0, /* properties_destroyed */ 1701 0, /* todo_flags_start */ 1702 TODO_ggc_collect | 1703 TODO_verify_flow /* todo_flags_finish */ 1704 } 1705 }; 1706 1707 struct rtl_opt_pass pass_lower_subreg2 = 1708 { 1709 { 1710 RTL_PASS, 1711 "subreg2", /* name */ 1712 OPTGROUP_NONE, /* optinfo_flags */ 1713 gate_handle_lower_subreg, /* gate */ 1714 rest_of_handle_lower_subreg2, /* execute */ 1715 NULL, /* sub */ 1716 NULL, /* next */ 1717 0, /* static_pass_number */ 1718 TV_LOWER_SUBREG, /* tv_id */ 1719 0, /* properties_required */ 1720 0, /* properties_provided */ 1721 0, /* properties_destroyed */ 1722 0, /* todo_flags_start */ 1723 TODO_df_finish | TODO_verify_rtl_sharing | 1724 TODO_ggc_collect | 1725 TODO_verify_flow /* todo_flags_finish */ 1726 } 1727 }; 1728