1 /* Decompose multiword subregs. 2 Copyright (C) 2007-2020 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com> 4 Ian Lance Taylor <iant@google.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "cfghooks.h" 29 #include "df.h" 30 #include "memmodel.h" 31 #include "tm_p.h" 32 #include "expmed.h" 33 #include "insn-config.h" 34 #include "emit-rtl.h" 35 #include "recog.h" 36 #include "cfgrtl.h" 37 #include "cfgbuild.h" 38 #include "dce.h" 39 #include "expr.h" 40 #include "tree-pass.h" 41 #include "lower-subreg.h" 42 #include "rtl-iter.h" 43 #include "target.h" 44 45 46 /* Decompose multi-word pseudo-registers into individual 47 pseudo-registers when possible and profitable. This is possible 48 when all the uses of a multi-word register are via SUBREG, or are 49 copies of the register to another location. Breaking apart the 50 register permits more CSE and permits better register allocation. 51 This is profitable if the machine does not have move instructions 52 to do this. 53 54 This pass only splits moves with modes that are wider than 55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with 56 integer modes that are twice the width of word_mode. The latter 57 could be generalized if there was a need to do this, but the trend in 58 architectures is to not need this. 59 60 There are two useful preprocessor defines for use by maintainers: 61 62 #define LOG_COSTS 1 63 64 if you wish to see the actual cost estimates that are being used 65 for each mode wider than word mode and the cost estimates for zero 66 extension and the shifts. This can be useful when port maintainers 67 are tuning insn rtx costs. 68 69 #define FORCE_LOWERING 1 70 71 if you wish to test the pass with all the transformation forced on. 72 This can be useful for finding bugs in the transformations. */ 73 74 #define LOG_COSTS 0 75 #define FORCE_LOWERING 0 76 77 /* Bit N in this bitmap is set if regno N is used in a context in 78 which we can decompose it. */ 79 static bitmap decomposable_context; 80 81 /* Bit N in this bitmap is set if regno N is used in a context in 82 which it cannot be decomposed. */ 83 static bitmap non_decomposable_context; 84 85 /* Bit N in this bitmap is set if regno N is used in a subreg 86 which changes the mode but not the size. This typically happens 87 when the register accessed as a floating-point value; we want to 88 avoid generating accesses to its subwords in integer modes. */ 89 static bitmap subreg_context; 90 91 /* Bit N in the bitmap in element M of this array is set if there is a 92 copy from reg M to reg N. */ 93 static vec<bitmap> reg_copy_graph; 94 95 struct target_lower_subreg default_target_lower_subreg; 96 #if SWITCHABLE_TARGET 97 struct target_lower_subreg *this_target_lower_subreg 98 = &default_target_lower_subreg; 99 #endif 100 101 #define twice_word_mode \ 102 this_target_lower_subreg->x_twice_word_mode 103 #define choices \ 104 this_target_lower_subreg->x_choices 105 106 /* Return true if MODE is a mode we know how to lower. When returning true, 107 store its byte size in *BYTES and its word size in *WORDS. */ 108 109 static inline bool 110 interesting_mode_p (machine_mode mode, unsigned int *bytes, 111 unsigned int *words) 112 { 113 if (!GET_MODE_SIZE (mode).is_constant (bytes)) 114 return false; 115 *words = CEIL (*bytes, UNITS_PER_WORD); 116 return true; 117 } 118 119 /* RTXes used while computing costs. */ 120 struct cost_rtxes { 121 /* Source and target registers. */ 122 rtx source; 123 rtx target; 124 125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */ 126 rtx zext; 127 128 /* A shift of SOURCE. */ 129 rtx shift; 130 131 /* A SET of TARGET. */ 132 rtx set; 133 }; 134 135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the 136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */ 137 138 static int 139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, 140 machine_mode mode, int op1) 141 { 142 PUT_CODE (rtxes->shift, code); 143 PUT_MODE (rtxes->shift, mode); 144 PUT_MODE (rtxes->source, mode); 145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1); 146 return set_src_cost (rtxes->shift, mode, speed_p); 147 } 148 149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] 150 to true if it is profitable to split a double-word CODE shift 151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing 152 for speed or size profitability. 153 154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is 155 the cost of moving zero into a word-mode register. WORD_MOVE_COST 156 is the cost of moving between word registers. */ 157 158 static void 159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, 160 bool *splitting, enum rtx_code code, 161 int word_move_zero_cost, int word_move_cost) 162 { 163 int wide_cost, narrow_cost, upper_cost, i; 164 165 for (i = 0; i < BITS_PER_WORD; i++) 166 { 167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, 168 i + BITS_PER_WORD); 169 if (i == 0) 170 narrow_cost = word_move_cost; 171 else 172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); 173 174 if (code != ASHIFTRT) 175 upper_cost = word_move_zero_cost; 176 else if (i == BITS_PER_WORD - 1) 177 upper_cost = word_move_cost; 178 else 179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode, 180 BITS_PER_WORD - 1); 181 182 if (LOG_COSTS) 183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", 184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), 185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); 186 187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) 188 splitting[i] = true; 189 } 190 } 191 192 /* Compute what we should do when optimizing for speed or size; SPEED_P 193 selects which. Use RTXES for computing costs. */ 194 195 static void 196 compute_costs (bool speed_p, struct cost_rtxes *rtxes) 197 { 198 unsigned int i; 199 int word_move_zero_cost, word_move_cost; 200 201 PUT_MODE (rtxes->target, word_mode); 202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode); 203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); 204 205 SET_SRC (rtxes->set) = rtxes->source; 206 word_move_cost = set_rtx_cost (rtxes->set, speed_p); 207 208 if (LOG_COSTS) 209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", 210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); 211 212 for (i = 0; i < MAX_MACHINE_MODE; i++) 213 { 214 machine_mode mode = (machine_mode) i; 215 unsigned int size, factor; 216 if (interesting_mode_p (mode, &size, &factor) && factor > 1) 217 { 218 unsigned int mode_move_cost; 219 220 PUT_MODE (rtxes->target, mode); 221 PUT_MODE (rtxes->source, mode); 222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p); 223 224 if (LOG_COSTS) 225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", 226 GET_MODE_NAME (mode), mode_move_cost, 227 word_move_cost, factor); 228 229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) 230 { 231 choices[speed_p].move_modes_to_split[i] = true; 232 choices[speed_p].something_to_do = true; 233 } 234 } 235 } 236 237 /* For the moves and shifts, the only case that is checked is one 238 where the mode of the target is an integer mode twice the width 239 of the word_mode. 240 241 If it is not profitable to split a double word move then do not 242 even consider the shifts or the zero extension. */ 243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) 244 { 245 int zext_cost; 246 247 /* The only case here to check to see if moving the upper part with a 248 zero is cheaper than doing the zext itself. */ 249 PUT_MODE (rtxes->source, word_mode); 250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p); 251 252 if (LOG_COSTS) 253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", 254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), 255 zext_cost, word_move_cost, word_move_zero_cost); 256 257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) 258 choices[speed_p].splitting_zext = true; 259 260 compute_splitting_shift (speed_p, rtxes, 261 choices[speed_p].splitting_ashift, ASHIFT, 262 word_move_zero_cost, word_move_cost); 263 compute_splitting_shift (speed_p, rtxes, 264 choices[speed_p].splitting_lshiftrt, LSHIFTRT, 265 word_move_zero_cost, word_move_cost); 266 compute_splitting_shift (speed_p, rtxes, 267 choices[speed_p].splitting_ashiftrt, ASHIFTRT, 268 word_move_zero_cost, word_move_cost); 269 } 270 } 271 272 /* Do one-per-target initialisation. This involves determining 273 which operations on the machine are profitable. If none are found, 274 then the pass just returns when called. */ 275 276 void 277 init_lower_subreg (void) 278 { 279 struct cost_rtxes rtxes; 280 281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); 282 283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require (); 284 285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); 286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2); 287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source); 288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); 289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); 290 291 if (LOG_COSTS) 292 fprintf (stderr, "\nSize costs\n==========\n\n"); 293 compute_costs (false, &rtxes); 294 295 if (LOG_COSTS) 296 fprintf (stderr, "\nSpeed costs\n===========\n\n"); 297 compute_costs (true, &rtxes); 298 } 299 300 static bool 301 simple_move_operand (rtx x) 302 { 303 if (GET_CODE (x) == SUBREG) 304 x = SUBREG_REG (x); 305 306 if (!OBJECT_P (x)) 307 return false; 308 309 if (GET_CODE (x) == LABEL_REF 310 || GET_CODE (x) == SYMBOL_REF 311 || GET_CODE (x) == HIGH 312 || GET_CODE (x) == CONST) 313 return false; 314 315 if (MEM_P (x) 316 && (MEM_VOLATILE_P (x) 317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) 318 return false; 319 320 return true; 321 } 322 323 /* If X is an operator that can be treated as a simple move that we 324 can split, then return the operand that is operated on. */ 325 326 static rtx 327 operand_for_swap_move_operator (rtx x) 328 { 329 /* A word sized rotate of a register pair is equivalent to swapping 330 the registers in the register pair. */ 331 if (GET_CODE (x) == ROTATE 332 && GET_MODE (x) == twice_word_mode 333 && simple_move_operand (XEXP (x, 0)) 334 && CONST_INT_P (XEXP (x, 1)) 335 && INTVAL (XEXP (x, 1)) == BITS_PER_WORD) 336 return XEXP (x, 0); 337 338 return NULL_RTX; 339 } 340 341 /* If INSN is a single set between two objects that we want to split, 342 return the single set. SPEED_P says whether we are optimizing 343 INSN for speed or size. 344 345 INSN should have been passed to recog and extract_insn before this 346 is called. */ 347 348 static rtx 349 simple_move (rtx_insn *insn, bool speed_p) 350 { 351 rtx x, op; 352 rtx set; 353 machine_mode mode; 354 355 if (recog_data.n_operands != 2) 356 return NULL_RTX; 357 358 set = single_set (insn); 359 if (!set) 360 return NULL_RTX; 361 362 x = SET_DEST (set); 363 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 364 return NULL_RTX; 365 if (!simple_move_operand (x)) 366 return NULL_RTX; 367 368 x = SET_SRC (set); 369 if ((op = operand_for_swap_move_operator (x)) != NULL_RTX) 370 x = op; 371 372 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 373 return NULL_RTX; 374 /* For the src we can handle ASM_OPERANDS, and it is beneficial for 375 things like x86 rdtsc which returns a DImode value. */ 376 if (GET_CODE (x) != ASM_OPERANDS 377 && !simple_move_operand (x)) 378 return NULL_RTX; 379 380 /* We try to decompose in integer modes, to avoid generating 381 inefficient code copying between integer and floating point 382 registers. That means that we can't decompose if this is a 383 non-integer mode for which there is no integer mode of the same 384 size. */ 385 mode = GET_MODE (SET_DEST (set)); 386 if (!SCALAR_INT_MODE_P (mode) 387 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ()) 388 return NULL_RTX; 389 390 /* Reject PARTIAL_INT modes. They are used for processor specific 391 purposes and it's probably best not to tamper with them. */ 392 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 393 return NULL_RTX; 394 395 if (!choices[speed_p].move_modes_to_split[(int) mode]) 396 return NULL_RTX; 397 398 return set; 399 } 400 401 /* If SET is a copy from one multi-word pseudo-register to another, 402 record that in reg_copy_graph. Return whether it is such a 403 copy. */ 404 405 static bool 406 find_pseudo_copy (rtx set) 407 { 408 rtx dest = SET_DEST (set); 409 rtx src = SET_SRC (set); 410 rtx op; 411 unsigned int rd, rs; 412 bitmap b; 413 414 if ((op = operand_for_swap_move_operator (src)) != NULL_RTX) 415 src = op; 416 417 if (!REG_P (dest) || !REG_P (src)) 418 return false; 419 420 rd = REGNO (dest); 421 rs = REGNO (src); 422 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) 423 return false; 424 425 b = reg_copy_graph[rs]; 426 if (b == NULL) 427 { 428 b = BITMAP_ALLOC (NULL); 429 reg_copy_graph[rs] = b; 430 } 431 432 bitmap_set_bit (b, rd); 433 434 return true; 435 } 436 437 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case 438 where they are copied to another register, add the register to 439 which they are copied to DECOMPOSABLE_CONTEXT. Use 440 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track 441 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ 442 443 static void 444 propagate_pseudo_copies (void) 445 { 446 auto_bitmap queue, propagate; 447 448 bitmap_copy (queue, decomposable_context); 449 do 450 { 451 bitmap_iterator iter; 452 unsigned int i; 453 454 bitmap_clear (propagate); 455 456 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) 457 { 458 bitmap b = reg_copy_graph[i]; 459 if (b) 460 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); 461 } 462 463 bitmap_and_compl (queue, propagate, decomposable_context); 464 bitmap_ior_into (decomposable_context, propagate); 465 } 466 while (!bitmap_empty_p (queue)); 467 } 468 469 /* A pointer to one of these values is passed to 470 find_decomposable_subregs. */ 471 472 enum classify_move_insn 473 { 474 /* Not a simple move from one location to another. */ 475 NOT_SIMPLE_MOVE, 476 /* A simple move we want to decompose. */ 477 DECOMPOSABLE_SIMPLE_MOVE, 478 /* Any other simple move. */ 479 SIMPLE_MOVE 480 }; 481 482 /* If we find a SUBREG in *LOC which we could use to decompose a 483 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an 484 unadorned register which is not a simple pseudo-register copy, 485 DATA will point at the type of move, and we set a bit in 486 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ 487 488 static void 489 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) 490 { 491 subrtx_var_iterator::array_type array; 492 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) 493 { 494 rtx x = *iter; 495 if (GET_CODE (x) == SUBREG) 496 { 497 rtx inner = SUBREG_REG (x); 498 unsigned int regno, outer_size, inner_size, outer_words, inner_words; 499 500 if (!REG_P (inner)) 501 continue; 502 503 regno = REGNO (inner); 504 if (HARD_REGISTER_NUM_P (regno)) 505 { 506 iter.skip_subrtxes (); 507 continue; 508 } 509 510 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) 511 || !interesting_mode_p (GET_MODE (inner), &inner_size, 512 &inner_words)) 513 continue; 514 515 /* We only try to decompose single word subregs of multi-word 516 registers. When we find one, we return -1 to avoid iterating 517 over the inner register. 518 519 ??? This doesn't allow, e.g., DImode subregs of TImode values 520 on 32-bit targets. We would need to record the way the 521 pseudo-register was used, and only decompose if all the uses 522 were the same number and size of pieces. Hopefully this 523 doesn't happen much. */ 524 525 if (outer_words == 1 526 && inner_words > 1 527 /* Don't allow to decompose floating point subregs of 528 multi-word pseudos if the floating point mode does 529 not have word size, because otherwise we'd generate 530 a subreg with that floating mode from a different 531 sized integral pseudo which is not allowed by 532 validate_subreg. */ 533 && (!FLOAT_MODE_P (GET_MODE (x)) 534 || outer_size == UNITS_PER_WORD)) 535 { 536 bitmap_set_bit (decomposable_context, regno); 537 iter.skip_subrtxes (); 538 continue; 539 } 540 541 /* If this is a cast from one mode to another, where the modes 542 have the same size, and they are not tieable, then mark this 543 register as non-decomposable. If we decompose it we are 544 likely to mess up whatever the backend is trying to do. */ 545 if (outer_words > 1 546 && outer_size == inner_size 547 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner))) 548 { 549 bitmap_set_bit (non_decomposable_context, regno); 550 bitmap_set_bit (subreg_context, regno); 551 iter.skip_subrtxes (); 552 continue; 553 } 554 } 555 else if (REG_P (x)) 556 { 557 unsigned int regno, size, words; 558 559 /* We will see an outer SUBREG before we see the inner REG, so 560 when we see a plain REG here it means a direct reference to 561 the register. 562 563 If this is not a simple copy from one location to another, 564 then we cannot decompose this register. If this is a simple 565 copy we want to decompose, and the mode is right, 566 then we mark the register as decomposable. 567 Otherwise we don't say anything about this register -- 568 it could be decomposed, but whether that would be 569 profitable depends upon how it is used elsewhere. 570 571 We only set bits in the bitmap for multi-word 572 pseudo-registers, since those are the only ones we care about 573 and it keeps the size of the bitmaps down. */ 574 575 regno = REGNO (x); 576 if (!HARD_REGISTER_NUM_P (regno) 577 && interesting_mode_p (GET_MODE (x), &size, &words) 578 && words > 1) 579 { 580 switch (*pcmi) 581 { 582 case NOT_SIMPLE_MOVE: 583 bitmap_set_bit (non_decomposable_context, regno); 584 break; 585 case DECOMPOSABLE_SIMPLE_MOVE: 586 if (targetm.modes_tieable_p (GET_MODE (x), word_mode)) 587 bitmap_set_bit (decomposable_context, regno); 588 break; 589 case SIMPLE_MOVE: 590 break; 591 default: 592 gcc_unreachable (); 593 } 594 } 595 } 596 else if (MEM_P (x)) 597 { 598 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; 599 600 /* Any registers used in a MEM do not participate in a 601 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion 602 here, and return -1 to block the parent's recursion. */ 603 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); 604 iter.skip_subrtxes (); 605 } 606 } 607 } 608 609 /* Decompose REGNO into word-sized components. We smash the REG node 610 in place. This ensures that (1) something goes wrong quickly if we 611 fail to make some replacement, and (2) the debug information inside 612 the symbol table is automatically kept up to date. */ 613 614 static void 615 decompose_register (unsigned int regno) 616 { 617 rtx reg; 618 unsigned int size, words, i; 619 rtvec v; 620 621 reg = regno_reg_rtx[regno]; 622 623 regno_reg_rtx[regno] = NULL_RTX; 624 625 if (!interesting_mode_p (GET_MODE (reg), &size, &words)) 626 gcc_unreachable (); 627 628 v = rtvec_alloc (words); 629 for (i = 0; i < words; ++i) 630 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); 631 632 PUT_CODE (reg, CONCATN); 633 XVEC (reg, 0) = v; 634 635 if (dump_file) 636 { 637 fprintf (dump_file, "; Splitting reg %u ->", regno); 638 for (i = 0; i < words; ++i) 639 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); 640 fputc ('\n', dump_file); 641 } 642 } 643 644 /* Get a SUBREG of a CONCATN. */ 645 646 static rtx 647 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte) 648 { 649 unsigned int outer_size, outer_words, inner_size, inner_words; 650 machine_mode innermode, partmode; 651 rtx part; 652 unsigned int final_offset; 653 unsigned int byte; 654 655 innermode = GET_MODE (op); 656 if (!interesting_mode_p (outermode, &outer_size, &outer_words) 657 || !interesting_mode_p (innermode, &inner_size, &inner_words)) 658 gcc_unreachable (); 659 660 /* Must be constant if interesting_mode_p passes. */ 661 byte = orig_byte.to_constant (); 662 gcc_assert (GET_CODE (op) == CONCATN); 663 gcc_assert (byte % outer_size == 0); 664 665 gcc_assert (byte < inner_size); 666 if (outer_size > inner_size) 667 return NULL_RTX; 668 669 inner_size /= XVECLEN (op, 0); 670 part = XVECEXP (op, 0, byte / inner_size); 671 partmode = GET_MODE (part); 672 673 final_offset = byte % inner_size; 674 if (final_offset + outer_size > inner_size) 675 return NULL_RTX; 676 677 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of 678 regular CONST_VECTORs. They have vector or integer modes, depending 679 on the capabilities of the target. Cope with them. */ 680 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) 681 partmode = GET_MODE_INNER (innermode); 682 else if (partmode == VOIDmode) 683 partmode = mode_for_size (inner_size * BITS_PER_UNIT, 684 GET_MODE_CLASS (innermode), 0).require (); 685 686 return simplify_gen_subreg (outermode, part, partmode, final_offset); 687 } 688 689 /* Wrapper around simplify_gen_subreg which handles CONCATN. */ 690 691 static rtx 692 simplify_gen_subreg_concatn (machine_mode outermode, rtx op, 693 machine_mode innermode, unsigned int byte) 694 { 695 rtx ret; 696 697 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. 698 If OP is a SUBREG of a CONCATN, then it must be a simple mode 699 change with the same size and offset 0, or it must extract a 700 part. We shouldn't see anything else here. */ 701 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) 702 { 703 rtx op2; 704 705 if (known_eq (GET_MODE_SIZE (GET_MODE (op)), 706 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) 707 && known_eq (SUBREG_BYTE (op), 0)) 708 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), 709 GET_MODE (SUBREG_REG (op)), byte); 710 711 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), 712 SUBREG_BYTE (op)); 713 if (op2 == NULL_RTX) 714 { 715 /* We don't handle paradoxical subregs here. */ 716 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op))); 717 gcc_assert (!paradoxical_subreg_p (op)); 718 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), 719 byte + SUBREG_BYTE (op)); 720 gcc_assert (op2 != NULL_RTX); 721 return op2; 722 } 723 724 op = op2; 725 gcc_assert (op != NULL_RTX); 726 gcc_assert (innermode == GET_MODE (op)); 727 } 728 729 if (GET_CODE (op) == CONCATN) 730 return simplify_subreg_concatn (outermode, op, byte); 731 732 ret = simplify_gen_subreg (outermode, op, innermode, byte); 733 734 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then 735 resolve_simple_move will ask for the high part of the paradoxical 736 subreg, which does not have a value. Just return a zero. */ 737 if (ret == NULL_RTX 738 && paradoxical_subreg_p (op)) 739 return CONST0_RTX (outermode); 740 741 gcc_assert (ret != NULL_RTX); 742 return ret; 743 } 744 745 /* Return whether we should resolve X into the registers into which it 746 was decomposed. */ 747 748 static bool 749 resolve_reg_p (rtx x) 750 { 751 return GET_CODE (x) == CONCATN; 752 } 753 754 /* Return whether X is a SUBREG of a register which we need to 755 resolve. */ 756 757 static bool 758 resolve_subreg_p (rtx x) 759 { 760 if (GET_CODE (x) != SUBREG) 761 return false; 762 return resolve_reg_p (SUBREG_REG (x)); 763 } 764 765 /* Look for SUBREGs in *LOC which need to be decomposed. */ 766 767 static bool 768 resolve_subreg_use (rtx *loc, rtx insn) 769 { 770 subrtx_ptr_iterator::array_type array; 771 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) 772 { 773 rtx *loc = *iter; 774 rtx x = *loc; 775 if (resolve_subreg_p (x)) 776 { 777 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 778 SUBREG_BYTE (x)); 779 780 /* It is possible for a note to contain a reference which we can 781 decompose. In this case, return 1 to the caller to indicate 782 that the note must be removed. */ 783 if (!x) 784 { 785 gcc_assert (!insn); 786 return true; 787 } 788 789 validate_change (insn, loc, x, 1); 790 iter.skip_subrtxes (); 791 } 792 else if (resolve_reg_p (x)) 793 /* Return 1 to the caller to indicate that we found a direct 794 reference to a register which is being decomposed. This can 795 happen inside notes, multiword shift or zero-extend 796 instructions. */ 797 return true; 798 } 799 800 return false; 801 } 802 803 /* Resolve any decomposed registers which appear in register notes on 804 INSN. */ 805 806 static void 807 resolve_reg_notes (rtx_insn *insn) 808 { 809 rtx *pnote, note; 810 811 note = find_reg_equal_equiv_note (insn); 812 if (note) 813 { 814 int old_count = num_validated_changes (); 815 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) 816 remove_note (insn, note); 817 else 818 if (old_count != num_validated_changes ()) 819 df_notes_rescan (insn); 820 } 821 822 pnote = ®_NOTES (insn); 823 while (*pnote != NULL_RTX) 824 { 825 bool del = false; 826 827 note = *pnote; 828 switch (REG_NOTE_KIND (note)) 829 { 830 case REG_DEAD: 831 case REG_UNUSED: 832 if (resolve_reg_p (XEXP (note, 0))) 833 del = true; 834 break; 835 836 default: 837 break; 838 } 839 840 if (del) 841 *pnote = XEXP (note, 1); 842 else 843 pnote = &XEXP (note, 1); 844 } 845 } 846 847 /* Return whether X can be decomposed into subwords. */ 848 849 static bool 850 can_decompose_p (rtx x) 851 { 852 if (REG_P (x)) 853 { 854 unsigned int regno = REGNO (x); 855 856 if (HARD_REGISTER_NUM_P (regno)) 857 { 858 unsigned int byte, num_bytes, num_words; 859 860 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words)) 861 return false; 862 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) 863 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) 864 return false; 865 return true; 866 } 867 else 868 return !bitmap_bit_p (subreg_context, regno); 869 } 870 871 return true; 872 } 873 874 /* OPND is a concatn operand this is used with a simple move operator. 875 Return a new rtx with the concatn's operands swapped. */ 876 877 static rtx 878 resolve_operand_for_swap_move_operator (rtx opnd) 879 { 880 gcc_assert (GET_CODE (opnd) == CONCATN); 881 rtx concatn = copy_rtx (opnd); 882 rtx op0 = XVECEXP (concatn, 0, 0); 883 rtx op1 = XVECEXP (concatn, 0, 1); 884 XVECEXP (concatn, 0, 0) = op1; 885 XVECEXP (concatn, 0, 1) = op0; 886 return concatn; 887 } 888 889 /* Decompose the registers used in a simple move SET within INSN. If 890 we don't change anything, return INSN, otherwise return the start 891 of the sequence of moves. */ 892 893 static rtx_insn * 894 resolve_simple_move (rtx set, rtx_insn *insn) 895 { 896 rtx src, dest, real_dest, src_op; 897 rtx_insn *insns; 898 machine_mode orig_mode, dest_mode; 899 unsigned int orig_size, words; 900 bool pushing; 901 902 src = SET_SRC (set); 903 dest = SET_DEST (set); 904 orig_mode = GET_MODE (dest); 905 906 if (!interesting_mode_p (orig_mode, &orig_size, &words)) 907 gcc_unreachable (); 908 gcc_assert (words > 1); 909 910 start_sequence (); 911 912 /* We have to handle copying from a SUBREG of a decomposed reg where 913 the SUBREG is larger than word size. Rather than assume that we 914 can take a word_mode SUBREG of the destination, we copy to a new 915 register and then copy that to the destination. */ 916 917 real_dest = NULL_RTX; 918 919 if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX) 920 { 921 if (resolve_reg_p (dest)) 922 { 923 /* DEST is a CONCATN, so swap its operands and strip 924 SRC's operator. */ 925 dest = resolve_operand_for_swap_move_operator (dest); 926 src = src_op; 927 } 928 else if (resolve_reg_p (src_op)) 929 { 930 /* SRC is an operation on a CONCATN, so strip the operator and 931 swap the CONCATN's operands. */ 932 src = resolve_operand_for_swap_move_operator (src_op); 933 } 934 } 935 936 if (GET_CODE (src) == SUBREG 937 && resolve_reg_p (SUBREG_REG (src)) 938 && (maybe_ne (SUBREG_BYTE (src), 0) 939 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) 940 { 941 real_dest = dest; 942 dest = gen_reg_rtx (orig_mode); 943 if (REG_P (real_dest)) 944 REG_ATTRS (dest) = REG_ATTRS (real_dest); 945 } 946 947 /* Similarly if we are copying to a SUBREG of a decomposed reg where 948 the SUBREG is larger than word size. */ 949 950 if (GET_CODE (dest) == SUBREG 951 && resolve_reg_p (SUBREG_REG (dest)) 952 && (maybe_ne (SUBREG_BYTE (dest), 0) 953 || maybe_ne (orig_size, 954 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) 955 { 956 rtx reg, smove; 957 rtx_insn *minsn; 958 959 reg = gen_reg_rtx (orig_mode); 960 minsn = emit_move_insn (reg, src); 961 smove = single_set (minsn); 962 gcc_assert (smove != NULL_RTX); 963 resolve_simple_move (smove, minsn); 964 src = reg; 965 } 966 967 /* If we didn't have any big SUBREGS of decomposed registers, and 968 neither side of the move is a register we are decomposing, then 969 we don't have to do anything here. */ 970 971 if (src == SET_SRC (set) 972 && dest == SET_DEST (set) 973 && !resolve_reg_p (src) 974 && !resolve_subreg_p (src) 975 && !resolve_reg_p (dest) 976 && !resolve_subreg_p (dest)) 977 { 978 end_sequence (); 979 return insn; 980 } 981 982 /* It's possible for the code to use a subreg of a decomposed 983 register while forming an address. We need to handle that before 984 passing the address to emit_move_insn. We pass NULL_RTX as the 985 insn parameter to resolve_subreg_use because we cannot validate 986 the insn yet. */ 987 if (MEM_P (src) || MEM_P (dest)) 988 { 989 int acg; 990 991 if (MEM_P (src)) 992 resolve_subreg_use (&XEXP (src, 0), NULL_RTX); 993 if (MEM_P (dest)) 994 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); 995 acg = apply_change_group (); 996 gcc_assert (acg); 997 } 998 999 /* If SRC is a register which we can't decompose, or has side 1000 effects, we need to move via a temporary register. */ 1001 1002 if (!can_decompose_p (src) 1003 || side_effects_p (src) 1004 || GET_CODE (src) == ASM_OPERANDS) 1005 { 1006 rtx reg; 1007 1008 reg = gen_reg_rtx (orig_mode); 1009 1010 if (AUTO_INC_DEC) 1011 { 1012 rtx_insn *move = emit_move_insn (reg, src); 1013 if (MEM_P (src)) 1014 { 1015 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 1016 if (note) 1017 add_reg_note (move, REG_INC, XEXP (note, 0)); 1018 } 1019 } 1020 else 1021 emit_move_insn (reg, src); 1022 1023 src = reg; 1024 } 1025 1026 /* If DEST is a register which we can't decompose, or has side 1027 effects, we need to first move to a temporary register. We 1028 handle the common case of pushing an operand directly. We also 1029 go through a temporary register if it holds a floating point 1030 value. This gives us better code on systems which can't move 1031 data easily between integer and floating point registers. */ 1032 1033 dest_mode = orig_mode; 1034 pushing = push_operand (dest, dest_mode); 1035 if (!can_decompose_p (dest) 1036 || (side_effects_p (dest) && !pushing) 1037 || (!SCALAR_INT_MODE_P (dest_mode) 1038 && !resolve_reg_p (dest) 1039 && !resolve_subreg_p (dest))) 1040 { 1041 if (real_dest == NULL_RTX) 1042 real_dest = dest; 1043 if (!SCALAR_INT_MODE_P (dest_mode)) 1044 dest_mode = int_mode_for_mode (dest_mode).require (); 1045 dest = gen_reg_rtx (dest_mode); 1046 if (REG_P (real_dest)) 1047 REG_ATTRS (dest) = REG_ATTRS (real_dest); 1048 } 1049 1050 if (pushing) 1051 { 1052 unsigned int i, j, jinc; 1053 1054 gcc_assert (orig_size % UNITS_PER_WORD == 0); 1055 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); 1056 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); 1057 1058 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) 1059 { 1060 j = 0; 1061 jinc = 1; 1062 } 1063 else 1064 { 1065 j = words - 1; 1066 jinc = -1; 1067 } 1068 1069 for (i = 0; i < words; ++i, j += jinc) 1070 { 1071 rtx temp; 1072 1073 temp = copy_rtx (XEXP (dest, 0)); 1074 temp = adjust_automodify_address_nv (dest, word_mode, temp, 1075 j * UNITS_PER_WORD); 1076 emit_move_insn (temp, 1077 simplify_gen_subreg_concatn (word_mode, src, 1078 orig_mode, 1079 j * UNITS_PER_WORD)); 1080 } 1081 } 1082 else 1083 { 1084 unsigned int i; 1085 1086 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) 1087 emit_clobber (dest); 1088 1089 for (i = 0; i < words; ++i) 1090 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, 1091 dest_mode, 1092 i * UNITS_PER_WORD), 1093 simplify_gen_subreg_concatn (word_mode, src, 1094 orig_mode, 1095 i * UNITS_PER_WORD)); 1096 } 1097 1098 if (real_dest != NULL_RTX) 1099 { 1100 rtx mdest, smove; 1101 rtx_insn *minsn; 1102 1103 if (dest_mode == orig_mode) 1104 mdest = dest; 1105 else 1106 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); 1107 minsn = emit_move_insn (real_dest, mdest); 1108 1109 if (AUTO_INC_DEC && MEM_P (real_dest) 1110 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) 1111 { 1112 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 1113 if (note) 1114 add_reg_note (minsn, REG_INC, XEXP (note, 0)); 1115 } 1116 1117 smove = single_set (minsn); 1118 gcc_assert (smove != NULL_RTX); 1119 1120 resolve_simple_move (smove, minsn); 1121 } 1122 1123 insns = get_insns (); 1124 end_sequence (); 1125 1126 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); 1127 1128 emit_insn_before (insns, insn); 1129 1130 /* If we get here via self-recursion, then INSN is not yet in the insns 1131 chain and delete_insn will fail. We only want to remove INSN from the 1132 current sequence. See PR56738. */ 1133 if (in_sequence_p ()) 1134 remove_insn (insn); 1135 else 1136 delete_insn (insn); 1137 1138 return insns; 1139 } 1140 1141 /* Change a CLOBBER of a decomposed register into a CLOBBER of the 1142 component registers. Return whether we changed something. */ 1143 1144 static bool 1145 resolve_clobber (rtx pat, rtx_insn *insn) 1146 { 1147 rtx reg; 1148 machine_mode orig_mode; 1149 unsigned int orig_size, words, i; 1150 int ret; 1151 1152 reg = XEXP (pat, 0); 1153 /* For clobbers we can look through paradoxical subregs which 1154 we do not handle in simplify_gen_subreg_concatn. */ 1155 if (paradoxical_subreg_p (reg)) 1156 reg = SUBREG_REG (reg); 1157 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) 1158 return false; 1159 1160 orig_mode = GET_MODE (reg); 1161 if (!interesting_mode_p (orig_mode, &orig_size, &words)) 1162 gcc_unreachable (); 1163 1164 ret = validate_change (NULL_RTX, &XEXP (pat, 0), 1165 simplify_gen_subreg_concatn (word_mode, reg, 1166 orig_mode, 0), 1167 0); 1168 df_insn_rescan (insn); 1169 gcc_assert (ret != 0); 1170 1171 for (i = words - 1; i > 0; --i) 1172 { 1173 rtx x; 1174 1175 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, 1176 i * UNITS_PER_WORD); 1177 x = gen_rtx_CLOBBER (VOIDmode, x); 1178 emit_insn_after (x, insn); 1179 } 1180 1181 resolve_reg_notes (insn); 1182 1183 return true; 1184 } 1185 1186 /* A USE of a decomposed register is no longer meaningful. Return 1187 whether we changed something. */ 1188 1189 static bool 1190 resolve_use (rtx pat, rtx_insn *insn) 1191 { 1192 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) 1193 { 1194 delete_insn (insn); 1195 return true; 1196 } 1197 1198 resolve_reg_notes (insn); 1199 1200 return false; 1201 } 1202 1203 /* A VAR_LOCATION can be simplified. */ 1204 1205 static void 1206 resolve_debug (rtx_insn *insn) 1207 { 1208 subrtx_ptr_iterator::array_type array; 1209 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) 1210 { 1211 rtx *loc = *iter; 1212 rtx x = *loc; 1213 if (resolve_subreg_p (x)) 1214 { 1215 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 1216 SUBREG_BYTE (x)); 1217 1218 if (x) 1219 *loc = x; 1220 else 1221 x = copy_rtx (*loc); 1222 } 1223 if (resolve_reg_p (x)) 1224 *loc = copy_rtx (x); 1225 } 1226 1227 df_insn_rescan (insn); 1228 1229 resolve_reg_notes (insn); 1230 } 1231 1232 /* Check if INSN is a decomposable multiword-shift or zero-extend and 1233 set the decomposable_context bitmap accordingly. SPEED_P is true 1234 if we are optimizing INSN for speed rather than size. Return true 1235 if INSN is decomposable. */ 1236 1237 static bool 1238 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) 1239 { 1240 rtx set; 1241 rtx op; 1242 rtx op_operand; 1243 1244 set = single_set (insn); 1245 if (!set) 1246 return false; 1247 1248 op = SET_SRC (set); 1249 if (GET_CODE (op) != ASHIFT 1250 && GET_CODE (op) != LSHIFTRT 1251 && GET_CODE (op) != ASHIFTRT 1252 && GET_CODE (op) != ZERO_EXTEND) 1253 return false; 1254 1255 op_operand = XEXP (op, 0); 1256 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) 1257 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) 1258 || HARD_REGISTER_NUM_P (REGNO (op_operand)) 1259 || GET_MODE (op) != twice_word_mode) 1260 return false; 1261 1262 if (GET_CODE (op) == ZERO_EXTEND) 1263 { 1264 if (GET_MODE (op_operand) != word_mode 1265 || !choices[speed_p].splitting_zext) 1266 return false; 1267 } 1268 else /* left or right shift */ 1269 { 1270 bool *splitting = (GET_CODE (op) == ASHIFT 1271 ? choices[speed_p].splitting_ashift 1272 : GET_CODE (op) == ASHIFTRT 1273 ? choices[speed_p].splitting_ashiftrt 1274 : choices[speed_p].splitting_lshiftrt); 1275 if (!CONST_INT_P (XEXP (op, 1)) 1276 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, 1277 2 * BITS_PER_WORD - 1) 1278 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) 1279 return false; 1280 1281 bitmap_set_bit (decomposable_context, REGNO (op_operand)); 1282 } 1283 1284 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); 1285 1286 return true; 1287 } 1288 1289 /* Decompose a more than word wide shift (in INSN) of a multiword 1290 pseudo or a multiword zero-extend of a wordmode pseudo into a move 1291 and 'set to zero' insn. Return a pointer to the new insn when a 1292 replacement was done. */ 1293 1294 static rtx_insn * 1295 resolve_shift_zext (rtx_insn *insn) 1296 { 1297 rtx set; 1298 rtx op; 1299 rtx op_operand; 1300 rtx_insn *insns; 1301 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; 1302 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; 1303 scalar_int_mode inner_mode; 1304 1305 set = single_set (insn); 1306 if (!set) 1307 return NULL; 1308 1309 op = SET_SRC (set); 1310 if (GET_CODE (op) != ASHIFT 1311 && GET_CODE (op) != LSHIFTRT 1312 && GET_CODE (op) != ASHIFTRT 1313 && GET_CODE (op) != ZERO_EXTEND) 1314 return NULL; 1315 1316 op_operand = XEXP (op, 0); 1317 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode)) 1318 return NULL; 1319 1320 /* We can tear this operation apart only if the regs were already 1321 torn apart. */ 1322 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) 1323 return NULL; 1324 1325 /* src_reg_num is the number of the word mode register which we 1326 are operating on. For a left shift and a zero_extend on little 1327 endian machines this is register 0. */ 1328 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) 1329 ? 1 : 0; 1330 1331 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD) 1332 src_reg_num = 1 - src_reg_num; 1333 1334 if (GET_CODE (op) == ZERO_EXTEND) 1335 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; 1336 else 1337 dest_reg_num = 1 - src_reg_num; 1338 1339 offset1 = UNITS_PER_WORD * dest_reg_num; 1340 offset2 = UNITS_PER_WORD * (1 - dest_reg_num); 1341 src_offset = UNITS_PER_WORD * src_reg_num; 1342 1343 start_sequence (); 1344 1345 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1346 GET_MODE (SET_DEST (set)), 1347 offset1); 1348 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1349 GET_MODE (SET_DEST (set)), 1350 offset2); 1351 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, 1352 GET_MODE (op_operand), 1353 src_offset); 1354 if (GET_CODE (op) == ASHIFTRT 1355 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) 1356 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), 1357 BITS_PER_WORD - 1, NULL_RTX, 0); 1358 1359 if (GET_CODE (op) != ZERO_EXTEND) 1360 { 1361 int shift_count = INTVAL (XEXP (op, 1)); 1362 if (shift_count > BITS_PER_WORD) 1363 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? 1364 LSHIFT_EXPR : RSHIFT_EXPR, 1365 word_mode, src_reg, 1366 shift_count - BITS_PER_WORD, 1367 dest_reg, GET_CODE (op) != ASHIFTRT); 1368 } 1369 1370 if (dest_reg != src_reg) 1371 emit_move_insn (dest_reg, src_reg); 1372 if (GET_CODE (op) != ASHIFTRT) 1373 emit_move_insn (dest_upper, CONST0_RTX (word_mode)); 1374 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) 1375 emit_move_insn (dest_upper, copy_rtx (src_reg)); 1376 else 1377 emit_move_insn (dest_upper, upper_src); 1378 insns = get_insns (); 1379 1380 end_sequence (); 1381 1382 emit_insn_before (insns, insn); 1383 1384 if (dump_file) 1385 { 1386 rtx_insn *in; 1387 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); 1388 for (in = insns; in != insn; in = NEXT_INSN (in)) 1389 fprintf (dump_file, "%d ", INSN_UID (in)); 1390 fprintf (dump_file, "\n"); 1391 } 1392 1393 delete_insn (insn); 1394 return insns; 1395 } 1396 1397 /* Print to dump_file a description of what we're doing with shift code CODE. 1398 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ 1399 1400 static void 1401 dump_shift_choices (enum rtx_code code, bool *splitting) 1402 { 1403 int i; 1404 const char *sep; 1405 1406 fprintf (dump_file, 1407 " Splitting mode %s for %s lowering with shift amounts = ", 1408 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); 1409 sep = ""; 1410 for (i = 0; i < BITS_PER_WORD; i++) 1411 if (splitting[i]) 1412 { 1413 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); 1414 sep = ","; 1415 } 1416 fprintf (dump_file, "\n"); 1417 } 1418 1419 /* Print to dump_file a description of what we're doing when optimizing 1420 for speed or size; SPEED_P says which. DESCRIPTION is a description 1421 of the SPEED_P choice. */ 1422 1423 static void 1424 dump_choices (bool speed_p, const char *description) 1425 { 1426 unsigned int size, factor, i; 1427 1428 fprintf (dump_file, "Choices when optimizing for %s:\n", description); 1429 1430 for (i = 0; i < MAX_MACHINE_MODE; i++) 1431 if (interesting_mode_p ((machine_mode) i, &size, &factor) 1432 && factor > 1) 1433 fprintf (dump_file, " %s mode %s for copy lowering.\n", 1434 choices[speed_p].move_modes_to_split[i] 1435 ? "Splitting" 1436 : "Skipping", 1437 GET_MODE_NAME ((machine_mode) i)); 1438 1439 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", 1440 choices[speed_p].splitting_zext ? "Splitting" : "Skipping", 1441 GET_MODE_NAME (twice_word_mode)); 1442 1443 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); 1444 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); 1445 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); 1446 fprintf (dump_file, "\n"); 1447 } 1448 1449 /* Look for registers which are always accessed via word-sized SUBREGs 1450 or -if DECOMPOSE_COPIES is true- via copies. Decompose these 1451 registers into several word-sized pseudo-registers. */ 1452 1453 static void 1454 decompose_multiword_subregs (bool decompose_copies) 1455 { 1456 unsigned int max; 1457 basic_block bb; 1458 bool speed_p; 1459 1460 if (dump_file) 1461 { 1462 dump_choices (false, "size"); 1463 dump_choices (true, "speed"); 1464 } 1465 1466 /* Check if this target even has any modes to consider lowering. */ 1467 if (!choices[false].something_to_do && !choices[true].something_to_do) 1468 { 1469 if (dump_file) 1470 fprintf (dump_file, "Nothing to do!\n"); 1471 return; 1472 } 1473 1474 max = max_reg_num (); 1475 1476 /* First see if there are any multi-word pseudo-registers. If there 1477 aren't, there is nothing we can do. This should speed up this 1478 pass in the normal case, since it should be faster than scanning 1479 all the insns. */ 1480 { 1481 unsigned int i; 1482 bool useful_modes_seen = false; 1483 1484 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) 1485 if (regno_reg_rtx[i] != NULL) 1486 { 1487 machine_mode mode = GET_MODE (regno_reg_rtx[i]); 1488 if (choices[false].move_modes_to_split[(int) mode] 1489 || choices[true].move_modes_to_split[(int) mode]) 1490 { 1491 useful_modes_seen = true; 1492 break; 1493 } 1494 } 1495 1496 if (!useful_modes_seen) 1497 { 1498 if (dump_file) 1499 fprintf (dump_file, "Nothing to lower in this function.\n"); 1500 return; 1501 } 1502 } 1503 1504 if (df) 1505 { 1506 df_set_flags (DF_DEFER_INSN_RESCAN); 1507 run_word_dce (); 1508 } 1509 1510 /* FIXME: It may be possible to change this code to look for each 1511 multi-word pseudo-register and to find each insn which sets or 1512 uses that register. That should be faster than scanning all the 1513 insns. */ 1514 1515 decomposable_context = BITMAP_ALLOC (NULL); 1516 non_decomposable_context = BITMAP_ALLOC (NULL); 1517 subreg_context = BITMAP_ALLOC (NULL); 1518 1519 reg_copy_graph.create (max); 1520 reg_copy_graph.safe_grow_cleared (max); 1521 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); 1522 1523 speed_p = optimize_function_for_speed_p (cfun); 1524 FOR_EACH_BB_FN (bb, cfun) 1525 { 1526 rtx_insn *insn; 1527 1528 FOR_BB_INSNS (bb, insn) 1529 { 1530 rtx set; 1531 enum classify_move_insn cmi; 1532 int i, n; 1533 1534 if (!INSN_P (insn) 1535 || GET_CODE (PATTERN (insn)) == CLOBBER 1536 || GET_CODE (PATTERN (insn)) == USE) 1537 continue; 1538 1539 recog_memoized (insn); 1540 1541 if (find_decomposable_shift_zext (insn, speed_p)) 1542 continue; 1543 1544 extract_insn (insn); 1545 1546 set = simple_move (insn, speed_p); 1547 1548 if (!set) 1549 cmi = NOT_SIMPLE_MOVE; 1550 else 1551 { 1552 /* We mark pseudo-to-pseudo copies as decomposable during the 1553 second pass only. The first pass is so early that there is 1554 good chance such moves will be optimized away completely by 1555 subsequent optimizations anyway. 1556 1557 However, we call find_pseudo_copy even during the first pass 1558 so as to properly set up the reg_copy_graph. */ 1559 if (find_pseudo_copy (set)) 1560 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; 1561 else 1562 cmi = SIMPLE_MOVE; 1563 } 1564 1565 n = recog_data.n_operands; 1566 for (i = 0; i < n; ++i) 1567 { 1568 find_decomposable_subregs (&recog_data.operand[i], &cmi); 1569 1570 /* We handle ASM_OPERANDS as a special case to support 1571 things like x86 rdtsc which returns a DImode value. 1572 We can decompose the output, which will certainly be 1573 operand 0, but not the inputs. */ 1574 1575 if (cmi == SIMPLE_MOVE 1576 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) 1577 { 1578 gcc_assert (i == 0); 1579 cmi = NOT_SIMPLE_MOVE; 1580 } 1581 } 1582 } 1583 } 1584 1585 bitmap_and_compl_into (decomposable_context, non_decomposable_context); 1586 if (!bitmap_empty_p (decomposable_context)) 1587 { 1588 unsigned int i; 1589 sbitmap_iterator sbi; 1590 bitmap_iterator iter; 1591 unsigned int regno; 1592 1593 propagate_pseudo_copies (); 1594 1595 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun)); 1596 bitmap_clear (sub_blocks); 1597 1598 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) 1599 decompose_register (regno); 1600 1601 FOR_EACH_BB_FN (bb, cfun) 1602 { 1603 rtx_insn *insn; 1604 1605 FOR_BB_INSNS (bb, insn) 1606 { 1607 rtx pat; 1608 1609 if (!INSN_P (insn)) 1610 continue; 1611 1612 pat = PATTERN (insn); 1613 if (GET_CODE (pat) == CLOBBER) 1614 resolve_clobber (pat, insn); 1615 else if (GET_CODE (pat) == USE) 1616 resolve_use (pat, insn); 1617 else if (DEBUG_INSN_P (insn)) 1618 resolve_debug (insn); 1619 else 1620 { 1621 rtx set; 1622 int i; 1623 1624 recog_memoized (insn); 1625 extract_insn (insn); 1626 1627 set = simple_move (insn, speed_p); 1628 if (set) 1629 { 1630 rtx_insn *orig_insn = insn; 1631 bool cfi = control_flow_insn_p (insn); 1632 1633 /* We can end up splitting loads to multi-word pseudos 1634 into separate loads to machine word size pseudos. 1635 When this happens, we first had one load that can 1636 throw, and after resolve_simple_move we'll have a 1637 bunch of loads (at least two). All those loads may 1638 trap if we can have non-call exceptions, so they 1639 all will end the current basic block. We split the 1640 block after the outer loop over all insns, but we 1641 make sure here that we will be able to split the 1642 basic block and still produce the correct control 1643 flow graph for it. */ 1644 gcc_assert (!cfi 1645 || (cfun->can_throw_non_call_exceptions 1646 && can_throw_internal (insn))); 1647 1648 insn = resolve_simple_move (set, insn); 1649 if (insn != orig_insn) 1650 { 1651 recog_memoized (insn); 1652 extract_insn (insn); 1653 1654 if (cfi) 1655 bitmap_set_bit (sub_blocks, bb->index); 1656 } 1657 } 1658 else 1659 { 1660 rtx_insn *decomposed_shift; 1661 1662 decomposed_shift = resolve_shift_zext (insn); 1663 if (decomposed_shift != NULL_RTX) 1664 { 1665 insn = decomposed_shift; 1666 recog_memoized (insn); 1667 extract_insn (insn); 1668 } 1669 } 1670 1671 for (i = recog_data.n_operands - 1; i >= 0; --i) 1672 resolve_subreg_use (recog_data.operand_loc[i], insn); 1673 1674 resolve_reg_notes (insn); 1675 1676 if (num_validated_changes () > 0) 1677 { 1678 for (i = recog_data.n_dups - 1; i >= 0; --i) 1679 { 1680 rtx *pl = recog_data.dup_loc[i]; 1681 int dup_num = recog_data.dup_num[i]; 1682 rtx *px = recog_data.operand_loc[dup_num]; 1683 1684 validate_unshare_change (insn, pl, *px, 1); 1685 } 1686 1687 i = apply_change_group (); 1688 gcc_assert (i); 1689 } 1690 } 1691 } 1692 } 1693 1694 /* If we had insns to split that caused control flow insns in the middle 1695 of a basic block, split those blocks now. Note that we only handle 1696 the case where splitting a load has caused multiple possibly trapping 1697 loads to appear. */ 1698 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) 1699 { 1700 rtx_insn *insn, *end; 1701 edge fallthru; 1702 1703 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1704 insn = BB_HEAD (bb); 1705 end = BB_END (bb); 1706 1707 while (insn != end) 1708 { 1709 if (control_flow_insn_p (insn)) 1710 { 1711 /* Split the block after insn. There will be a fallthru 1712 edge, which is OK so we keep it. We have to create the 1713 exception edges ourselves. */ 1714 fallthru = split_block (bb, insn); 1715 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 1716 bb = fallthru->dest; 1717 insn = BB_HEAD (bb); 1718 } 1719 else 1720 insn = NEXT_INSN (insn); 1721 } 1722 } 1723 } 1724 1725 { 1726 unsigned int i; 1727 bitmap b; 1728 1729 FOR_EACH_VEC_ELT (reg_copy_graph, i, b) 1730 if (b) 1731 BITMAP_FREE (b); 1732 } 1733 1734 reg_copy_graph.release (); 1735 1736 BITMAP_FREE (decomposable_context); 1737 BITMAP_FREE (non_decomposable_context); 1738 BITMAP_FREE (subreg_context); 1739 } 1740 1741 /* Implement first lower subreg pass. */ 1742 1743 namespace { 1744 1745 const pass_data pass_data_lower_subreg = 1746 { 1747 RTL_PASS, /* type */ 1748 "subreg1", /* name */ 1749 OPTGROUP_NONE, /* optinfo_flags */ 1750 TV_LOWER_SUBREG, /* tv_id */ 1751 0, /* properties_required */ 1752 0, /* properties_provided */ 1753 0, /* properties_destroyed */ 1754 0, /* todo_flags_start */ 1755 0, /* todo_flags_finish */ 1756 }; 1757 1758 class pass_lower_subreg : public rtl_opt_pass 1759 { 1760 public: 1761 pass_lower_subreg (gcc::context *ctxt) 1762 : rtl_opt_pass (pass_data_lower_subreg, ctxt) 1763 {} 1764 1765 /* opt_pass methods: */ 1766 virtual bool gate (function *) { return flag_split_wide_types != 0; } 1767 virtual unsigned int execute (function *) 1768 { 1769 decompose_multiword_subregs (false); 1770 return 0; 1771 } 1772 1773 }; // class pass_lower_subreg 1774 1775 } // anon namespace 1776 1777 rtl_opt_pass * 1778 make_pass_lower_subreg (gcc::context *ctxt) 1779 { 1780 return new pass_lower_subreg (ctxt); 1781 } 1782 1783 /* Implement second lower subreg pass. */ 1784 1785 namespace { 1786 1787 const pass_data pass_data_lower_subreg2 = 1788 { 1789 RTL_PASS, /* type */ 1790 "subreg2", /* name */ 1791 OPTGROUP_NONE, /* optinfo_flags */ 1792 TV_LOWER_SUBREG, /* tv_id */ 1793 0, /* properties_required */ 1794 0, /* properties_provided */ 1795 0, /* properties_destroyed */ 1796 0, /* todo_flags_start */ 1797 TODO_df_finish, /* todo_flags_finish */ 1798 }; 1799 1800 class pass_lower_subreg2 : public rtl_opt_pass 1801 { 1802 public: 1803 pass_lower_subreg2 (gcc::context *ctxt) 1804 : rtl_opt_pass (pass_data_lower_subreg2, ctxt) 1805 {} 1806 1807 /* opt_pass methods: */ 1808 virtual bool gate (function *) { return flag_split_wide_types 1809 && flag_split_wide_types_early; } 1810 virtual unsigned int execute (function *) 1811 { 1812 decompose_multiword_subregs (true); 1813 return 0; 1814 } 1815 1816 }; // class pass_lower_subreg2 1817 1818 } // anon namespace 1819 1820 rtl_opt_pass * 1821 make_pass_lower_subreg2 (gcc::context *ctxt) 1822 { 1823 return new pass_lower_subreg2 (ctxt); 1824 } 1825 1826 /* Implement third lower subreg pass. */ 1827 1828 namespace { 1829 1830 const pass_data pass_data_lower_subreg3 = 1831 { 1832 RTL_PASS, /* type */ 1833 "subreg3", /* name */ 1834 OPTGROUP_NONE, /* optinfo_flags */ 1835 TV_LOWER_SUBREG, /* tv_id */ 1836 0, /* properties_required */ 1837 0, /* properties_provided */ 1838 0, /* properties_destroyed */ 1839 0, /* todo_flags_start */ 1840 TODO_df_finish, /* todo_flags_finish */ 1841 }; 1842 1843 class pass_lower_subreg3 : public rtl_opt_pass 1844 { 1845 public: 1846 pass_lower_subreg3 (gcc::context *ctxt) 1847 : rtl_opt_pass (pass_data_lower_subreg3, ctxt) 1848 {} 1849 1850 /* opt_pass methods: */ 1851 virtual bool gate (function *) { return flag_split_wide_types; } 1852 virtual unsigned int execute (function *) 1853 { 1854 decompose_multiword_subregs (true); 1855 return 0; 1856 } 1857 1858 }; // class pass_lower_subreg3 1859 1860 } // anon namespace 1861 1862 rtl_opt_pass * 1863 make_pass_lower_subreg3 (gcc::context *ctxt) 1864 { 1865 return new pass_lower_subreg3 (ctxt); 1866 } 1867